Fix bug #1385272: Improved version for "Parsing of short open tags not fully compatib...
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPPartitionScanner.java
1 /**********************************************************************
2  Copyright (c) 2002  Widespace, OU  and others.
3  All rights reserved.   This program and the accompanying materials
4  are made available under the terms of the Common Public License v1.0
5  which accompanies this distribution, and is available at
6  http://solareclipse.sourceforge.net/legal/cpl-v10.html
7
8  Contributors:
9  Igor Malinin - initial contribution
10
11  $Id: PHPPartitionScanner.java,v 1.33 2005-10-10 19:56:02 axelcl Exp $
12  **********************************************************************/
13 package net.sourceforge.phpeclipse.phpeditor.php;
14
15 import java.util.HashMap;
16 import java.util.Map;
17
18 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
19 import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
20
21 import org.eclipse.jface.text.Assert;
22 import org.eclipse.jface.text.BadLocationException;
23 import org.eclipse.jface.text.IDocument;
24 import org.eclipse.jface.text.rules.ICharacterScanner;
25 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
26 import org.eclipse.jface.text.rules.IToken;
27 import org.eclipse.jface.text.rules.Token;
28
29 /**
30  *
31  *
32  * @author Igor Malinin
33  */
34 public class PHPPartitionScanner implements IPartitionTokenScanner {
35         public static final String PHP_SCRIPTING_AREA = "__php_scripting_area ";
36
37         public static final int STATE_DEFAULT = 0;
38
39         // public static final int STATE_TAG = 1;
40         // public static final int STATE_SCRIPT = 2;
41
42         private IDocument document;
43
44         // private int begin;
45
46         private int end;
47
48         private int offset;
49
50         private int length;
51
52         private int position;
53
54         // private int state;
55
56         private Map tokens = new HashMap();
57
58         public PHPPartitionScanner() {
59         }
60
61         /*
62          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
63          */
64         public IToken nextToken() {
65                 offset += length;
66
67                 /*
68                  * switch (state) { case STATE_TAG: return nextTagToken(); }
69                  */
70
71                 switch (read()) {
72                 case ICharacterScanner.EOF:
73                         // state = STATE_DEFAULT;
74                         return getToken(null);
75
76                 case '<':
77                         switch (read()) {
78                         case ICharacterScanner.EOF:
79                                 // state = STATE_DEFAULT;
80                                 return getToken(null);
81
82                         case '?': // <?
83                                 // int ch = read();
84                                 //
85                                 // switch (ch) {
86                                 // case ICharacterScanner.EOF:
87                                 // state = STATE_DEFAULT;
88                                 // return getToken(PHP_SCRIPTING_AREA);
89                                 // }
90                                 return scanUntilPHPEndToken(PHP_SCRIPTING_AREA);
91                         }
92
93                         unread();
94                 }
95
96                 loop: while (true) {
97                         switch (read()) {
98                         case ICharacterScanner.EOF:
99                                 // state = STATE_DEFAULT;
100                                 return getToken(null);
101
102                         case '<':
103                                 switch (read()) {
104                                 case ICharacterScanner.EOF:
105                                         // state = STATE_DEFAULT;
106                                         return getToken(null);
107
108                                 case '?':
109                                         unread();
110                                         break;
111
112                                 case '<':
113                                         unread();
114
115                                 default:
116                                         continue loop;
117                                 }
118
119                                 unread();
120
121                                 // state = STATE_DEFAULT;
122                                 return getToken(null);
123                         }
124                 }
125         }
126
127         private IToken scanUntilPHPEndToken(String token) {
128                 int ch = read();
129                 while (true) {
130                         switch (ch) {
131                         case ICharacterScanner.EOF:
132                                 // state = STATE_DEFAULT;
133                                 return getToken(token);
134                         case '"': // double quoted string
135                                 // read until end of double quoted string
136                                 if (!readUntilEscapedDQ()) {
137                                         // state = STATE_DEFAULT;
138                                         return getToken(token);
139                                 }
140                                 break;
141                         case '<': // heredoc string
142                                 ch = read();
143                                 switch (ch) {
144                                 case ICharacterScanner.EOF:
145                                         break;
146                                 case '<':
147                                         ch = read();
148                                         switch (ch) {
149                                         case ICharacterScanner.EOF:
150                                                 break;
151                                         case '<':
152                                                 // read until end of heredoc string
153                                                 if (!readUntilEscapedHEREDOC()) {
154                                                         // state = STATE_DEFAULT;
155                                                         return getToken(token);
156                                                 }
157                                         }
158                                 }
159                                 break;
160                         case '\'': // single quoted string
161                                 // read until end of single quoted string
162                                 if (!readUntilEscapedSQ()) {
163                                         // state = STATE_DEFAULT;
164                                         return getToken(token);
165                                 }
166                                 break;
167                         case '/': // comment start?
168                                 ch = read();
169                                 switch (ch) {
170                                 case ICharacterScanner.EOF:
171                                         break;
172                                 case '/':
173                                         // read until end of line
174                                         if (!readSingleLine()) {
175                                                 // state = STATE_DEFAULT;
176                                                 return getToken(token);
177                                         }
178                                         break;
179                                 case '*':
180                                         // read until end of comment
181                                         if (!readMultiLineComment()) {
182                                                 // state = STATE_DEFAULT;
183                                                 return getToken(token);
184                                         }
185                                         break;
186                                 default:
187                                         continue;
188                                 }
189                                 break;
190                         case '#': // line comment
191                                 // read until end of line
192                                 if (!readSingleLine()) {
193                                         // state = STATE_DEFAULT;
194                                         return getToken(token);
195                                 }
196                                 break;
197                         case '?':
198                                 ch = read();
199                                 switch (ch) {
200                                 case ICharacterScanner.EOF:
201                                 case '>':
202                                         // state = STATE_DEFAULT;
203                                         return getToken(token);
204
205                                 case '?':
206                                         continue;
207                                 default:
208                                         continue;
209                                 }
210                         }
211
212                         ch = read();
213                 }
214         }
215
216         private IToken getToken(String type) {
217                 length = position - offset;
218
219                 if (length == 0) {
220                         return Token.EOF;
221                 }
222
223                 // if (length<0) {
224                 // try {
225                 // System.out.println("Length<0:"+document.get(offset,5)+""+length);
226                 // } catch (BadLocationException e) {
227                 // e.printStackTrace();
228                 // }
229                 // }
230
231                 if (type == null) {
232                         return Token.UNDEFINED;
233                 }
234
235                 IToken token = (IToken) tokens.get(type);
236                 if (token == null) {
237                         token = new Token(type);
238                         tokens.put(type, token);
239                 }
240
241                 return token;
242         }
243
244         private int read() {
245                 if (position >= end) {
246                         return ICharacterScanner.EOF;
247                 }
248
249                 try {
250                         return document.getChar(position++);
251                 } catch (BadLocationException e) {
252                         --position;
253                         return ICharacterScanner.EOF;
254                 }
255         }
256
257         private boolean readUntilEscapedDQ() {
258                 // search last double quoted character
259                 try {
260                         char ch;
261                         while (true) {
262                                 if (position >= end) {
263                                         return false;
264                                 }
265                                 ch = document.getChar(position++);
266                                 if (ch == '\\') {
267                                         if (position >= end) {
268                                                 return false;
269                                         }
270                                         ch = document.getChar(position++); // ignore escaped character
271                                 } else if (ch == '"') {
272                                         return true;
273                                 }
274                         }
275                 } catch (BadLocationException e) {
276                         --position;
277                 }
278                 return false;
279         }
280
281         private boolean readUntilEscapedSQ() {
282                 // search last single quoted character
283                 try {
284                         char ch;
285                         while (true) {
286                                 if (position >= end) {
287                                         return false;
288                                 }
289                                 ch = document.getChar(position++);
290                                 if (ch == '\\') {
291                                         if (position >= end) {
292                                                 return false;
293                                         }
294                                         ch = document.getChar(position++); // ignore escaped character
295                                 } else if (ch == '\'') {
296                                         return true;
297                                 }
298                         }
299                 } catch (BadLocationException e) {
300                         --position;
301                 }
302                 return false;
303         }
304
305         private boolean readUntilEscapedHEREDOC() {
306                 // search until heredoc ends
307                 try {
308                         char ch;
309                         StringBuffer buf = new StringBuffer();
310                         char[] heredocIdent;
311                         if (position >= end) {
312                                 return false;
313                         }
314                         ch = document.getChar(position++);
315                         if (!Scanner.isPHPIdentifierStart(ch)) {
316                                 return false;
317                         }
318                         while (Scanner.isPHPIdentifierPart(ch)) {
319                                 buf.append(ch);
320                                 if (position >= end) {
321                                         return false;
322                                 }
323                                 ch = document.getChar(position++);
324                         }
325                         heredocIdent = buf.toString().toCharArray();
326                         while (true) {
327                                 if (position >= end) {
328                                         return false;
329                                 }
330                                 ch = document.getChar(position++);
331                                 if (ch == '\n') { // heredoc could end after a newline
332                                         int pos = 0;
333                                         while (true) {
334                                                 if (position >= end) {
335                                                         return false;
336                                                 }
337                                                 if (pos == heredocIdent.length) {
338                                                         return true;
339                                                 }
340                                                 ch = document.getChar(position++); // ignore escaped character
341                                                 if (ch != heredocIdent[pos]) {
342                                                         break;
343                                                 }
344                                                 pos++;
345                                         }
346                                 }
347                         }
348                 } catch (BadLocationException e) {
349                         --position;
350                 }
351                 return false;
352         }
353
354         private boolean readSingleLine() {
355                 try {
356                         do {
357                                 if (position >= end) {
358                                         return false;
359                                 }
360                         } while (document.getChar(position++) != '\n');
361                         return true;
362                 } catch (BadLocationException e) {
363                         --position;
364                 }
365                 return false;
366         }
367
368         private boolean readMultiLineComment() {
369                 try {
370                         char ch;
371                         while (true) {
372                                 if (position >= end) {
373                                         return false;
374                                 }
375                                 ch = document.getChar(position++);
376                                 if (ch == '*') {
377                                         if (position >= end) {
378                                                 return false;
379                                         }
380                                         if (document.getChar(position) == '/') {
381                                                 position++;
382                                                 return true;
383                                         }
384                                 }
385                         }
386                 } catch (BadLocationException e) {
387                         --position;
388                 }
389                 return false;
390         }
391
392         private void unread() {
393                 --position;
394         }
395
396         /*
397          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
398          */
399         public int getTokenOffset() {
400                 if (AbstractPartitioner.DEBUG) {
401                         Assert.isTrue(offset >= 0, Integer.toString(offset));
402                 }
403                 return offset;
404         }
405
406         /*
407          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
408          */
409         public int getTokenLength() {
410                 return length;
411         }
412
413         /*
414          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int,
415          *      int)
416          */
417         public void setRange(IDocument document, int offset, int length) {
418                 this.document = document;
419                 // this.begin = offset;
420                 this.end = offset + length;
421
422                 this.offset = offset;
423                 this.position = offset;
424                 this.length = 0;
425         }
426
427         /*
428          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
429          */
430         public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
431                 // state = STATE_DEFAULT;
432                 if (partitionOffset > -1) {
433                         int delta = offset - partitionOffset;
434                         if (delta > 0) {
435                                 setRange(document, partitionOffset, length + delta);
436                                 return;
437                         }
438                 }
439                 setRange(document, partitionOffset, length);
440         }
441
442         // private boolean isContinuationPartition(IDocument document, int offset) {
443         // try {
444         // String type = document.getContentType(offset - 1);
445         //
446         // if (type != IDocument.DEFAULT_CONTENT_TYPE) {
447         // return true;
448         // }
449         // } catch (BadLocationException e) {}
450         //
451         // return false;
452         // }
453 }