Fixed bug #1404228: Crash on <?php // comment ?>
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / FastJavaPartitionScanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
14
15 import org.eclipse.jface.text.Assert;
16 import org.eclipse.jface.text.IDocument;
17 import org.eclipse.jface.text.rules.ICharacterScanner;
18 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
19 import org.eclipse.jface.text.rules.IToken;
20 import org.eclipse.jface.text.rules.Token;
21
22 /**
23  * This scanner recognizes the JavaDoc comments, Java multi line comments, Java
24  * single line comments, Java strings.
25  */
26 public class FastJavaPartitionScanner implements IPartitionTokenScanner, IPHPPartitions {
27
28         // states
29         private static final int PHP = 0;
30
31         private static final int SINGLE_LINE_COMMENT = 1;
32
33         private static final int MULTI_LINE_COMMENT = 2;
34
35         private static final int PHPDOC = 3;
36
37         private static final int STRING_DQ = 4;
38
39         private static final int STRING_SQ = 5;
40
41         private static final int STRING_HEREDOC = 6;
42
43         // beginning of prefixes and postfixes
44         private static final int NONE = 0;
45
46         private static final int BACKSLASH = 1; // postfix for STRING_DQ and CHARACTER
47
48         private static final int SLASH = 2; // prefix for SINGLE_LINE or MULTI_LINE or
49                                                                                                                                                         // JAVADOC
50
51         private static final int SLASH_STAR = 3; // prefix for MULTI_LINE_COMMENT or
52                                                                                                                                                                                 // JAVADOC
53
54         private static final int SLASH_STAR_STAR = 4; // prefix for MULTI_LINE_COMMENT
55                                                                                                                                                                                                 // or JAVADOC
56
57         private static final int STAR = 5; // postfix for MULTI_LINE_COMMENT or
58                                                                                                                                                         // JAVADOC
59
60         private static final int CARRIAGE_RETURN = 6; // postfix for STRING_DQ,
61                                                                                                                                                                                                 // CHARACTER and
62                                                                                                                                                                                                 // SINGLE_LINE_COMMENT
63
64         // private static final int HEREDOC = 7;
65
66         /** The scanner. */
67         private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner(1000); // faster
68                                                                                                                                                                                                                                                                                                                                                         // implementation
69
70         /** The offset of the last returned token. */
71         private int fTokenOffset;
72
73         /** The length of the last returned token. */
74         private int fTokenLength;
75
76         /** The state of the scanner. */
77         private int fState;
78
79         /** The last significant characters read. */
80         private int fLast;
81
82         /** The amount of characters already read on first call to nextToken(). */
83         private int fPrefixLength;
84
85         // emulate JavaPartitionScanner
86         private boolean fEmulate = false;
87
88         private int fJavaOffset;
89
90         private int fJavaLength;
91
92         private final IToken[] fTokens = new IToken[] { new Token(null), new Token(PHP_SINGLELINE_COMMENT),
93                         new Token(PHP_MULTILINE_COMMENT), new Token(PHP_PHPDOC_COMMENT), new Token(PHP_STRING_DQ), new Token(PHP_STRING_SQ),
94                         new Token(PHP_STRING_HEREDOC) };
95
96         public FastJavaPartitionScanner(boolean emulate) {
97                 fEmulate = emulate;
98         }
99
100         public FastJavaPartitionScanner() {
101                 this(false);
102         }
103
104         /*
105          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
106          */
107         public IToken nextToken() {
108
109                 // emulate JavaPartitionScanner
110                 if (fEmulate) {
111                         if (fJavaOffset != -1 && fTokenOffset + fTokenLength != fJavaOffset + fJavaLength) {
112                                 fTokenOffset += fTokenLength;
113                                 return fTokens[PHP];
114                         } else {
115                                 fJavaOffset = -1;
116                                 fJavaLength = 0;
117                         }
118                 }
119
120                 fTokenOffset += fTokenLength;
121                 fTokenLength = fPrefixLength;
122
123                 while (true) {
124                         final int ch = fScanner.read();
125
126                         // characters
127                         switch (ch) {
128                         case ICharacterScanner.EOF:
129                                 if (fTokenLength > 0) {
130                                         fLast = NONE; // ignore last
131                                         return preFix(fState, PHP, NONE, 0);
132
133                                 } else {
134                                         fLast = NONE;
135                                         fPrefixLength = 0;
136                                         return Token.EOF;
137                                 }
138
139                         case '\r':
140                                 // emulate JavaPartitionScanner
141                                 if (!fEmulate && fLast != CARRIAGE_RETURN) {
142                                         fLast = CARRIAGE_RETURN;
143                                         fTokenLength++;
144                                         continue;
145
146                                 } else {
147
148                                         switch (fState) {
149                                         case SINGLE_LINE_COMMENT:
150                                                 // case CHARACTER:
151                                                 // case STRING_DQ:
152                                                 // case STRING_SQ:
153                                                 if (fTokenLength > 0) {
154                                                         IToken token = fTokens[fState];
155
156                                                         // emulate JavaPartitionScanner
157                                                         if (fEmulate) {
158                                                                 fTokenLength++;
159                                                                 fLast = NONE;
160                                                                 fPrefixLength = 0;
161                                                         } else {
162                                                                 fLast = CARRIAGE_RETURN;
163                                                                 fPrefixLength = 1;
164                                                         }
165
166                                                         fState = PHP;
167                                                         return token;
168
169                                                 } else {
170                                                         consume();
171                                                         continue;
172                                                 }
173
174                                         default:
175                                                 consume();
176                                                 continue;
177                                         }
178                                 }
179
180                         case '\n':
181                                 switch (fState) {
182                                 case SINGLE_LINE_COMMENT:
183                                         // case CHARACTER:
184                                         // case STRING_DQ:
185                                         // case STRING_SQ:
186                                         // assert(fTokenLength > 0);
187                                         return postFix(fState);
188
189                                 default:
190                                         consume();
191                                         continue;
192                                 }
193
194                         case '?':
195                                 if (fState == SINGLE_LINE_COMMENT) {
196                                         int nextch = fScanner.read();
197                                         if (nextch == '>') {
198                                                 // <h1>This is an <?php # echo 'simple' ?> example.</h1>
199                                                 fTokenLength--;
200                                                 fScanner.unread();
201                                                 fScanner.unread();
202                                                 return postFix(fState);
203                                         }
204                                         // bug #1404228: Crash on <?php // comment ?>
205                                         // fScanner.unread();
206                                 }
207
208                         default:
209                                 if (!fEmulate && fLast == CARRIAGE_RETURN) {
210                                         switch (fState) {
211                                         case SINGLE_LINE_COMMENT:
212                                                 // case CHARACTER:
213                                                 // case STRING_DQ:
214                                                 // case STRING_SQ:
215                                                 int last;
216                                                 int newState;
217                                                 switch (ch) {
218                                                 case '/':
219                                                         last = SLASH;
220                                                         newState = PHP;
221                                                         break;
222
223                                                 case '*':
224                                                         last = STAR;
225                                                         newState = PHP;
226                                                         break;
227
228                                                 case '\'':
229                                                         last = NONE;
230                                                         newState = STRING_SQ;
231                                                         break;
232
233                                                 case '"':
234                                                         last = NONE;
235                                                         newState = STRING_DQ;
236                                                         break;
237
238                                                 case '\r':
239                                                         last = CARRIAGE_RETURN;
240                                                         newState = PHP;
241                                                         break;
242
243                                                 case '\\':
244                                                         last = BACKSLASH;
245                                                         newState = PHP;
246                                                         break;
247
248                                                 default:
249                                                         last = NONE;
250                                                         newState = PHP;
251                                                         break;
252                                                 }
253
254                                                 fLast = NONE; // ignore fLast
255                                                 return preFix(fState, newState, last, 1);
256
257                                         default:
258                                                 break;
259                                         }
260                                 }
261                         }
262
263                         // states
264                         switch (fState) {
265                         case PHP:
266                                 switch (ch) {
267                                 case '#':
268                                         if (fTokenLength > 0) {
269                                                 return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
270                                         } else {
271                                                 preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
272                                                 fTokenOffset += fTokenLength;
273                                                 fTokenLength = fPrefixLength;
274                                                 break;
275                                         }
276                                 case '/':
277                                         if (fLast == SLASH) {
278                                                 if (fTokenLength - getLastLength(fLast) > 0) {
279                                                         return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
280                                                 } else {
281                                                         preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
282                                                         fTokenOffset += fTokenLength;
283                                                         fTokenLength = fPrefixLength;
284                                                         break;
285                                                 }
286
287                                         } else {
288                                                 fTokenLength++;
289                                                 fLast = SLASH;
290                                                 break;
291                                         }
292
293                                 case '*':
294                                         if (fLast == SLASH) {
295                                                 if (fTokenLength - getLastLength(fLast) > 0)
296                                                         return preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
297                                                 else {
298                                                         preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
299                                                         fTokenOffset += fTokenLength;
300                                                         fTokenLength = fPrefixLength;
301                                                         break;
302                                                 }
303
304                                         } else {
305                                                 consume();
306                                                 break;
307                                         }
308
309                                 case '\'':
310                                         fLast = NONE; // ignore fLast
311                                         if (fTokenLength > 0)
312                                                 return preFix(PHP, STRING_SQ, NONE, 1);
313                                         else {
314                                                 preFix(PHP, STRING_SQ, NONE, 1);
315                                                 fTokenOffset += fTokenLength;
316                                                 fTokenLength = fPrefixLength;
317                                                 break;
318                                         }
319
320                                 case '"':
321                                         fLast = NONE; // ignore fLast
322                                         if (fTokenLength > 0)
323                                                 return preFix(PHP, STRING_DQ, NONE, 1);
324                                         else {
325                                                 preFix(PHP, STRING_DQ, NONE, 1);
326                                                 fTokenOffset += fTokenLength;
327                                                 fTokenLength = fPrefixLength;
328                                                 break;
329                                         }
330
331                                 default:
332                                         consume();
333                                         break;
334                                 }
335                                 break;
336
337                         case SINGLE_LINE_COMMENT:
338                                 consume();
339                                 break;
340
341                         case PHPDOC:
342                                 switch (ch) {
343                                 case '/':
344                                         switch (fLast) {
345                                         case SLASH_STAR_STAR:
346                                                 return postFix(MULTI_LINE_COMMENT);
347
348                                         case STAR:
349                                                 return postFix(PHPDOC);
350
351                                         default:
352                                                 consume();
353                                                 break;
354                                         }
355                                         break;
356
357                                 case '*':
358                                         fTokenLength++;
359                                         fLast = STAR;
360                                         break;
361
362                                 default:
363                                         consume();
364                                         break;
365                                 }
366                                 break;
367
368                         case MULTI_LINE_COMMENT:
369                                 switch (ch) {
370                                 case '*':
371                                         if (fLast == SLASH_STAR) {
372                                                 fLast = SLASH_STAR_STAR;
373                                                 fTokenLength++;
374                                                 fState = PHPDOC;
375                                         } else {
376                                                 fTokenLength++;
377                                                 fLast = STAR;
378                                         }
379                                         break;
380
381                                 case '/':
382                                         if (fLast == STAR) {
383                                                 return postFix(MULTI_LINE_COMMENT);
384                                         } else {
385                                                 consume();
386                                                 break;
387                                         }
388
389                                 default:
390                                         consume();
391                                         break;
392                                 }
393                                 break;
394
395                         case STRING_DQ:
396                                 switch (ch) {
397                                 case '\\':
398                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
399                                         fTokenLength++;
400                                         break;
401
402                                 case '\"':
403                                         if (fLast != BACKSLASH) {
404                                                 return postFix(STRING_DQ);
405
406                                         } else {
407                                                 consume();
408                                                 break;
409                                         }
410
411                                 default:
412                                         consume();
413                                         break;
414                                 }
415                                 break;
416                         case STRING_SQ:
417                                 switch (ch) {
418                                 case '\\':
419                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
420                                         fTokenLength++;
421                                         break;
422
423                                 case '\'':
424                                         if (fLast != BACKSLASH) {
425                                                 return postFix(STRING_SQ);
426
427                                         } else {
428                                                 consume();
429                                                 break;
430                                         }
431
432                                 default:
433                                         consume();
434                                         break;
435                                 }
436                                 break;
437                         // case CHARACTER:
438                         // switch (ch) {
439                         // case '\\':
440                         // fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
441                         // fTokenLength++;
442                         // break;
443                         //
444                         // case '\'':
445                         // if (fLast != BACKSLASH) {
446                         // return postFix(CHARACTER);
447                         //
448                         // } else {
449                         // consume();
450                         // break;
451                         // }
452                         //
453                         // default:
454                         // consume();
455                         // break;
456                         // }
457                         // break;
458                         }
459                 }
460         }
461
462         private static final int getLastLength(int last) {
463                 switch (last) {
464                 default:
465                         return -1;
466
467                 case NONE:
468                         return 0;
469
470                 case CARRIAGE_RETURN:
471                 case BACKSLASH:
472                 case SLASH:
473                 case STAR:
474                         return 1;
475
476                 case SLASH_STAR:
477                         return 2;
478
479                 case SLASH_STAR_STAR:
480                         return 3;
481                 }
482         }
483
484         private final void consume() {
485                 fTokenLength++;
486                 fLast = NONE;
487         }
488
489         private final IToken postFix(int state) {
490                 fTokenLength++;
491                 fLast = NONE;
492                 fState = PHP;
493                 fPrefixLength = 0;
494                 return fTokens[state];
495         }
496
497         private final IToken preFix(int state, int newState, int last, int prefixLength) {
498                 // emulate JavaPartitionScanner
499                 if (fEmulate && state == PHP && (fTokenLength - getLastLength(fLast) > 0)) {
500                         fTokenLength -= getLastLength(fLast);
501                         fJavaOffset = fTokenOffset;
502                         fJavaLength = fTokenLength;
503                         fTokenLength = 1;
504                         fState = newState;
505                         fPrefixLength = prefixLength;
506                         fLast = last;
507                         return fTokens[state];
508
509                 } else {
510                         fTokenLength -= getLastLength(fLast);
511                         fLast = last;
512                         fPrefixLength = prefixLength;
513                         IToken token = fTokens[state];
514                         fState = newState;
515                         return token;
516                 }
517         }
518
519         private static int getState(String contentType) {
520
521                 if (contentType == null)
522                         return PHP;
523
524                 else if (contentType.equals(PHP_SINGLELINE_COMMENT))
525                         return SINGLE_LINE_COMMENT;
526
527                 else if (contentType.equals(PHP_MULTILINE_COMMENT))
528                         return MULTI_LINE_COMMENT;
529
530                 else if (contentType.equals(PHP_PHPDOC_COMMENT))
531                         return PHPDOC;
532
533                 else if (contentType.equals(PHP_STRING_DQ))
534                         return STRING_DQ;
535
536                 else if (contentType.equals(PHP_STRING_SQ))
537                         return STRING_SQ;
538
539                 else if (contentType.equals(PHP_STRING_HEREDOC))
540                         return STRING_HEREDOC;
541
542                 // else if (contentType.equals(JAVA_CHARACTER))
543                 // return CHARACTER;
544
545                 else
546                         return PHP;
547         }
548
549         /*
550          * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String,
551          *      int)
552          */
553         public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
554                 fScanner.setRange(document, offset, length);
555                 setRange(document, offset, length);
556                 fTokenOffset = partitionOffset;
557                 fTokenLength = 0;
558                 fPrefixLength = offset - partitionOffset;
559                 fLast = NONE;
560
561                 if (offset == partitionOffset) {
562                         // restart at beginning of partition
563                         fState = PHP;
564                 } else {
565                         fState = getState(contentType);
566                 }
567
568                 // emulate JavaPartitionScanner
569                 if (fEmulate) {
570                         fJavaOffset = -1;
571                         fJavaLength = 0;
572                 }
573         }
574
575         /*
576          * @see ITokenScanner#setRange(IDocument, int, int)
577          */
578         public void setRange(IDocument document, int offset, int length) {
579                 fScanner.setRange(document, offset, length);
580                 fTokenOffset = offset;
581                 fTokenLength = 0;
582                 fPrefixLength = 0;
583                 fLast = NONE;
584                 fState = PHP;
585
586                 // emulate JavaPartitionScanner
587                 if (fEmulate) {
588                         fJavaOffset = -1;
589                         fJavaLength = 0;
590                 }
591         }
592
593         /*
594          * @see ITokenScanner#getTokenLength()
595          */
596         public int getTokenLength() {
597                 return fTokenLength;
598         }
599
600         /*
601          * @see ITokenScanner#getTokenOffset()
602          */
603         public int getTokenOffset() {
604                 if (AbstractPartitioner.DEBUG) {
605                         Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));
606                 }
607                 return fTokenOffset;
608         }
609
610 }