unification of bug fixes 1198893, 1404228
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / FastJavaPartitionScanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
14
15 import org.eclipse.jface.text.Assert;
16 import org.eclipse.jface.text.IDocument;
17 import org.eclipse.jface.text.rules.ICharacterScanner;
18 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
19 import org.eclipse.jface.text.rules.IToken;
20 import org.eclipse.jface.text.rules.Token;
21
22 /**
23  * This scanner recognizes the JavaDoc comments, Java multi line comments, Java
24  * single line comments, Java strings.
25  */
26 public class FastJavaPartitionScanner implements IPartitionTokenScanner, IPHPPartitions {
27
28         // states
29         private static final int PHP = 0;
30
31         private static final int SINGLE_LINE_COMMENT = 1;
32
33         private static final int MULTI_LINE_COMMENT = 2;
34
35         private static final int PHPDOC = 3;
36
37         private static final int STRING_DQ = 4;
38
39         private static final int STRING_SQ = 5;
40
41         private static final int STRING_HEREDOC = 6;
42
43         // beginning of prefixes and postfixes
44         private static final int NONE = 0;
45
46         private static final int BACKSLASH = 1; // postfix for STRING_DQ and CHARACTER
47
48         private static final int SLASH = 2; // prefix for SINGLE_LINE or MULTI_LINE or
49                                                                                                                                                         // JAVADOC
50
51         private static final int SLASH_STAR = 3; // prefix for MULTI_LINE_COMMENT or
52                                                                                                                                                                                 // JAVADOC
53
54         private static final int SLASH_STAR_STAR = 4; // prefix for MULTI_LINE_COMMENT
55                                                                                                                                                                                                 // or JAVADOC
56
57         private static final int STAR = 5; // postfix for MULTI_LINE_COMMENT or
58                                                                                                                                                         // JAVADOC
59
60         private static final int CARRIAGE_RETURN = 6; // postfix for STRING_DQ,
61                                                                                                                                                                                                 // CHARACTER and
62                                                                                                                                                                                                 // SINGLE_LINE_COMMENT
63
64         // private static final int HEREDOC = 7;
65
66         /** The scanner. */
67         private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner(1000); // faster
68                                                                                                                                                                                                                                                                                                                                                         // implementation
69
70         /** The offset of the last returned token. */
71         private int fTokenOffset;
72
73         /** The length of the last returned token. */
74         private int fTokenLength;
75
76         /** The state of the scanner. */
77         private int fState;
78
79         /** The last significant characters read. */
80         private int fLast;
81
82         /** The amount of characters already read on first call to nextToken(). */
83         private int fPrefixLength;
84
85         // emulate JavaPartitionScanner
86         private boolean fEmulate = false;
87
88         private int fJavaOffset;
89
90         private int fJavaLength;
91
92         private final IToken[] fTokens = new IToken[] { new Token(null), new Token(PHP_SINGLELINE_COMMENT),
93                         new Token(PHP_MULTILINE_COMMENT), new Token(PHP_PHPDOC_COMMENT), new Token(PHP_STRING_DQ), new Token(PHP_STRING_SQ),
94                         new Token(PHP_STRING_HEREDOC) };
95
96         public FastJavaPartitionScanner(boolean emulate) {
97                 fEmulate = emulate;
98         }
99
100         public FastJavaPartitionScanner() {
101                 this(false);
102         }
103
104         /*
105          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
106          */
107         public IToken nextToken() {
108
109                 // emulate JavaPartitionScanner
110                 if (fEmulate) {
111                         if (fJavaOffset != -1 && fTokenOffset + fTokenLength != fJavaOffset + fJavaLength) {
112                                 fTokenOffset += fTokenLength;
113                                 return fTokens[PHP];
114                         } else {
115                                 fJavaOffset = -1;
116                                 fJavaLength = 0;
117                         }
118                 }
119
120                 fTokenOffset += fTokenLength;
121                 fTokenLength = fPrefixLength;
122
123                 while (true) {
124                         final int ch = fScanner.read();
125
126                         // characters
127                         switch (ch) {
128                         case ICharacterScanner.EOF:
129                                 if (fTokenLength > 0) {
130                                         fLast = NONE; // ignore last
131                                         return preFix(fState, PHP, NONE, 0);
132
133                                 } else {
134                                         fLast = NONE;
135                                         fPrefixLength = 0;
136                                         return Token.EOF;
137                                 }
138
139                         case '\r':
140                                 // emulate JavaPartitionScanner
141                                 if (!fEmulate && fLast != CARRIAGE_RETURN) {
142                                         fLast = CARRIAGE_RETURN;
143                                         fTokenLength++;
144                                         continue;
145
146                                 } else {
147
148                                         switch (fState) {
149                                         case SINGLE_LINE_COMMENT:
150                                                 // case CHARACTER:
151                                                 // case STRING_DQ:
152                                                 // case STRING_SQ:
153                                                 if (fTokenLength > 0) {
154                                                         IToken token = fTokens[fState];
155
156                                                         // emulate JavaPartitionScanner
157                                                         if (fEmulate) {
158                                                                 fTokenLength++;
159                                                                 fLast = NONE;
160                                                                 fPrefixLength = 0;
161                                                         } else {
162                                                                 fLast = CARRIAGE_RETURN;
163                                                                 fPrefixLength = 1;
164                                                         }
165
166                                                         fState = PHP;
167                                                         return token;
168
169                                                 } else {
170                                                         consume();
171                                                         continue;
172                                                 }
173
174                                         default:
175                                                 consume();
176                                                 continue;
177                                         }
178                                 }
179
180                         case '\n':
181                                 switch (fState) {
182                                 case SINGLE_LINE_COMMENT:
183                                         // case CHARACTER:
184                                         // case STRING_DQ:
185                                         // case STRING_SQ:
186                                         // assert(fTokenLength > 0);
187                                         return postFix(fState);
188
189                                 default:
190                                         consume();
191                                         continue;
192                                 }
193
194                         case '?':
195                                 if (fState == SINGLE_LINE_COMMENT) {
196                                         int nextch = fScanner.read();
197                                         if (nextch == '>') {
198                                                 // <h1>This is an <?php # echo 'simple' ?> example.</h1>
199                                                 fTokenLength--;
200                                                 fScanner.unread();
201                                                 fScanner.unread();
202                                                 return postFix(fState);
203                                         }
204                                         else {
205                                                 // bug #1404228: Crash on <?php // comment ?>
206                                                 if (nextch!=ICharacterScanner.EOF) {
207                                                         fScanner.unread();
208                                                 }
209                                         }
210                                 }
211
212                         default:
213                                 if (!fEmulate && fLast == CARRIAGE_RETURN) {
214                                         switch (fState) {
215                                         case SINGLE_LINE_COMMENT:
216                                                 // case CHARACTER:
217                                                 // case STRING_DQ:
218                                                 // case STRING_SQ:
219                                                 int last;
220                                                 int newState;
221                                                 switch (ch) {
222                                                 case '/':
223                                                         last = SLASH;
224                                                         newState = PHP;
225                                                         break;
226
227                                                 case '*':
228                                                         last = STAR;
229                                                         newState = PHP;
230                                                         break;
231
232                                                 case '\'':
233                                                         last = NONE;
234                                                         newState = STRING_SQ;
235                                                         break;
236
237                                                 case '"':
238                                                         last = NONE;
239                                                         newState = STRING_DQ;
240                                                         break;
241
242                                                 case '\r':
243                                                         last = CARRIAGE_RETURN;
244                                                         newState = PHP;
245                                                         break;
246
247                                                 case '\\':
248                                                         last = BACKSLASH;
249                                                         newState = PHP;
250                                                         break;
251
252                                                 default:
253                                                         last = NONE;
254                                                         newState = PHP;
255                                                         break;
256                                                 }
257
258                                                 fLast = NONE; // ignore fLast
259                                                 return preFix(fState, newState, last, 1);
260
261                                         default:
262                                                 break;
263                                         }
264                                 }
265                         }
266
267                         // states
268                         switch (fState) {
269                         case PHP:
270                                 switch (ch) {
271                                 case '#':
272                                         if (fTokenLength > 0) {
273                                                 return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
274                                         } else {
275                                                 preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
276                                                 fTokenOffset += fTokenLength;
277                                                 fTokenLength = fPrefixLength;
278                                                 break;
279                                         }
280                                 case '/':
281                                         if (fLast == SLASH) {
282                                                 if (fTokenLength - getLastLength(fLast) > 0) {
283                                                         return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
284                                                 } else {
285                                                         preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
286                                                         fTokenOffset += fTokenLength;
287                                                         fTokenLength = fPrefixLength;
288                                                         break;
289                                                 }
290
291                                         } else {
292                                                 fTokenLength++;
293                                                 fLast = SLASH;
294                                                 break;
295                                         }
296
297                                 case '*':
298                                         if (fLast == SLASH) {
299                                                 if (fTokenLength - getLastLength(fLast) > 0)
300                                                         return preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
301                                                 else {
302                                                         preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
303                                                         fTokenOffset += fTokenLength;
304                                                         fTokenLength = fPrefixLength;
305                                                         break;
306                                                 }
307
308                                         } else {
309                                                 consume();
310                                                 break;
311                                         }
312
313                                 case '\'':
314                                         fLast = NONE; // ignore fLast
315                                         if (fTokenLength > 0)
316                                                 return preFix(PHP, STRING_SQ, NONE, 1);
317                                         else {
318                                                 preFix(PHP, STRING_SQ, NONE, 1);
319                                                 fTokenOffset += fTokenLength;
320                                                 fTokenLength = fPrefixLength;
321                                                 break;
322                                         }
323
324                                 case '"':
325                                         fLast = NONE; // ignore fLast
326                                         if (fTokenLength > 0)
327                                                 return preFix(PHP, STRING_DQ, NONE, 1);
328                                         else {
329                                                 preFix(PHP, STRING_DQ, NONE, 1);
330                                                 fTokenOffset += fTokenLength;
331                                                 fTokenLength = fPrefixLength;
332                                                 break;
333                                         }
334
335                                 default:
336                                         consume();
337                                         break;
338                                 }
339                                 break;
340
341                         case SINGLE_LINE_COMMENT:
342                                 consume();
343                                 break;
344
345                         case PHPDOC:
346                                 switch (ch) {
347                                 case '/':
348                                         switch (fLast) {
349                                         case SLASH_STAR_STAR:
350                                                 return postFix(MULTI_LINE_COMMENT);
351
352                                         case STAR:
353                                                 return postFix(PHPDOC);
354
355                                         default:
356                                                 consume();
357                                                 break;
358                                         }
359                                         break;
360
361                                 case '*':
362                                         fTokenLength++;
363                                         fLast = STAR;
364                                         break;
365
366                                 default:
367                                         consume();
368                                         break;
369                                 }
370                                 break;
371
372                         case MULTI_LINE_COMMENT:
373                                 switch (ch) {
374                                 case '*':
375                                         if (fLast == SLASH_STAR) {
376                                                 fLast = SLASH_STAR_STAR;
377                                                 fTokenLength++;
378                                                 fState = PHPDOC;
379                                         } else {
380                                                 fTokenLength++;
381                                                 fLast = STAR;
382                                         }
383                                         break;
384
385                                 case '/':
386                                         if (fLast == STAR) {
387                                                 return postFix(MULTI_LINE_COMMENT);
388                                         } else {
389                                                 consume();
390                                                 break;
391                                         }
392
393                                 default:
394                                         consume();
395                                         break;
396                                 }
397                                 break;
398
399                         case STRING_DQ:
400                                 switch (ch) {
401                                 case '\\':
402                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
403                                         fTokenLength++;
404                                         break;
405
406                                 case '\"':
407                                         if (fLast != BACKSLASH) {
408                                                 return postFix(STRING_DQ);
409
410                                         } else {
411                                                 consume();
412                                                 break;
413                                         }
414
415                                 default:
416                                         consume();
417                                         break;
418                                 }
419                                 break;
420                         case STRING_SQ:
421                                 switch (ch) {
422                                 case '\\':
423                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
424                                         fTokenLength++;
425                                         break;
426
427                                 case '\'':
428                                         if (fLast != BACKSLASH) {
429                                                 return postFix(STRING_SQ);
430
431                                         } else {
432                                                 consume();
433                                                 break;
434                                         }
435
436                                 default:
437                                         consume();
438                                         break;
439                                 }
440                                 break;
441                         // case CHARACTER:
442                         // switch (ch) {
443                         // case '\\':
444                         // fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
445                         // fTokenLength++;
446                         // break;
447                         //
448                         // case '\'':
449                         // if (fLast != BACKSLASH) {
450                         // return postFix(CHARACTER);
451                         //
452                         // } else {
453                         // consume();
454                         // break;
455                         // }
456                         //
457                         // default:
458                         // consume();
459                         // break;
460                         // }
461                         // break;
462                         }
463                 }
464         }
465
466         private static final int getLastLength(int last) {
467                 switch (last) {
468                 default:
469                         return -1;
470
471                 case NONE:
472                         return 0;
473
474                 case CARRIAGE_RETURN:
475                 case BACKSLASH:
476                 case SLASH:
477                 case STAR:
478                         return 1;
479
480                 case SLASH_STAR:
481                         return 2;
482
483                 case SLASH_STAR_STAR:
484                         return 3;
485                 }
486         }
487
488         private final void consume() {
489                 fTokenLength++;
490                 fLast = NONE;
491         }
492
493         private final IToken postFix(int state) {
494                 fTokenLength++;
495                 fLast = NONE;
496                 fState = PHP;
497                 fPrefixLength = 0;
498                 return fTokens[state];
499         }
500
501         private final IToken preFix(int state, int newState, int last, int prefixLength) {
502                 // emulate JavaPartitionScanner
503                 if (fEmulate && state == PHP && (fTokenLength - getLastLength(fLast) > 0)) {
504                         fTokenLength -= getLastLength(fLast);
505                         fJavaOffset = fTokenOffset;
506                         fJavaLength = fTokenLength;
507                         fTokenLength = 1;
508                         fState = newState;
509                         fPrefixLength = prefixLength;
510                         fLast = last;
511                         return fTokens[state];
512
513                 } else {
514                         fTokenLength -= getLastLength(fLast);
515                         fLast = last;
516                         fPrefixLength = prefixLength;
517                         IToken token = fTokens[state];
518                         fState = newState;
519                         return token;
520                 }
521         }
522
523         private static int getState(String contentType) {
524
525                 if (contentType == null)
526                         return PHP;
527
528                 else if (contentType.equals(PHP_SINGLELINE_COMMENT))
529                         return SINGLE_LINE_COMMENT;
530
531                 else if (contentType.equals(PHP_MULTILINE_COMMENT))
532                         return MULTI_LINE_COMMENT;
533
534                 else if (contentType.equals(PHP_PHPDOC_COMMENT))
535                         return PHPDOC;
536
537                 else if (contentType.equals(PHP_STRING_DQ))
538                         return STRING_DQ;
539
540                 else if (contentType.equals(PHP_STRING_SQ))
541                         return STRING_SQ;
542
543                 else if (contentType.equals(PHP_STRING_HEREDOC))
544                         return STRING_HEREDOC;
545
546                 // else if (contentType.equals(JAVA_CHARACTER))
547                 // return CHARACTER;
548
549                 else
550                         return PHP;
551         }
552
553         /*
554          * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String,
555          *      int)
556          */
557         public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
558                 fScanner.setRange(document, offset, length);
559                 setRange(document, offset, length);
560                 fTokenOffset = partitionOffset;
561                 fTokenLength = 0;
562                 fPrefixLength = offset - partitionOffset;
563                 fLast = NONE;
564
565                 if (offset == partitionOffset) {
566                         // restart at beginning of partition
567                         fState = PHP;
568                 } else {
569                         fState = getState(contentType);
570                 }
571
572                 // emulate JavaPartitionScanner
573                 if (fEmulate) {
574                         fJavaOffset = -1;
575                         fJavaLength = 0;
576                 }
577         }
578
579         /*
580          * @see ITokenScanner#setRange(IDocument, int, int)
581          */
582         public void setRange(IDocument document, int offset, int length) {
583                 fScanner.setRange(document, offset, length);
584                 fTokenOffset = offset;
585                 fTokenLength = 0;
586                 fPrefixLength = 0;
587                 fLast = NONE;
588                 fState = PHP;
589
590                 // emulate JavaPartitionScanner
591                 if (fEmulate) {
592                         fJavaOffset = -1;
593                         fJavaLength = 0;
594                 }
595         }
596
597         /*
598          * @see ITokenScanner#getTokenLength()
599          */
600         public int getTokenLength() {
601                 return fTokenLength;
602         }
603
604         /*
605          * @see ITokenScanner#getTokenOffset()
606          */
607         public int getTokenOffset() {
608                 if (AbstractPartitioner.DEBUG) {
609                         Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));
610                 }
611                 return fTokenOffset;
612         }
613
614 }