8653b77c369f3b9fc16b8c649279e5ab3b7e9b19
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / FastJavaPartitionScanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
14
15 import org.eclipse.jface.text.Assert;
16 import org.eclipse.jface.text.IDocument;
17 import org.eclipse.jface.text.rules.ICharacterScanner;
18 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
19 import org.eclipse.jface.text.rules.IToken;
20 import org.eclipse.jface.text.rules.Token;
21
22 /**
23  * This scanner recognizes the JavaDoc comments, Java multi line comments, Java
24  * single line comments, Java strings.
25  */
26 public class FastJavaPartitionScanner implements IPartitionTokenScanner,
27                 IPHPPartitions {
28
29         // states
30         private static final int PHP = 0;
31
32         private static final int SINGLE_LINE_COMMENT = 1;
33
34         private static final int MULTI_LINE_COMMENT = 2;
35
36         private static final int PHPDOC = 3;
37
38         private static final int STRING_DQ = 4;
39
40         private static final int STRING_SQ = 5;
41
42         private static final int STRING_HEREDOC = 6;
43
44         // beginning of prefixes and postfixes
45         private static final int NONE = 0;
46
47         private static final int BACKSLASH = 1; // postfix for STRING_DQ and
48                                                                                         // CHARACTER
49
50         private static final int SLASH = 2; // prefix for SINGLE_LINE or MULTI_LINE
51                                                                                 // or
52
53         // JAVADOC
54
55         private static final int SLASH_STAR = 3; // prefix for MULTI_LINE_COMMENT
56                                                                                                 // or
57
58         // JAVADOC
59
60         private static final int SLASH_STAR_STAR = 4; // prefix for
61                                                                                                         // MULTI_LINE_COMMENT
62
63         // or JAVADOC
64
65         private static final int STAR = 5; // postfix for MULTI_LINE_COMMENT or
66
67         // JAVADOC
68
69         private static final int CARRIAGE_RETURN = 6; // postfix for STRING_DQ,
70
71         // CHARACTER and
72         // SINGLE_LINE_COMMENT
73
74         // private static final int HEREDOC = 7;
75
76         /** The scanner. */
77         private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner(
78                         1000); // faster
79
80         // implementation
81
82         /** The offset of the last returned token. */
83         private int fTokenOffset;
84
85         /** The length of the last returned token. */
86         private int fTokenLength;
87
88         /** The state of the scanner. */
89         private int fState;
90
91         /** The last significant characters read. */
92         private int fLast;
93
94         /** The amount of characters already read on first call to nextToken(). */
95         private int fPrefixLength;
96
97         // emulate JavaPartitionScanner
98         private boolean fEmulate = false;
99
100         private int fJavaOffset;
101
102         private int fJavaLength;
103
104         private final IToken[] fTokens = new IToken[] { new Token(null),
105                         new Token(PHP_SINGLELINE_COMMENT),
106                         new Token(PHP_MULTILINE_COMMENT), new Token(PHP_PHPDOC_COMMENT),
107                         new Token(PHP_STRING_DQ), new Token(PHP_STRING_SQ),
108                         new Token(PHP_STRING_HEREDOC) };
109
110         public FastJavaPartitionScanner(boolean emulate) {
111                 fEmulate = emulate;
112         }
113
114         public FastJavaPartitionScanner() {
115                 this(false);
116         }
117
118         /*
119          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
120          */
121         public IToken nextToken() {
122
123                 // emulate JavaPartitionScanner
124                 if (fEmulate) {
125                         if (fJavaOffset != -1
126                                         && fTokenOffset + fTokenLength != fJavaOffset + fJavaLength) {
127                                 fTokenOffset += fTokenLength;
128                                 return fTokens[PHP];
129                         } else {
130                                 fJavaOffset = -1;
131                                 fJavaLength = 0;
132                         }
133                 }
134
135                 fTokenOffset += fTokenLength;
136                 fTokenLength = fPrefixLength;
137
138                 while (true) {
139                         final int ch = fScanner.read();
140
141                         // characters
142                         switch (ch) {
143                         case ICharacterScanner.EOF:
144                                 if (fTokenLength > 0) {
145                                         fLast = NONE; // ignore last
146                                         return preFix(fState, PHP, NONE, 0);
147
148                                 } else {
149                                         fLast = NONE;
150                                         fPrefixLength = 0;
151                                         return Token.EOF;
152                                 }
153
154                         case '\r':
155                                 // emulate JavaPartitionScanner
156                                 if (!fEmulate && fLast != CARRIAGE_RETURN) {
157                                         fLast = CARRIAGE_RETURN;
158                                         fTokenLength++;
159                                         continue;
160
161                                 } else {
162
163                                         switch (fState) {
164                                         case SINGLE_LINE_COMMENT:
165                                                 // case CHARACTER:
166                                                 // case STRING_DQ:
167                                                 // case STRING_SQ:
168                                                 if (fTokenLength > 0) {
169                                                         IToken token = fTokens[fState];
170
171                                                         // emulate JavaPartitionScanner
172                                                         if (fEmulate) {
173                                                                 fTokenLength++;
174                                                                 fLast = NONE;
175                                                                 fPrefixLength = 0;
176                                                         } else {
177                                                                 fLast = CARRIAGE_RETURN;
178                                                                 fPrefixLength = 1;
179                                                         }
180
181                                                         fState = PHP;
182                                                         return token;
183
184                                                 } else {
185                                                         consume();
186                                                         continue;
187                                                 }
188
189                                         default:
190                                                 consume();
191                                                 continue;
192                                         }
193                                 }
194
195                         case '\n':
196                                 switch (fState) {
197                                 case SINGLE_LINE_COMMENT:
198                                         // case CHARACTER:
199                                         // case STRING_DQ:
200                                         // case STRING_SQ:
201                                         // assert(fTokenLength > 0);
202                                         return postFix(fState);
203
204                                 default:
205                                         consume();
206                                         continue;
207                                 }
208
209                         case '?':
210                                 if (fState == SINGLE_LINE_COMMENT) {
211                                         int nextch = fScanner.read();
212                                         if (nextch == '>') {
213                                                 // <h1>This is an <?php # echo 'simple' ?> example.</h1>
214                                                 fTokenLength--;
215                                                 fScanner.unread();
216                                                 fScanner.unread();
217                                                 return postFix(fState);
218                                         } else {
219                                                 // bug #1404228: Crash on <?php // comment ?>
220                                                 if (nextch != ICharacterScanner.EOF) {
221                                                         fScanner.unread();
222                                                 }
223                                         }
224                                 }
225
226                         default:
227                                 if (!fEmulate && fLast == CARRIAGE_RETURN) {
228                                         switch (fState) {
229                                         case SINGLE_LINE_COMMENT:
230                                                 // case CHARACTER:
231                                                 // case STRING_DQ:
232                                                 // case STRING_SQ:
233                                                 int last;
234                                                 int newState;
235                                                 switch (ch) {
236                                                 case '/':
237                                                         last = SLASH;
238                                                         newState = PHP;
239                                                         break;
240
241                                                 case '*':
242                                                         last = STAR;
243                                                         newState = PHP;
244                                                         break;
245
246                                                 case '\'':
247                                                         last = NONE;
248                                                         newState = STRING_SQ;
249                                                         break;
250
251                                                 case '"':
252                                                         last = NONE;
253                                                         newState = STRING_DQ;
254                                                         break;
255
256                                                 case '\r':
257                                                         last = CARRIAGE_RETURN;
258                                                         newState = PHP;
259                                                         break;
260
261                                                 case '\\':
262                                                         last = BACKSLASH;
263                                                         newState = PHP;
264                                                         break;
265
266                                                 default:
267                                                         last = NONE;
268                                                         newState = PHP;
269                                                         break;
270                                                 }
271
272                                                 fLast = NONE; // ignore fLast
273                                                 return preFix(fState, newState, last, 1);
274
275                                         default:
276                                                 break;
277                                         }
278                                 }
279                         }
280
281                         // states
282                         switch (fState) {
283                         case PHP:
284                                 switch (ch) {
285                                 case '#':
286                                         if (fTokenLength > 0) {
287                                                 return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
288                                         } else {
289                                                 preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
290                                                 fTokenOffset += fTokenLength;
291                                                 fTokenLength = fPrefixLength;
292                                                 break;
293                                         }
294                                 case '/':
295                                         if (fLast == SLASH) {
296                                                 if (fTokenLength - getLastLength(fLast) > 0) {
297                                                         return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
298                                                 } else {
299                                                         preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
300                                                         fTokenOffset += fTokenLength;
301                                                         fTokenLength = fPrefixLength;
302                                                         break;
303                                                 }
304
305                                         } else {
306                                                 fTokenLength++;
307                                                 fLast = SLASH;
308                                                 break;
309                                         }
310
311                                 case '*':
312                                         if (fLast == SLASH) {
313                                                 if (fTokenLength - getLastLength(fLast) > 0)
314                                                         return preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR,
315                                                                         2);
316                                                 else {
317                                                         preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
318                                                         fTokenOffset += fTokenLength;
319                                                         fTokenLength = fPrefixLength;
320                                                         break;
321                                                 }
322
323                                         } else {
324                                                 consume();
325                                                 break;
326                                         }
327
328                                 case '\'':
329                                         fLast = NONE; // ignore fLast
330                                         if (fTokenLength > 0)
331                                                 return preFix(PHP, STRING_SQ, NONE, 1);
332                                         else {
333                                                 preFix(PHP, STRING_SQ, NONE, 1);
334                                                 fTokenOffset += fTokenLength;
335                                                 fTokenLength = fPrefixLength;
336                                                 break;
337                                         }
338
339                                 case '"':
340                                         fLast = NONE; // ignore fLast
341                                         if (fTokenLength > 0)
342                                                 return preFix(PHP, STRING_DQ, NONE, 1);
343                                         else {
344                                                 preFix(PHP, STRING_DQ, NONE, 1);
345                                                 fTokenOffset += fTokenLength;
346                                                 fTokenLength = fPrefixLength;
347                                                 break;
348                                         }
349
350                                 default:
351                                         consume();
352                                         break;
353                                 }
354                                 break;
355
356                         case SINGLE_LINE_COMMENT:
357                                 consume();
358                                 break;
359
360                         case PHPDOC:
361                                 switch (ch) {
362                                 case '/':
363                                         switch (fLast) {
364                                         case SLASH_STAR_STAR:
365                                                 return postFix(MULTI_LINE_COMMENT);
366
367                                         case STAR:
368                                                 return postFix(PHPDOC);
369
370                                         default:
371                                                 consume();
372                                                 break;
373                                         }
374                                         break;
375
376                                 case '*':
377                                         fTokenLength++;
378                                         fLast = STAR;
379                                         break;
380
381                                 default:
382                                         consume();
383                                         break;
384                                 }
385                                 break;
386
387                         case MULTI_LINE_COMMENT:
388                                 switch (ch) {
389                                 case '*':
390                                         if (fLast == SLASH_STAR) {
391                                                 fLast = SLASH_STAR_STAR;
392                                                 fTokenLength++;
393                                                 fState = PHPDOC;
394                                         } else {
395                                                 fTokenLength++;
396                                                 fLast = STAR;
397                                         }
398                                         break;
399
400                                 case '/':
401                                         if (fLast == STAR) {
402                                                 return postFix(MULTI_LINE_COMMENT);
403                                         } else {
404                                                 consume();
405                                                 break;
406                                         }
407
408                                 default:
409                                         consume();
410                                         break;
411                                 }
412                                 break;
413
414                         case STRING_DQ:
415                                 switch (ch) {
416                                 case '\\':
417                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
418                                         fTokenLength++;
419                                         break;
420
421                                 case '\"':
422                                         if (fLast != BACKSLASH) {
423                                                 return postFix(STRING_DQ);
424
425                                         } else {
426                                                 consume();
427                                                 break;
428                                         }
429
430                                 default:
431                                         consume();
432                                         break;
433                                 }
434                                 break;
435                         case STRING_SQ:
436                                 switch (ch) {
437                                 case '\\':
438                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
439                                         fTokenLength++;
440                                         break;
441
442                                 case '\'':
443                                         if (fLast != BACKSLASH) {
444                                                 return postFix(STRING_SQ);
445
446                                         } else {
447                                                 consume();
448                                                 break;
449                                         }
450
451                                 default:
452                                         consume();
453                                         break;
454                                 }
455                                 break;
456                         // case CHARACTER:
457                         // switch (ch) {
458                         // case '\\':
459                         // fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
460                         // fTokenLength++;
461                         // break;
462                         //
463                         // case '\'':
464                         // if (fLast != BACKSLASH) {
465                         // return postFix(CHARACTER);
466                         //
467                         // } else {
468                         // consume();
469                         // break;
470                         // }
471                         //
472                         // default:
473                         // consume();
474                         // break;
475                         // }
476                         // break;
477                         }
478                 }
479         }
480
481         private static final int getLastLength(int last) {
482                 switch (last) {
483                 default:
484                         return -1;
485
486                 case NONE:
487                         return 0;
488
489                 case CARRIAGE_RETURN:
490                 case BACKSLASH:
491                 case SLASH:
492                 case STAR:
493                         return 1;
494
495                 case SLASH_STAR:
496                         return 2;
497
498                 case SLASH_STAR_STAR:
499                         return 3;
500                 }
501         }
502
503         private final void consume() {
504                 fTokenLength++;
505                 fLast = NONE;
506         }
507
508         private final IToken postFix(int state) {
509                 fTokenLength++;
510                 fLast = NONE;
511                 fState = PHP;
512                 fPrefixLength = 0;
513                 return fTokens[state];
514         }
515
516         private final IToken preFix(int state, int newState, int last,
517                         int prefixLength) {
518                 // emulate JavaPartitionScanner
519                 if (fEmulate && state == PHP
520                                 && (fTokenLength - getLastLength(fLast) > 0)) {
521                         fTokenLength -= getLastLength(fLast);
522                         fJavaOffset = fTokenOffset;
523                         fJavaLength = fTokenLength;
524                         fTokenLength = 1;
525                         fState = newState;
526                         fPrefixLength = prefixLength;
527                         fLast = last;
528                         return fTokens[state];
529
530                 } else {
531                         fTokenLength -= getLastLength(fLast);
532                         fLast = last;
533                         fPrefixLength = prefixLength;
534                         IToken token = fTokens[state];
535                         fState = newState;
536                         return token;
537                 }
538         }
539
540         private static int getState(String contentType) {
541
542                 if (contentType == null)
543                         return PHP;
544
545                 else if (contentType.equals(PHP_SINGLELINE_COMMENT))
546                         return SINGLE_LINE_COMMENT;
547
548                 else if (contentType.equals(PHP_MULTILINE_COMMENT))
549                         return MULTI_LINE_COMMENT;
550
551                 else if (contentType.equals(PHP_PHPDOC_COMMENT))
552                         return PHPDOC;
553
554                 else if (contentType.equals(PHP_STRING_DQ))
555                         return STRING_DQ;
556
557                 else if (contentType.equals(PHP_STRING_SQ))
558                         return STRING_SQ;
559
560                 else if (contentType.equals(PHP_STRING_HEREDOC))
561                         return STRING_HEREDOC;
562
563                 // else if (contentType.equals(JAVA_CHARACTER))
564                 // return CHARACTER;
565
566                 else
567                         return PHP;
568         }
569
570         /*
571          * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String,
572          *      int)
573          */
574         public void setPartialRange(IDocument document, int offset, int length,
575                         String contentType, int partitionOffset) {
576                 fScanner.setRange(document, offset, length);
577                 setRange(document, offset, length);
578                 fTokenOffset = partitionOffset;
579                 fTokenLength = 0;
580                 fPrefixLength = offset - partitionOffset;
581                 fLast = NONE;
582
583                 if (offset == partitionOffset) {
584                         // restart at beginning of partition
585                         fState = PHP;
586                 } else {
587                         fState = getState(contentType);
588                 }
589
590                 // emulate JavaPartitionScanner
591                 if (fEmulate) {
592                         fJavaOffset = -1;
593                         fJavaLength = 0;
594                 }
595         }
596
597         /*
598          * @see ITokenScanner#setRange(IDocument, int, int)
599          */
600         public void setRange(IDocument document, int offset, int length) {
601                 fScanner.setRange(document, offset, length);
602                 fTokenOffset = offset;
603                 fTokenLength = 0;
604                 fPrefixLength = 0;
605                 fLast = NONE;
606                 fState = PHP;
607
608                 // emulate JavaPartitionScanner
609                 if (fEmulate) {
610                         fJavaOffset = -1;
611                         fJavaLength = 0;
612                 }
613         }
614
615         /*
616          * @see ITokenScanner#getTokenLength()
617          */
618         public int getTokenLength() {
619                 return fTokenLength;
620         }
621
622         /*
623          * @see ITokenScanner#getTokenOffset()
624          */
625         public int getTokenOffset() {
626                 if (AbstractPartitioner.DEBUG) {
627                         Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));
628                 }
629                 return fTokenOffset;
630         }
631
632 }