a4239ba6e79e4ef2ddb3074e94e38774c0d0a2ce
[phpeclipse.git] / net.sourceforge.phpeclipse.ui / src / net / sourceforge / phpdt / internal / ui / text / FastJavaPartitionScanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
14
15 //incastrix
16 //import org.eclipse.jface.text.Assert;
17 import org.eclipse.core.runtime.Assert;
18 import org.eclipse.jface.text.IDocument;
19 import org.eclipse.jface.text.rules.ICharacterScanner;
20 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
21 import org.eclipse.jface.text.rules.IToken;
22 import org.eclipse.jface.text.rules.Token;
23
24 /**
25  * This scanner recognizes the JavaDoc comments, Java multi line comments, Java
26  * single line comments, Java strings.
27  */
28 public class FastJavaPartitionScanner implements IPartitionTokenScanner,
29                 IPHPPartitions {
30
31         // states
32         private static final int PHP = 0;
33
34         private static final int SINGLE_LINE_COMMENT = 1;
35
36         private static final int MULTI_LINE_COMMENT = 2;
37
38         private static final int PHPDOC = 3;
39
40         private static final int STRING_DQ = 4;
41
42         private static final int STRING_SQ = 5;
43
44         private static final int STRING_HEREDOC = 6;
45
46         // beginning of prefixes and postfixes
47         private static final int NONE = 0;
48
49         private static final int BACKSLASH = 1; // postfix for STRING_DQ and
50                                                                                         // CHARACTER
51
52         private static final int SLASH = 2; // prefix for SINGLE_LINE or MULTI_LINE
53                                                                                 // or
54
55         // JAVADOC
56
57         private static final int SLASH_STAR = 3; // prefix for MULTI_LINE_COMMENT
58                                                                                                 // or
59
60         // JAVADOC
61
62         private static final int SLASH_STAR_STAR = 4; // prefix for
63                                                                                                         // MULTI_LINE_COMMENT
64
65         // or JAVADOC
66
67         private static final int STAR = 5; // postfix for MULTI_LINE_COMMENT or
68
69         // JAVADOC
70
71         private static final int CARRIAGE_RETURN = 6; // postfix for STRING_DQ,
72
73         // CHARACTER and
74         // SINGLE_LINE_COMMENT
75
76         // private static final int HEREDOC = 7;
77
78         /** The scanner. */
79         private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner(
80                         1000); // faster
81
82         // implementation
83
84         /** The offset of the last returned token. */
85         private int fTokenOffset;
86
87         /** The length of the last returned token. */
88         private int fTokenLength;
89
90         /** The state of the scanner. */
91         private int fState;
92
93         /** The last significant characters read. */
94         private int fLast;
95
96         /** The amount of characters already read on first call to nextToken(). */
97         private int fPrefixLength;
98
99         // emulate JavaPartitionScanner
100         private boolean fEmulate = false;
101
102         private int fJavaOffset;
103
104         private int fJavaLength;
105
106         private final IToken[] fTokens = new IToken[] { new Token(null),
107                         new Token(PHP_SINGLELINE_COMMENT),
108                         new Token(PHP_MULTILINE_COMMENT), new Token(PHP_PHPDOC_COMMENT),
109                         new Token(PHP_STRING_DQ), new Token(PHP_STRING_SQ),
110                         new Token(PHP_STRING_HEREDOC) };
111
112         public FastJavaPartitionScanner(boolean emulate) {
113                 fEmulate = emulate;
114         }
115
116         public FastJavaPartitionScanner() {
117                 this(false);
118         }
119
120         /*
121          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
122          */
123         public IToken nextToken() {
124
125                 // emulate JavaPartitionScanner
126                 if (fEmulate) {
127                         if (fJavaOffset != -1
128                                         && fTokenOffset + fTokenLength != fJavaOffset + fJavaLength) {
129                                 fTokenOffset += fTokenLength;
130                                 return fTokens[PHP];
131                         } else {
132                                 fJavaOffset = -1;
133                                 fJavaLength = 0;
134                         }
135                 }
136
137                 fTokenOffset += fTokenLength;
138                 fTokenLength = fPrefixLength;
139
140                 while (true) {
141                         final int ch = fScanner.read();
142
143                         // characters
144                         switch (ch) {
145                         case ICharacterScanner.EOF:
146                                 if (fTokenLength > 0) {
147                                         fLast = NONE; // ignore last
148                                         return preFix(fState, PHP, NONE, 0);
149
150                                 } else {
151                                         fLast = NONE;
152                                         fPrefixLength = 0;
153                                         return Token.EOF;
154                                 }
155
156                         case '\r':
157                                 // emulate JavaPartitionScanner
158                                 if (!fEmulate && fLast != CARRIAGE_RETURN) {
159                                         fLast = CARRIAGE_RETURN;
160                                         fTokenLength++;
161                                         continue;
162
163                                 } else {
164
165                                         switch (fState) {
166                                         case SINGLE_LINE_COMMENT:
167                                                 // case CHARACTER:
168                                                 // case STRING_DQ:
169                                                 // case STRING_SQ:
170                                                 if (fTokenLength > 0) {
171                                                         IToken token = fTokens[fState];
172
173                                                         // emulate JavaPartitionScanner
174                                                         if (fEmulate) {
175                                                                 fTokenLength++;
176                                                                 fLast = NONE;
177                                                                 fPrefixLength = 0;
178                                                         } else {
179                                                                 fLast = CARRIAGE_RETURN;
180                                                                 fPrefixLength = 1;
181                                                         }
182
183                                                         fState = PHP;
184                                                         return token;
185
186                                                 } else {
187                                                         consume();
188                                                         continue;
189                                                 }
190
191                                         default:
192                                                 consume();
193                                                 continue;
194                                         }
195                                 }
196
197                         case '\n':
198                                 switch (fState) {
199                                 case SINGLE_LINE_COMMENT:
200                                         // case CHARACTER:
201                                         // case STRING_DQ:
202                                         // case STRING_SQ:
203                                         // assert(fTokenLength > 0);
204                                         return postFix(fState);
205
206                                 default:
207                                         consume();
208                                         continue;
209                                 }
210
211                         case '?':
212                                 if (fState == SINGLE_LINE_COMMENT) {
213                                         int nextch = fScanner.read();
214                                         if (nextch == '>') {
215                                                 // <h1>This is an <?php # echo 'simple' ?> example.</h1>
216                                                 fTokenLength--;
217                                                 fScanner.unread();
218                                                 fScanner.unread();
219                                                 return postFix(fState);
220                                         } else {
221                                                 // bug #1404228: Crash on <?php // comment ?>
222                                                 if (nextch != ICharacterScanner.EOF) {
223                                                         fScanner.unread();
224                                                 }
225                                         }
226                                 }
227
228                         default:
229                                 if (!fEmulate && fLast == CARRIAGE_RETURN) {
230                                         switch (fState) {
231                                         case SINGLE_LINE_COMMENT:
232                                                 // case CHARACTER:
233                                                 // case STRING_DQ:
234                                                 // case STRING_SQ:
235                                                 int last;
236                                                 int newState;
237                                                 switch (ch) {
238                                                 case '/':
239                                                         last = SLASH;
240                                                         newState = PHP;
241                                                         break;
242
243                                                 case '*':
244                                                         last = STAR;
245                                                         newState = PHP;
246                                                         break;
247
248                                                 case '\'':
249                                                         last = NONE;
250                                                         newState = STRING_SQ;
251                                                         break;
252
253                                                 case '"':
254                                                         last = NONE;
255                                                         newState = STRING_DQ;
256                                                         break;
257
258                                                 case '\r':
259                                                         last = CARRIAGE_RETURN;
260                                                         newState = PHP;
261                                                         break;
262
263                                                 case '\\':
264                                                         last = BACKSLASH;
265                                                         newState = PHP;
266                                                         break;
267
268                                                 default:
269                                                         last = NONE;
270                                                         newState = PHP;
271                                                         break;
272                                                 }
273
274                                                 fLast = NONE; // ignore fLast
275                                                 return preFix(fState, newState, last, 1);
276
277                                         default:
278                                                 break;
279                                         }
280                                 }
281                         }
282
283                         // states
284                         switch (fState) {
285                         case PHP:
286                                 switch (ch) {
287                                 case '#':
288                                         if (fTokenLength > 0) {
289                                                 return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
290                                         } else {
291                                                 preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
292                                                 fTokenOffset += fTokenLength;
293                                                 fTokenLength = fPrefixLength;
294                                                 break;
295                                         }
296                                 case '/':
297                                         if (fLast == SLASH) {
298                                                 if (fTokenLength - getLastLength(fLast) > 0) {
299                                                         return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
300                                                 } else {
301                                                         preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
302                                                         fTokenOffset += fTokenLength;
303                                                         fTokenLength = fPrefixLength;
304                                                         break;
305                                                 }
306
307                                         } else {
308                                                 fTokenLength++;
309                                                 fLast = SLASH;
310                                                 break;
311                                         }
312
313                                 case '*':
314                                         if (fLast == SLASH) {
315                                                 if (fTokenLength - getLastLength(fLast) > 0)
316                                                         return preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR,
317                                                                         2);
318                                                 else {
319                                                         preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
320                                                         fTokenOffset += fTokenLength;
321                                                         fTokenLength = fPrefixLength;
322                                                         break;
323                                                 }
324
325                                         } else {
326                                                 consume();
327                                                 break;
328                                         }
329
330                                 case '\'':
331                                         fLast = NONE; // ignore fLast
332                                         if (fTokenLength > 0)
333                                                 return preFix(PHP, STRING_SQ, NONE, 1);
334                                         else {
335                                                 preFix(PHP, STRING_SQ, NONE, 1);
336                                                 fTokenOffset += fTokenLength;
337                                                 fTokenLength = fPrefixLength;
338                                                 break;
339                                         }
340
341                                 case '"':
342                                         fLast = NONE; // ignore fLast
343                                         if (fTokenLength > 0)
344                                                 return preFix(PHP, STRING_DQ, NONE, 1);
345                                         else {
346                                                 preFix(PHP, STRING_DQ, NONE, 1);
347                                                 fTokenOffset += fTokenLength;
348                                                 fTokenLength = fPrefixLength;
349                                                 break;
350                                         }
351
352                                 default:
353                                         consume();
354                                         break;
355                                 }
356                                 break;
357
358                         case SINGLE_LINE_COMMENT:
359                                 consume();
360                                 break;
361
362                         case PHPDOC:
363                                 switch (ch) {
364                                 case '/':
365                                         switch (fLast) {
366                                         case SLASH_STAR_STAR:
367                                                 return postFix(MULTI_LINE_COMMENT);
368
369                                         case STAR:
370                                                 return postFix(PHPDOC);
371
372                                         default:
373                                                 consume();
374                                                 break;
375                                         }
376                                         break;
377
378                                 case '*':
379                                         fTokenLength++;
380                                         fLast = STAR;
381                                         break;
382
383                                 default:
384                                         consume();
385                                         break;
386                                 }
387                                 break;
388
389                         case MULTI_LINE_COMMENT:
390                                 switch (ch) {
391                                 case '*':
392                                         if (fLast == SLASH_STAR) {
393                                                 fLast = SLASH_STAR_STAR;
394                                                 fTokenLength++;
395                                                 fState = PHPDOC;
396                                         } else {
397                                                 fTokenLength++;
398                                                 fLast = STAR;
399                                         }
400                                         break;
401
402                                 case '/':
403                                         if (fLast == STAR) {
404                                                 return postFix(MULTI_LINE_COMMENT);
405                                         } else {
406                                                 consume();
407                                                 break;
408                                         }
409
410                                 default:
411                                         consume();
412                                         break;
413                                 }
414                                 break;
415
416                         case STRING_DQ:
417                                 switch (ch) {
418                                 case '\\':
419                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
420                                         fTokenLength++;
421                                         break;
422
423                                 case '\"':
424                                         if (fLast != BACKSLASH) {
425                                                 return postFix(STRING_DQ);
426
427                                         } else {
428                                                 consume();
429                                                 break;
430                                         }
431
432                                 default:
433                                         consume();
434                                         break;
435                                 }
436                                 break;
437                         case STRING_SQ:
438                                 switch (ch) {
439                                 case '\\':
440                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
441                                         fTokenLength++;
442                                         break;
443
444                                 case '\'':
445                                         if (fLast != BACKSLASH) {
446                                                 return postFix(STRING_SQ);
447
448                                         } else {
449                                                 consume();
450                                                 break;
451                                         }
452
453                                 default:
454                                         consume();
455                                         break;
456                                 }
457                                 break;
458                         // case CHARACTER:
459                         // switch (ch) {
460                         // case '\\':
461                         // fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
462                         // fTokenLength++;
463                         // break;
464                         //
465                         // case '\'':
466                         // if (fLast != BACKSLASH) {
467                         // return postFix(CHARACTER);
468                         //
469                         // } else {
470                         // consume();
471                         // break;
472                         // }
473                         //
474                         // default:
475                         // consume();
476                         // break;
477                         // }
478                         // break;
479                         }
480                 }
481         }
482
483         private static final int getLastLength(int last) {
484                 switch (last) {
485                 default:
486                         return -1;
487
488                 case NONE:
489                         return 0;
490
491                 case CARRIAGE_RETURN:
492                 case BACKSLASH:
493                 case SLASH:
494                 case STAR:
495                         return 1;
496
497                 case SLASH_STAR:
498                         return 2;
499
500                 case SLASH_STAR_STAR:
501                         return 3;
502                 }
503         }
504
505         private final void consume() {
506                 fTokenLength++;
507                 fLast = NONE;
508         }
509
510         private final IToken postFix(int state) {
511                 fTokenLength++;
512                 fLast = NONE;
513                 fState = PHP;
514                 fPrefixLength = 0;
515                 return fTokens[state];
516         }
517
518         private final IToken preFix(int state, int newState, int last,
519                         int prefixLength) {
520                 // emulate JavaPartitionScanner
521                 if (fEmulate && state == PHP
522                                 && (fTokenLength - getLastLength(fLast) > 0)) {
523                         fTokenLength -= getLastLength(fLast);
524                         fJavaOffset = fTokenOffset;
525                         fJavaLength = fTokenLength;
526                         fTokenLength = 1;
527                         fState = newState;
528                         fPrefixLength = prefixLength;
529                         fLast = last;
530                         return fTokens[state];
531
532                 } else {
533                         fTokenLength -= getLastLength(fLast);
534                         fLast = last;
535                         fPrefixLength = prefixLength;
536                         IToken token = fTokens[state];
537                         fState = newState;
538                         return token;
539                 }
540         }
541
542         private static int getState(String contentType) {
543
544                 if (contentType == null)
545                         return PHP;
546
547                 else if (contentType.equals(PHP_SINGLELINE_COMMENT))
548                         return SINGLE_LINE_COMMENT;
549
550                 else if (contentType.equals(PHP_MULTILINE_COMMENT))
551                         return MULTI_LINE_COMMENT;
552
553                 else if (contentType.equals(PHP_PHPDOC_COMMENT))
554                         return PHPDOC;
555
556                 else if (contentType.equals(PHP_STRING_DQ))
557                         return STRING_DQ;
558
559                 else if (contentType.equals(PHP_STRING_SQ))
560                         return STRING_SQ;
561
562                 else if (contentType.equals(PHP_STRING_HEREDOC))
563                         return STRING_HEREDOC;
564
565                 // else if (contentType.equals(JAVA_CHARACTER))
566                 // return CHARACTER;
567
568                 else
569                         return PHP;
570         }
571
572         /*
573          * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String,
574          *      int)
575          */
576         public void setPartialRange(IDocument document, int offset, int length,
577                         String contentType, int partitionOffset) {
578                 fScanner.setRange(document, offset, length);
579                 setRange(document, offset, length);
580                 fTokenOffset = partitionOffset;
581                 fTokenLength = 0;
582                 fPrefixLength = offset - partitionOffset;
583                 fLast = NONE;
584
585                 if (offset == partitionOffset) {
586                         // restart at beginning of partition
587                         fState = PHP;
588                 } else {
589                         fState = getState(contentType);
590                 }
591
592                 // emulate JavaPartitionScanner
593                 if (fEmulate) {
594                         fJavaOffset = -1;
595                         fJavaLength = 0;
596                 }
597         }
598
599         /*
600          * @see ITokenScanner#setRange(IDocument, int, int)
601          */
602         public void setRange(IDocument document, int offset, int length) {
603                 fScanner.setRange(document, offset, length);
604                 fTokenOffset = offset;
605                 fTokenLength = 0;
606                 fPrefixLength = 0;
607                 fLast = NONE;
608                 fState = PHP;
609
610                 // emulate JavaPartitionScanner
611                 if (fEmulate) {
612                         fJavaOffset = -1;
613                         fJavaLength = 0;
614                 }
615         }
616
617         /*
618          * @see ITokenScanner#getTokenLength()
619          */
620         public int getTokenLength() {
621                 return fTokenLength;
622         }
623
624         /*
625          * @see ITokenScanner#getTokenOffset()
626          */
627         public int getTokenOffset() {
628                 if (AbstractPartitioner.DEBUG) {
629                         Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));
630                 }
631                 return fTokenOffset;
632         }
633
634 }