559dc8804effaf0c56bc225a24ba229c6768dd3a
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / FastJavaPartitionScanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
14
15 import org.eclipse.jface.text.Assert;
16 import org.eclipse.jface.text.IDocument;
17 import org.eclipse.jface.text.rules.ICharacterScanner;
18 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
19 import org.eclipse.jface.text.rules.IToken;
20 import org.eclipse.jface.text.rules.Token;
21
22 /**
23  * This scanner recognizes the JavaDoc comments, Java multi line comments, Java
24  * single line comments, Java strings.
25  */
26 public class FastJavaPartitionScanner implements IPartitionTokenScanner, IPHPPartitions {
27
28         // states
29         private static final int PHP = 0;
30
31         private static final int SINGLE_LINE_COMMENT = 1;
32
33         private static final int MULTI_LINE_COMMENT = 2;
34
35         private static final int PHPDOC = 3;
36
37         private static final int STRING_DQ = 4;
38
39         private static final int STRING_SQ = 5;
40
41         private static final int STRING_HEREDOC = 6;
42
43         // beginning of prefixes and postfixes
44         private static final int NONE = 0;
45
46         private static final int BACKSLASH = 1; // postfix for STRING_DQ and CHARACTER
47
48         private static final int SLASH = 2; // prefix for SINGLE_LINE or MULTI_LINE or
49                                                                                                                                                         // JAVADOC
50
51         private static final int SLASH_STAR = 3; // prefix for MULTI_LINE_COMMENT or
52                                                                                                                                                                                 // JAVADOC
53
54         private static final int SLASH_STAR_STAR = 4; // prefix for MULTI_LINE_COMMENT
55                                                                                                                                                                                                 // or JAVADOC
56
57         private static final int STAR = 5; // postfix for MULTI_LINE_COMMENT or
58                                                                                                                                                         // JAVADOC
59
60         private static final int CARRIAGE_RETURN = 6; // postfix for STRING_DQ,
61                                                                                                                                                                                                 // CHARACTER and
62                                                                                                                                                                                                 // SINGLE_LINE_COMMENT
63
64         // private static final int HEREDOC = 7;
65
66         /** The scanner. */
67         private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner(1000); // faster
68                                                                                                                                                                                                                                                                                                                                                         // implementation
69
70         /** The offset of the last returned token. */
71         private int fTokenOffset;
72
73         /** The length of the last returned token. */
74         private int fTokenLength;
75
76         /** The state of the scanner. */
77         private int fState;
78
79         /** The last significant characters read. */
80         private int fLast;
81
82         /** The amount of characters already read on first call to nextToken(). */
83         private int fPrefixLength;
84
85         // emulate JavaPartitionScanner
86         private boolean fEmulate = false;
87
88         private int fJavaOffset;
89
90         private int fJavaLength;
91
92         private final IToken[] fTokens = new IToken[] { new Token(null), new Token(PHP_SINGLELINE_COMMENT),
93                         new Token(PHP_MULTILINE_COMMENT), new Token(PHP_PHPDOC_COMMENT), new Token(PHP_STRING_DQ), new Token(PHP_STRING_SQ),
94                         new Token(PHP_STRING_HEREDOC) };
95
96         public FastJavaPartitionScanner(boolean emulate) {
97                 fEmulate = emulate;
98         }
99
100         public FastJavaPartitionScanner() {
101                 this(false);
102         }
103
104         /*
105          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
106          */
107         public IToken nextToken() {
108
109                 // emulate JavaPartitionScanner
110                 if (fEmulate) {
111                         if (fJavaOffset != -1 && fTokenOffset + fTokenLength != fJavaOffset + fJavaLength) {
112                                 fTokenOffset += fTokenLength;
113                                 return fTokens[PHP];
114                         } else {
115                                 fJavaOffset = -1;
116                                 fJavaLength = 0;
117                         }
118                 }
119
120                 fTokenOffset += fTokenLength;
121                 fTokenLength = fPrefixLength;
122
123                 while (true) {
124                         final int ch = fScanner.read();
125
126                         // characters
127                         switch (ch) {
128                         case ICharacterScanner.EOF:
129                                 if (fTokenLength > 0) {
130                                         fLast = NONE; // ignore last
131                                         return preFix(fState, PHP, NONE, 0);
132
133                                 } else {
134                                         fLast = NONE;
135                                         fPrefixLength = 0;
136                                         return Token.EOF;
137                                 }
138
139                         case '\r':
140                                 // emulate JavaPartitionScanner
141                                 if (!fEmulate && fLast != CARRIAGE_RETURN) {
142                                         fLast = CARRIAGE_RETURN;
143                                         fTokenLength++;
144                                         continue;
145
146                                 } else {
147
148                                         switch (fState) {
149                                         case SINGLE_LINE_COMMENT:
150                                                 // case CHARACTER:
151                                                 // case STRING_DQ:
152                                                 // case STRING_SQ:
153                                                 if (fTokenLength > 0) {
154                                                         IToken token = fTokens[fState];
155
156                                                         // emulate JavaPartitionScanner
157                                                         if (fEmulate) {
158                                                                 fTokenLength++;
159                                                                 fLast = NONE;
160                                                                 fPrefixLength = 0;
161                                                         } else {
162                                                                 fLast = CARRIAGE_RETURN;
163                                                                 fPrefixLength = 1;
164                                                         }
165
166                                                         fState = PHP;
167                                                         return token;
168
169                                                 } else {
170                                                         consume();
171                                                         continue;
172                                                 }
173
174                                         default:
175                                                 consume();
176                                                 continue;
177                                         }
178                                 }
179
180                         case '\n':
181                                 switch (fState) {
182                                 case SINGLE_LINE_COMMENT:
183                                         // case CHARACTER:
184                                         // case STRING_DQ:
185                                         // case STRING_SQ:
186                                         // assert(fTokenLength > 0);
187                                         return postFix(fState);
188
189                                 default:
190                                         consume();
191                                         continue;
192                                 }
193
194                         case '?':
195                                 if (fState == SINGLE_LINE_COMMENT) {
196                                         int nextch = fScanner.read();
197                                         if (nextch == '>') {
198                                                 // <h1>This is an <?php # echo 'simple' ?> example.</h1>
199                                                 fTokenLength--;
200                                                 fScanner.unread();
201                                                 fScanner.unread();
202                                                 return postFix(fState);
203                                         }
204                                         else {
205                                                 // bug #1404228: Crash on <?php // comment ?>
206                                                 fScanner.unread();
207                                         }
208                                 }
209
210                         default:
211                                 if (!fEmulate && fLast == CARRIAGE_RETURN) {
212                                         switch (fState) {
213                                         case SINGLE_LINE_COMMENT:
214                                                 // case CHARACTER:
215                                                 // case STRING_DQ:
216                                                 // case STRING_SQ:
217                                                 int last;
218                                                 int newState;
219                                                 switch (ch) {
220                                                 case '/':
221                                                         last = SLASH;
222                                                         newState = PHP;
223                                                         break;
224
225                                                 case '*':
226                                                         last = STAR;
227                                                         newState = PHP;
228                                                         break;
229
230                                                 case '\'':
231                                                         last = NONE;
232                                                         newState = STRING_SQ;
233                                                         break;
234
235                                                 case '"':
236                                                         last = NONE;
237                                                         newState = STRING_DQ;
238                                                         break;
239
240                                                 case '\r':
241                                                         last = CARRIAGE_RETURN;
242                                                         newState = PHP;
243                                                         break;
244
245                                                 case '\\':
246                                                         last = BACKSLASH;
247                                                         newState = PHP;
248                                                         break;
249
250                                                 default:
251                                                         last = NONE;
252                                                         newState = PHP;
253                                                         break;
254                                                 }
255
256                                                 fLast = NONE; // ignore fLast
257                                                 return preFix(fState, newState, last, 1);
258
259                                         default:
260                                                 break;
261                                         }
262                                 }
263                         }
264
265                         // states
266                         switch (fState) {
267                         case PHP:
268                                 switch (ch) {
269                                 case '#':
270                                         if (fTokenLength > 0) {
271                                                 return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
272                                         } else {
273                                                 preFix(PHP, SINGLE_LINE_COMMENT, NONE, 1);
274                                                 fTokenOffset += fTokenLength;
275                                                 fTokenLength = fPrefixLength;
276                                                 break;
277                                         }
278                                 case '/':
279                                         if (fLast == SLASH) {
280                                                 if (fTokenLength - getLastLength(fLast) > 0) {
281                                                         return preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
282                                                 } else {
283                                                         preFix(PHP, SINGLE_LINE_COMMENT, NONE, 2);
284                                                         fTokenOffset += fTokenLength;
285                                                         fTokenLength = fPrefixLength;
286                                                         break;
287                                                 }
288
289                                         } else {
290                                                 fTokenLength++;
291                                                 fLast = SLASH;
292                                                 break;
293                                         }
294
295                                 case '*':
296                                         if (fLast == SLASH) {
297                                                 if (fTokenLength - getLastLength(fLast) > 0)
298                                                         return preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
299                                                 else {
300                                                         preFix(PHP, MULTI_LINE_COMMENT, SLASH_STAR, 2);
301                                                         fTokenOffset += fTokenLength;
302                                                         fTokenLength = fPrefixLength;
303                                                         break;
304                                                 }
305
306                                         } else {
307                                                 consume();
308                                                 break;
309                                         }
310
311                                 case '\'':
312                                         fLast = NONE; // ignore fLast
313                                         if (fTokenLength > 0)
314                                                 return preFix(PHP, STRING_SQ, NONE, 1);
315                                         else {
316                                                 preFix(PHP, STRING_SQ, NONE, 1);
317                                                 fTokenOffset += fTokenLength;
318                                                 fTokenLength = fPrefixLength;
319                                                 break;
320                                         }
321
322                                 case '"':
323                                         fLast = NONE; // ignore fLast
324                                         if (fTokenLength > 0)
325                                                 return preFix(PHP, STRING_DQ, NONE, 1);
326                                         else {
327                                                 preFix(PHP, STRING_DQ, NONE, 1);
328                                                 fTokenOffset += fTokenLength;
329                                                 fTokenLength = fPrefixLength;
330                                                 break;
331                                         }
332
333                                 default:
334                                         consume();
335                                         break;
336                                 }
337                                 break;
338
339                         case SINGLE_LINE_COMMENT:
340                                 consume();
341                                 break;
342
343                         case PHPDOC:
344                                 switch (ch) {
345                                 case '/':
346                                         switch (fLast) {
347                                         case SLASH_STAR_STAR:
348                                                 return postFix(MULTI_LINE_COMMENT);
349
350                                         case STAR:
351                                                 return postFix(PHPDOC);
352
353                                         default:
354                                                 consume();
355                                                 break;
356                                         }
357                                         break;
358
359                                 case '*':
360                                         fTokenLength++;
361                                         fLast = STAR;
362                                         break;
363
364                                 default:
365                                         consume();
366                                         break;
367                                 }
368                                 break;
369
370                         case MULTI_LINE_COMMENT:
371                                 switch (ch) {
372                                 case '*':
373                                         if (fLast == SLASH_STAR) {
374                                                 fLast = SLASH_STAR_STAR;
375                                                 fTokenLength++;
376                                                 fState = PHPDOC;
377                                         } else {
378                                                 fTokenLength++;
379                                                 fLast = STAR;
380                                         }
381                                         break;
382
383                                 case '/':
384                                         if (fLast == STAR) {
385                                                 return postFix(MULTI_LINE_COMMENT);
386                                         } else {
387                                                 consume();
388                                                 break;
389                                         }
390
391                                 default:
392                                         consume();
393                                         break;
394                                 }
395                                 break;
396
397                         case STRING_DQ:
398                                 switch (ch) {
399                                 case '\\':
400                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
401                                         fTokenLength++;
402                                         break;
403
404                                 case '\"':
405                                         if (fLast != BACKSLASH) {
406                                                 return postFix(STRING_DQ);
407
408                                         } else {
409                                                 consume();
410                                                 break;
411                                         }
412
413                                 default:
414                                         consume();
415                                         break;
416                                 }
417                                 break;
418                         case STRING_SQ:
419                                 switch (ch) {
420                                 case '\\':
421                                         fLast = (fLast == BACKSLASH) ? NONE : BACKSLASH;
422                                         fTokenLength++;
423                                         break;
424
425                                 case '\'':
426                                         if (fLast != BACKSLASH) {
427                                                 return postFix(STRING_SQ);
428
429                                         } else {
430                                                 consume();
431                                                 break;
432                                         }
433
434                                 default:
435                                         consume();
436                                         break;
437                                 }
438                                 break;
439                         // case CHARACTER:
440                         // switch (ch) {
441                         // case '\\':
442                         // fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
443                         // fTokenLength++;
444                         // break;
445                         //
446                         // case '\'':
447                         // if (fLast != BACKSLASH) {
448                         // return postFix(CHARACTER);
449                         //
450                         // } else {
451                         // consume();
452                         // break;
453                         // }
454                         //
455                         // default:
456                         // consume();
457                         // break;
458                         // }
459                         // break;
460                         }
461                 }
462         }
463
464         private static final int getLastLength(int last) {
465                 switch (last) {
466                 default:
467                         return -1;
468
469                 case NONE:
470                         return 0;
471
472                 case CARRIAGE_RETURN:
473                 case BACKSLASH:
474                 case SLASH:
475                 case STAR:
476                         return 1;
477
478                 case SLASH_STAR:
479                         return 2;
480
481                 case SLASH_STAR_STAR:
482                         return 3;
483                 }
484         }
485
486         private final void consume() {
487                 fTokenLength++;
488                 fLast = NONE;
489         }
490
491         private final IToken postFix(int state) {
492                 fTokenLength++;
493                 fLast = NONE;
494                 fState = PHP;
495                 fPrefixLength = 0;
496                 return fTokens[state];
497         }
498
499         private final IToken preFix(int state, int newState, int last, int prefixLength) {
500                 // emulate JavaPartitionScanner
501                 if (fEmulate && state == PHP && (fTokenLength - getLastLength(fLast) > 0)) {
502                         fTokenLength -= getLastLength(fLast);
503                         fJavaOffset = fTokenOffset;
504                         fJavaLength = fTokenLength;
505                         fTokenLength = 1;
506                         fState = newState;
507                         fPrefixLength = prefixLength;
508                         fLast = last;
509                         return fTokens[state];
510
511                 } else {
512                         fTokenLength -= getLastLength(fLast);
513                         fLast = last;
514                         fPrefixLength = prefixLength;
515                         IToken token = fTokens[state];
516                         fState = newState;
517                         return token;
518                 }
519         }
520
521         private static int getState(String contentType) {
522
523                 if (contentType == null)
524                         return PHP;
525
526                 else if (contentType.equals(PHP_SINGLELINE_COMMENT))
527                         return SINGLE_LINE_COMMENT;
528
529                 else if (contentType.equals(PHP_MULTILINE_COMMENT))
530                         return MULTI_LINE_COMMENT;
531
532                 else if (contentType.equals(PHP_PHPDOC_COMMENT))
533                         return PHPDOC;
534
535                 else if (contentType.equals(PHP_STRING_DQ))
536                         return STRING_DQ;
537
538                 else if (contentType.equals(PHP_STRING_SQ))
539                         return STRING_SQ;
540
541                 else if (contentType.equals(PHP_STRING_HEREDOC))
542                         return STRING_HEREDOC;
543
544                 // else if (contentType.equals(JAVA_CHARACTER))
545                 // return CHARACTER;
546
547                 else
548                         return PHP;
549         }
550
551         /*
552          * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String,
553          *      int)
554          */
555         public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
556                 fScanner.setRange(document, offset, length);
557                 setRange(document, offset, length);
558                 fTokenOffset = partitionOffset;
559                 fTokenLength = 0;
560                 fPrefixLength = offset - partitionOffset;
561                 fLast = NONE;
562
563                 if (offset == partitionOffset) {
564                         // restart at beginning of partition
565                         fState = PHP;
566                 } else {
567                         fState = getState(contentType);
568                 }
569
570                 // emulate JavaPartitionScanner
571                 if (fEmulate) {
572                         fJavaOffset = -1;
573                         fJavaLength = 0;
574                 }
575         }
576
577         /*
578          * @see ITokenScanner#setRange(IDocument, int, int)
579          */
580         public void setRange(IDocument document, int offset, int length) {
581                 fScanner.setRange(document, offset, length);
582                 fTokenOffset = offset;
583                 fTokenLength = 0;
584                 fPrefixLength = 0;
585                 fLast = NONE;
586                 fState = PHP;
587
588                 // emulate JavaPartitionScanner
589                 if (fEmulate) {
590                         fJavaOffset = -1;
591                         fJavaLength = 0;
592                 }
593         }
594
595         /*
596          * @see ITokenScanner#getTokenLength()
597          */
598         public int getTokenLength() {
599                 return fTokenLength;
600         }
601
602         /*
603          * @see ITokenScanner#getTokenOffset()
604          */
605         public int getTokenOffset() {
606                 if (AbstractPartitioner.DEBUG) {
607                         Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));
608                 }
609                 return fTokenOffset;
610         }
611
612 }