1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
16 import net.sourceforge.phpdt.core.compiler.CharOperation;
17 import net.sourceforge.phpdt.core.compiler.IScanner;
18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token
24 * (this value is not memorized by the scanner) - getCurrentTokenSource()
25 * which provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
32 //flag indicating if processed source contains occurrences of keyword assert
33 public boolean containsAssertKeyword = false;
34 public boolean recordLineSeparator;
35 public boolean phpMode = false;
36 public Stack encapsedStringStack = null;
37 public char currentCharacter;
38 public int startPosition;
39 public int currentPosition;
40 public int initialPosition, eofPosition;
41 // after this position eof are generated instead of real token from the
43 public boolean tokenizeComments;
44 public boolean tokenizeWhiteSpace;
45 public boolean tokenizeStrings;
46 //source should be viewed as a window (aka a part)
47 //of a entire very large stream
50 public char[] withoutUnicodeBuffer;
51 public int withoutUnicodePtr;
52 //when == 0 ==> no unicode in the current token
53 public boolean unicodeAsBackSlash = false;
54 public boolean scanningFloatLiteral = false;
55 //support for /** comments
56 //public char[][] comments = new char[10][];
57 public int[] commentStops = new int[10];
58 public int[] commentStarts = new int[10];
59 public int commentPtr = -1; // no comment test with commentPtr value -1
60 //diet parsing support - jump over some method body when requested
61 public boolean diet = false;
62 //support for the poor-line-debuggers ....
63 //remember the position of the cr/lf
64 public int[] lineEnds = new int[250];
65 public int linePtr = -1;
66 public boolean wasAcr = false;
67 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
68 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
69 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
70 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
71 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
72 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
73 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
74 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
75 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
76 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
77 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
78 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
79 //----------------optimized identifier managment------------------
80 static final char[] charArray_a = new char[]{'a'},
81 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
82 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
83 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
84 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
85 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
86 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
87 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
88 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
89 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
90 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
91 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
92 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
93 charArray_z = new char[]{'z'};
94 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
95 '\u0000', '\u0000', '\u0000'};
96 static final int TableSize = 30, InternalTableSize = 6;
98 public static final int OptimizedLength = 6;
100 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
101 // support for detecting non-externalized string literals
102 int currentLineNr = -1;
103 int previousLineNr = -1;
104 NLSLine currentLine = null;
105 List lines = new ArrayList();
106 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
107 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
108 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
109 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
110 public StringLiteral[] nonNLSStrings = null;
111 public boolean checkNonExternalizedStringLiterals = true;
112 public boolean wasNonExternalizedStringLiteral = false;
114 for (int i = 0; i < 6; i++) {
115 for (int j = 0; j < TableSize; j++) {
116 for (int k = 0; k < InternalTableSize; k++) {
117 charArray_length[i][j][k] = initCharArray;
122 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
124 public static final int RoundBracket = 0;
125 public static final int SquareBracket = 1;
126 public static final int CurlyBracket = 2;
127 public static final int BracketKinds = 3;
129 public char[][] foundTaskTags = null;
130 public char[][] foundTaskMessages;
131 public char[][] foundTaskPriorities = null;
132 public int[][] foundTaskPositions;
133 public int foundTaskCount = 0;
134 public char[][] taskTags = null;
135 public char[][] taskPriorities = null;
136 public static final boolean DEBUG = false;
137 public static final boolean TRACE = false;
141 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
142 this(tokenizeComments, tokenizeWhiteSpace, false);
145 * Determines if the specified character is permissible as the first
146 * character in a PHP identifier
148 public static boolean isPHPIdentifierStart(char ch) {
149 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
152 * Determines if the specified character may be part of a PHP identifier as
153 * other than the first character
155 public static boolean isPHPIdentifierPart(char ch) {
156 return Character.isLetterOrDigit(ch) || (ch == '_')
157 || (0x7F <= ch && ch <= 0xFF);
159 public final boolean atEnd() {
160 // This code is not relevant if source is
161 // Only a part of the real stream input
162 return source.length == currentPosition;
164 public char[] getCurrentIdentifierSource() {
165 //return the token REAL source (aka unicodes are precomputed)
167 // if (withoutUnicodePtr != 0)
168 // //0 is used as a fast test flag so the real first char is in position 1
170 // withoutUnicodeBuffer,
172 // result = new char[withoutUnicodePtr],
174 // withoutUnicodePtr);
176 int length = currentPosition - startPosition;
177 switch (length) { // see OptimizedLength
179 return optimizedCurrentTokenSource1();
181 return optimizedCurrentTokenSource2();
183 return optimizedCurrentTokenSource3();
185 return optimizedCurrentTokenSource4();
187 return optimizedCurrentTokenSource5();
189 return optimizedCurrentTokenSource6();
192 System.arraycopy(source, startPosition, result = new char[length], 0,
197 public int getCurrentTokenEndPosition() {
198 return this.currentPosition - 1;
200 public final char[] getCurrentTokenSource() {
201 // Return the token REAL source (aka unicodes are precomputed)
203 // if (withoutUnicodePtr != 0)
204 // // 0 is used as a fast test flag so the real first char is in position 1
206 // withoutUnicodeBuffer,
208 // result = new char[withoutUnicodePtr],
210 // withoutUnicodePtr);
213 System.arraycopy(source, startPosition,
214 result = new char[length = currentPosition - startPosition], 0, length);
218 public final char[] getCurrentTokenSource(int startPos) {
219 // Return the token REAL source (aka unicodes are precomputed)
221 // if (withoutUnicodePtr != 0)
222 // // 0 is used as a fast test flag so the real first char is in position 1
224 // withoutUnicodeBuffer,
226 // result = new char[withoutUnicodePtr],
228 // withoutUnicodePtr);
231 System.arraycopy(source, startPos,
232 result = new char[length = currentPosition - startPos], 0, length);
236 public final char[] getCurrentTokenSourceString() {
237 //return the token REAL source (aka unicodes are precomputed).
238 //REMOVE the two " that are at the beginning and the end.
240 if (withoutUnicodePtr != 0)
241 //0 is used as a fast test flag so the real first char is in position 1
242 System.arraycopy(withoutUnicodeBuffer, 2,
243 //2 is 1 (real start) + 1 (to jump over the ")
244 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
247 System.arraycopy(source, startPosition + 1,
248 result = new char[length = currentPosition - startPosition - 2], 0,
253 public int getCurrentTokenStartPosition() {
254 return this.startPosition;
256 public final char[] getCurrentStringLiteralSource() {
257 // Return the token REAL source (aka unicodes are precomputed)
260 System.arraycopy(source, startPosition + 1,
261 result = new char[length = currentPosition - startPosition - 2], 0,
267 * Search the source position corresponding to the end of a given line number
269 * Line numbers are 1-based, and relative to the scanner initialPosition.
270 * Character positions are 0-based.
272 * In case the given line number is inconsistent, answers -1.
274 public final int getLineEnd(int lineNumber) {
275 if (lineEnds == null)
277 if (lineNumber >= lineEnds.length)
281 if (lineNumber == lineEnds.length - 1)
283 return lineEnds[lineNumber - 1];
284 // next line start one character behind the lineEnd of the previous line
287 * Search the source position corresponding to the beginning of a given line
290 * Line numbers are 1-based, and relative to the scanner initialPosition.
291 * Character positions are 0-based.
293 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
295 * In case the given line number is inconsistent, answers -1.
297 public final int getLineStart(int lineNumber) {
298 if (lineEnds == null)
300 if (lineNumber >= lineEnds.length)
305 return initialPosition;
306 return lineEnds[lineNumber - 2] + 1;
307 // next line start one character behind the lineEnd of the previous line
309 public final boolean getNextChar(char testedChar) {
311 //handle the case of unicode.
312 //when a unicode appears then we must use a buffer that holds char
314 //At the end of this method currentCharacter holds the new visited char
315 //and currentPosition points right next after it
316 //Both previous lines are true if the currentCharacter is == to the
318 //On false, no side effect has occured.
319 //ALL getNextChar.... ARE OPTIMIZED COPIES
320 int temp = currentPosition;
322 currentCharacter = source[currentPosition++];
323 // if (((currentCharacter = source[currentPosition++]) == '\\')
324 // && (source[currentPosition] == 'u')) {
325 // //-------------unicode traitement ------------
326 // int c1, c2, c3, c4;
327 // int unicodeSize = 6;
328 // currentPosition++;
329 // while (source[currentPosition] == 'u') {
330 // currentPosition++;
334 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
336 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
338 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
340 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
342 // currentPosition = temp;
346 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
347 // if (currentCharacter != testedChar) {
348 // currentPosition = temp;
351 // unicodeAsBackSlash = currentCharacter == '\\';
353 // //need the unicode buffer
354 // if (withoutUnicodePtr == 0) {
355 // //buffer all the entries that have been left aside....
356 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
360 // withoutUnicodeBuffer,
362 // withoutUnicodePtr);
364 // //fill the buffer with the char
365 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
368 // } //-------------end unicode traitement--------------
370 if (currentCharacter != testedChar) {
371 currentPosition = temp;
374 unicodeAsBackSlash = false;
375 // if (withoutUnicodePtr != 0)
376 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
379 } catch (IndexOutOfBoundsException e) {
380 unicodeAsBackSlash = false;
381 currentPosition = temp;
385 public final int getNextChar(char testedChar1, char testedChar2) {
386 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
387 //test can be done with (x==0) for the first and (x>0) for the second
388 //handle the case of unicode.
389 //when a unicode appears then we must use a buffer that holds char
391 //At the end of this method currentCharacter holds the new visited char
392 //and currentPosition points right next after it
393 //Both previous lines are true if the currentCharacter is == to the
395 //On false, no side effect has occured.
396 //ALL getNextChar.... ARE OPTIMIZED COPIES
397 int temp = currentPosition;
400 currentCharacter = source[currentPosition++];
401 // if (((currentCharacter = source[currentPosition++]) == '\\')
402 // && (source[currentPosition] == 'u')) {
403 // //-------------unicode traitement ------------
404 // int c1, c2, c3, c4;
405 // int unicodeSize = 6;
406 // currentPosition++;
407 // while (source[currentPosition] == 'u') {
408 // currentPosition++;
412 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
414 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
416 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
418 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
420 // currentPosition = temp;
424 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
425 // if (currentCharacter == testedChar1)
427 // else if (currentCharacter == testedChar2)
430 // currentPosition = temp;
434 // //need the unicode buffer
435 // if (withoutUnicodePtr == 0) {
436 // //buffer all the entries that have been left aside....
437 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
441 // withoutUnicodeBuffer,
443 // withoutUnicodePtr);
445 // //fill the buffer with the char
446 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
448 // } //-------------end unicode traitement--------------
450 if (currentCharacter == testedChar1)
452 else if (currentCharacter == testedChar2)
455 currentPosition = temp;
458 // if (withoutUnicodePtr != 0)
459 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
462 } catch (IndexOutOfBoundsException e) {
463 currentPosition = temp;
467 public final boolean getNextCharAsDigit() {
469 //handle the case of unicode.
470 //when a unicode appears then we must use a buffer that holds char
472 //At the end of this method currentCharacter holds the new visited char
473 //and currentPosition points right next after it
474 //Both previous lines are true if the currentCharacter is a digit
475 //On false, no side effect has occured.
476 //ALL getNextChar.... ARE OPTIMIZED COPIES
477 int temp = currentPosition;
479 currentCharacter = source[currentPosition++];
480 // if (((currentCharacter = source[currentPosition++]) == '\\')
481 // && (source[currentPosition] == 'u')) {
482 // //-------------unicode traitement ------------
483 // int c1, c2, c3, c4;
484 // int unicodeSize = 6;
485 // currentPosition++;
486 // while (source[currentPosition] == 'u') {
487 // currentPosition++;
491 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
493 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
495 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
497 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
499 // currentPosition = temp;
503 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
504 // if (!Character.isDigit(currentCharacter)) {
505 // currentPosition = temp;
509 // //need the unicode buffer
510 // if (withoutUnicodePtr == 0) {
511 // //buffer all the entries that have been left aside....
512 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
516 // withoutUnicodeBuffer,
518 // withoutUnicodePtr);
520 // //fill the buffer with the char
521 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
523 // } //-------------end unicode traitement--------------
525 if (!Character.isDigit(currentCharacter)) {
526 currentPosition = temp;
529 // if (withoutUnicodePtr != 0)
530 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
533 } catch (IndexOutOfBoundsException e) {
534 currentPosition = temp;
538 public final boolean getNextCharAsDigit(int radix) {
540 //handle the case of unicode.
541 //when a unicode appears then we must use a buffer that holds char
543 //At the end of this method currentCharacter holds the new visited char
544 //and currentPosition points right next after it
545 //Both previous lines are true if the currentCharacter is a digit base on
547 //On false, no side effect has occured.
548 //ALL getNextChar.... ARE OPTIMIZED COPIES
549 int temp = currentPosition;
551 currentCharacter = source[currentPosition++];
552 // if (((currentCharacter = source[currentPosition++]) == '\\')
553 // && (source[currentPosition] == 'u')) {
554 // //-------------unicode traitement ------------
555 // int c1, c2, c3, c4;
556 // int unicodeSize = 6;
557 // currentPosition++;
558 // while (source[currentPosition] == 'u') {
559 // currentPosition++;
563 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
565 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
567 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
569 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
571 // currentPosition = temp;
575 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
576 // if (Character.digit(currentCharacter, radix) == -1) {
577 // currentPosition = temp;
581 // //need the unicode buffer
582 // if (withoutUnicodePtr == 0) {
583 // //buffer all the entries that have been left aside....
584 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
588 // withoutUnicodeBuffer,
590 // withoutUnicodePtr);
592 // //fill the buffer with the char
593 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
595 // } //-------------end unicode traitement--------------
597 if (Character.digit(currentCharacter, radix) == -1) {
598 currentPosition = temp;
601 // if (withoutUnicodePtr != 0)
602 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
605 } catch (IndexOutOfBoundsException e) {
606 currentPosition = temp;
610 public boolean getNextCharAsJavaIdentifierPart() {
612 //handle the case of unicode.
613 //when a unicode appears then we must use a buffer that holds char
615 //At the end of this method currentCharacter holds the new visited char
616 //and currentPosition points right next after it
617 //Both previous lines are true if the currentCharacter is a
618 // JavaIdentifierPart
619 //On false, no side effect has occured.
620 //ALL getNextChar.... ARE OPTIMIZED COPIES
621 int temp = currentPosition;
623 currentCharacter = source[currentPosition++];
624 // if (((currentCharacter = source[currentPosition++]) == '\\')
625 // && (source[currentPosition] == 'u')) {
626 // //-------------unicode traitement ------------
627 // int c1, c2, c3, c4;
628 // int unicodeSize = 6;
629 // currentPosition++;
630 // while (source[currentPosition] == 'u') {
631 // currentPosition++;
635 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
637 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
639 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
641 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
643 // currentPosition = temp;
647 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
648 // if (!isPHPIdentifierPart(currentCharacter)) {
649 // currentPosition = temp;
653 // //need the unicode buffer
654 // if (withoutUnicodePtr == 0) {
655 // //buffer all the entries that have been left aside....
656 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
660 // withoutUnicodeBuffer,
662 // withoutUnicodePtr);
664 // //fill the buffer with the char
665 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
667 // } //-------------end unicode traitement--------------
669 if (!isPHPIdentifierPart(currentCharacter)) {
670 currentPosition = temp;
673 // if (withoutUnicodePtr != 0)
674 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
677 } catch (IndexOutOfBoundsException e) {
678 currentPosition = temp;
682 public int getCastOrParen() {
683 int tempPosition = currentPosition;
684 char tempCharacter = currentCharacter;
685 int tempToken = TokenNameLPAREN;
686 boolean found = false;
687 StringBuffer buf = new StringBuffer();
690 currentCharacter = source[currentPosition++];
691 } while (currentCharacter == ' ' || currentCharacter == '\t');
692 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
693 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
694 buf.append(currentCharacter);
695 currentCharacter = source[currentPosition++];
697 if (buf.length() >= 3 && buf.length() <= 7) {
698 char[] data = buf.toString().toCharArray();
700 switch (data.length) {
703 if ((data[index] == 'i') && (data[++index] == 'n')
704 && (data[++index] == 't')) {
706 tempToken = TokenNameintCAST;
711 if ((data[index] == 'b') && (data[++index] == 'o')
712 && (data[++index] == 'o') && (data[++index] == 'l')) {
714 tempToken = TokenNameboolCAST;
717 if ((data[index] == 'r') && (data[++index] == 'e')
718 && (data[++index] == 'a') && (data[++index] == 'l')) {
720 tempToken = TokenNamedoubleCAST;
726 if ((data[index] == 'a') && (data[++index] == 'r')
727 && (data[++index] == 'r') && (data[++index] == 'a')
728 && (data[++index] == 'y')) {
730 tempToken = TokenNamearrayCAST;
733 if ((data[index] == 'u') && (data[++index] == 'n')
734 && (data[++index] == 's') && (data[++index] == 'e')
735 && (data[++index] == 't')) {
737 tempToken = TokenNameunsetCAST;
740 if ((data[index] == 'f') && (data[++index] == 'l')
741 && (data[++index] == 'o') && (data[++index] == 'a')
742 && (data[++index] == 't')) {
744 tempToken = TokenNamedoubleCAST;
750 // object string double
751 if ((data[index] == 'o') && (data[++index] == 'b')
752 && (data[++index] == 'j') && (data[++index] == 'e')
753 && (data[++index] == 'c') && (data[++index] == 't')) {
755 tempToken = TokenNameobjectCAST;
758 if ((data[index] == 's') && (data[++index] == 't')
759 && (data[++index] == 'r') && (data[++index] == 'i')
760 && (data[++index] == 'n') && (data[++index] == 'g')) {
762 tempToken = TokenNamestringCAST;
765 if ((data[index] == 'd') && (data[++index] == 'o')
766 && (data[++index] == 'u') && (data[++index] == 'b')
767 && (data[++index] == 'l') && (data[++index] == 'e')) {
769 tempToken = TokenNamedoubleCAST;
776 if ((data[index] == 'b') && (data[++index] == 'o')
777 && (data[++index] == 'o') && (data[++index] == 'l')
778 && (data[++index] == 'e') && (data[++index] == 'a')
779 && (data[++index] == 'n')) {
781 tempToken = TokenNameboolCAST;
784 if ((data[index] == 'i') && (data[++index] == 'n')
785 && (data[++index] == 't') && (data[++index] == 'e')
786 && (data[++index] == 'g') && (data[++index] == 'e')
787 && (data[++index] == 'r')) {
789 tempToken = TokenNameintCAST;
795 while (currentCharacter == ' ' || currentCharacter == '\t') {
796 currentCharacter = source[currentPosition++];
798 if (currentCharacter == ')') {
803 } catch (IndexOutOfBoundsException e) {
805 currentCharacter = tempCharacter;
806 currentPosition = tempPosition;
807 return TokenNameLPAREN;
809 public void consumeStringInterpolated() throws InvalidInputException {
811 // consume next character
812 unicodeAsBackSlash = false;
813 currentCharacter = source[currentPosition++];
814 // if (((currentCharacter = source[currentPosition++]) == '\\')
815 // && (source[currentPosition] == 'u')) {
816 // getNextUnicodeChar();
818 // if (withoutUnicodePtr != 0) {
819 // withoutUnicodeBuffer[++withoutUnicodePtr] =
823 while (currentCharacter != '`') {
824 /** ** in PHP \r and \n are valid in string literals *** */
825 // if ((currentCharacter == '\n')
826 // || (currentCharacter == '\r')) {
827 // // relocate if finding another quote fairly close: thus unicode
828 // '/u000D' will be fully consumed
829 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
830 // if (currentPosition + lookAhead == source.length)
832 // if (source[currentPosition + lookAhead] == '\n')
834 // if (source[currentPosition + lookAhead] == '\"') {
835 // currentPosition += lookAhead + 1;
839 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
841 if (currentCharacter == '\\') {
842 int escapeSize = currentPosition;
843 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
844 //scanEscapeCharacter make a side effect on this value and we need
845 // the previous value few lines down this one
846 scanDoubleQuotedEscapeCharacter();
847 escapeSize = currentPosition - escapeSize;
848 if (withoutUnicodePtr == 0) {
849 //buffer all the entries that have been left aside....
850 withoutUnicodePtr = currentPosition - escapeSize - 1
852 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
854 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
855 } else { //overwrite the / in the buffer
856 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
857 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
858 // where only one is correct
863 // consume next character
864 unicodeAsBackSlash = false;
865 currentCharacter = source[currentPosition++];
866 // if (((currentCharacter = source[currentPosition++]) == '\\')
867 // && (source[currentPosition] == 'u')) {
868 // getNextUnicodeChar();
870 if (withoutUnicodePtr != 0) {
871 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
875 } catch (IndexOutOfBoundsException e) {
876 throw new InvalidInputException(UNTERMINATED_STRING);
877 } catch (InvalidInputException e) {
878 if (e.getMessage().equals(INVALID_ESCAPE)) {
879 // relocate if finding another quote fairly close: thus unicode
880 // '/u000D' will be fully consumed
881 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
882 if (currentPosition + lookAhead == source.length)
884 if (source[currentPosition + lookAhead] == '\n')
886 if (source[currentPosition + lookAhead] == '`') {
887 currentPosition += lookAhead + 1;
894 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
895 // //$NON-NLS-?$ where ? is an
897 if (currentLine == null) {
898 currentLine = new NLSLine();
899 lines.add(currentLine);
901 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
902 startPosition, currentPosition - 1));
905 public void consumeStringConstant() throws InvalidInputException {
907 // consume next character
908 unicodeAsBackSlash = false;
909 currentCharacter = source[currentPosition++];
910 // if (((currentCharacter = source[currentPosition++]) == '\\')
911 // && (source[currentPosition] == 'u')) {
912 // getNextUnicodeChar();
914 // if (withoutUnicodePtr != 0) {
915 // withoutUnicodeBuffer[++withoutUnicodePtr] =
919 while (currentCharacter != '\'') {
920 /** ** in PHP \r and \n are valid in string literals *** */
921 // if ((currentCharacter == '\n')
922 // || (currentCharacter == '\r')) {
923 // // relocate if finding another quote fairly close: thus unicode
924 // '/u000D' will be fully consumed
925 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
926 // if (currentPosition + lookAhead == source.length)
928 // if (source[currentPosition + lookAhead] == '\n')
930 // if (source[currentPosition + lookAhead] == '\"') {
931 // currentPosition += lookAhead + 1;
935 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
937 if (currentCharacter == '\\') {
938 int escapeSize = currentPosition;
939 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
940 //scanEscapeCharacter make a side effect on this value and we need
941 // the previous value few lines down this one
942 scanSingleQuotedEscapeCharacter();
943 escapeSize = currentPosition - escapeSize;
944 if (withoutUnicodePtr == 0) {
945 //buffer all the entries that have been left aside....
946 withoutUnicodePtr = currentPosition - escapeSize - 1
948 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
950 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
951 } else { //overwrite the / in the buffer
952 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
953 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
954 // where only one is correct
959 // consume next character
960 unicodeAsBackSlash = false;
961 currentCharacter = source[currentPosition++];
962 // if (((currentCharacter = source[currentPosition++]) == '\\')
963 // && (source[currentPosition] == 'u')) {
964 // getNextUnicodeChar();
966 if (withoutUnicodePtr != 0) {
967 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
971 } catch (IndexOutOfBoundsException e) {
972 throw new InvalidInputException(UNTERMINATED_STRING);
973 } catch (InvalidInputException e) {
974 if (e.getMessage().equals(INVALID_ESCAPE)) {
975 // relocate if finding another quote fairly close: thus unicode
976 // '/u000D' will be fully consumed
977 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
978 if (currentPosition + lookAhead == source.length)
980 if (source[currentPosition + lookAhead] == '\n')
982 if (source[currentPosition + lookAhead] == '\'') {
983 currentPosition += lookAhead + 1;
990 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
991 // //$NON-NLS-?$ where ? is an
993 if (currentLine == null) {
994 currentLine = new NLSLine();
995 lines.add(currentLine);
997 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
998 startPosition, currentPosition - 1));
1001 public void consumeStringLiteral() throws InvalidInputException {
1003 // consume next character
1004 unicodeAsBackSlash = false;
1005 currentCharacter = source[currentPosition++];
1006 // if (((currentCharacter = source[currentPosition++]) == '\\')
1007 // && (source[currentPosition] == 'u')) {
1008 // getNextUnicodeChar();
1010 // if (withoutUnicodePtr != 0) {
1011 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1012 // currentCharacter;
1015 while (currentCharacter != '"') {
1016 /** ** in PHP \r and \n are valid in string literals *** */
1017 // if ((currentCharacter == '\n')
1018 // || (currentCharacter == '\r')) {
1019 // // relocate if finding another quote fairly close: thus unicode
1020 // '/u000D' will be fully consumed
1021 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1022 // if (currentPosition + lookAhead == source.length)
1024 // if (source[currentPosition + lookAhead] == '\n')
1026 // if (source[currentPosition + lookAhead] == '\"') {
1027 // currentPosition += lookAhead + 1;
1031 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1033 if (currentCharacter == '\\') {
1034 int escapeSize = currentPosition;
1035 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1036 //scanEscapeCharacter make a side effect on this value and we need
1037 // the previous value few lines down this one
1038 scanDoubleQuotedEscapeCharacter();
1039 escapeSize = currentPosition - escapeSize;
1040 if (withoutUnicodePtr == 0) {
1041 //buffer all the entries that have been left aside....
1042 withoutUnicodePtr = currentPosition - escapeSize - 1
1044 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1046 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1047 } else { //overwrite the / in the buffer
1048 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1049 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1050 // where only one is correct
1051 withoutUnicodePtr--;
1055 // consume next character
1056 unicodeAsBackSlash = false;
1057 currentCharacter = source[currentPosition++];
1058 // if (((currentCharacter = source[currentPosition++]) == '\\')
1059 // && (source[currentPosition] == 'u')) {
1060 // getNextUnicodeChar();
1062 if (withoutUnicodePtr != 0) {
1063 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1067 } catch (IndexOutOfBoundsException e) {
1068 throw new InvalidInputException(UNTERMINATED_STRING);
1069 } catch (InvalidInputException e) {
1070 if (e.getMessage().equals(INVALID_ESCAPE)) {
1071 // relocate if finding another quote fairly close: thus unicode
1072 // '/u000D' will be fully consumed
1073 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1074 if (currentPosition + lookAhead == source.length)
1076 if (source[currentPosition + lookAhead] == '\n')
1078 if (source[currentPosition + lookAhead] == '\"') {
1079 currentPosition += lookAhead + 1;
1086 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1087 // //$NON-NLS-?$ where ? is an
1089 if (currentLine == null) {
1090 currentLine = new NLSLine();
1091 lines.add(currentLine);
1093 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1094 startPosition, currentPosition - 1));
1097 public int getNextToken() throws InvalidInputException {
1099 return getInlinedHTML(currentPosition);
1102 this.wasAcr = false;
1104 jumpOverMethodBody();
1106 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1110 withoutUnicodePtr = 0;
1111 //start with a new token
1112 char encapsedChar = ' ';
1113 if (!encapsedStringStack.isEmpty()) {
1114 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1116 if (encapsedChar != '$' && encapsedChar != ' ') {
1117 currentCharacter = source[currentPosition++];
1118 if (currentCharacter == encapsedChar) {
1119 switch (currentCharacter) {
1121 return TokenNameEncapsedString0;
1123 return TokenNameEncapsedString1;
1125 return TokenNameEncapsedString2;
1128 while (currentCharacter != encapsedChar) {
1129 /** ** in PHP \r and \n are valid in string literals *** */
1130 switch (currentCharacter) {
1132 int escapeSize = currentPosition;
1133 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1134 //scanEscapeCharacter make a side effect on this value and
1135 // we need the previous value few lines down this one
1136 scanDoubleQuotedEscapeCharacter();
1137 escapeSize = currentPosition - escapeSize;
1138 if (withoutUnicodePtr == 0) {
1139 //buffer all the entries that have been left aside....
1140 withoutUnicodePtr = currentPosition - escapeSize - 1
1142 System.arraycopy(source, startPosition,
1143 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1144 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1145 } else { //overwrite the / in the buffer
1146 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1147 if (backSlashAsUnicodeInString) { //there are TWO \ in
1148 withoutUnicodePtr--;
1153 if (isPHPIdentifierStart(source[currentPosition])
1154 || source[currentPosition] == '{') {
1156 encapsedStringStack.push(new Character('$'));
1157 return TokenNameSTRING;
1161 if (source[currentPosition] == '$') { // CURLY_OPEN
1163 encapsedStringStack.push(new Character('$'));
1164 return TokenNameSTRING;
1167 // consume next character
1168 unicodeAsBackSlash = false;
1169 currentCharacter = source[currentPosition++];
1170 if (withoutUnicodePtr != 0) {
1171 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1176 return TokenNameSTRING;
1178 // ---------Consume white space and handles startPosition---------
1179 int whiteStart = currentPosition;
1180 startPosition = currentPosition;
1181 currentCharacter = source[currentPosition++];
1182 if (encapsedChar == '$') {
1183 switch (currentCharacter) {
1185 currentCharacter = source[currentPosition++];
1186 return TokenNameSTRING;
1188 if (encapsedChar == '$') {
1189 if (getNextChar('$'))
1190 return TokenNameCURLY_OPEN;
1192 return TokenNameLBRACE;
1194 return TokenNameRBRACE;
1196 return TokenNameLBRACKET;
1198 return TokenNameRBRACKET;
1200 if (tokenizeStrings) {
1201 consumeStringConstant();
1202 return TokenNameStringConstant;
1204 return TokenNameEncapsedString1;
1206 return TokenNameEncapsedString2;
1208 if (tokenizeStrings) {
1209 consumeStringInterpolated();
1210 return TokenNameStringInterpolated;
1212 return TokenNameEncapsedString0;
1214 if (getNextChar('>'))
1215 return TokenNameMINUS_GREATER;
1216 return TokenNameSTRING;
1218 if (currentCharacter == '$') {
1219 int oldPosition = currentPosition;
1221 currentCharacter = source[currentPosition++];
1222 if (currentCharacter == '{') {
1223 return TokenNameDOLLAR_LBRACE;
1225 if (isPHPIdentifierStart(currentCharacter)) {
1226 return scanIdentifierOrKeyword(true);
1228 currentPosition = oldPosition;
1229 return TokenNameSTRING;
1231 } catch (IndexOutOfBoundsException e) {
1232 currentPosition = oldPosition;
1233 return TokenNameSTRING;
1236 if (isPHPIdentifierStart(currentCharacter))
1237 return scanIdentifierOrKeyword(false);
1238 if (Character.isDigit(currentCharacter))
1239 return scanNumber(false);
1240 return TokenNameERROR;
1243 // boolean isWhiteSpace;
1245 while ((currentCharacter == ' ')
1246 || Character.isWhitespace(currentCharacter)) {
1247 startPosition = currentPosition;
1248 currentCharacter = source[currentPosition++];
1249 // if (((currentCharacter = source[currentPosition++]) == '\\')
1250 // && (source[currentPosition] == 'u')) {
1251 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1253 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1254 checkNonExternalizeString();
1255 if (recordLineSeparator) {
1256 pushLineSeparator();
1261 // isWhiteSpace = (currentCharacter == ' ')
1262 // || Character.isWhitespace(currentCharacter);
1265 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1266 // reposition scanner in case we are interested by spaces as tokens
1268 startPosition = whiteStart;
1269 return TokenNameWHITESPACE;
1271 //little trick to get out in the middle of a source compuation
1272 if (currentPosition > eofPosition)
1273 return TokenNameEOF;
1274 // ---------Identify the next token-------------
1275 switch (currentCharacter) {
1277 return getCastOrParen();
1279 return TokenNameRPAREN;
1281 return TokenNameLBRACE;
1283 return TokenNameRBRACE;
1285 return TokenNameLBRACKET;
1287 return TokenNameRBRACKET;
1289 return TokenNameSEMICOLON;
1291 return TokenNameCOMMA;
1293 if (getNextChar('='))
1294 return TokenNameDOT_EQUAL;
1295 if (getNextCharAsDigit())
1296 return scanNumber(true);
1297 return TokenNameDOT;
1301 if ((test = getNextChar('+', '=')) == 0)
1302 return TokenNamePLUS_PLUS;
1304 return TokenNamePLUS_EQUAL;
1305 return TokenNamePLUS;
1310 if ((test = getNextChar('-', '=')) == 0)
1311 return TokenNameMINUS_MINUS;
1313 return TokenNameMINUS_EQUAL;
1314 if (getNextChar('>'))
1315 return TokenNameMINUS_GREATER;
1316 return TokenNameMINUS;
1319 if (getNextChar('='))
1320 return TokenNameTWIDDLE_EQUAL;
1321 return TokenNameTWIDDLE;
1323 if (getNextChar('=')) {
1324 if (getNextChar('=')) {
1325 return TokenNameNOT_EQUAL_EQUAL;
1327 return TokenNameNOT_EQUAL;
1329 return TokenNameNOT;
1331 if (getNextChar('='))
1332 return TokenNameMULTIPLY_EQUAL;
1333 return TokenNameMULTIPLY;
1335 if (getNextChar('='))
1336 return TokenNameREMAINDER_EQUAL;
1337 return TokenNameREMAINDER;
1340 int oldPosition = currentPosition;
1342 currentCharacter = source[currentPosition++];
1343 } catch (IndexOutOfBoundsException e) {
1344 currentPosition = oldPosition;
1345 return TokenNameLESS;
1347 switch (currentCharacter) {
1349 return TokenNameLESS_EQUAL;
1351 return TokenNameNOT_EQUAL;
1353 if (getNextChar('='))
1354 return TokenNameLEFT_SHIFT_EQUAL;
1355 if (getNextChar('<')) {
1356 currentCharacter = source[currentPosition++];
1357 while (Character.isWhitespace(currentCharacter)) {
1358 currentCharacter = source[currentPosition++];
1360 int heredocStart = currentPosition - 1;
1361 int heredocLength = 0;
1362 if (isPHPIdentifierStart(currentCharacter)) {
1363 currentCharacter = source[currentPosition++];
1365 return TokenNameERROR;
1367 while (isPHPIdentifierPart(currentCharacter)) {
1368 currentCharacter = source[currentPosition++];
1370 heredocLength = currentPosition - heredocStart - 1;
1371 // heredoc end-tag determination
1372 boolean endTag = true;
1375 ch = source[currentPosition++];
1376 if (ch == '\r' || ch == '\n') {
1377 if (recordLineSeparator) {
1378 pushLineSeparator();
1382 for (int i = 0; i < heredocLength; i++) {
1383 if (source[currentPosition + i] != source[heredocStart
1390 currentPosition += heredocLength - 1;
1391 currentCharacter = source[currentPosition++];
1392 break; // do...while loop
1398 return TokenNameHEREDOC;
1400 return TokenNameLEFT_SHIFT;
1402 currentPosition = oldPosition;
1403 return TokenNameLESS;
1408 if ((test = getNextChar('=', '>')) == 0)
1409 return TokenNameGREATER_EQUAL;
1411 if ((test = getNextChar('=', '>')) == 0)
1412 return TokenNameRIGHT_SHIFT_EQUAL;
1413 return TokenNameRIGHT_SHIFT;
1415 return TokenNameGREATER;
1418 if (getNextChar('=')) {
1419 if (getNextChar('=')) {
1420 return TokenNameEQUAL_EQUAL_EQUAL;
1422 return TokenNameEQUAL_EQUAL;
1424 if (getNextChar('>'))
1425 return TokenNameEQUAL_GREATER;
1426 return TokenNameEQUAL;
1430 if ((test = getNextChar('&', '=')) == 0)
1431 return TokenNameAND_AND;
1433 return TokenNameAND_EQUAL;
1434 return TokenNameAND;
1439 if ((test = getNextChar('|', '=')) == 0)
1440 return TokenNameOR_OR;
1442 return TokenNameOR_EQUAL;
1446 if (getNextChar('='))
1447 return TokenNameXOR_EQUAL;
1448 return TokenNameXOR;
1450 if (getNextChar('>')) {
1452 if (currentPosition == source.length) {
1454 return TokenNameINLINE_HTML;
1456 return getInlinedHTML(currentPosition - 2);
1458 return TokenNameQUESTION;
1460 if (getNextChar(':'))
1461 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1462 return TokenNameCOLON;
1466 consumeStringConstant();
1467 return TokenNameStringConstant;
1469 if (tokenizeStrings) {
1470 consumeStringLiteral();
1471 return TokenNameStringLiteral;
1473 return TokenNameEncapsedString2;
1475 if (tokenizeStrings) {
1476 consumeStringInterpolated();
1477 return TokenNameStringInterpolated;
1479 return TokenNameEncapsedString0;
1483 char startChar = currentCharacter;
1484 if (getNextChar('=')) {
1485 return TokenNameDIVIDE_EQUAL;
1488 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1490 int endPositionForLineComment = 0;
1491 try { //get the next char
1492 currentCharacter = source[currentPosition++];
1493 // if (((currentCharacter = source[currentPosition++])
1495 // && (source[currentPosition] == 'u')) {
1496 // //-------------unicode traitement ------------
1497 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1498 // currentPosition++;
1499 // while (source[currentPosition] == 'u') {
1500 // currentPosition++;
1503 // Character.getNumericValue(source[currentPosition++]))
1507 // Character.getNumericValue(source[currentPosition++]))
1511 // Character.getNumericValue(source[currentPosition++]))
1515 // Character.getNumericValue(source[currentPosition++]))
1519 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1521 // currentCharacter =
1522 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1525 //handle the \\u case manually into comment
1526 // if (currentCharacter == '\\') {
1527 // if (source[currentPosition] == '\\')
1528 // currentPosition++;
1529 // } //jump over the \\
1530 boolean isUnicode = false;
1531 while (currentCharacter != '\r' && currentCharacter != '\n') {
1532 if (currentCharacter == '?') {
1533 if (getNextChar('>')) {
1534 startPosition = currentPosition - 2;
1536 return TokenNameINLINE_HTML;
1541 currentCharacter = source[currentPosition++];
1542 // if (((currentCharacter = source[currentPosition++])
1544 // && (source[currentPosition] == 'u')) {
1545 // isUnicode = true;
1546 // //-------------unicode traitement ------------
1547 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1548 // currentPosition++;
1549 // while (source[currentPosition] == 'u') {
1550 // currentPosition++;
1553 // Character.getNumericValue(source[currentPosition++]))
1557 // Character.getNumericValue(
1558 // source[currentPosition++]))
1562 // Character.getNumericValue(
1563 // source[currentPosition++]))
1567 // Character.getNumericValue(
1568 // source[currentPosition++]))
1572 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1574 // currentCharacter =
1575 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1578 //handle the \\u case manually into comment
1579 // if (currentCharacter == '\\') {
1580 // if (source[currentPosition] == '\\')
1581 // currentPosition++;
1582 // } //jump over the \\
1585 endPositionForLineComment = currentPosition - 6;
1587 endPositionForLineComment = currentPosition - 1;
1589 recordComment(false);
1590 if ((currentCharacter == '\r')
1591 || (currentCharacter == '\n')) {
1592 checkNonExternalizeString();
1593 if (recordLineSeparator) {
1595 pushUnicodeLineSeparator();
1597 pushLineSeparator();
1603 if (tokenizeComments) {
1605 currentPosition = endPositionForLineComment;
1606 // reset one character behind
1608 return TokenNameCOMMENT_LINE;
1610 } catch (IndexOutOfBoundsException e) { //an eof will them
1612 if (tokenizeComments) {
1614 // reset one character behind
1615 return TokenNameCOMMENT_LINE;
1621 //traditional and annotation comment
1622 boolean isJavadoc = false, star = false;
1623 // consume next character
1624 unicodeAsBackSlash = false;
1625 currentCharacter = source[currentPosition++];
1626 // if (((currentCharacter = source[currentPosition++]) ==
1628 // && (source[currentPosition] == 'u')) {
1629 // getNextUnicodeChar();
1631 // if (withoutUnicodePtr != 0) {
1632 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1633 // currentCharacter;
1636 if (currentCharacter == '*') {
1640 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1641 checkNonExternalizeString();
1642 if (recordLineSeparator) {
1643 pushLineSeparator();
1648 try { //get the next char
1649 currentCharacter = source[currentPosition++];
1650 // if (((currentCharacter = source[currentPosition++])
1652 // && (source[currentPosition] == 'u')) {
1653 // //-------------unicode traitement ------------
1654 // getNextUnicodeChar();
1656 //handle the \\u case manually into comment
1657 // if (currentCharacter == '\\') {
1658 // if (source[currentPosition] == '\\')
1659 // currentPosition++;
1660 // //jump over the \\
1662 // empty comment is not a javadoc /**/
1663 if (currentCharacter == '/') {
1666 //loop until end of comment */
1667 while ((currentCharacter != '/') || (!star)) {
1668 if ((currentCharacter == '\r')
1669 || (currentCharacter == '\n')) {
1670 checkNonExternalizeString();
1671 if (recordLineSeparator) {
1672 pushLineSeparator();
1677 star = currentCharacter == '*';
1679 currentCharacter = source[currentPosition++];
1680 // if (((currentCharacter = source[currentPosition++])
1682 // && (source[currentPosition] == 'u')) {
1683 // //-------------unicode traitement ------------
1684 // getNextUnicodeChar();
1686 //handle the \\u case manually into comment
1687 // if (currentCharacter == '\\') {
1688 // if (source[currentPosition] == '\\')
1689 // currentPosition++;
1690 // } //jump over the \\
1692 recordComment(isJavadoc);
1693 if (tokenizeComments) {
1695 return TokenNameCOMMENT_PHPDOC;
1696 return TokenNameCOMMENT_BLOCK;
1698 } catch (IndexOutOfBoundsException e) {
1699 throw new InvalidInputException(UNTERMINATED_COMMENT);
1703 return TokenNameDIVIDE;
1707 return TokenNameEOF;
1708 //the atEnd may not be <currentPosition == source.length> if
1709 // source is only some part of a real (external) stream
1710 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1712 if (currentCharacter == '$') {
1713 int oldPosition = currentPosition;
1715 currentCharacter = source[currentPosition++];
1716 if (isPHPIdentifierStart(currentCharacter)) {
1717 return scanIdentifierOrKeyword(true);
1719 currentPosition = oldPosition;
1720 return TokenNameDOLLAR;
1722 } catch (IndexOutOfBoundsException e) {
1723 currentPosition = oldPosition;
1724 return TokenNameDOLLAR;
1727 if (isPHPIdentifierStart(currentCharacter))
1728 return scanIdentifierOrKeyword(false);
1729 if (Character.isDigit(currentCharacter))
1730 return scanNumber(false);
1731 return TokenNameERROR;
1734 } //-----------------end switch while try--------------------
1735 catch (IndexOutOfBoundsException e) {
1738 return TokenNameEOF;
1742 * InvalidInputException
1744 private int getInlinedHTML(int start) throws InvalidInputException {
1745 // int htmlPosition = start;
1746 if (currentPosition > source.length) {
1747 currentPosition = source.length;
1748 return TokenNameEOF;
1750 startPosition = start;
1753 currentCharacter = source[currentPosition++];
1754 if (currentCharacter == '<') {
1755 if (getNextChar('?')) {
1756 currentCharacter = source[currentPosition++];
1757 if ((currentCharacter == ' ')
1758 || Character.isWhitespace(currentCharacter)) {
1761 return TokenNameINLINE_HTML;
1763 boolean phpStart = (currentCharacter == 'P')
1764 || (currentCharacter == 'p');
1766 int test = getNextChar('H', 'h');
1768 test = getNextChar('P', 'p');
1772 return TokenNameINLINE_HTML;
1779 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1780 if (recordLineSeparator) {
1781 pushLineSeparator();
1786 } //-----------------while--------------------
1788 return TokenNameINLINE_HTML;
1789 } //-----------------try--------------------
1790 catch (IndexOutOfBoundsException e) {
1791 startPosition = start;
1795 return TokenNameINLINE_HTML;
1797 // public final void getNextUnicodeChar()
1798 // throws IndexOutOfBoundsException, InvalidInputException {
1800 // //handle the case of unicode.
1801 // //when a unicode appears then we must use a buffer that holds char
1803 // //At the end of this method currentCharacter holds the new visited char
1804 // //and currentPosition points right next after it
1806 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1808 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1809 // currentPosition++;
1810 // while (source[currentPosition] == 'u') {
1811 // currentPosition++;
1815 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1817 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1819 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1821 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1823 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1825 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1826 // //need the unicode buffer
1827 // if (withoutUnicodePtr == 0) {
1828 // //buffer all the entries that have been left aside....
1829 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1830 // System.arraycopy(
1833 // withoutUnicodeBuffer,
1835 // withoutUnicodePtr);
1837 // //fill the buffer with the char
1838 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1840 // unicodeAsBackSlash = currentCharacter == '\\';
1843 * Tokenize a method body, assuming that curly brackets are properly
1846 public final void jumpOverMethodBody() {
1847 this.wasAcr = false;
1850 while (true) { //loop for jumping over comments
1851 // ---------Consume white space and handles startPosition---------
1852 boolean isWhiteSpace;
1854 startPosition = currentPosition;
1855 currentCharacter = source[currentPosition++];
1856 // if (((currentCharacter = source[currentPosition++]) == '\\')
1857 // && (source[currentPosition] == 'u')) {
1858 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1860 if (recordLineSeparator
1861 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1862 pushLineSeparator();
1863 isWhiteSpace = Character.isWhitespace(currentCharacter);
1865 } while (isWhiteSpace);
1866 // -------consume token until } is found---------
1867 switch (currentCharacter) {
1879 test = getNextChar('\\');
1882 scanDoubleQuotedEscapeCharacter();
1883 } catch (InvalidInputException ex) {
1886 // try { // consume next character
1887 unicodeAsBackSlash = false;
1888 currentCharacter = source[currentPosition++];
1889 // if (((currentCharacter = source[currentPosition++]) == '\\')
1890 // && (source[currentPosition] == 'u')) {
1891 // getNextUnicodeChar();
1893 if (withoutUnicodePtr != 0) {
1894 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1897 // } catch (InvalidInputException ex) {
1905 // try { // consume next character
1906 unicodeAsBackSlash = false;
1907 currentCharacter = source[currentPosition++];
1908 // if (((currentCharacter = source[currentPosition++]) == '\\')
1909 // && (source[currentPosition] == 'u')) {
1910 // getNextUnicodeChar();
1912 if (withoutUnicodePtr != 0) {
1913 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1916 // } catch (InvalidInputException ex) {
1918 while (currentCharacter != '"') {
1919 if (currentCharacter == '\r') {
1920 if (source[currentPosition] == '\n')
1923 // the string cannot go further that the line
1925 if (currentCharacter == '\n') {
1927 // the string cannot go further that the line
1929 if (currentCharacter == '\\') {
1931 scanDoubleQuotedEscapeCharacter();
1932 } catch (InvalidInputException ex) {
1935 // try { // consume next character
1936 unicodeAsBackSlash = false;
1937 currentCharacter = source[currentPosition++];
1938 // if (((currentCharacter = source[currentPosition++]) == '\\')
1939 // && (source[currentPosition] == 'u')) {
1940 // getNextUnicodeChar();
1942 if (withoutUnicodePtr != 0) {
1943 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1946 // } catch (InvalidInputException ex) {
1949 } catch (IndexOutOfBoundsException e) {
1956 if ((test = getNextChar('/', '*')) == 0) {
1960 currentCharacter = source[currentPosition++];
1961 // if (((currentCharacter = source[currentPosition++]) ==
1963 // && (source[currentPosition] == 'u')) {
1964 // //-------------unicode traitement ------------
1965 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1966 // currentPosition++;
1967 // while (source[currentPosition] == 'u') {
1968 // currentPosition++;
1971 // Character.getNumericValue(source[currentPosition++]))
1975 // Character.getNumericValue(source[currentPosition++]))
1979 // Character.getNumericValue(source[currentPosition++]))
1983 // Character.getNumericValue(source[currentPosition++]))
1986 // //error don't care of the value
1987 // currentCharacter = 'A';
1988 // } //something different from \n and \r
1990 // currentCharacter =
1991 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1994 while (currentCharacter != '\r' && currentCharacter != '\n') {
1996 currentCharacter = source[currentPosition++];
1997 // if (((currentCharacter = source[currentPosition++])
1999 // && (source[currentPosition] == 'u')) {
2000 // //-------------unicode traitement ------------
2001 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2002 // currentPosition++;
2003 // while (source[currentPosition] == 'u') {
2004 // currentPosition++;
2007 // Character.getNumericValue(source[currentPosition++]))
2011 // Character.getNumericValue(source[currentPosition++]))
2015 // Character.getNumericValue(source[currentPosition++]))
2019 // Character.getNumericValue(source[currentPosition++]))
2022 // //error don't care of the value
2023 // currentCharacter = 'A';
2024 // } //something different from \n and \r
2026 // currentCharacter =
2027 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2031 if (recordLineSeparator
2032 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2033 pushLineSeparator();
2034 } catch (IndexOutOfBoundsException e) {
2035 } //an eof will them be generated
2039 //traditional and annotation comment
2040 boolean star = false;
2041 // try { // consume next character
2042 unicodeAsBackSlash = false;
2043 currentCharacter = source[currentPosition++];
2044 // if (((currentCharacter = source[currentPosition++]) == '\\')
2045 // && (source[currentPosition] == 'u')) {
2046 // getNextUnicodeChar();
2048 if (withoutUnicodePtr != 0) {
2049 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2052 // } catch (InvalidInputException ex) {
2054 if (currentCharacter == '*') {
2057 if (recordLineSeparator
2058 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2059 pushLineSeparator();
2060 try { //get the next char
2061 currentCharacter = source[currentPosition++];
2062 // if (((currentCharacter = source[currentPosition++]) ==
2064 // && (source[currentPosition] == 'u')) {
2065 // //-------------unicode traitement ------------
2066 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2067 // currentPosition++;
2068 // while (source[currentPosition] == 'u') {
2069 // currentPosition++;
2072 // Character.getNumericValue(source[currentPosition++]))
2076 // Character.getNumericValue(source[currentPosition++]))
2080 // Character.getNumericValue(source[currentPosition++]))
2084 // Character.getNumericValue(source[currentPosition++]))
2087 // //error don't care of the value
2088 // currentCharacter = 'A';
2089 // } //something different from * and /
2091 // currentCharacter =
2092 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2095 //loop until end of comment */
2096 while ((currentCharacter != '/') || (!star)) {
2097 if (recordLineSeparator
2098 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2099 pushLineSeparator();
2100 star = currentCharacter == '*';
2102 currentCharacter = source[currentPosition++];
2103 // if (((currentCharacter = source[currentPosition++])
2105 // && (source[currentPosition] == 'u')) {
2106 // //-------------unicode traitement ------------
2107 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2108 // currentPosition++;
2109 // while (source[currentPosition] == 'u') {
2110 // currentPosition++;
2113 // Character.getNumericValue(source[currentPosition++]))
2117 // Character.getNumericValue(source[currentPosition++]))
2121 // Character.getNumericValue(source[currentPosition++]))
2125 // Character.getNumericValue(source[currentPosition++]))
2128 // //error don't care of the value
2129 // currentCharacter = 'A';
2130 // } //something different from * and /
2132 // currentCharacter =
2133 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2137 } catch (IndexOutOfBoundsException e) {
2145 if (isPHPIdentifierStart(currentCharacter)
2146 || currentCharacter == '$') {
2148 scanIdentifierOrKeyword((currentCharacter == '$'));
2149 } catch (InvalidInputException ex) {
2153 if (Character.isDigit(currentCharacter)) {
2156 } catch (InvalidInputException ex) {
2162 //-----------------end switch while try--------------------
2163 } catch (IndexOutOfBoundsException e) {
2164 } catch (InvalidInputException e) {
2168 // public final boolean jumpOverUnicodeWhiteSpace()
2169 // throws InvalidInputException {
2171 // //handle the case of unicode. Jump over the next whiteSpace
2172 // //making startPosition pointing on the next available char
2173 // //On false, the currentCharacter is filled up with a potential
2177 // this.wasAcr = false;
2178 // int c1, c2, c3, c4;
2179 // int unicodeSize = 6;
2180 // currentPosition++;
2181 // while (source[currentPosition] == 'u') {
2182 // currentPosition++;
2186 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2188 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2190 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2192 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2194 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2197 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2198 // if (recordLineSeparator
2199 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2200 // pushLineSeparator();
2201 // if (Character.isWhitespace(currentCharacter))
2204 // //buffer the new char which is not a white space
2205 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2206 // //withoutUnicodePtr == 1 is true here
2208 // } catch (IndexOutOfBoundsException e) {
2209 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2212 public final int[] getLineEnds() {
2213 //return a bounded copy of this.lineEnds
2215 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2218 public char[] getSource() {
2221 final char[] optimizedCurrentTokenSource1() {
2222 //return always the same char[] build only once
2223 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2224 char charOne = source[startPosition];
2279 return new char[]{charOne};
2282 final char[] optimizedCurrentTokenSource2() {
2283 //try to return the same char[] build only once
2285 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2287 char[][] table = charArray_length[0][hash];
2289 while (++i < InternalTableSize) {
2290 char[] charArray = table[i];
2291 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2294 //---------other side---------
2296 int max = newEntry2;
2297 while (++i <= max) {
2298 char[] charArray = table[i];
2299 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2302 //--------add the entry-------
2303 if (++max >= InternalTableSize)
2306 table[max] = (r = new char[]{c0, c1});
2310 final char[] optimizedCurrentTokenSource3() {
2311 //try to return the same char[] build only once
2313 int hash = (((c0 = source[startPosition]) << 12)
2314 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2316 char[][] table = charArray_length[1][hash];
2318 while (++i < InternalTableSize) {
2319 char[] charArray = table[i];
2320 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2323 //---------other side---------
2325 int max = newEntry3;
2326 while (++i <= max) {
2327 char[] charArray = table[i];
2328 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2331 //--------add the entry-------
2332 if (++max >= InternalTableSize)
2335 table[max] = (r = new char[]{c0, c1, c2});
2339 final char[] optimizedCurrentTokenSource4() {
2340 //try to return the same char[] build only once
2341 char c0, c1, c2, c3;
2342 long hash = ((((long) (c0 = source[startPosition])) << 18)
2343 + ((c1 = source[startPosition + 1]) << 12)
2344 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2346 char[][] table = charArray_length[2][(int) hash];
2348 while (++i < InternalTableSize) {
2349 char[] charArray = table[i];
2350 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2351 && (c3 == charArray[3]))
2354 //---------other side---------
2356 int max = newEntry4;
2357 while (++i <= max) {
2358 char[] charArray = table[i];
2359 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2360 && (c3 == charArray[3]))
2363 //--------add the entry-------
2364 if (++max >= InternalTableSize)
2367 table[max] = (r = new char[]{c0, c1, c2, c3});
2371 final char[] optimizedCurrentTokenSource5() {
2372 //try to return the same char[] build only once
2373 char c0, c1, c2, c3, c4;
2374 long hash = ((((long) (c0 = source[startPosition])) << 24)
2375 + (((long) (c1 = source[startPosition + 1])) << 18)
2376 + ((c2 = source[startPosition + 2]) << 12)
2377 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2379 char[][] table = charArray_length[3][(int) hash];
2381 while (++i < InternalTableSize) {
2382 char[] charArray = table[i];
2383 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2384 && (c3 == charArray[3]) && (c4 == charArray[4]))
2387 //---------other side---------
2389 int max = newEntry5;
2390 while (++i <= max) {
2391 char[] charArray = table[i];
2392 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2393 && (c3 == charArray[3]) && (c4 == charArray[4]))
2396 //--------add the entry-------
2397 if (++max >= InternalTableSize)
2400 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2404 final char[] optimizedCurrentTokenSource6() {
2405 //try to return the same char[] build only once
2406 char c0, c1, c2, c3, c4, c5;
2407 long hash = ((((long) (c0 = source[startPosition])) << 32)
2408 + (((long) (c1 = source[startPosition + 1])) << 24)
2409 + (((long) (c2 = source[startPosition + 2])) << 18)
2410 + ((c3 = source[startPosition + 3]) << 12)
2411 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2413 char[][] table = charArray_length[4][(int) hash];
2415 while (++i < InternalTableSize) {
2416 char[] charArray = table[i];
2417 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2418 && (c3 == charArray[3]) && (c4 == charArray[4])
2419 && (c5 == charArray[5]))
2422 //---------other side---------
2424 int max = newEntry6;
2425 while (++i <= max) {
2426 char[] charArray = table[i];
2427 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2428 && (c3 == charArray[3]) && (c4 == charArray[4])
2429 && (c5 == charArray[5]))
2432 //--------add the entry-------
2433 if (++max >= InternalTableSize)
2436 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2440 public final void pushLineSeparator() throws InvalidInputException {
2441 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2442 final int INCREMENT = 250;
2443 if (this.checkNonExternalizedStringLiterals) {
2444 // reinitialize the current line for non externalize strings purpose
2447 //currentCharacter is at position currentPosition-1
2449 if (currentCharacter == '\r') {
2450 int separatorPos = currentPosition - 1;
2451 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2453 //System.out.println("CR-" + separatorPos);
2455 lineEnds[++linePtr] = separatorPos;
2456 } catch (IndexOutOfBoundsException e) {
2457 //linePtr value is correct
2458 int oldLength = lineEnds.length;
2459 int[] old = lineEnds;
2460 lineEnds = new int[oldLength + INCREMENT];
2461 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2462 lineEnds[linePtr] = separatorPos;
2464 // look-ahead for merged cr+lf
2466 if (source[currentPosition] == '\n') {
2467 //System.out.println("look-ahead LF-" + currentPosition);
2468 lineEnds[linePtr] = currentPosition;
2474 } catch (IndexOutOfBoundsException e) {
2479 if (currentCharacter == '\n') {
2480 //must merge eventual cr followed by lf
2481 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2482 //System.out.println("merge LF-" + (currentPosition - 1));
2483 lineEnds[linePtr] = currentPosition - 1;
2485 int separatorPos = currentPosition - 1;
2486 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2488 // System.out.println("LF-" + separatorPos);
2490 lineEnds[++linePtr] = separatorPos;
2491 } catch (IndexOutOfBoundsException e) {
2492 //linePtr value is correct
2493 int oldLength = lineEnds.length;
2494 int[] old = lineEnds;
2495 lineEnds = new int[oldLength + INCREMENT];
2496 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2497 lineEnds[linePtr] = separatorPos;
2504 public final void pushUnicodeLineSeparator() {
2505 // isUnicode means that the \r or \n has been read as a unicode character
2506 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2507 final int INCREMENT = 250;
2508 //currentCharacter is at position currentPosition-1
2509 if (this.checkNonExternalizedStringLiterals) {
2510 // reinitialize the current line for non externalize strings purpose
2514 if (currentCharacter == '\r') {
2515 int separatorPos = currentPosition - 6;
2516 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2518 //System.out.println("CR-" + separatorPos);
2520 lineEnds[++linePtr] = separatorPos;
2521 } catch (IndexOutOfBoundsException e) {
2522 //linePtr value is correct
2523 int oldLength = lineEnds.length;
2524 int[] old = lineEnds;
2525 lineEnds = new int[oldLength + INCREMENT];
2526 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2527 lineEnds[linePtr] = separatorPos;
2529 // look-ahead for merged cr+lf
2530 if (source[currentPosition] == '\n') {
2531 //System.out.println("look-ahead LF-" + currentPosition);
2532 lineEnds[linePtr] = currentPosition;
2540 if (currentCharacter == '\n') {
2541 //must merge eventual cr followed by lf
2542 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2543 //System.out.println("merge LF-" + (currentPosition - 1));
2544 lineEnds[linePtr] = currentPosition - 6;
2546 int separatorPos = currentPosition - 6;
2547 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2549 // System.out.println("LF-" + separatorPos);
2551 lineEnds[++linePtr] = separatorPos;
2552 } catch (IndexOutOfBoundsException e) {
2553 //linePtr value is correct
2554 int oldLength = lineEnds.length;
2555 int[] old = lineEnds;
2556 lineEnds = new int[oldLength + INCREMENT];
2557 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2558 lineEnds[linePtr] = separatorPos;
2565 public final void recordComment(boolean isJavadoc) {
2566 // a new annotation comment is recorded
2568 commentStops[++commentPtr] = isJavadoc
2571 } catch (IndexOutOfBoundsException e) {
2572 int oldStackLength = commentStops.length;
2573 int[] oldStack = commentStops;
2574 commentStops = new int[oldStackLength + 30];
2575 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2576 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2577 //grows the positions buffers too
2578 int[] old = commentStarts;
2579 commentStarts = new int[oldStackLength + 30];
2580 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2582 //the buffer is of a correct size here
2583 commentStarts[commentPtr] = startPosition;
2585 public void resetTo(int begin, int end) {
2586 //reset the scanner to a given position where it may rescan again
2588 initialPosition = startPosition = currentPosition = begin;
2589 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2590 commentPtr = -1; // reset comment stack
2592 public final void scanSingleQuotedEscapeCharacter()
2593 throws InvalidInputException {
2594 // the string with "\\u" is a legal string of two chars \ and u
2595 //thus we use a direct access to the source (for regular cases).
2596 // if (unicodeAsBackSlash) {
2597 // // consume next character
2598 // unicodeAsBackSlash = false;
2599 // if (((currentCharacter = source[currentPosition++]) == '\\')
2600 // && (source[currentPosition] == 'u')) {
2601 // getNextUnicodeChar();
2603 // if (withoutUnicodePtr != 0) {
2604 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2608 currentCharacter = source[currentPosition++];
2609 switch (currentCharacter) {
2611 currentCharacter = '\'';
2614 currentCharacter = '\\';
2617 currentCharacter = '\\';
2621 public final void scanDoubleQuotedEscapeCharacter()
2622 throws InvalidInputException {
2623 // the string with "\\u" is a legal string of two chars \ and u
2624 //thus we use a direct access to the source (for regular cases).
2625 // if (unicodeAsBackSlash) {
2626 // // consume next character
2627 // unicodeAsBackSlash = false;
2628 // if (((currentCharacter = source[currentPosition++]) == '\\')
2629 // && (source[currentPosition] == 'u')) {
2630 // getNextUnicodeChar();
2632 // if (withoutUnicodePtr != 0) {
2633 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2637 currentCharacter = source[currentPosition++];
2638 switch (currentCharacter) {
2640 // currentCharacter = '\b';
2643 currentCharacter = '\t';
2646 currentCharacter = '\n';
2649 // currentCharacter = '\f';
2652 currentCharacter = '\r';
2655 currentCharacter = '\"';
2658 currentCharacter = '\'';
2661 currentCharacter = '\\';
2664 currentCharacter = '$';
2667 // -----------octal escape--------------
2669 // OctalDigit OctalDigit
2670 // ZeroToThree OctalDigit OctalDigit
2671 int number = Character.getNumericValue(currentCharacter);
2672 if (number >= 0 && number <= 7) {
2673 boolean zeroToThreeNot = number > 3;
2674 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2675 int digit = Character.getNumericValue(currentCharacter);
2676 if (digit >= 0 && digit <= 7) {
2677 number = (number * 8) + digit;
2679 .isDigit(currentCharacter = source[currentPosition++])) {
2680 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2681 // Digit --> ignore last character
2684 digit = Character.getNumericValue(currentCharacter);
2685 if (digit >= 0 && digit <= 7) {
2686 // has read \ZeroToThree OctalDigit OctalDigit
2687 number = (number * 8) + digit;
2688 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2689 // --> ignore last character
2693 } else { // has read \OctalDigit NonDigit--> ignore last
2697 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2701 } else { // has read \OctalDigit --> ignore last character
2705 throw new InvalidInputException(INVALID_ESCAPE);
2706 currentCharacter = (char) number;
2709 // throw new InvalidInputException(INVALID_ESCAPE);
2712 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2713 // return scanIdentifierOrKeyword( false );
2715 public int scanIdentifierOrKeyword(boolean isVariable)
2716 throws InvalidInputException {
2718 //first dispatch on the first char.
2719 //then the length. If there are several
2720 //keywors with the same length AND the same first char, then do another
2721 //disptach on the second char :-)...cool....but fast !
2722 useAssertAsAnIndentifier = false;
2723 while (getNextCharAsJavaIdentifierPart()) {
2726 // if (new String(getCurrentTokenSource()).equals("$this")) {
2727 // return TokenNamethis;
2729 return TokenNameVariable;
2734 // if (withoutUnicodePtr == 0)
2735 //quick test on length == 1 but not on length > 12 while most identifier
2736 //have a length which is <= 12...but there are lots of identifier with
2739 if ((length = currentPosition - startPosition) == 1)
2740 return TokenNameIdentifier;
2742 data = new char[length];
2743 index = startPosition;
2744 for (int i = 0; i < length; i++) {
2745 data[i] = Character.toLowerCase(source[index + i]);
2749 // if ((length = withoutUnicodePtr) == 1)
2750 // return TokenNameIdentifier;
2751 // // data = withoutUnicodeBuffer;
2752 // data = new char[withoutUnicodeBuffer.length];
2753 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2754 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2758 firstLetter = data[index];
2759 switch (firstLetter) {
2764 if ((data[++index] == '_') && (data[++index] == 'f')
2765 && (data[++index] == 'i') && (data[++index] == 'l')
2766 && (data[++index] == 'e') && (data[++index] == '_')
2767 && (data[++index] == '_'))
2768 return TokenNameFILE;
2769 index = 0; //__LINE__
2770 if ((data[++index] == '_') && (data[++index] == 'l')
2771 && (data[++index] == 'i') && (data[++index] == 'n')
2772 && (data[++index] == 'e') && (data[++index] == '_')
2773 && (data[++index] == '_'))
2774 return TokenNameLINE;
2778 if ((data[++index] == '_') && (data[++index] == 'c')
2779 && (data[++index] == 'l') && (data[++index] == 'a')
2780 && (data[++index] == 's') && (data[++index] == 's')
2781 && (data[++index] == '_') && (data[++index] == '_'))
2782 return TokenNameCLASS_C;
2786 if ((data[++index] == '_') && (data[++index] == 'm')
2787 && (data[++index] == 'e') && (data[++index] == 't')
2788 && (data[++index] == 'h') && (data[++index] == 'o')
2789 && (data[++index] == 'd') && (data[++index] == '_')
2790 && (data[++index] == '_'))
2791 return TokenNameMETHOD_C;
2795 if ((data[++index] == '_') && (data[++index] == 'f')
2796 && (data[++index] == 'u') && (data[++index] == 'n')
2797 && (data[++index] == 'c') && (data[++index] == 't')
2798 && (data[++index] == 'i') && (data[++index] == 'o')
2799 && (data[++index] == 'n') && (data[++index] == '_')
2800 && (data[++index] == '_'))
2801 return TokenNameFUNC_C;
2804 return TokenNameIdentifier;
2806 // as and array abstract
2810 if ((data[++index] == 's')) {
2813 return TokenNameIdentifier;
2817 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2818 return TokenNameand;
2820 return TokenNameIdentifier;
2824 if ((data[++index] == 'r') && (data[++index] == 'r')
2825 && (data[++index] == 'a') && (data[++index] == 'y'))
2826 return TokenNamearray;
2828 return TokenNameIdentifier;
2830 if ((data[++index] == 'b') && (data[++index] == 's')
2831 && (data[++index] == 't') && (data[++index] == 'r')
2832 && (data[++index] == 'a') && (data[++index] == 'c')
2833 && (data[++index] == 't'))
2834 return TokenNameabstract;
2836 return TokenNameIdentifier;
2838 return TokenNameIdentifier;
2844 if ((data[++index] == 'r') && (data[++index] == 'e')
2845 && (data[++index] == 'a') && (data[++index] == 'k'))
2846 return TokenNamebreak;
2848 return TokenNameIdentifier;
2850 return TokenNameIdentifier;
2853 //case catch class clone const continue
2856 if ((data[++index] == 'a') && (data[++index] == 's')
2857 && (data[++index] == 'e'))
2858 return TokenNamecase;
2860 return TokenNameIdentifier;
2862 if ((data[++index] == 'a') && (data[++index] == 't')
2863 && (data[++index] == 'c') && (data[++index] == 'h'))
2864 return TokenNamecatch;
2866 if ((data[++index] == 'l') && (data[++index] == 'a')
2867 && (data[++index] == 's') && (data[++index] == 's'))
2868 return TokenNameclass;
2870 if ((data[++index] == 'l') && (data[++index] == 'o')
2871 && (data[++index] == 'n') && (data[++index] == 'e'))
2872 return TokenNameclone;
2874 if ((data[++index] == 'o') && (data[++index] == 'n')
2875 && (data[++index] == 's') && (data[++index] == 't'))
2876 return TokenNameconst;
2878 return TokenNameIdentifier;
2880 if ((data[++index] == 'o') && (data[++index] == 'n')
2881 && (data[++index] == 't') && (data[++index] == 'i')
2882 && (data[++index] == 'n') && (data[++index] == 'u')
2883 && (data[++index] == 'e'))
2884 return TokenNamecontinue;
2886 return TokenNameIdentifier;
2888 return TokenNameIdentifier;
2891 // declare default do die
2892 // TODO delete define ==> no keyword !
2895 if ((data[++index] == 'o'))
2898 return TokenNameIdentifier;
2900 // if ((data[++index] == 'e')
2901 // && (data[++index] == 'f')
2902 // && (data[++index] == 'i')
2903 // && (data[++index] == 'n')
2904 // && (data[++index] == 'e'))
2905 // return TokenNamedefine;
2907 // return TokenNameIdentifier;
2909 if ((data[++index] == 'e') && (data[++index] == 'c')
2910 && (data[++index] == 'l') && (data[++index] == 'a')
2911 && (data[++index] == 'r') && (data[++index] == 'e'))
2912 return TokenNamedeclare;
2914 if ((data[++index] == 'e') && (data[++index] == 'f')
2915 && (data[++index] == 'a') && (data[++index] == 'u')
2916 && (data[++index] == 'l') && (data[++index] == 't'))
2917 return TokenNamedefault;
2919 return TokenNameIdentifier;
2921 return TokenNameIdentifier;
2924 //echo else exit elseif extends eval
2927 if ((data[++index] == 'c') && (data[++index] == 'h')
2928 && (data[++index] == 'o'))
2929 return TokenNameecho;
2930 else if ((data[index] == 'l') && (data[++index] == 's')
2931 && (data[++index] == 'e'))
2932 return TokenNameelse;
2933 else if ((data[index] == 'x') && (data[++index] == 'i')
2934 && (data[++index] == 't'))
2935 return TokenNameexit;
2936 else if ((data[index] == 'v') && (data[++index] == 'a')
2937 && (data[++index] == 'l'))
2938 return TokenNameeval;
2940 return TokenNameIdentifier;
2943 if ((data[++index] == 'n') && (data[++index] == 'd')
2944 && (data[++index] == 'i') && (data[++index] == 'f'))
2945 return TokenNameendif;
2946 if ((data[index] == 'm') && (data[++index] == 'p')
2947 && (data[++index] == 't') && (data[++index] == 'y'))
2948 return TokenNameempty;
2950 return TokenNameIdentifier;
2953 if ((data[++index] == 'n') && (data[++index] == 'd')
2954 && (data[++index] == 'f') && (data[++index] == 'o')
2955 && (data[++index] == 'r'))
2956 return TokenNameendfor;
2957 else if ((data[index] == 'l') && (data[++index] == 's')
2958 && (data[++index] == 'e') && (data[++index] == 'i')
2959 && (data[++index] == 'f'))
2960 return TokenNameelseif;
2962 return TokenNameIdentifier;
2964 if ((data[++index] == 'x') && (data[++index] == 't')
2965 && (data[++index] == 'e') && (data[++index] == 'n')
2966 && (data[++index] == 'd') && (data[++index] == 's'))
2967 return TokenNameextends;
2969 return TokenNameIdentifier;
2972 if ((data[++index] == 'n') && (data[++index] == 'd')
2973 && (data[++index] == 'w') && (data[++index] == 'h')
2974 && (data[++index] == 'i') && (data[++index] == 'l')
2975 && (data[++index] == 'e'))
2976 return TokenNameendwhile;
2978 return TokenNameIdentifier;
2981 if ((data[++index] == 'n') && (data[++index] == 'd')
2982 && (data[++index] == 's') && (data[++index] == 'w')
2983 && (data[++index] == 'i') && (data[++index] == 't')
2984 && (data[++index] == 'c') && (data[++index] == 'h'))
2985 return TokenNameendswitch;
2987 return TokenNameIdentifier;
2990 if ((data[++index] == 'n') && (data[++index] == 'd')
2991 && (data[++index] == 'd') && (data[++index] == 'e')
2992 && (data[++index] == 'c') && (data[++index] == 'l')
2993 && (data[++index] == 'a') && (data[++index] == 'r')
2994 && (data[++index] == 'e'))
2995 return TokenNameendforeach;
2997 if ((data[++index] == 'n') // endforeach
2998 && (data[++index] == 'd') && (data[++index] == 'f')
2999 && (data[++index] == 'o') && (data[++index] == 'r')
3000 && (data[++index] == 'e') && (data[++index] == 'a')
3001 && (data[++index] == 'c') && (data[++index] == 'h'))
3002 return TokenNameendforeach;
3004 return TokenNameIdentifier;
3006 return TokenNameIdentifier;
3009 //for false final function
3012 if ((data[++index] == 'o') && (data[++index] == 'r'))
3013 return TokenNamefor;
3015 return TokenNameIdentifier;
3017 // if ((data[++index] == 'a') && (data[++index] == 'l')
3018 // && (data[++index] == 's') && (data[++index] == 'e'))
3019 // return TokenNamefalse;
3020 if ((data[++index] == 'i') && (data[++index] == 'n')
3021 && (data[++index] == 'a') && (data[++index] == 'l'))
3022 return TokenNamefinal;
3024 return TokenNameIdentifier;
3027 if ((data[++index] == 'o') && (data[++index] == 'r')
3028 && (data[++index] == 'e') && (data[++index] == 'a')
3029 && (data[++index] == 'c') && (data[++index] == 'h'))
3030 return TokenNameforeach;
3032 return TokenNameIdentifier;
3035 if ((data[++index] == 'u') && (data[++index] == 'n')
3036 && (data[++index] == 'c') && (data[++index] == 't')
3037 && (data[++index] == 'i') && (data[++index] == 'o')
3038 && (data[++index] == 'n'))
3039 return TokenNamefunction;
3041 return TokenNameIdentifier;
3043 return TokenNameIdentifier;
3048 if ((data[++index] == 'l') && (data[++index] == 'o')
3049 && (data[++index] == 'b') && (data[++index] == 'a')
3050 && (data[++index] == 'l')) {
3051 return TokenNameglobal;
3054 return TokenNameIdentifier;
3056 //if int isset include include_once instanceof interface implements
3059 if (data[++index] == 'f')
3062 return TokenNameIdentifier;
3064 // if ((data[++index] == 'n') && (data[++index] == 't'))
3065 // return TokenNameint;
3067 // return TokenNameIdentifier;
3069 if ((data[++index] == 's') && (data[++index] == 's')
3070 && (data[++index] == 'e') && (data[++index] == 't'))
3071 return TokenNameisset;
3073 return TokenNameIdentifier;
3075 if ((data[++index] == 'n') && (data[++index] == 'c')
3076 && (data[++index] == 'l') && (data[++index] == 'u')
3077 && (data[++index] == 'd') && (data[++index] == 'e'))
3078 return TokenNameinclude;
3080 return TokenNameIdentifier;
3083 if ((data[++index] == 'n') && (data[++index] == 't')
3084 && (data[++index] == 'e') && (data[++index] == 'r')
3085 && (data[++index] == 'f') && (data[++index] == 'a')
3086 && (data[++index] == 'c') && (data[++index] == 'e'))
3087 return TokenNameinterface;
3089 return TokenNameIdentifier;
3092 if ((data[++index] == 'n') && (data[++index] == 's')
3093 && (data[++index] == 't') && (data[++index] == 'a')
3094 && (data[++index] == 'n') && (data[++index] == 'c')
3095 && (data[++index] == 'e') && (data[++index] == 'o')
3096 && (data[++index] == 'f'))
3097 return TokenNameinstanceof;
3098 if ((data[index] == 'm') && (data[++index] == 'p')
3099 && (data[++index] == 'l') && (data[++index] == 'e')
3100 && (data[++index] == 'm') && (data[++index] == 'e')
3101 && (data[++index] == 'n') && (data[++index] == 't')
3102 && (data[++index] == 's'))
3103 return TokenNameimplements;
3105 return TokenNameIdentifier;
3107 if ((data[++index] == 'n') && (data[++index] == 'c')
3108 && (data[++index] == 'l') && (data[++index] == 'u')
3109 && (data[++index] == 'd') && (data[++index] == 'e')
3110 && (data[++index] == '_') && (data[++index] == 'o')
3111 && (data[++index] == 'n') && (data[++index] == 'c')
3112 && (data[++index] == 'e'))
3113 return TokenNameinclude_once;
3115 return TokenNameIdentifier;
3117 return TokenNameIdentifier;
3122 if ((data[++index] == 'i') && (data[++index] == 's')
3123 && (data[++index] == 't')) {
3124 return TokenNamelist;
3127 return TokenNameIdentifier;
3132 if ((data[++index] == 'e') && (data[++index] == 'w'))
3133 return TokenNamenew;
3135 return TokenNameIdentifier;
3137 // if ((data[++index] == 'u') && (data[++index] == 'l')
3138 // && (data[++index] == 'l'))
3139 // return TokenNamenull;
3141 // return TokenNameIdentifier;
3143 return TokenNameIdentifier;
3148 if (data[++index] == 'r') {
3152 // if (length == 12) {
3153 // if ((data[++index] == 'l')
3154 // && (data[++index] == 'd')
3155 // && (data[++index] == '_')
3156 // && (data[++index] == 'f')
3157 // && (data[++index] == 'u')
3158 // && (data[++index] == 'n')
3159 // && (data[++index] == 'c')
3160 // && (data[++index] == 't')
3161 // && (data[++index] == 'i')
3162 // && (data[++index] == 'o')
3163 // && (data[++index] == 'n')) {
3164 // return TokenNameold_function;
3167 return TokenNameIdentifier;
3169 // print public private protected
3172 if ((data[++index] == 'r') && (data[++index] == 'i')
3173 && (data[++index] == 'n') && (data[++index] == 't')) {
3174 return TokenNameprint;
3176 return TokenNameIdentifier;
3178 if ((data[++index] == 'u') && (data[++index] == 'b')
3179 && (data[++index] == 'l') && (data[++index] == 'i')
3180 && (data[++index] == 'c')) {
3181 return TokenNamepublic;
3183 return TokenNameIdentifier;
3185 if ((data[++index] == 'r') && (data[++index] == 'i')
3186 && (data[++index] == 'v') && (data[++index] == 'a')
3187 && (data[++index] == 't') && (data[++index] == 'e')) {
3188 return TokenNameprivate;
3190 return TokenNameIdentifier;
3192 if ((data[++index] == 'r') && (data[++index] == 'o')
3193 && (data[++index] == 't') && (data[++index] == 'e')
3194 && (data[++index] == 'c') && (data[++index] == 't')
3195 && (data[++index] == 'e') && (data[++index] == 'd')) {
3196 return TokenNameprotected;
3198 return TokenNameIdentifier;
3200 return TokenNameIdentifier;
3202 //return require require_once
3204 if ((data[++index] == 'e') && (data[++index] == 't')
3205 && (data[++index] == 'u') && (data[++index] == 'r')
3206 && (data[++index] == 'n')) {
3207 return TokenNamereturn;
3209 } else if (length == 7) {
3210 if ((data[++index] == 'e') && (data[++index] == 'q')
3211 && (data[++index] == 'u') && (data[++index] == 'i')
3212 && (data[++index] == 'r') && (data[++index] == 'e')) {
3213 return TokenNamerequire;
3215 } else if (length == 12) {
3216 if ((data[++index] == 'e') && (data[++index] == 'q')
3217 && (data[++index] == 'u') && (data[++index] == 'i')
3218 && (data[++index] == 'r') && (data[++index] == 'e')
3219 && (data[++index] == '_') && (data[++index] == 'o')
3220 && (data[++index] == 'n') && (data[++index] == 'c')
3221 && (data[++index] == 'e')) {
3222 return TokenNamerequire_once;
3225 return TokenNameIdentifier;
3230 if (data[++index] == 't')
3231 if ((data[++index] == 'a') && (data[++index] == 't')
3232 && (data[++index] == 'i') && (data[++index] == 'c')) {
3233 return TokenNamestatic;
3235 return TokenNameIdentifier;
3236 else if ((data[index] == 'w') && (data[++index] == 'i')
3237 && (data[++index] == 't') && (data[++index] == 'c')
3238 && (data[++index] == 'h'))
3239 return TokenNameswitch;
3241 return TokenNameIdentifier;
3243 return TokenNameIdentifier;
3249 if ((data[++index] == 'r') && (data[++index] == 'y'))
3250 return TokenNametry;
3252 return TokenNameIdentifier;
3254 // if ((data[++index] == 'r') && (data[++index] == 'u')
3255 // && (data[++index] == 'e'))
3256 // return TokenNametrue;
3258 // return TokenNameIdentifier;
3260 if ((data[++index] == 'h') && (data[++index] == 'r')
3261 && (data[++index] == 'o') && (data[++index] == 'w'))
3262 return TokenNamethrow;
3264 return TokenNameIdentifier;
3266 return TokenNameIdentifier;
3272 if ((data[++index] == 's') && (data[++index] == 'e'))
3273 return TokenNameuse;
3275 return TokenNameIdentifier;
3277 if ((data[++index] == 'n') && (data[++index] == 's')
3278 && (data[++index] == 'e') && (data[++index] == 't'))
3279 return TokenNameunset;
3281 return TokenNameIdentifier;
3283 return TokenNameIdentifier;
3289 if ((data[++index] == 'a') && (data[++index] == 'r'))
3290 return TokenNamevar;
3292 return TokenNameIdentifier;
3294 return TokenNameIdentifier;
3300 if ((data[++index] == 'h') && (data[++index] == 'i')
3301 && (data[++index] == 'l') && (data[++index] == 'e'))
3302 return TokenNamewhile;
3304 return TokenNameIdentifier;
3305 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3306 // (data[++index]=='e') && (data[++index]=='f')&&
3307 // (data[++index]=='p'))
3308 //return TokenNamewidefp ;
3310 //return TokenNameIdentifier;
3312 return TokenNameIdentifier;
3318 if ((data[++index] == 'o') && (data[++index] == 'r'))
3319 return TokenNamexor;
3321 return TokenNameIdentifier;
3323 return TokenNameIdentifier;
3326 return TokenNameIdentifier;
3329 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3330 //when entering this method the currentCharacter is the firt
3331 //digit of the number , i.e. it may be preceeded by a . when
3333 boolean floating = dotPrefix;
3334 if ((!dotPrefix) && (currentCharacter == '0')) {
3335 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3336 //force the first char of the hexa number do exist...
3337 // consume next character
3338 unicodeAsBackSlash = false;
3339 currentCharacter = source[currentPosition++];
3340 // if (((currentCharacter = source[currentPosition++]) == '\\')
3341 // && (source[currentPosition] == 'u')) {
3342 // getNextUnicodeChar();
3344 // if (withoutUnicodePtr != 0) {
3345 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3348 if (Character.digit(currentCharacter, 16) == -1)
3349 throw new InvalidInputException(INVALID_HEXA);
3351 while (getNextCharAsDigit(16)) {
3353 // if (getNextChar('l', 'L') >= 0)
3354 // return TokenNameLongLiteral;
3356 return TokenNameIntegerLiteral;
3358 //there is x or X in the number
3359 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3360 // 00078.0 is true !!!!! crazy language
3361 if (getNextCharAsDigit()) {
3362 //-------------potential octal-----------------
3363 while (getNextCharAsDigit()) {
3365 // if (getNextChar('l', 'L') >= 0) {
3366 // return TokenNameLongLiteral;
3369 // if (getNextChar('f', 'F') >= 0) {
3370 // return TokenNameFloatingPointLiteral;
3372 if (getNextChar('d', 'D') >= 0) {
3373 return TokenNameDoubleLiteral;
3374 } else { //make the distinction between octal and float ....
3375 if (getNextChar('.')) { //bingo ! ....
3376 while (getNextCharAsDigit()) {
3378 if (getNextChar('e', 'E') >= 0) {
3379 // consume next character
3380 unicodeAsBackSlash = false;
3381 currentCharacter = source[currentPosition++];
3382 // if (((currentCharacter = source[currentPosition++]) == '\\')
3383 // && (source[currentPosition] == 'u')) {
3384 // getNextUnicodeChar();
3386 // if (withoutUnicodePtr != 0) {
3387 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3390 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3391 // consume next character
3392 unicodeAsBackSlash = false;
3393 currentCharacter = source[currentPosition++];
3394 // if (((currentCharacter = source[currentPosition++]) == '\\')
3395 // && (source[currentPosition] == 'u')) {
3396 // getNextUnicodeChar();
3398 // if (withoutUnicodePtr != 0) {
3399 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3400 // currentCharacter;
3404 if (!Character.isDigit(currentCharacter))
3405 throw new InvalidInputException(INVALID_FLOAT);
3406 while (getNextCharAsDigit()) {
3409 // if (getNextChar('f', 'F') >= 0)
3410 // return TokenNameFloatingPointLiteral;
3411 getNextChar('d', 'D'); //jump over potential d or D
3412 return TokenNameDoubleLiteral;
3414 return TokenNameIntegerLiteral;
3421 while (getNextCharAsDigit()) {
3423 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3424 // return TokenNameLongLiteral;
3425 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3426 while (getNextCharAsDigit()) {
3430 //if floating is true both exponant and suffix may be optional
3431 if (getNextChar('e', 'E') >= 0) {
3433 // consume next character
3434 unicodeAsBackSlash = false;
3435 currentCharacter = source[currentPosition++];
3436 // if (((currentCharacter = source[currentPosition++]) == '\\')
3437 // && (source[currentPosition] == 'u')) {
3438 // getNextUnicodeChar();
3440 // if (withoutUnicodePtr != 0) {
3441 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3444 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3447 unicodeAsBackSlash = false;
3448 currentCharacter = source[currentPosition++];
3449 // if (((currentCharacter = source[currentPosition++]) == '\\')
3450 // && (source[currentPosition] == 'u')) {
3451 // getNextUnicodeChar();
3453 // if (withoutUnicodePtr != 0) {
3454 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3458 if (!Character.isDigit(currentCharacter))
3459 throw new InvalidInputException(INVALID_FLOAT);
3460 while (getNextCharAsDigit()) {
3463 if (getNextChar('d', 'D') >= 0)
3464 return TokenNameDoubleLiteral;
3465 // if (getNextChar('f', 'F') >= 0)
3466 // return TokenNameFloatingPointLiteral;
3467 //the long flag has been tested before
3468 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3471 * Search the line number corresponding to a specific position
3474 public final int getLineNumber(int position) {
3475 if (lineEnds == null)
3477 int length = linePtr + 1;
3480 int g = 0, d = length - 1;
3484 if (position < lineEnds[m]) {
3486 } else if (position > lineEnds[m]) {
3492 if (position < lineEnds[m]) {
3497 public void setPHPMode(boolean mode) {
3500 public final void setSource(char[] source) {
3501 //the source-buffer is set to sourceString
3502 if (source == null) {
3503 this.source = new char[0];
3505 this.source = source;
3508 initialPosition = currentPosition = 0;
3509 containsAssertKeyword = false;
3510 withoutUnicodeBuffer = new char[this.source.length];
3511 encapsedStringStack = new Stack();
3513 public String toString() {
3514 if (startPosition == source.length)
3515 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3516 if (currentPosition > source.length)
3517 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3518 char front[] = new char[startPosition];
3519 System.arraycopy(source, 0, front, 0, startPosition);
3520 int middleLength = (currentPosition - 1) - startPosition + 1;
3522 if (middleLength > -1) {
3523 middle = new char[middleLength];
3524 System.arraycopy(source, startPosition, middle, 0, middleLength);
3526 middle = new char[0];
3528 char end[] = new char[source.length - (currentPosition - 1)];
3529 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3530 - (currentPosition - 1) - 1);
3531 return new String(front)
3532 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3533 + new String(middle)
3534 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3537 public final String toStringAction(int act) {
3539 case TokenNameERROR :
3540 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3542 case TokenNameINLINE_HTML :
3543 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3544 case TokenNameIdentifier :
3545 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3546 case TokenNameVariable :
3547 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3548 case TokenNameabstract :
3549 return "abstract"; //$NON-NLS-1$
3551 return "AND"; //$NON-NLS-1$
3552 case TokenNamearray :
3553 return "array"; //$NON-NLS-1$
3555 return "as"; //$NON-NLS-1$
3556 case TokenNamebreak :
3557 return "break"; //$NON-NLS-1$
3558 case TokenNamecase :
3559 return "case"; //$NON-NLS-1$
3560 case TokenNameclass :
3561 return "class"; //$NON-NLS-1$
3562 case TokenNamecatch :
3563 return "catch"; //$NON-NLS-1$
3564 case TokenNameclone :
3567 case TokenNameconst :
3570 case TokenNamecontinue :
3571 return "continue"; //$NON-NLS-1$
3572 case TokenNamedefault :
3573 return "default"; //$NON-NLS-1$
3574 // case TokenNamedefine :
3575 // return "define"; //$NON-NLS-1$
3577 return "do"; //$NON-NLS-1$
3578 case TokenNameecho :
3579 return "echo"; //$NON-NLS-1$
3580 case TokenNameelse :
3581 return "else"; //$NON-NLS-1$
3582 case TokenNameelseif :
3583 return "elseif"; //$NON-NLS-1$
3584 case TokenNameendfor :
3585 return "endfor"; //$NON-NLS-1$
3586 case TokenNameendforeach :
3587 return "endforeach"; //$NON-NLS-1$
3588 case TokenNameendif :
3589 return "endif"; //$NON-NLS-1$
3590 case TokenNameendswitch :
3591 return "endswitch"; //$NON-NLS-1$
3592 case TokenNameendwhile :
3593 return "endwhile"; //$NON-NLS-1$
3596 case TokenNameextends :
3597 return "extends"; //$NON-NLS-1$
3598 // case TokenNamefalse :
3599 // return "false"; //$NON-NLS-1$
3600 case TokenNamefinal :
3601 return "final"; //$NON-NLS-1$
3603 return "for"; //$NON-NLS-1$
3604 case TokenNameforeach :
3605 return "foreach"; //$NON-NLS-1$
3606 case TokenNamefunction :
3607 return "function"; //$NON-NLS-1$
3608 case TokenNameglobal :
3609 return "global"; //$NON-NLS-1$
3611 return "if"; //$NON-NLS-1$
3612 case TokenNameimplements :
3613 return "implements"; //$NON-NLS-1$
3614 case TokenNameinclude :
3615 return "include"; //$NON-NLS-1$
3616 case TokenNameinclude_once :
3617 return "include_once"; //$NON-NLS-1$
3618 case TokenNameinstanceof :
3619 return "instanceof"; //$NON-NLS-1$
3620 case TokenNameinterface :
3621 return "interface"; //$NON-NLS-1$
3622 case TokenNameisset :
3623 return "isset"; //$NON-NLS-1$
3624 case TokenNamelist :
3625 return "list"; //$NON-NLS-1$
3627 return "new"; //$NON-NLS-1$
3628 // case TokenNamenull :
3629 // return "null"; //$NON-NLS-1$
3631 return "OR"; //$NON-NLS-1$
3632 case TokenNameprint :
3633 return "print"; //$NON-NLS-1$
3634 case TokenNameprivate :
3635 return "private"; //$NON-NLS-1$
3636 case TokenNameprotected :
3637 return "protected"; //$NON-NLS-1$
3638 case TokenNamepublic :
3639 return "public"; //$NON-NLS-1$
3640 case TokenNamerequire :
3641 return "require"; //$NON-NLS-1$
3642 case TokenNamerequire_once :
3643 return "require_once"; //$NON-NLS-1$
3644 case TokenNamereturn :
3645 return "return"; //$NON-NLS-1$
3646 case TokenNamestatic :
3647 return "static"; //$NON-NLS-1$
3648 case TokenNameswitch :
3649 return "switch"; //$NON-NLS-1$
3650 // case TokenNametrue :
3651 // return "true"; //$NON-NLS-1$
3652 case TokenNameunset :
3653 return "unset"; //$NON-NLS-1$
3655 return "var"; //$NON-NLS-1$
3656 case TokenNamewhile :
3657 return "while"; //$NON-NLS-1$
3659 return "XOR"; //$NON-NLS-1$
3660 // case TokenNamethis :
3661 // return "$this"; //$NON-NLS-1$
3662 case TokenNameIntegerLiteral :
3663 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3664 case TokenNameDoubleLiteral :
3665 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3666 case TokenNameStringLiteral :
3667 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3668 case TokenNameStringConstant :
3669 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3670 case TokenNameStringInterpolated :
3671 return "StringInterpolated(" + new String(getCurrentTokenSource())
3672 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3673 case TokenNameEncapsedString0 :
3674 return "`"; //$NON-NLS-1$
3675 case TokenNameEncapsedString1 :
3676 return "\'"; //$NON-NLS-1$
3677 case TokenNameEncapsedString2 :
3678 return "\""; //$NON-NLS-1$
3679 case TokenNameSTRING :
3680 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3681 case TokenNameHEREDOC :
3682 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3683 case TokenNamePLUS_PLUS :
3684 return "++"; //$NON-NLS-1$
3685 case TokenNameMINUS_MINUS :
3686 return "--"; //$NON-NLS-1$
3687 case TokenNameEQUAL_EQUAL :
3688 return "=="; //$NON-NLS-1$
3689 case TokenNameEQUAL_EQUAL_EQUAL :
3690 return "==="; //$NON-NLS-1$
3691 case TokenNameEQUAL_GREATER :
3692 return "=>"; //$NON-NLS-1$
3693 case TokenNameLESS_EQUAL :
3694 return "<="; //$NON-NLS-1$
3695 case TokenNameGREATER_EQUAL :
3696 return ">="; //$NON-NLS-1$
3697 case TokenNameNOT_EQUAL :
3698 return "!="; //$NON-NLS-1$
3699 case TokenNameNOT_EQUAL_EQUAL :
3700 return "!=="; //$NON-NLS-1$
3701 case TokenNameLEFT_SHIFT :
3702 return "<<"; //$NON-NLS-1$
3703 case TokenNameRIGHT_SHIFT :
3704 return ">>"; //$NON-NLS-1$
3705 case TokenNamePLUS_EQUAL :
3706 return "+="; //$NON-NLS-1$
3707 case TokenNameMINUS_EQUAL :
3708 return "-="; //$NON-NLS-1$
3709 case TokenNameMULTIPLY_EQUAL :
3710 return "*="; //$NON-NLS-1$
3711 case TokenNameDIVIDE_EQUAL :
3712 return "/="; //$NON-NLS-1$
3713 case TokenNameAND_EQUAL :
3714 return "&="; //$NON-NLS-1$
3715 case TokenNameOR_EQUAL :
3716 return "|="; //$NON-NLS-1$
3717 case TokenNameXOR_EQUAL :
3718 return "^="; //$NON-NLS-1$
3719 case TokenNameREMAINDER_EQUAL :
3720 return "%="; //$NON-NLS-1$
3721 case TokenNameDOT_EQUAL :
3722 return ".="; //$NON-NLS-1$
3723 case TokenNameLEFT_SHIFT_EQUAL :
3724 return "<<="; //$NON-NLS-1$
3725 case TokenNameRIGHT_SHIFT_EQUAL :
3726 return ">>="; //$NON-NLS-1$
3727 case TokenNameOR_OR :
3728 return "||"; //$NON-NLS-1$
3729 case TokenNameAND_AND :
3730 return "&&"; //$NON-NLS-1$
3731 case TokenNamePLUS :
3732 return "+"; //$NON-NLS-1$
3733 case TokenNameMINUS :
3734 return "-"; //$NON-NLS-1$
3735 case TokenNameMINUS_GREATER :
3738 return "!"; //$NON-NLS-1$
3739 case TokenNameREMAINDER :
3740 return "%"; //$NON-NLS-1$
3742 return "^"; //$NON-NLS-1$
3744 return "&"; //$NON-NLS-1$
3745 case TokenNameMULTIPLY :
3746 return "*"; //$NON-NLS-1$
3748 return "|"; //$NON-NLS-1$
3749 case TokenNameTWIDDLE :
3750 return "~"; //$NON-NLS-1$
3751 case TokenNameTWIDDLE_EQUAL :
3752 return "~="; //$NON-NLS-1$
3753 case TokenNameDIVIDE :
3754 return "/"; //$NON-NLS-1$
3755 case TokenNameGREATER :
3756 return ">"; //$NON-NLS-1$
3757 case TokenNameLESS :
3758 return "<"; //$NON-NLS-1$
3759 case TokenNameLPAREN :
3760 return "("; //$NON-NLS-1$
3761 case TokenNameRPAREN :
3762 return ")"; //$NON-NLS-1$
3763 case TokenNameLBRACE :
3764 return "{"; //$NON-NLS-1$
3765 case TokenNameRBRACE :
3766 return "}"; //$NON-NLS-1$
3767 case TokenNameLBRACKET :
3768 return "["; //$NON-NLS-1$
3769 case TokenNameRBRACKET :
3770 return "]"; //$NON-NLS-1$
3771 case TokenNameSEMICOLON :
3772 return ";"; //$NON-NLS-1$
3773 case TokenNameQUESTION :
3774 return "?"; //$NON-NLS-1$
3775 case TokenNameCOLON :
3776 return ":"; //$NON-NLS-1$
3777 case TokenNameCOMMA :
3778 return ","; //$NON-NLS-1$
3780 return "."; //$NON-NLS-1$
3781 case TokenNameEQUAL :
3782 return "="; //$NON-NLS-1$
3785 case TokenNameDOLLAR :
3787 case TokenNameDOLLAR_LBRACE :
3790 return "EOF"; //$NON-NLS-1$
3791 case TokenNameWHITESPACE :
3792 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3793 case TokenNameCOMMENT_LINE :
3794 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3795 case TokenNameCOMMENT_BLOCK :
3796 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3797 case TokenNameCOMMENT_PHPDOC :
3798 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3799 // case TokenNameHTML :
3800 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3802 case TokenNameFILE :
3803 return "__FILE__"; //$NON-NLS-1$
3804 case TokenNameLINE :
3805 return "__LINE__"; //$NON-NLS-1$
3806 case TokenNameCLASS_C :
3807 return "__CLASS__"; //$NON-NLS-1$
3808 case TokenNameMETHOD_C :
3809 return "__METHOD__"; //$NON-NLS-1$
3810 case TokenNameFUNC_C :
3811 return "__FUNCTION__"; //$NON-NLS-1
3812 case TokenNameboolCAST :
3813 return "( bool )"; //$NON-NLS-1$
3814 case TokenNameintCAST :
3815 return "( int )"; //$NON-NLS-1$
3816 case TokenNamedoubleCAST :
3817 return "( double )"; //$NON-NLS-1$
3818 case TokenNameobjectCAST :
3819 return "( object )"; //$NON-NLS-1$
3820 case TokenNamestringCAST :
3821 return "( string )"; //$NON-NLS-1$
3823 return "not-a-token(" + (new Integer(act)) + ") "
3824 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3827 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3828 boolean checkNonExternalizedStringLiterals) {
3829 this(tokenizeComments, tokenizeWhiteSpace,
3830 checkNonExternalizedStringLiterals, false);
3832 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3833 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3834 this(tokenizeComments, tokenizeWhiteSpace,
3835 checkNonExternalizedStringLiterals, assertMode, false);
3837 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3838 boolean checkNonExternalizedStringLiterals, boolean assertMode,
3839 boolean tokenizeStrings) {
3840 this.eofPosition = Integer.MAX_VALUE;
3841 this.tokenizeComments = tokenizeComments;
3842 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3843 this.tokenizeStrings = tokenizeStrings;
3844 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3845 this.assertMode = assertMode;
3846 this.encapsedStringStack = null;
3848 private void checkNonExternalizeString() throws InvalidInputException {
3849 if (currentLine == null)
3851 parseTags(currentLine);
3853 private void parseTags(NLSLine line) throws InvalidInputException {
3854 String s = new String(getCurrentTokenSource());
3855 int pos = s.indexOf(TAG_PREFIX);
3856 int lineLength = line.size();
3858 int start = pos + TAG_PREFIX_LENGTH;
3859 int end = s.indexOf(TAG_POSTFIX, start);
3860 String index = s.substring(start, end);
3863 i = Integer.parseInt(index) - 1;
3864 // Tags are one based not zero based.
3865 } catch (NumberFormatException e) {
3866 i = -1; // we don't want to consider this as a valid NLS tag
3868 if (line.exists(i)) {
3871 pos = s.indexOf(TAG_PREFIX, start);
3873 this.nonNLSStrings = new StringLiteral[lineLength];
3874 int nonNLSCounter = 0;
3875 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3876 StringLiteral literal = (StringLiteral) iterator.next();
3877 if (literal != null) {
3878 this.nonNLSStrings[nonNLSCounter++] = literal;
3881 if (nonNLSCounter == 0) {
3882 this.nonNLSStrings = null;
3886 this.wasNonExternalizedStringLiteral = true;
3887 if (nonNLSCounter != lineLength) {
3888 System.arraycopy(this.nonNLSStrings, 0,
3889 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3894 public final void scanEscapeCharacter() throws InvalidInputException {
3895 // the string with "\\u" is a legal string of two chars \ and u
3896 //thus we use a direct access to the source (for regular cases).
3897 if (unicodeAsBackSlash) {
3898 // consume next character
3899 unicodeAsBackSlash = false;
3900 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3901 // (source[currentPosition] == 'u')) {
3902 // getNextUnicodeChar();
3904 if (withoutUnicodePtr != 0) {
3905 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3909 currentCharacter = source[currentPosition++];
3910 switch (currentCharacter) {
3912 currentCharacter = '\b';
3915 currentCharacter = '\t';
3918 currentCharacter = '\n';
3921 currentCharacter = '\f';
3924 currentCharacter = '\r';
3927 currentCharacter = '\"';
3930 currentCharacter = '\'';
3933 currentCharacter = '\\';
3936 // -----------octal escape--------------
3938 // OctalDigit OctalDigit
3939 // ZeroToThree OctalDigit OctalDigit
3940 int number = Character.getNumericValue(currentCharacter);
3941 if (number >= 0 && number <= 7) {
3942 boolean zeroToThreeNot = number > 3;
3943 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3944 int digit = Character.getNumericValue(currentCharacter);
3945 if (digit >= 0 && digit <= 7) {
3946 number = (number * 8) + digit;
3948 .isDigit(currentCharacter = source[currentPosition++])) {
3949 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3950 // Digit --> ignore last character
3953 digit = Character.getNumericValue(currentCharacter);
3954 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3955 // OctalDigit OctalDigit
3956 number = (number * 8) + digit;
3957 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3958 // --> ignore last character
3962 } else { // has read \OctalDigit NonDigit--> ignore last
3966 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3970 } else { // has read \OctalDigit --> ignore last character
3974 throw new InvalidInputException(INVALID_ESCAPE);
3975 currentCharacter = (char) number;
3977 throw new InvalidInputException(INVALID_ESCAPE);
3980 // chech presence of task: tags
3981 public void checkTaskTag(int commentStart, int commentEnd) {
3982 // only look for newer task: tags
3983 if (this.foundTaskCount > 0
3984 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3987 int foundTaskIndex = this.foundTaskCount;
3988 nextChar : for (int i = commentStart; i < commentEnd
3989 && i < this.eofPosition; i++) {
3991 char[] priority = null;
3992 // check for tag occurrence
3993 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3994 tag = this.taskTags[itag];
3995 priority = this.taskPriorities != null
3996 && itag < this.taskPriorities.length
3997 ? this.taskPriorities[itag]
3999 int tagLength = tag.length;
4000 for (int t = 0; t < tagLength; t++) {
4001 if (this.source[i + t] != tag[t])
4004 if (this.foundTaskTags == null) {
4005 this.foundTaskTags = new char[5][];
4006 this.foundTaskMessages = new char[5][];
4007 this.foundTaskPriorities = new char[5][];
4008 this.foundTaskPositions = new int[5][];
4009 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4010 System.arraycopy(this.foundTaskTags, 0,
4011 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4012 this.foundTaskCount);
4013 System.arraycopy(this.foundTaskMessages, 0,
4014 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4015 this.foundTaskCount);
4016 System.arraycopy(this.foundTaskPriorities, 0,
4017 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4018 0, this.foundTaskCount);
4019 System.arraycopy(this.foundTaskPositions, 0,
4020 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4021 this.foundTaskCount);
4023 this.foundTaskTags[this.foundTaskCount] = tag;
4024 this.foundTaskPriorities[this.foundTaskCount] = priority;
4025 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4027 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4028 this.foundTaskCount++;
4029 i += tagLength - 1; // will be incremented when looping
4032 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4033 // retrieve message start and end positions
4034 int msgStart = this.foundTaskPositions[i][0]
4035 + this.foundTaskTags[i].length;
4036 int max_value = i + 1 < this.foundTaskCount
4037 ? this.foundTaskPositions[i + 1][0] - 1
4039 // at most beginning of next task
4040 if (max_value < msgStart)
4041 max_value = msgStart; // would only occur if tag is before EOF.
4044 for (int j = msgStart; j < max_value; j++) {
4045 if ((c = this.source[j]) == '\n' || c == '\r') {
4051 for (int j = max_value; j > msgStart; j--) {
4052 if ((c = this.source[j]) == '*') {
4060 if (msgStart == end)
4063 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4065 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4067 // update the end position of the task
4068 this.foundTaskPositions[i][1] = end;
4069 // get the message source
4070 final int messageLength = end - msgStart + 1;
4071 char[] message = new char[messageLength];
4072 System.arraycopy(source, msgStart, message, 0, messageLength);
4073 this.foundTaskMessages[i] = message;