1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
120 static final int TableSize = 30, InternalTableSize = 6;
122 public static final int OptimizedLength = 6;
124 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
125 // support for detecting non-externalized string literals
126 int currentLineNr = -1;
127 int previousLineNr = -1;
128 NLSLine currentLine = null;
129 List lines = new ArrayList();
130 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
131 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
132 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
133 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
134 public StringLiteral[] nonNLSStrings = null;
135 public boolean checkNonExternalizedStringLiterals = true;
136 public boolean wasNonExternalizedStringLiteral = false;
139 for (int i = 0; i < 6; i++) {
140 for (int j = 0; j < TableSize; j++) {
141 for (int k = 0; k < InternalTableSize; k++) {
142 charArray_length[i][j][k] = initCharArray;
147 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
149 public static final int RoundBracket = 0;
150 public static final int SquareBracket = 1;
151 public static final int CurlyBracket = 2;
152 public static final int BracketKinds = 3;
154 public static final boolean DEBUG = false;
159 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
160 this(tokenizeComments, tokenizeWhiteSpace, false);
164 * Determines if the specified character is
165 * permissible as the first character in a PHP identifier
167 public static boolean isPHPIdentifierStart(char ch) {
168 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
172 * Determines if the specified character may be part of a PHP identifier as
173 * other than the first character
175 public static boolean isPHPIdentifierPart(char ch) {
176 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
179 public final boolean atEnd() {
180 // This code is not relevant if source is
181 // Only a part of the real stream input
183 return source.length == currentPosition;
185 public char[] getCurrentIdentifierSource() {
186 //return the token REAL source (aka unicodes are precomputed)
189 // if (withoutUnicodePtr != 0)
190 // //0 is used as a fast test flag so the real first char is in position 1
192 // withoutUnicodeBuffer,
194 // result = new char[withoutUnicodePtr],
196 // withoutUnicodePtr);
198 int length = currentPosition - startPosition;
199 switch (length) { // see OptimizedLength
201 return optimizedCurrentTokenSource1();
203 return optimizedCurrentTokenSource2();
205 return optimizedCurrentTokenSource3();
207 return optimizedCurrentTokenSource4();
209 return optimizedCurrentTokenSource5();
211 return optimizedCurrentTokenSource6();
214 System.arraycopy(source, startPosition, result = new char[length], 0, length);
218 public int getCurrentTokenEndPosition() {
219 return this.currentPosition - 1;
221 public final char[] getCurrentTokenSource() {
222 // Return the token REAL source (aka unicodes are precomputed)
225 // if (withoutUnicodePtr != 0)
226 // // 0 is used as a fast test flag so the real first char is in position 1
228 // withoutUnicodeBuffer,
230 // result = new char[withoutUnicodePtr],
232 // withoutUnicodePtr);
235 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
240 public final char[] getCurrentTokenSource(int startPos) {
241 // Return the token REAL source (aka unicodes are precomputed)
244 // if (withoutUnicodePtr != 0)
245 // // 0 is used as a fast test flag so the real first char is in position 1
247 // withoutUnicodeBuffer,
249 // result = new char[withoutUnicodePtr],
251 // withoutUnicodePtr);
254 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
259 public final char[] getCurrentTokenSourceString() {
260 //return the token REAL source (aka unicodes are precomputed).
261 //REMOVE the two " that are at the beginning and the end.
264 if (withoutUnicodePtr != 0)
265 //0 is used as a fast test flag so the real first char is in position 1
266 System.arraycopy(withoutUnicodeBuffer, 2,
267 //2 is 1 (real start) + 1 (to jump over the ")
268 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
271 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
275 public int getCurrentTokenStartPosition() {
276 return this.startPosition;
279 * Search the source position corresponding to the end of a given line number
281 * Line numbers are 1-based, and relative to the scanner initialPosition.
282 * Character positions are 0-based.
284 * In case the given line number is inconsistent, answers -1.
286 public final int getLineEnd(int lineNumber) {
288 if (lineEnds == null)
290 if (lineNumber >= lineEnds.length)
295 if (lineNumber == lineEnds.length - 1)
297 return lineEnds[lineNumber - 1];
298 // next line start one character behind the lineEnd of the previous line
301 * Search the source position corresponding to the beginning of a given line number
303 * Line numbers are 1-based, and relative to the scanner initialPosition.
304 * Character positions are 0-based.
306 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
308 * In case the given line number is inconsistent, answers -1.
310 public final int getLineStart(int lineNumber) {
312 if (lineEnds == null)
314 if (lineNumber >= lineEnds.length)
320 return initialPosition;
321 return lineEnds[lineNumber - 2] + 1;
322 // next line start one character behind the lineEnd of the previous line
324 public final boolean getNextChar(char testedChar) {
326 //handle the case of unicode.
327 //when a unicode appears then we must use a buffer that holds char internal values
328 //At the end of this method currentCharacter holds the new visited char
329 //and currentPosition points right next after it
330 //Both previous lines are true if the currentCharacter is == to the testedChar
331 //On false, no side effect has occured.
333 //ALL getNextChar.... ARE OPTIMIZED COPIES
335 int temp = currentPosition;
337 currentCharacter = source[currentPosition++];
338 // if (((currentCharacter = source[currentPosition++]) == '\\')
339 // && (source[currentPosition] == 'u')) {
340 // //-------------unicode traitement ------------
341 // int c1, c2, c3, c4;
342 // int unicodeSize = 6;
343 // currentPosition++;
344 // while (source[currentPosition] == 'u') {
345 // currentPosition++;
349 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
351 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
353 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
355 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
357 // currentPosition = temp;
361 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
362 // if (currentCharacter != testedChar) {
363 // currentPosition = temp;
366 // unicodeAsBackSlash = currentCharacter == '\\';
368 // //need the unicode buffer
369 // if (withoutUnicodePtr == 0) {
370 // //buffer all the entries that have been left aside....
371 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
375 // withoutUnicodeBuffer,
377 // withoutUnicodePtr);
379 // //fill the buffer with the char
380 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
383 // } //-------------end unicode traitement--------------
385 if (currentCharacter != testedChar) {
386 currentPosition = temp;
389 unicodeAsBackSlash = false;
390 // if (withoutUnicodePtr != 0)
391 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
394 } catch (IndexOutOfBoundsException e) {
395 unicodeAsBackSlash = false;
396 currentPosition = temp;
400 public final int getNextChar(char testedChar1, char testedChar2) {
401 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
402 //test can be done with (x==0) for the first and (x>0) for the second
403 //handle the case of unicode.
404 //when a unicode appears then we must use a buffer that holds char internal values
405 //At the end of this method currentCharacter holds the new visited char
406 //and currentPosition points right next after it
407 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
408 //On false, no side effect has occured.
410 //ALL getNextChar.... ARE OPTIMIZED COPIES
412 int temp = currentPosition;
415 currentCharacter = source[currentPosition++];
416 // if (((currentCharacter = source[currentPosition++]) == '\\')
417 // && (source[currentPosition] == 'u')) {
418 // //-------------unicode traitement ------------
419 // int c1, c2, c3, c4;
420 // int unicodeSize = 6;
421 // currentPosition++;
422 // while (source[currentPosition] == 'u') {
423 // currentPosition++;
427 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
429 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
431 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
433 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
435 // currentPosition = temp;
439 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
440 // if (currentCharacter == testedChar1)
442 // else if (currentCharacter == testedChar2)
445 // currentPosition = temp;
449 // //need the unicode buffer
450 // if (withoutUnicodePtr == 0) {
451 // //buffer all the entries that have been left aside....
452 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
456 // withoutUnicodeBuffer,
458 // withoutUnicodePtr);
460 // //fill the buffer with the char
461 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
463 // } //-------------end unicode traitement--------------
465 if (currentCharacter == testedChar1)
467 else if (currentCharacter == testedChar2)
470 currentPosition = temp;
474 // if (withoutUnicodePtr != 0)
475 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
478 } catch (IndexOutOfBoundsException e) {
479 currentPosition = temp;
483 public final boolean getNextCharAsDigit() {
485 //handle the case of unicode.
486 //when a unicode appears then we must use a buffer that holds char internal values
487 //At the end of this method currentCharacter holds the new visited char
488 //and currentPosition points right next after it
489 //Both previous lines are true if the currentCharacter is a digit
490 //On false, no side effect has occured.
492 //ALL getNextChar.... ARE OPTIMIZED COPIES
494 int temp = currentPosition;
496 currentCharacter = source[currentPosition++];
497 // if (((currentCharacter = source[currentPosition++]) == '\\')
498 // && (source[currentPosition] == 'u')) {
499 // //-------------unicode traitement ------------
500 // int c1, c2, c3, c4;
501 // int unicodeSize = 6;
502 // currentPosition++;
503 // while (source[currentPosition] == 'u') {
504 // currentPosition++;
508 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
510 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
512 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
514 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
516 // currentPosition = temp;
520 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
521 // if (!Character.isDigit(currentCharacter)) {
522 // currentPosition = temp;
526 // //need the unicode buffer
527 // if (withoutUnicodePtr == 0) {
528 // //buffer all the entries that have been left aside....
529 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
533 // withoutUnicodeBuffer,
535 // withoutUnicodePtr);
537 // //fill the buffer with the char
538 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
540 // } //-------------end unicode traitement--------------
542 if (!Character.isDigit(currentCharacter)) {
543 currentPosition = temp;
546 // if (withoutUnicodePtr != 0)
547 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
550 } catch (IndexOutOfBoundsException e) {
551 currentPosition = temp;
555 public final boolean getNextCharAsDigit(int radix) {
557 //handle the case of unicode.
558 //when a unicode appears then we must use a buffer that holds char internal values
559 //At the end of this method currentCharacter holds the new visited char
560 //and currentPosition points right next after it
561 //Both previous lines are true if the currentCharacter is a digit base on radix
562 //On false, no side effect has occured.
564 //ALL getNextChar.... ARE OPTIMIZED COPIES
566 int temp = currentPosition;
568 currentCharacter = source[currentPosition++];
569 // if (((currentCharacter = source[currentPosition++]) == '\\')
570 // && (source[currentPosition] == 'u')) {
571 // //-------------unicode traitement ------------
572 // int c1, c2, c3, c4;
573 // int unicodeSize = 6;
574 // currentPosition++;
575 // while (source[currentPosition] == 'u') {
576 // currentPosition++;
580 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
582 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
584 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
586 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
588 // currentPosition = temp;
592 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
593 // if (Character.digit(currentCharacter, radix) == -1) {
594 // currentPosition = temp;
598 // //need the unicode buffer
599 // if (withoutUnicodePtr == 0) {
600 // //buffer all the entries that have been left aside....
601 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
605 // withoutUnicodeBuffer,
607 // withoutUnicodePtr);
609 // //fill the buffer with the char
610 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
612 // } //-------------end unicode traitement--------------
614 if (Character.digit(currentCharacter, radix) == -1) {
615 currentPosition = temp;
618 // if (withoutUnicodePtr != 0)
619 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
622 } catch (IndexOutOfBoundsException e) {
623 currentPosition = temp;
627 public boolean getNextCharAsJavaIdentifierPart() {
629 //handle the case of unicode.
630 //when a unicode appears then we must use a buffer that holds char internal values
631 //At the end of this method currentCharacter holds the new visited char
632 //and currentPosition points right next after it
633 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
634 //On false, no side effect has occured.
636 //ALL getNextChar.... ARE OPTIMIZED COPIES
638 int temp = currentPosition;
640 currentCharacter = source[currentPosition++];
641 // if (((currentCharacter = source[currentPosition++]) == '\\')
642 // && (source[currentPosition] == 'u')) {
643 // //-------------unicode traitement ------------
644 // int c1, c2, c3, c4;
645 // int unicodeSize = 6;
646 // currentPosition++;
647 // while (source[currentPosition] == 'u') {
648 // currentPosition++;
652 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
654 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
656 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
658 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
660 // currentPosition = temp;
664 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
665 // if (!isPHPIdentifierPart(currentCharacter)) {
666 // currentPosition = temp;
670 // //need the unicode buffer
671 // if (withoutUnicodePtr == 0) {
672 // //buffer all the entries that have been left aside....
673 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
677 // withoutUnicodeBuffer,
679 // withoutUnicodePtr);
681 // //fill the buffer with the char
682 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
684 // } //-------------end unicode traitement--------------
686 if (!isPHPIdentifierPart(currentCharacter)) {
687 currentPosition = temp;
691 // if (withoutUnicodePtr != 0)
692 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
695 } catch (IndexOutOfBoundsException e) {
696 currentPosition = temp;
701 public int getNextToken() throws InvalidInputException {
702 int htmlPosition = currentPosition;
705 currentCharacter = source[currentPosition++];
706 if (currentCharacter == '<') {
707 if (getNextChar('?')) {
708 currentCharacter = source[currentPosition++];
709 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
711 startPosition = currentPosition;
713 if (tokenizeWhiteSpace) {
714 // && (whiteStart != currentPosition - 1)) {
715 // reposition scanner in case we are interested by spaces as tokens
716 startPosition = htmlPosition;
717 return TokenNameHTML;
720 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
722 int test = getNextChar('H', 'h');
724 test = getNextChar('P', 'p');
727 startPosition = currentPosition;
730 if (tokenizeWhiteSpace) {
731 // && (whiteStart != currentPosition - 1)) {
732 // reposition scanner in case we are interested by spaces as tokens
733 startPosition = htmlPosition;
734 return TokenNameHTML;
743 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
744 if (recordLineSeparator) {
751 } //-----------------end switch while try--------------------
752 catch (IndexOutOfBoundsException e) {
753 if (tokenizeWhiteSpace) {
754 // && (whiteStart != currentPosition - 1)) {
755 // reposition scanner in case we are interested by spaces as tokens
756 startPosition = htmlPosition;
764 jumpOverMethodBody();
766 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
769 while (true) { //loop for jumping over comments
770 withoutUnicodePtr = 0;
771 //start with a new token (even comment written with unicode )
773 // ---------Consume white space and handles startPosition---------
774 int whiteStart = currentPosition;
775 boolean isWhiteSpace;
777 startPosition = currentPosition;
778 currentCharacter = source[currentPosition++];
779 // if (((currentCharacter = source[currentPosition++]) == '\\')
780 // && (source[currentPosition] == 'u')) {
781 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
783 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
784 checkNonExternalizeString();
785 if (recordLineSeparator) {
791 isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
793 } while (isWhiteSpace);
794 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
795 // reposition scanner in case we are interested by spaces as tokens
797 startPosition = whiteStart;
798 return TokenNameWHITESPACE;
800 //little trick to get out in the middle of a source compuation
801 if (currentPosition > eofPosition)
804 // ---------Identify the next token-------------
806 switch (currentCharacter) {
808 return TokenNameLPAREN;
810 return TokenNameRPAREN;
812 return TokenNameLBRACE;
814 return TokenNameRBRACE;
816 return TokenNameLBRACKET;
818 return TokenNameRBRACKET;
820 return TokenNameSEMICOLON;
822 return TokenNameCOMMA;
825 if (getNextCharAsDigit())
826 return scanNumber(true);
831 if ((test = getNextChar('+', '=')) == 0)
832 return TokenNamePLUS_PLUS;
834 return TokenNamePLUS_EQUAL;
835 return TokenNamePLUS;
840 if ((test = getNextChar('-', '=')) == 0)
841 return TokenNameMINUS_MINUS;
843 return TokenNameMINUS_EQUAL;
844 if (getNextChar('>'))
845 return TokenNameMINUS_GREATER;
847 return TokenNameMINUS;
850 if (getNextChar('='))
851 return TokenNameTWIDDLE_EQUAL;
852 return TokenNameTWIDDLE;
854 if (getNextChar('='))
855 return TokenNameNOT_EQUAL;
858 if (getNextChar('='))
859 return TokenNameMULTIPLY_EQUAL;
860 return TokenNameMULTIPLY;
862 if (getNextChar('='))
863 return TokenNameREMAINDER_EQUAL;
864 return TokenNameREMAINDER;
868 if ((test = getNextChar('=', '<')) == 0)
869 return TokenNameLESS_EQUAL;
871 if (getNextChar('='))
872 return TokenNameLEFT_SHIFT_EQUAL;
873 if (getNextChar('<')) {
874 int heredocStart = currentPosition;
875 int heredocLength = 0;
876 currentCharacter = source[currentPosition++];
877 if (isPHPIdentifierStart(currentCharacter)) {
878 currentCharacter = source[currentPosition++];
880 return TokenNameERROR;
882 while (isPHPIdentifierPart(currentCharacter)) {
883 currentCharacter = source[currentPosition++];
886 heredocLength = currentPosition - heredocStart - 1;
888 // heredoc end-tag determination
889 boolean endTag = true;
892 ch = source[currentPosition++];
893 if (ch == '\r' || ch == '\n') {
894 if (recordLineSeparator) {
899 for (int i = 0; i < heredocLength; i++) {
900 if (source[currentPosition + i] != source[heredocStart + i]) {
906 currentPosition += heredocLength - 1;
907 currentCharacter = source[currentPosition++];
908 break; // do...while loop
916 return TokenNameHEREDOC;
918 return TokenNameLEFT_SHIFT;
920 return TokenNameLESS;
925 if ((test = getNextChar('=', '>')) == 0)
926 return TokenNameGREATER_EQUAL;
928 if ((test = getNextChar('=', '>')) == 0)
929 return TokenNameRIGHT_SHIFT_EQUAL;
930 return TokenNameRIGHT_SHIFT;
932 return TokenNameGREATER;
935 if (getNextChar('='))
936 return TokenNameEQUAL_EQUAL;
937 if (getNextChar('>'))
938 return TokenNameEQUAL_GREATER;
939 return TokenNameEQUAL;
943 if ((test = getNextChar('&', '=')) == 0)
944 return TokenNameAND_AND;
946 return TokenNameAND_EQUAL;
952 if ((test = getNextChar('|', '=')) == 0)
953 return TokenNameOR_OR;
955 return TokenNameOR_EQUAL;
959 if (getNextChar('='))
960 return TokenNameXOR_EQUAL;
963 if (getNextChar('>')) {
965 return TokenNameStopPHP;
967 return TokenNameQUESTION;
969 if (getNextChar(':'))
970 return TokenNameCOLON_COLON;
971 return TokenNameCOLON;
977 // if ((test = getNextChar('\n', '\r')) == 0) {
978 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
981 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
982 // for (int lookAhead = 0;
985 // if (currentPosition + lookAhead
988 // if (source[currentPosition + lookAhead]
991 // if (source[currentPosition + lookAhead]
993 // currentPosition += lookAhead + 1;
997 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1000 // if (getNextChar('\'')) {
1001 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1002 // for (int lookAhead = 0;
1005 // if (currentPosition + lookAhead
1006 // == source.length)
1008 // if (source[currentPosition + lookAhead]
1011 // if (source[currentPosition + lookAhead]
1013 // currentPosition += lookAhead + 1;
1017 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1019 // if (getNextChar('\\'))
1020 // scanEscapeCharacter();
1021 // else { // consume next character
1022 // unicodeAsBackSlash = false;
1023 // if (((currentCharacter = source[currentPosition++])
1025 // && (source[currentPosition] == 'u')) {
1026 // getNextUnicodeChar();
1028 // if (withoutUnicodePtr != 0) {
1029 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1030 // currentCharacter;
1034 // // if (getNextChar('\''))
1035 // // return TokenNameCharacterLiteral;
1036 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1037 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1038 // if (currentPosition + lookAhead == source.length)
1040 // if (source[currentPosition + lookAhead] == '\n')
1042 // if (source[currentPosition + lookAhead] == '\'') {
1043 // currentPosition += lookAhead + 1;
1047 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1050 // consume next character
1051 unicodeAsBackSlash = false;
1052 currentCharacter = source[currentPosition++];
1053 // if (((currentCharacter = source[currentPosition++]) == '\\')
1054 // && (source[currentPosition] == 'u')) {
1055 // getNextUnicodeChar();
1057 // if (withoutUnicodePtr != 0) {
1058 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1059 // currentCharacter;
1063 while (currentCharacter != '\'') {
1065 /**** in PHP \r and \n are valid in string literals ****/
1066 // if ((currentCharacter == '\n')
1067 // || (currentCharacter == '\r')) {
1068 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1069 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1070 // if (currentPosition + lookAhead == source.length)
1072 // if (source[currentPosition + lookAhead] == '\n')
1074 // if (source[currentPosition + lookAhead] == '\"') {
1075 // currentPosition += lookAhead + 1;
1079 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1081 if (currentCharacter == '\\') {
1082 int escapeSize = currentPosition;
1083 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1084 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1085 scanSingleQuotedEscapeCharacter();
1086 escapeSize = currentPosition - escapeSize;
1087 if (withoutUnicodePtr == 0) {
1088 //buffer all the entries that have been left aside....
1089 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1090 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1091 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1092 } else { //overwrite the / in the buffer
1093 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1094 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1095 withoutUnicodePtr--;
1099 // consume next character
1100 unicodeAsBackSlash = false;
1101 currentCharacter = source[currentPosition++];
1102 // if (((currentCharacter = source[currentPosition++]) == '\\')
1103 // && (source[currentPosition] == 'u')) {
1104 // getNextUnicodeChar();
1106 if (withoutUnicodePtr != 0) {
1107 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1112 } catch (IndexOutOfBoundsException e) {
1113 throw new InvalidInputException(UNTERMINATED_STRING);
1114 } catch (InvalidInputException e) {
1115 if (e.getMessage().equals(INVALID_ESCAPE)) {
1116 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1117 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1118 if (currentPosition + lookAhead == source.length)
1120 if (source[currentPosition + lookAhead] == '\n')
1122 if (source[currentPosition + lookAhead] == '\'') {
1123 currentPosition += lookAhead + 1;
1131 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1132 if (currentLine == null) {
1133 currentLine = new NLSLine();
1134 lines.add(currentLine);
1136 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1138 return TokenNameStringConstant;
1141 // consume next character
1142 unicodeAsBackSlash = false;
1143 currentCharacter = source[currentPosition++];
1144 // if (((currentCharacter = source[currentPosition++]) == '\\')
1145 // && (source[currentPosition] == 'u')) {
1146 // getNextUnicodeChar();
1148 // if (withoutUnicodePtr != 0) {
1149 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1150 // currentCharacter;
1154 while (currentCharacter != '"') {
1156 /**** in PHP \r and \n are valid in string literals ****/
1157 // if ((currentCharacter == '\n')
1158 // || (currentCharacter == '\r')) {
1159 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1160 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1161 // if (currentPosition + lookAhead == source.length)
1163 // if (source[currentPosition + lookAhead] == '\n')
1165 // if (source[currentPosition + lookAhead] == '\"') {
1166 // currentPosition += lookAhead + 1;
1170 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1172 if (currentCharacter == '\\') {
1173 int escapeSize = currentPosition;
1174 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1175 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1176 scanDoubleQuotedEscapeCharacter();
1177 escapeSize = currentPosition - escapeSize;
1178 if (withoutUnicodePtr == 0) {
1179 //buffer all the entries that have been left aside....
1180 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1181 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1182 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1183 } else { //overwrite the / in the buffer
1184 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1185 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1186 withoutUnicodePtr--;
1190 // consume next character
1191 unicodeAsBackSlash = false;
1192 currentCharacter = source[currentPosition++];
1193 // if (((currentCharacter = source[currentPosition++]) == '\\')
1194 // && (source[currentPosition] == 'u')) {
1195 // getNextUnicodeChar();
1197 if (withoutUnicodePtr != 0) {
1198 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1203 } catch (IndexOutOfBoundsException e) {
1204 throw new InvalidInputException(UNTERMINATED_STRING);
1205 } catch (InvalidInputException e) {
1206 if (e.getMessage().equals(INVALID_ESCAPE)) {
1207 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1208 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1209 if (currentPosition + lookAhead == source.length)
1211 if (source[currentPosition + lookAhead] == '\n')
1213 if (source[currentPosition + lookAhead] == '\"') {
1214 currentPosition += lookAhead + 1;
1222 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1223 if (currentLine == null) {
1224 currentLine = new NLSLine();
1225 lines.add(currentLine);
1227 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1229 return TokenNameStringLiteral;
1232 // consume next character
1233 unicodeAsBackSlash = false;
1234 currentCharacter = source[currentPosition++];
1235 // if (((currentCharacter = source[currentPosition++]) == '\\')
1236 // && (source[currentPosition] == 'u')) {
1237 // getNextUnicodeChar();
1239 // if (withoutUnicodePtr != 0) {
1240 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1241 // currentCharacter;
1245 while (currentCharacter != '`') {
1247 /**** in PHP \r and \n are valid in string literals ****/
1248 // if ((currentCharacter == '\n')
1249 // || (currentCharacter == '\r')) {
1250 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1251 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1252 // if (currentPosition + lookAhead == source.length)
1254 // if (source[currentPosition + lookAhead] == '\n')
1256 // if (source[currentPosition + lookAhead] == '\"') {
1257 // currentPosition += lookAhead + 1;
1261 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1263 if (currentCharacter == '\\') {
1264 int escapeSize = currentPosition;
1265 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1266 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1267 scanDoubleQuotedEscapeCharacter();
1268 escapeSize = currentPosition - escapeSize;
1269 if (withoutUnicodePtr == 0) {
1270 //buffer all the entries that have been left aside....
1271 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1272 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1273 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1274 } else { //overwrite the / in the buffer
1275 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1276 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1277 withoutUnicodePtr--;
1281 // consume next character
1282 unicodeAsBackSlash = false;
1283 currentCharacter = source[currentPosition++];
1284 // if (((currentCharacter = source[currentPosition++]) == '\\')
1285 // && (source[currentPosition] == 'u')) {
1286 // getNextUnicodeChar();
1288 if (withoutUnicodePtr != 0) {
1289 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1294 } catch (IndexOutOfBoundsException e) {
1295 throw new InvalidInputException(UNTERMINATED_STRING);
1296 } catch (InvalidInputException e) {
1297 if (e.getMessage().equals(INVALID_ESCAPE)) {
1298 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1299 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1300 if (currentPosition + lookAhead == source.length)
1302 if (source[currentPosition + lookAhead] == '\n')
1304 if (source[currentPosition + lookAhead] == '`') {
1305 currentPosition += lookAhead + 1;
1313 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1314 if (currentLine == null) {
1315 currentLine = new NLSLine();
1316 lines.add(currentLine);
1318 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1320 return TokenNameStringInterpolated;
1325 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1327 int endPositionForLineComment = 0;
1328 try { //get the next char
1329 currentCharacter = source[currentPosition++];
1330 // if (((currentCharacter = source[currentPosition++])
1332 // && (source[currentPosition] == 'u')) {
1333 // //-------------unicode traitement ------------
1334 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1335 // currentPosition++;
1336 // while (source[currentPosition] == 'u') {
1337 // currentPosition++;
1340 // Character.getNumericValue(source[currentPosition++]))
1344 // Character.getNumericValue(source[currentPosition++]))
1348 // Character.getNumericValue(source[currentPosition++]))
1352 // Character.getNumericValue(source[currentPosition++]))
1355 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1357 // currentCharacter =
1358 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1362 //handle the \\u case manually into comment
1363 // if (currentCharacter == '\\') {
1364 // if (source[currentPosition] == '\\')
1365 // currentPosition++;
1366 // } //jump over the \\
1367 boolean isUnicode = false;
1368 while (currentCharacter != '\r' && currentCharacter != '\n') {
1369 if (currentCharacter == '?') {
1370 if (getNextChar('>')) {
1371 startPosition = currentPosition - 2;
1373 return TokenNameStopPHP;
1379 currentCharacter = source[currentPosition++];
1380 // if (((currentCharacter = source[currentPosition++])
1382 // && (source[currentPosition] == 'u')) {
1383 // isUnicode = true;
1384 // //-------------unicode traitement ------------
1385 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1386 // currentPosition++;
1387 // while (source[currentPosition] == 'u') {
1388 // currentPosition++;
1391 // Character.getNumericValue(source[currentPosition++]))
1395 // Character.getNumericValue(
1396 // source[currentPosition++]))
1400 // Character.getNumericValue(
1401 // source[currentPosition++]))
1405 // Character.getNumericValue(
1406 // source[currentPosition++]))
1409 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1411 // currentCharacter =
1412 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1415 //handle the \\u case manually into comment
1416 // if (currentCharacter == '\\') {
1417 // if (source[currentPosition] == '\\')
1418 // currentPosition++;
1419 // } //jump over the \\
1422 endPositionForLineComment = currentPosition - 6;
1424 endPositionForLineComment = currentPosition - 1;
1426 recordComment(false);
1427 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1428 checkNonExternalizeString();
1429 if (recordLineSeparator) {
1431 pushUnicodeLineSeparator();
1433 pushLineSeparator();
1439 if (tokenizeComments) {
1441 currentPosition = endPositionForLineComment;
1442 // reset one character behind
1444 return TokenNameCOMMENT_LINE;
1446 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1447 if (tokenizeComments) {
1449 // reset one character behind
1450 return TokenNameCOMMENT_LINE;
1456 //traditional and annotation comment
1457 boolean isJavadoc = false, star = false;
1458 // consume next character
1459 unicodeAsBackSlash = false;
1460 currentCharacter = source[currentPosition++];
1461 // if (((currentCharacter = source[currentPosition++]) == '\\')
1462 // && (source[currentPosition] == 'u')) {
1463 // getNextUnicodeChar();
1465 // if (withoutUnicodePtr != 0) {
1466 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1467 // currentCharacter;
1471 if (currentCharacter == '*') {
1475 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1476 checkNonExternalizeString();
1477 if (recordLineSeparator) {
1478 pushLineSeparator();
1483 try { //get the next char
1484 currentCharacter = source[currentPosition++];
1485 // if (((currentCharacter = source[currentPosition++])
1487 // && (source[currentPosition] == 'u')) {
1488 // //-------------unicode traitement ------------
1489 // getNextUnicodeChar();
1491 //handle the \\u case manually into comment
1492 // if (currentCharacter == '\\') {
1493 // if (source[currentPosition] == '\\')
1494 // currentPosition++;
1495 // //jump over the \\
1497 // empty comment is not a javadoc /**/
1498 if (currentCharacter == '/') {
1501 //loop until end of comment */
1502 while ((currentCharacter != '/') || (!star)) {
1503 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1504 checkNonExternalizeString();
1505 if (recordLineSeparator) {
1506 pushLineSeparator();
1511 star = currentCharacter == '*';
1513 currentCharacter = source[currentPosition++];
1514 // if (((currentCharacter = source[currentPosition++])
1516 // && (source[currentPosition] == 'u')) {
1517 // //-------------unicode traitement ------------
1518 // getNextUnicodeChar();
1520 //handle the \\u case manually into comment
1521 // if (currentCharacter == '\\') {
1522 // if (source[currentPosition] == '\\')
1523 // currentPosition++;
1524 // } //jump over the \\
1526 recordComment(isJavadoc);
1527 if (tokenizeComments) {
1529 return TokenNameCOMMENT_PHPDOC;
1530 return TokenNameCOMMENT_BLOCK;
1532 } catch (IndexOutOfBoundsException e) {
1533 throw new InvalidInputException(UNTERMINATED_COMMENT);
1537 if (getNextChar('='))
1538 return TokenNameDIVIDE_EQUAL;
1539 return TokenNameDIVIDE;
1543 return TokenNameEOF;
1544 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1545 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1548 if (currentCharacter == '$') {
1549 while ((currentCharacter = source[currentPosition++]) == '$') {
1551 if (currentCharacter == '{')
1552 return TokenNameDOLLAR_LBRACE;
1553 if (isPHPIdentifierStart(currentCharacter))
1554 return scanIdentifierOrKeyword(true);
1555 return TokenNameERROR;
1557 if (isPHPIdentifierStart(currentCharacter))
1558 return scanIdentifierOrKeyword(false);
1559 if (Character.isDigit(currentCharacter))
1560 return scanNumber(false);
1561 return TokenNameERROR;
1564 } //-----------------end switch while try--------------------
1565 catch (IndexOutOfBoundsException e) {
1568 return TokenNameEOF;
1571 // public final void getNextUnicodeChar()
1572 // throws IndexOutOfBoundsException, InvalidInputException {
1574 // //handle the case of unicode.
1575 // //when a unicode appears then we must use a buffer that holds char internal values
1576 // //At the end of this method currentCharacter holds the new visited char
1577 // //and currentPosition points right next after it
1579 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1581 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1582 // currentPosition++;
1583 // while (source[currentPosition] == 'u') {
1584 // currentPosition++;
1588 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1590 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1592 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1594 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1596 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1598 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1599 // //need the unicode buffer
1600 // if (withoutUnicodePtr == 0) {
1601 // //buffer all the entries that have been left aside....
1602 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1603 // System.arraycopy(
1606 // withoutUnicodeBuffer,
1608 // withoutUnicodePtr);
1610 // //fill the buffer with the char
1611 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1613 // unicodeAsBackSlash = currentCharacter == '\\';
1615 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1617 public final void jumpOverMethodBody() {
1619 this.wasAcr = false;
1622 while (true) { //loop for jumping over comments
1623 // ---------Consume white space and handles startPosition---------
1624 boolean isWhiteSpace;
1626 startPosition = currentPosition;
1627 currentCharacter = source[currentPosition++];
1628 // if (((currentCharacter = source[currentPosition++]) == '\\')
1629 // && (source[currentPosition] == 'u')) {
1630 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1632 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1633 pushLineSeparator();
1634 isWhiteSpace = Character.isWhitespace(currentCharacter);
1636 } while (isWhiteSpace);
1638 // -------consume token until } is found---------
1639 switch (currentCharacter) {
1651 test = getNextChar('\\');
1654 scanDoubleQuotedEscapeCharacter();
1655 } catch (InvalidInputException ex) {
1658 // try { // consume next character
1659 unicodeAsBackSlash = false;
1660 currentCharacter = source[currentPosition++];
1661 // if (((currentCharacter = source[currentPosition++]) == '\\')
1662 // && (source[currentPosition] == 'u')) {
1663 // getNextUnicodeChar();
1665 if (withoutUnicodePtr != 0) {
1666 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1669 // } catch (InvalidInputException ex) {
1677 // try { // consume next character
1678 unicodeAsBackSlash = false;
1679 currentCharacter = source[currentPosition++];
1680 // if (((currentCharacter = source[currentPosition++]) == '\\')
1681 // && (source[currentPosition] == 'u')) {
1682 // getNextUnicodeChar();
1684 if (withoutUnicodePtr != 0) {
1685 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1688 // } catch (InvalidInputException ex) {
1690 while (currentCharacter != '"') {
1691 if (currentCharacter == '\r') {
1692 if (source[currentPosition] == '\n')
1695 // the string cannot go further that the line
1697 if (currentCharacter == '\n') {
1699 // the string cannot go further that the line
1701 if (currentCharacter == '\\') {
1703 scanDoubleQuotedEscapeCharacter();
1704 } catch (InvalidInputException ex) {
1707 // try { // consume next character
1708 unicodeAsBackSlash = false;
1709 currentCharacter = source[currentPosition++];
1710 // if (((currentCharacter = source[currentPosition++]) == '\\')
1711 // && (source[currentPosition] == 'u')) {
1712 // getNextUnicodeChar();
1714 if (withoutUnicodePtr != 0) {
1715 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1718 // } catch (InvalidInputException ex) {
1721 } catch (IndexOutOfBoundsException e) {
1728 if ((test = getNextChar('/', '*')) == 0) {
1732 currentCharacter = source[currentPosition++];
1733 // if (((currentCharacter = source[currentPosition++]) == '\\')
1734 // && (source[currentPosition] == 'u')) {
1735 // //-------------unicode traitement ------------
1736 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1737 // currentPosition++;
1738 // while (source[currentPosition] == 'u') {
1739 // currentPosition++;
1742 // Character.getNumericValue(source[currentPosition++]))
1746 // Character.getNumericValue(source[currentPosition++]))
1750 // Character.getNumericValue(source[currentPosition++]))
1754 // Character.getNumericValue(source[currentPosition++]))
1757 // //error don't care of the value
1758 // currentCharacter = 'A';
1759 // } //something different from \n and \r
1761 // currentCharacter =
1762 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1766 while (currentCharacter != '\r' && currentCharacter != '\n') {
1768 currentCharacter = source[currentPosition++];
1769 // if (((currentCharacter = source[currentPosition++])
1771 // && (source[currentPosition] == 'u')) {
1772 // //-------------unicode traitement ------------
1773 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1774 // currentPosition++;
1775 // while (source[currentPosition] == 'u') {
1776 // currentPosition++;
1779 // Character.getNumericValue(source[currentPosition++]))
1783 // Character.getNumericValue(source[currentPosition++]))
1787 // Character.getNumericValue(source[currentPosition++]))
1791 // Character.getNumericValue(source[currentPosition++]))
1794 // //error don't care of the value
1795 // currentCharacter = 'A';
1796 // } //something different from \n and \r
1798 // currentCharacter =
1799 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1803 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1804 pushLineSeparator();
1805 } catch (IndexOutOfBoundsException e) {
1806 } //an eof will them be generated
1810 //traditional and annotation comment
1811 boolean star = false;
1812 // try { // consume next character
1813 unicodeAsBackSlash = false;
1814 currentCharacter = source[currentPosition++];
1815 // if (((currentCharacter = source[currentPosition++]) == '\\')
1816 // && (source[currentPosition] == 'u')) {
1817 // getNextUnicodeChar();
1819 if (withoutUnicodePtr != 0) {
1820 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1823 // } catch (InvalidInputException ex) {
1825 if (currentCharacter == '*') {
1828 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1829 pushLineSeparator();
1830 try { //get the next char
1831 currentCharacter = source[currentPosition++];
1832 // if (((currentCharacter = source[currentPosition++]) == '\\')
1833 // && (source[currentPosition] == 'u')) {
1834 // //-------------unicode traitement ------------
1835 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1836 // currentPosition++;
1837 // while (source[currentPosition] == 'u') {
1838 // currentPosition++;
1841 // Character.getNumericValue(source[currentPosition++]))
1845 // Character.getNumericValue(source[currentPosition++]))
1849 // Character.getNumericValue(source[currentPosition++]))
1853 // Character.getNumericValue(source[currentPosition++]))
1856 // //error don't care of the value
1857 // currentCharacter = 'A';
1858 // } //something different from * and /
1860 // currentCharacter =
1861 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1864 //loop until end of comment */
1865 while ((currentCharacter != '/') || (!star)) {
1866 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1867 pushLineSeparator();
1868 star = currentCharacter == '*';
1870 currentCharacter = source[currentPosition++];
1871 // if (((currentCharacter = source[currentPosition++])
1873 // && (source[currentPosition] == 'u')) {
1874 // //-------------unicode traitement ------------
1875 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1876 // currentPosition++;
1877 // while (source[currentPosition] == 'u') {
1878 // currentPosition++;
1881 // Character.getNumericValue(source[currentPosition++]))
1885 // Character.getNumericValue(source[currentPosition++]))
1889 // Character.getNumericValue(source[currentPosition++]))
1893 // Character.getNumericValue(source[currentPosition++]))
1896 // //error don't care of the value
1897 // currentCharacter = 'A';
1898 // } //something different from * and /
1900 // currentCharacter =
1901 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1905 } catch (IndexOutOfBoundsException e) {
1914 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1916 scanIdentifierOrKeyword((currentCharacter == '$'));
1917 } catch (InvalidInputException ex) {
1921 if (Character.isDigit(currentCharacter)) {
1924 } catch (InvalidInputException ex) {
1930 //-----------------end switch while try--------------------
1931 } catch (IndexOutOfBoundsException e) {
1932 } catch (InvalidInputException e) {
1936 // public final boolean jumpOverUnicodeWhiteSpace()
1937 // throws InvalidInputException {
1939 // //handle the case of unicode. Jump over the next whiteSpace
1940 // //making startPosition pointing on the next available char
1941 // //On false, the currentCharacter is filled up with a potential
1945 // this.wasAcr = false;
1946 // int c1, c2, c3, c4;
1947 // int unicodeSize = 6;
1948 // currentPosition++;
1949 // while (source[currentPosition] == 'u') {
1950 // currentPosition++;
1954 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1956 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1958 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1960 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1962 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1965 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1966 // if (recordLineSeparator
1967 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1968 // pushLineSeparator();
1969 // if (Character.isWhitespace(currentCharacter))
1972 // //buffer the new char which is not a white space
1973 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1974 // //withoutUnicodePtr == 1 is true here
1976 // } catch (IndexOutOfBoundsException e) {
1977 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1980 public final int[] getLineEnds() {
1981 //return a bounded copy of this.lineEnds
1984 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
1988 public char[] getSource() {
1991 final char[] optimizedCurrentTokenSource1() {
1992 //return always the same char[] build only once
1994 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
1995 char charOne = source[startPosition];
2050 return new char[] { charOne };
2054 final char[] optimizedCurrentTokenSource2() {
2055 //try to return the same char[] build only once
2058 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2059 char[][] table = charArray_length[0][hash];
2061 while (++i < InternalTableSize) {
2062 char[] charArray = table[i];
2063 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2066 //---------other side---------
2068 int max = newEntry2;
2069 while (++i <= max) {
2070 char[] charArray = table[i];
2071 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2074 //--------add the entry-------
2075 if (++max >= InternalTableSize)
2078 table[max] = (r = new char[] { c0, c1 });
2083 final char[] optimizedCurrentTokenSource3() {
2084 //try to return the same char[] build only once
2088 (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2090 char[][] table = charArray_length[1][hash];
2092 while (++i < InternalTableSize) {
2093 char[] charArray = table[i];
2094 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2097 //---------other side---------
2099 int max = newEntry3;
2100 while (++i <= max) {
2101 char[] charArray = table[i];
2102 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2105 //--------add the entry-------
2106 if (++max >= InternalTableSize)
2109 table[max] = (r = new char[] { c0, c1, c2 });
2114 final char[] optimizedCurrentTokenSource4() {
2115 //try to return the same char[] build only once
2117 char c0, c1, c2, c3;
2119 ((((long) (c0 = source[startPosition])) << 18)
2120 + ((c1 = source[startPosition + 1]) << 12)
2121 + ((c2 = source[startPosition + 2]) << 6)
2122 + (c3 = source[startPosition + 3]))
2124 char[][] table = charArray_length[2][(int) hash];
2126 while (++i < InternalTableSize) {
2127 char[] charArray = table[i];
2128 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2131 //---------other side---------
2133 int max = newEntry4;
2134 while (++i <= max) {
2135 char[] charArray = table[i];
2136 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2139 //--------add the entry-------
2140 if (++max >= InternalTableSize)
2143 table[max] = (r = new char[] { c0, c1, c2, c3 });
2149 final char[] optimizedCurrentTokenSource5() {
2150 //try to return the same char[] build only once
2152 char c0, c1, c2, c3, c4;
2154 ((((long) (c0 = source[startPosition])) << 24)
2155 + (((long) (c1 = source[startPosition + 1])) << 18)
2156 + ((c2 = source[startPosition + 2]) << 12)
2157 + ((c3 = source[startPosition + 3]) << 6)
2158 + (c4 = source[startPosition + 4]))
2160 char[][] table = charArray_length[3][(int) hash];
2162 while (++i < InternalTableSize) {
2163 char[] charArray = table[i];
2164 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2167 //---------other side---------
2169 int max = newEntry5;
2170 while (++i <= max) {
2171 char[] charArray = table[i];
2172 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2175 //--------add the entry-------
2176 if (++max >= InternalTableSize)
2179 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2185 final char[] optimizedCurrentTokenSource6() {
2186 //try to return the same char[] build only once
2188 char c0, c1, c2, c3, c4, c5;
2190 ((((long) (c0 = source[startPosition])) << 32)
2191 + (((long) (c1 = source[startPosition + 1])) << 24)
2192 + (((long) (c2 = source[startPosition + 2])) << 18)
2193 + ((c3 = source[startPosition + 3]) << 12)
2194 + ((c4 = source[startPosition + 4]) << 6)
2195 + (c5 = source[startPosition + 5]))
2197 char[][] table = charArray_length[4][(int) hash];
2199 while (++i < InternalTableSize) {
2200 char[] charArray = table[i];
2201 if ((c0 == charArray[0])
2202 && (c1 == charArray[1])
2203 && (c2 == charArray[2])
2204 && (c3 == charArray[3])
2205 && (c4 == charArray[4])
2206 && (c5 == charArray[5]))
2209 //---------other side---------
2211 int max = newEntry6;
2212 while (++i <= max) {
2213 char[] charArray = table[i];
2214 if ((c0 == charArray[0])
2215 && (c1 == charArray[1])
2216 && (c2 == charArray[2])
2217 && (c3 == charArray[3])
2218 && (c4 == charArray[4])
2219 && (c5 == charArray[5]))
2222 //--------add the entry-------
2223 if (++max >= InternalTableSize)
2226 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2231 public final void pushLineSeparator() throws InvalidInputException {
2232 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2233 final int INCREMENT = 250;
2235 if (this.checkNonExternalizedStringLiterals) {
2236 // reinitialize the current line for non externalize strings purpose
2239 //currentCharacter is at position currentPosition-1
2242 if (currentCharacter == '\r') {
2243 int separatorPos = currentPosition - 1;
2244 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2246 //System.out.println("CR-" + separatorPos);
2248 lineEnds[++linePtr] = separatorPos;
2249 } catch (IndexOutOfBoundsException e) {
2250 //linePtr value is correct
2251 int oldLength = lineEnds.length;
2252 int[] old = lineEnds;
2253 lineEnds = new int[oldLength + INCREMENT];
2254 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2255 lineEnds[linePtr] = separatorPos;
2257 // look-ahead for merged cr+lf
2259 if (source[currentPosition] == '\n') {
2260 //System.out.println("look-ahead LF-" + currentPosition);
2261 lineEnds[linePtr] = currentPosition;
2267 } catch (IndexOutOfBoundsException e) {
2272 if (currentCharacter == '\n') {
2273 //must merge eventual cr followed by lf
2274 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2275 //System.out.println("merge LF-" + (currentPosition - 1));
2276 lineEnds[linePtr] = currentPosition - 1;
2278 int separatorPos = currentPosition - 1;
2279 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2281 // System.out.println("LF-" + separatorPos);
2283 lineEnds[++linePtr] = separatorPos;
2284 } catch (IndexOutOfBoundsException e) {
2285 //linePtr value is correct
2286 int oldLength = lineEnds.length;
2287 int[] old = lineEnds;
2288 lineEnds = new int[oldLength + INCREMENT];
2289 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2290 lineEnds[linePtr] = separatorPos;
2297 public final void pushUnicodeLineSeparator() {
2298 // isUnicode means that the \r or \n has been read as a unicode character
2300 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2302 final int INCREMENT = 250;
2303 //currentCharacter is at position currentPosition-1
2305 if (this.checkNonExternalizedStringLiterals) {
2306 // reinitialize the current line for non externalize strings purpose
2311 if (currentCharacter == '\r') {
2312 int separatorPos = currentPosition - 6;
2313 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2315 //System.out.println("CR-" + separatorPos);
2317 lineEnds[++linePtr] = separatorPos;
2318 } catch (IndexOutOfBoundsException e) {
2319 //linePtr value is correct
2320 int oldLength = lineEnds.length;
2321 int[] old = lineEnds;
2322 lineEnds = new int[oldLength + INCREMENT];
2323 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2324 lineEnds[linePtr] = separatorPos;
2326 // look-ahead for merged cr+lf
2327 if (source[currentPosition] == '\n') {
2328 //System.out.println("look-ahead LF-" + currentPosition);
2329 lineEnds[linePtr] = currentPosition;
2337 if (currentCharacter == '\n') {
2338 //must merge eventual cr followed by lf
2339 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2340 //System.out.println("merge LF-" + (currentPosition - 1));
2341 lineEnds[linePtr] = currentPosition - 6;
2343 int separatorPos = currentPosition - 6;
2344 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2346 // System.out.println("LF-" + separatorPos);
2348 lineEnds[++linePtr] = separatorPos;
2349 } catch (IndexOutOfBoundsException e) {
2350 //linePtr value is correct
2351 int oldLength = lineEnds.length;
2352 int[] old = lineEnds;
2353 lineEnds = new int[oldLength + INCREMENT];
2354 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2355 lineEnds[linePtr] = separatorPos;
2362 public final void recordComment(boolean isJavadoc) {
2364 // a new annotation comment is recorded
2366 commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2367 } catch (IndexOutOfBoundsException e) {
2368 int oldStackLength = commentStops.length;
2369 int[] oldStack = commentStops;
2370 commentStops = new int[oldStackLength + 30];
2371 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2372 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2373 //grows the positions buffers too
2374 int[] old = commentStarts;
2375 commentStarts = new int[oldStackLength + 30];
2376 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2379 //the buffer is of a correct size here
2380 commentStarts[commentPtr] = startPosition;
2382 public void resetTo(int begin, int end) {
2383 //reset the scanner to a given position where it may rescan again
2386 initialPosition = startPosition = currentPosition = begin;
2387 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2388 commentPtr = -1; // reset comment stack
2391 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2392 // the string with "\\u" is a legal string of two chars \ and u
2393 //thus we use a direct access to the source (for regular cases).
2395 // if (unicodeAsBackSlash) {
2396 // // consume next character
2397 // unicodeAsBackSlash = false;
2398 // if (((currentCharacter = source[currentPosition++]) == '\\')
2399 // && (source[currentPosition] == 'u')) {
2400 // getNextUnicodeChar();
2402 // if (withoutUnicodePtr != 0) {
2403 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2407 currentCharacter = source[currentPosition++];
2408 switch (currentCharacter) {
2410 currentCharacter = '\'';
2413 currentCharacter = '\\';
2416 currentCharacter = '\\';
2421 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2422 // the string with "\\u" is a legal string of two chars \ and u
2423 //thus we use a direct access to the source (for regular cases).
2425 // if (unicodeAsBackSlash) {
2426 // // consume next character
2427 // unicodeAsBackSlash = false;
2428 // if (((currentCharacter = source[currentPosition++]) == '\\')
2429 // && (source[currentPosition] == 'u')) {
2430 // getNextUnicodeChar();
2432 // if (withoutUnicodePtr != 0) {
2433 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2437 currentCharacter = source[currentPosition++];
2438 switch (currentCharacter) {
2440 // currentCharacter = '\b';
2443 currentCharacter = '\t';
2446 currentCharacter = '\n';
2449 // currentCharacter = '\f';
2452 currentCharacter = '\r';
2455 currentCharacter = '\"';
2458 currentCharacter = '\'';
2461 currentCharacter = '\\';
2464 currentCharacter = '$';
2467 // -----------octal escape--------------
2469 // OctalDigit OctalDigit
2470 // ZeroToThree OctalDigit OctalDigit
2472 int number = Character.getNumericValue(currentCharacter);
2473 if (number >= 0 && number <= 7) {
2474 boolean zeroToThreeNot = number > 3;
2475 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2476 int digit = Character.getNumericValue(currentCharacter);
2477 if (digit >= 0 && digit <= 7) {
2478 number = (number * 8) + digit;
2479 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2480 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2483 digit = Character.getNumericValue(currentCharacter);
2484 if (digit >= 0 && digit <= 7) {
2485 // has read \ZeroToThree OctalDigit OctalDigit
2486 number = (number * 8) + digit;
2487 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2491 } else { // has read \OctalDigit NonDigit--> ignore last character
2494 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2497 } else { // has read \OctalDigit --> ignore last character
2501 throw new InvalidInputException(INVALID_ESCAPE);
2502 currentCharacter = (char) number;
2505 // throw new InvalidInputException(INVALID_ESCAPE);
2509 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2510 // return scanIdentifierOrKeyword( false );
2513 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2516 //first dispatch on the first char.
2517 //then the length. If there are several
2518 //keywors with the same length AND the same first char, then do another
2519 //disptach on the second char :-)...cool....but fast !
2521 useAssertAsAnIndentifier = false;
2523 while (getNextCharAsJavaIdentifierPart()) {
2527 return TokenNameVariable;
2532 // if (withoutUnicodePtr == 0)
2534 //quick test on length == 1 but not on length > 12 while most identifier
2535 //have a length which is <= 12...but there are lots of identifier with
2539 if ((length = currentPosition - startPosition) == 1)
2540 return TokenNameIdentifier;
2542 data = new char[length];
2543 index = startPosition;
2544 for (int i = 0; i < length; i++) {
2545 data[i] = Character.toLowerCase(source[index + i]);
2549 // if ((length = withoutUnicodePtr) == 1)
2550 // return TokenNameIdentifier;
2551 // // data = withoutUnicodeBuffer;
2552 // data = new char[withoutUnicodeBuffer.length];
2553 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2554 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2559 firstLetter = data[index];
2560 switch (firstLetter) {
2562 case 'a' : // as and array
2565 if ((data[++index] == 's')) {
2568 return TokenNameIdentifier;
2571 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2572 return TokenNameAND;
2574 return TokenNameIdentifier;
2577 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2578 // return TokenNamearray;
2580 // return TokenNameIdentifier;
2582 return TokenNameIdentifier;
2587 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2588 return TokenNamebreak;
2590 return TokenNameIdentifier;
2592 return TokenNameIdentifier;
2595 case 'c' : //case class continue
2598 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2599 return TokenNamecase;
2601 return TokenNameIdentifier;
2603 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2604 return TokenNameclass;
2606 return TokenNameIdentifier;
2608 if ((data[++index] == 'o')
2609 && (data[++index] == 'n')
2610 && (data[++index] == 't')
2611 && (data[++index] == 'i')
2612 && (data[++index] == 'n')
2613 && (data[++index] == 'u')
2614 && (data[++index] == 'e'))
2615 return TokenNamecontinue;
2617 return TokenNameIdentifier;
2619 return TokenNameIdentifier;
2622 case 'd' : //define default do
2625 if ((data[++index] == 'o'))
2628 return TokenNameIdentifier;
2630 if ((data[++index] == 'e')
2631 && (data[++index] == 'f')
2632 && (data[++index] == 'i')
2633 && (data[++index] == 'n')
2634 && (data[++index] == 'e'))
2635 return TokenNamedefine;
2637 return TokenNameIdentifier;
2639 if ((data[++index] == 'e')
2640 && (data[++index] == 'f')
2641 && (data[++index] == 'a')
2642 && (data[++index] == 'u')
2643 && (data[++index] == 'l')
2644 && (data[++index] == 't'))
2645 return TokenNamedefault;
2647 return TokenNameIdentifier;
2649 return TokenNameIdentifier;
2651 case 'e' : //echo else elseif extends
2654 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2655 return TokenNameecho;
2656 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2657 return TokenNameelse;
2659 return TokenNameIdentifier;
2661 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2662 return TokenNameendif;
2664 return TokenNameIdentifier;
2666 if ((data[++index] == 'n')
2667 && (data[++index] == 'd')
2668 && (data[++index] == 'f')
2669 && (data[++index] == 'o')
2670 && (data[++index] == 'r'))
2671 return TokenNameendfor;
2673 (data[index] == 'l')
2674 && (data[++index] == 's')
2675 && (data[++index] == 'e')
2676 && (data[++index] == 'i')
2677 && (data[++index] == 'f'))
2678 return TokenNameelseif;
2680 return TokenNameIdentifier;
2682 if ((data[++index] == 'x')
2683 && (data[++index] == 't')
2684 && (data[++index] == 'e')
2685 && (data[++index] == 'n')
2686 && (data[++index] == 'd')
2687 && (data[++index] == 's'))
2688 return TokenNameextends;
2690 return TokenNameIdentifier;
2691 case 8 : // endwhile
2692 if ((data[++index] == 'n')
2693 && (data[++index] == 'd')
2694 && (data[++index] == 'w')
2695 && (data[++index] == 'h')
2696 && (data[++index] == 'i')
2697 && (data[++index] == 'l')
2698 && (data[++index] == 'e'))
2699 return TokenNameendwhile;
2701 return TokenNameIdentifier;
2702 case 9 : // endswitch
2703 if ((data[++index] == 'n')
2704 && (data[++index] == 'd')
2705 && (data[++index] == 's')
2706 && (data[++index] == 'w')
2707 && (data[++index] == 'i')
2708 && (data[++index] == 't')
2709 && (data[++index] == 'c')
2710 && (data[++index] == 'h'))
2711 return TokenNameendswitch;
2713 return TokenNameIdentifier;
2714 case 10 : // endforeach
2715 if ((data[++index] == 'n')
2716 && (data[++index] == 'd')
2717 && (data[++index] == 'f')
2718 && (data[++index] == 'o')
2719 && (data[++index] == 'r')
2720 && (data[++index] == 'e')
2721 && (data[++index] == 'a')
2722 && (data[++index] == 'c')
2723 && (data[++index] == 'h'))
2724 return TokenNameendforeach;
2726 return TokenNameIdentifier;
2729 return TokenNameIdentifier;
2732 case 'f' : //for false function
2735 if ((data[++index] == 'o') && (data[++index] == 'r'))
2736 return TokenNamefor;
2738 return TokenNameIdentifier;
2740 if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2741 return TokenNamefalse;
2743 return TokenNameIdentifier;
2744 case 7 : // function
2745 if ((data[++index] == 'o')
2746 && (data[++index] == 'r')
2747 && (data[++index] == 'e')
2748 && (data[++index] == 'a')
2749 && (data[++index] == 'c')
2750 && (data[++index] == 'h'))
2751 return TokenNameforeach;
2753 return TokenNameIdentifier;
2754 case 8 : // function
2755 if ((data[++index] == 'u')
2756 && (data[++index] == 'n')
2757 && (data[++index] == 'c')
2758 && (data[++index] == 't')
2759 && (data[++index] == 'i')
2760 && (data[++index] == 'o')
2761 && (data[++index] == 'n'))
2762 return TokenNamefunction;
2764 return TokenNameIdentifier;
2766 return TokenNameIdentifier;
2770 if ((data[++index] == 'l')
2771 && (data[++index] == 'o')
2772 && (data[++index] == 'b')
2773 && (data[++index] == 'a')
2774 && (data[++index] == 'l')) {
2775 return TokenNameglobal;
2778 return TokenNameIdentifier;
2783 if (data[++index] == 'f')
2786 return TokenNameIdentifier;
2788 // if ((data[++index] == 'n') && (data[++index] == 't'))
2789 // return TokenNameint;
2791 // return TokenNameIdentifier;
2793 if ((data[++index] == 'n')
2794 && (data[++index] == 'c')
2795 && (data[++index] == 'l')
2796 && (data[++index] == 'u')
2797 && (data[++index] == 'd')
2798 && (data[++index] == 'e'))
2799 return TokenNameinclude;
2801 return TokenNameIdentifier;
2803 if ((data[++index] == 'n')
2804 && (data[++index] == 'c')
2805 && (data[++index] == 'l')
2806 && (data[++index] == 'u')
2807 && (data[++index] == 'd')
2808 && (data[++index] == 'e')
2809 && (data[++index] == '_')
2810 && (data[++index] == 'o')
2811 && (data[++index] == 'n')
2812 && (data[++index] == 'c')
2813 && (data[++index] == 'e'))
2814 return TokenNameinclude_once;
2816 return TokenNameIdentifier;
2818 return TokenNameIdentifier;
2823 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2824 return TokenNamelist;
2827 return TokenNameIdentifier;
2829 case 'n' : // new null
2832 if ((data[++index] == 'e') && (data[++index] == 'w'))
2833 return TokenNamenew;
2835 return TokenNameIdentifier;
2837 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2838 return TokenNamenull;
2840 return TokenNameIdentifier;
2843 return TokenNameIdentifier;
2845 case 'o' : // or old_function
2847 if (data[++index] == 'r') {
2851 // if (length == 12) {
2852 // if ((data[++index] == 'l')
2853 // && (data[++index] == 'd')
2854 // && (data[++index] == '_')
2855 // && (data[++index] == 'f')
2856 // && (data[++index] == 'u')
2857 // && (data[++index] == 'n')
2858 // && (data[++index] == 'c')
2859 // && (data[++index] == 't')
2860 // && (data[++index] == 'i')
2861 // && (data[++index] == 'o')
2862 // && (data[++index] == 'n')) {
2863 // return TokenNameold_function;
2866 return TokenNameIdentifier;
2870 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2871 return TokenNameprint;
2874 return TokenNameIdentifier;
2875 case 'r' : //return require require_once
2877 if ((data[++index] == 'e')
2878 && (data[++index] == 't')
2879 && (data[++index] == 'u')
2880 && (data[++index] == 'r')
2881 && (data[++index] == 'n')) {
2882 return TokenNamereturn;
2884 } else if (length == 7) {
2885 if ((data[++index] == 'e')
2886 && (data[++index] == 'q')
2887 && (data[++index] == 'u')
2888 && (data[++index] == 'i')
2889 && (data[++index] == 'r')
2890 && (data[++index] == 'e')) {
2891 return TokenNamerequire;
2893 } else if (length == 12) {
2894 if ((data[++index] == 'e')
2895 && (data[++index] == 'q')
2896 && (data[++index] == 'u')
2897 && (data[++index] == 'i')
2898 && (data[++index] == 'r')
2899 && (data[++index] == 'e')
2900 && (data[++index] == '_')
2901 && (data[++index] == 'o')
2902 && (data[++index] == 'n')
2903 && (data[++index] == 'c')
2904 && (data[++index] == 'e')) {
2905 return TokenNamerequire_once;
2908 return TokenNameIdentifier;
2910 case 's' : //static switch
2913 if (data[++index] == 't')
2914 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2915 return TokenNamestatic;
2917 return TokenNameIdentifier;
2919 (data[index] == 'w')
2920 && (data[++index] == 'i')
2921 && (data[++index] == 't')
2922 && (data[++index] == 'c')
2923 && (data[++index] == 'h'))
2924 return TokenNameswitch;
2926 return TokenNameIdentifier;
2928 return TokenNameIdentifier;
2935 if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2936 return TokenNametrue;
2938 return TokenNameIdentifier;
2939 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2940 // return TokenNamethis;
2943 return TokenNameIdentifier;
2949 if ((data[++index] == 'a') && (data[++index] == 'r'))
2950 return TokenNamevar;
2952 return TokenNameIdentifier;
2955 return TokenNameIdentifier;
2961 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2962 return TokenNamewhile;
2964 return TokenNameIdentifier;
2965 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2966 //return TokenNamewidefp ;
2968 //return TokenNameIdentifier;
2970 return TokenNameIdentifier;
2976 if ((data[++index] == 'o') && (data[++index] == 'r'))
2977 return TokenNameXOR;
2979 return TokenNameIdentifier;
2982 return TokenNameIdentifier;
2985 return TokenNameIdentifier;
2988 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
2990 //when entering this method the currentCharacter is the firt
2991 //digit of the number , i.e. it may be preceeded by a . when
2994 boolean floating = dotPrefix;
2995 if ((!dotPrefix) && (currentCharacter == '0')) {
2996 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
2997 //force the first char of the hexa number do exist...
2998 // consume next character
2999 unicodeAsBackSlash = false;
3000 currentCharacter = source[currentPosition++];
3001 // if (((currentCharacter = source[currentPosition++]) == '\\')
3002 // && (source[currentPosition] == 'u')) {
3003 // getNextUnicodeChar();
3005 // if (withoutUnicodePtr != 0) {
3006 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3009 if (Character.digit(currentCharacter, 16) == -1)
3010 throw new InvalidInputException(INVALID_HEXA);
3012 while (getNextCharAsDigit(16)) {
3014 // if (getNextChar('l', 'L') >= 0)
3015 // return TokenNameLongLiteral;
3017 return TokenNameIntegerLiteral;
3020 //there is x or X in the number
3021 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3022 if (getNextCharAsDigit()) {
3023 //-------------potential octal-----------------
3024 while (getNextCharAsDigit()) {
3027 // if (getNextChar('l', 'L') >= 0) {
3028 // return TokenNameLongLiteral;
3031 // if (getNextChar('f', 'F') >= 0) {
3032 // return TokenNameFloatingPointLiteral;
3035 if (getNextChar('d', 'D') >= 0) {
3036 return TokenNameDoubleLiteral;
3037 } else { //make the distinction between octal and float ....
3038 if (getNextChar('.')) { //bingo ! ....
3039 while (getNextCharAsDigit()) {
3041 if (getNextChar('e', 'E') >= 0) {
3042 // consume next character
3043 unicodeAsBackSlash = false;
3044 currentCharacter = source[currentPosition++];
3045 // if (((currentCharacter = source[currentPosition++]) == '\\')
3046 // && (source[currentPosition] == 'u')) {
3047 // getNextUnicodeChar();
3049 // if (withoutUnicodePtr != 0) {
3050 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3054 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3055 // consume next character
3056 unicodeAsBackSlash = false;
3057 currentCharacter = source[currentPosition++];
3058 // if (((currentCharacter = source[currentPosition++]) == '\\')
3059 // && (source[currentPosition] == 'u')) {
3060 // getNextUnicodeChar();
3062 // if (withoutUnicodePtr != 0) {
3063 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3064 // currentCharacter;
3068 if (!Character.isDigit(currentCharacter))
3069 throw new InvalidInputException(INVALID_FLOAT);
3070 while (getNextCharAsDigit()) {
3073 // if (getNextChar('f', 'F') >= 0)
3074 // return TokenNameFloatingPointLiteral;
3075 getNextChar('d', 'D'); //jump over potential d or D
3076 return TokenNameDoubleLiteral;
3078 return TokenNameIntegerLiteral;
3086 while (getNextCharAsDigit()) {
3089 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3090 // return TokenNameLongLiteral;
3092 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3093 while (getNextCharAsDigit()) {
3098 //if floating is true both exponant and suffix may be optional
3100 if (getNextChar('e', 'E') >= 0) {
3102 // consume next character
3103 unicodeAsBackSlash = false;
3104 currentCharacter = source[currentPosition++];
3105 // if (((currentCharacter = source[currentPosition++]) == '\\')
3106 // && (source[currentPosition] == 'u')) {
3107 // getNextUnicodeChar();
3109 // if (withoutUnicodePtr != 0) {
3110 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3114 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3115 unicodeAsBackSlash = false;
3116 currentCharacter = source[currentPosition++];
3117 // if (((currentCharacter = source[currentPosition++]) == '\\')
3118 // && (source[currentPosition] == 'u')) {
3119 // getNextUnicodeChar();
3121 // if (withoutUnicodePtr != 0) {
3122 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3126 if (!Character.isDigit(currentCharacter))
3127 throw new InvalidInputException(INVALID_FLOAT);
3128 while (getNextCharAsDigit()) {
3132 if (getNextChar('d', 'D') >= 0)
3133 return TokenNameDoubleLiteral;
3134 // if (getNextChar('f', 'F') >= 0)
3135 // return TokenNameFloatingPointLiteral;
3137 //the long flag has been tested before
3139 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3142 * Search the line number corresponding to a specific position
3145 public final int getLineNumber(int position) {
3147 if (lineEnds == null)
3149 int length = linePtr + 1;
3152 int g = 0, d = length - 1;
3156 if (position < lineEnds[m]) {
3158 } else if (position > lineEnds[m]) {
3164 if (position < lineEnds[m]) {
3170 public void setPHPMode(boolean mode) {
3174 public final void setSource(char[] source) {
3175 //the source-buffer is set to sourceString
3177 if (source == null) {
3178 this.source = new char[0];
3180 this.source = source;
3183 initialPosition = currentPosition = 0;
3184 containsAssertKeyword = false;
3185 withoutUnicodeBuffer = new char[this.source.length];
3189 public String toString() {
3190 if (startPosition == source.length)
3191 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3192 if (currentPosition > source.length)
3193 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3195 char front[] = new char[startPosition];
3196 System.arraycopy(source, 0, front, 0, startPosition);
3198 int middleLength = (currentPosition - 1) - startPosition + 1;
3200 if (middleLength > -1) {
3201 middle = new char[middleLength];
3202 System.arraycopy(source, startPosition, middle, 0, middleLength);
3204 middle = new char[0];
3207 char end[] = new char[source.length - (currentPosition - 1)];
3208 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3210 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3211 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3214 public final String toStringAction(int act) {
3217 case TokenNameERROR :
3218 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3219 case TokenNameStopPHP :
3220 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3221 case TokenNameIdentifier :
3222 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3223 case TokenNameVariable :
3224 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3226 return "as"; //$NON-NLS-1$
3227 case TokenNamebreak :
3228 return "break"; //$NON-NLS-1$
3229 case TokenNamecase :
3230 return "case"; //$NON-NLS-1$
3231 case TokenNameclass :
3232 return "class"; //$NON-NLS-1$
3233 case TokenNamecontinue :
3234 return "continue"; //$NON-NLS-1$
3235 case TokenNamedefault :
3236 return "default"; //$NON-NLS-1$
3237 case TokenNamedefine :
3238 return "define"; //$NON-NLS-1$
3240 return "do"; //$NON-NLS-1$
3241 case TokenNameecho :
3242 return "echo"; //$NON-NLS-1$
3243 case TokenNameelse :
3244 return "else"; //$NON-NLS-1$
3245 case TokenNameelseif :
3246 return "elseif"; //$NON-NLS-1$
3247 case TokenNameendfor :
3248 return "endfor"; //$NON-NLS-1$
3249 case TokenNameendforeach :
3250 return "endforeach"; //$NON-NLS-1$
3251 case TokenNameendif :
3252 return "endif"; //$NON-NLS-1$
3253 case TokenNameendswitch :
3254 return "endswitch"; //$NON-NLS-1$
3255 case TokenNameendwhile :
3256 return "endwhile"; //$NON-NLS-1$
3257 case TokenNameextends :
3258 return "extends"; //$NON-NLS-1$
3259 case TokenNamefalse :
3260 return "false"; //$NON-NLS-1$
3262 return "for"; //$NON-NLS-1$
3263 case TokenNameforeach :
3264 return "foreach"; //$NON-NLS-1$
3265 case TokenNamefunction :
3266 return "function"; //$NON-NLS-1$
3267 case TokenNameglobal :
3268 return "global"; //$NON-NLS-1$
3270 return "if"; //$NON-NLS-1$
3271 case TokenNameinclude :
3272 return "include"; //$NON-NLS-1$
3273 case TokenNameinclude_once :
3274 return "include_once"; //$NON-NLS-1$
3275 case TokenNamelist :
3276 return "list"; //$NON-NLS-1$
3278 return "new"; //$NON-NLS-1$
3279 case TokenNamenull :
3280 return "null"; //$NON-NLS-1$
3281 case TokenNameprint :
3282 return "print"; //$NON-NLS-1$
3283 case TokenNamerequire :
3284 return "require"; //$NON-NLS-1$
3285 case TokenNamerequire_once :
3286 return "require_once"; //$NON-NLS-1$
3287 case TokenNamereturn :
3288 return "return"; //$NON-NLS-1$
3289 case TokenNamestatic :
3290 return "static"; //$NON-NLS-1$
3291 case TokenNameswitch :
3292 return "switch"; //$NON-NLS-1$
3293 case TokenNametrue :
3294 return "true"; //$NON-NLS-1$
3296 return "var"; //$NON-NLS-1$
3297 case TokenNamewhile :
3298 return "while"; //$NON-NLS-1$
3299 case TokenNameIntegerLiteral :
3300 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3301 case TokenNameDoubleLiteral :
3302 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3303 case TokenNameStringLiteral :
3304 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3305 case TokenNameStringConstant :
3306 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3307 case TokenNameStringInterpolated :
3308 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3309 case TokenNameHEREDOC :
3310 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3312 case TokenNamePLUS_PLUS :
3313 return "++"; //$NON-NLS-1$
3314 case TokenNameMINUS_MINUS :
3315 return "--"; //$NON-NLS-1$
3316 case TokenNameEQUAL_EQUAL :
3317 return "=="; //$NON-NLS-1$
3318 case TokenNameEQUAL_GREATER :
3319 return "=>"; //$NON-NLS-1$
3320 case TokenNameLESS_EQUAL :
3321 return "<="; //$NON-NLS-1$
3322 case TokenNameGREATER_EQUAL :
3323 return ">="; //$NON-NLS-1$
3324 case TokenNameNOT_EQUAL :
3325 return "!="; //$NON-NLS-1$
3326 case TokenNameLEFT_SHIFT :
3327 return "<<"; //$NON-NLS-1$
3328 case TokenNameRIGHT_SHIFT :
3329 return ">>"; //$NON-NLS-1$
3330 case TokenNamePLUS_EQUAL :
3331 return "+="; //$NON-NLS-1$
3332 case TokenNameMINUS_EQUAL :
3333 return "-="; //$NON-NLS-1$
3334 case TokenNameMULTIPLY_EQUAL :
3335 return "*="; //$NON-NLS-1$
3336 case TokenNameDIVIDE_EQUAL :
3337 return "/="; //$NON-NLS-1$
3338 case TokenNameAND_EQUAL :
3339 return "&="; //$NON-NLS-1$
3340 case TokenNameOR_EQUAL :
3341 return "|="; //$NON-NLS-1$
3342 case TokenNameXOR_EQUAL :
3343 return "^="; //$NON-NLS-1$
3344 case TokenNameREMAINDER_EQUAL :
3345 return "%="; //$NON-NLS-1$
3346 case TokenNameLEFT_SHIFT_EQUAL :
3347 return "<<="; //$NON-NLS-1$
3348 case TokenNameRIGHT_SHIFT_EQUAL :
3349 return ">>="; //$NON-NLS-1$
3350 case TokenNameOR_OR :
3351 return "||"; //$NON-NLS-1$
3352 case TokenNameAND_AND :
3353 return "&&"; //$NON-NLS-1$
3354 case TokenNamePLUS :
3355 return "+"; //$NON-NLS-1$
3356 case TokenNameMINUS :
3357 return "-"; //$NON-NLS-1$
3358 case TokenNameMINUS_GREATER :
3361 return "!"; //$NON-NLS-1$
3362 case TokenNameREMAINDER :
3363 return "%"; //$NON-NLS-1$
3365 return "^"; //$NON-NLS-1$
3367 return "&"; //$NON-NLS-1$
3368 case TokenNameMULTIPLY :
3369 return "*"; //$NON-NLS-1$
3371 return "|"; //$NON-NLS-1$
3372 case TokenNameTWIDDLE :
3373 return "~"; //$NON-NLS-1$
3374 case TokenNameTWIDDLE_EQUAL :
3375 return "~="; //$NON-NLS-1$
3376 case TokenNameDIVIDE :
3377 return "/"; //$NON-NLS-1$
3378 case TokenNameGREATER :
3379 return ">"; //$NON-NLS-1$
3380 case TokenNameLESS :
3381 return "<"; //$NON-NLS-1$
3382 case TokenNameLPAREN :
3383 return "("; //$NON-NLS-1$
3384 case TokenNameRPAREN :
3385 return ")"; //$NON-NLS-1$
3386 case TokenNameLBRACE :
3387 return "{"; //$NON-NLS-1$
3388 case TokenNameRBRACE :
3389 return "}"; //$NON-NLS-1$
3390 case TokenNameLBRACKET :
3391 return "["; //$NON-NLS-1$
3392 case TokenNameRBRACKET :
3393 return "]"; //$NON-NLS-1$
3394 case TokenNameSEMICOLON :
3395 return ";"; //$NON-NLS-1$
3396 case TokenNameQUESTION :
3397 return "?"; //$NON-NLS-1$
3398 case TokenNameCOLON :
3399 return ":"; //$NON-NLS-1$
3400 case TokenNameCOMMA :
3401 return ","; //$NON-NLS-1$
3403 return "."; //$NON-NLS-1$
3404 case TokenNameEQUAL :
3405 return "="; //$NON-NLS-1$
3408 case TokenNameDOLLAR_LBRACE :
3411 return "EOF"; //$NON-NLS-1$
3412 case TokenNameWHITESPACE :
3413 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3414 case TokenNameCOMMENT_LINE :
3415 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3416 case TokenNameCOMMENT_BLOCK :
3417 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3418 case TokenNameCOMMENT_PHPDOC :
3419 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3420 case TokenNameHTML :
3421 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3423 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3427 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3428 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3432 boolean tokenizeComments,
3433 boolean tokenizeWhiteSpace,
3434 boolean checkNonExternalizedStringLiterals,
3435 boolean assertMode) {
3436 this.eofPosition = Integer.MAX_VALUE;
3437 this.tokenizeComments = tokenizeComments;
3438 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3439 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3440 this.assertMode = assertMode;
3443 private void checkNonExternalizeString() throws InvalidInputException {
3444 if (currentLine == null)
3446 parseTags(currentLine);
3449 private void parseTags(NLSLine line) throws InvalidInputException {
3450 String s = new String(getCurrentTokenSource());
3451 int pos = s.indexOf(TAG_PREFIX);
3452 int lineLength = line.size();
3454 int start = pos + TAG_PREFIX_LENGTH;
3455 int end = s.indexOf(TAG_POSTFIX, start);
3456 String index = s.substring(start, end);
3459 i = Integer.parseInt(index) - 1;
3460 // Tags are one based not zero based.
3461 } catch (NumberFormatException e) {
3462 i = -1; // we don't want to consider this as a valid NLS tag
3464 if (line.exists(i)) {
3467 pos = s.indexOf(TAG_PREFIX, start);
3470 this.nonNLSStrings = new StringLiteral[lineLength];
3471 int nonNLSCounter = 0;
3472 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3473 StringLiteral literal = (StringLiteral) iterator.next();
3474 if (literal != null) {
3475 this.nonNLSStrings[nonNLSCounter++] = literal;
3478 if (nonNLSCounter == 0) {
3479 this.nonNLSStrings = null;
3483 this.wasNonExternalizedStringLiteral = true;
3484 if (nonNLSCounter != lineLength) {
3485 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);