1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray =
120 new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121 static final int TableSize = 30, InternalTableSize = 6;
123 public static final int OptimizedLength = 6;
125 final char[][][][] charArray_length =
126 new char[OptimizedLength][TableSize][InternalTableSize][];
127 // support for detecting non-externalized string literals
128 int currentLineNr = -1;
129 int previousLineNr = -1;
130 NLSLine currentLine = null;
131 List lines = new ArrayList();
132 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136 public StringLiteral[] nonNLSStrings = null;
137 public boolean checkNonExternalizedStringLiterals = true;
138 public boolean wasNonExternalizedStringLiteral = false;
141 for (int i = 0; i < 6; i++) {
142 for (int j = 0; j < TableSize; j++) {
143 for (int k = 0; k < InternalTableSize; k++) {
144 charArray_length[i][j][k] = initCharArray;
149 static int newEntry2 = 0,
155 public static final int RoundBracket = 0;
156 public static final int SquareBracket = 1;
157 public static final int CurlyBracket = 2;
158 public static final int BracketKinds = 3;
160 public static final boolean DEBUG = false;
164 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
165 this(tokenizeComments, tokenizeWhiteSpace, false);
169 * Determines if the specified character is
170 * permissible as the first character in a PHP identifier
172 public static boolean isPHPIdentifierStart(char ch) {
173 return Character.isLetter(ch)
175 || (0x7F <= ch && ch <= 0xFF);
179 * Determines if the specified character may be part of a PHP identifier as
180 * other than the first character
182 public static boolean isPHPIdentifierPart(char ch) {
183 return Character.isLetterOrDigit(ch)
185 || (0x7F <= ch && ch <= 0xFF);
188 public final boolean atEnd() {
189 // This code is not relevant if source is
190 // Only a part of the real stream input
192 return source.length == currentPosition;
194 public char[] getCurrentIdentifierSource() {
195 //return the token REAL source (aka unicodes are precomputed)
198 // if (withoutUnicodePtr != 0)
199 // //0 is used as a fast test flag so the real first char is in position 1
201 // withoutUnicodeBuffer,
203 // result = new char[withoutUnicodePtr],
205 // withoutUnicodePtr);
207 int length = currentPosition - startPosition;
208 switch (length) { // see OptimizedLength
210 return optimizedCurrentTokenSource1();
212 return optimizedCurrentTokenSource2();
214 return optimizedCurrentTokenSource3();
216 return optimizedCurrentTokenSource4();
218 return optimizedCurrentTokenSource5();
220 return optimizedCurrentTokenSource6();
226 result = new char[length],
232 public int getCurrentTokenEndPosition() {
233 return this.currentPosition - 1;
235 public final char[] getCurrentTokenSource() {
236 // Return the token REAL source (aka unicodes are precomputed)
239 // if (withoutUnicodePtr != 0)
240 // // 0 is used as a fast test flag so the real first char is in position 1
242 // withoutUnicodeBuffer,
244 // result = new char[withoutUnicodePtr],
246 // withoutUnicodePtr);
252 result = new char[length = currentPosition - startPosition],
259 public final char[] getCurrentTokenSource(int startPos) {
260 // Return the token REAL source (aka unicodes are precomputed)
263 // if (withoutUnicodePtr != 0)
264 // // 0 is used as a fast test flag so the real first char is in position 1
266 // withoutUnicodeBuffer,
268 // result = new char[withoutUnicodePtr],
270 // withoutUnicodePtr);
276 result = new char[length = currentPosition - startPos],
283 public final char[] getCurrentTokenSourceString() {
284 //return the token REAL source (aka unicodes are precomputed).
285 //REMOVE the two " that are at the beginning and the end.
288 if (withoutUnicodePtr != 0)
289 //0 is used as a fast test flag so the real first char is in position 1
290 System.arraycopy(withoutUnicodeBuffer, 2,
291 //2 is 1 (real start) + 1 (to jump over the ")
292 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
298 result = new char[length = currentPosition - startPosition - 2],
304 public int getCurrentTokenStartPosition() {
305 return this.startPosition;
308 * Search the source position corresponding to the end of a given line number
310 * Line numbers are 1-based, and relative to the scanner initialPosition.
311 * Character positions are 0-based.
313 * In case the given line number is inconsistent, answers -1.
315 public final int getLineEnd(int lineNumber) {
317 if (lineEnds == null)
319 if (lineNumber >= lineEnds.length)
324 if (lineNumber == lineEnds.length - 1)
326 return lineEnds[lineNumber - 1];
327 // next line start one character behind the lineEnd of the previous line
330 * Search the source position corresponding to the beginning of a given line number
332 * Line numbers are 1-based, and relative to the scanner initialPosition.
333 * Character positions are 0-based.
335 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
337 * In case the given line number is inconsistent, answers -1.
339 public final int getLineStart(int lineNumber) {
341 if (lineEnds == null)
343 if (lineNumber >= lineEnds.length)
349 return initialPosition;
350 return lineEnds[lineNumber - 2] + 1;
351 // next line start one character behind the lineEnd of the previous line
353 public final boolean getNextChar(char testedChar) {
355 //handle the case of unicode.
356 //when a unicode appears then we must use a buffer that holds char internal values
357 //At the end of this method currentCharacter holds the new visited char
358 //and currentPosition points right next after it
359 //Both previous lines are true if the currentCharacter is == to the testedChar
360 //On false, no side effect has occured.
362 //ALL getNextChar.... ARE OPTIMIZED COPIES
364 int temp = currentPosition;
366 currentCharacter = source[currentPosition++];
367 // if (((currentCharacter = source[currentPosition++]) == '\\')
368 // && (source[currentPosition] == 'u')) {
369 // //-------------unicode traitement ------------
370 // int c1, c2, c3, c4;
371 // int unicodeSize = 6;
372 // currentPosition++;
373 // while (source[currentPosition] == 'u') {
374 // currentPosition++;
378 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
380 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
382 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
384 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
386 // currentPosition = temp;
390 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
391 // if (currentCharacter != testedChar) {
392 // currentPosition = temp;
395 // unicodeAsBackSlash = currentCharacter == '\\';
397 // //need the unicode buffer
398 // if (withoutUnicodePtr == 0) {
399 // //buffer all the entries that have been left aside....
400 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
404 // withoutUnicodeBuffer,
406 // withoutUnicodePtr);
408 // //fill the buffer with the char
409 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
412 // } //-------------end unicode traitement--------------
414 if (currentCharacter != testedChar) {
415 currentPosition = temp;
418 unicodeAsBackSlash = false;
419 // if (withoutUnicodePtr != 0)
420 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
423 } catch (IndexOutOfBoundsException e) {
424 unicodeAsBackSlash = false;
425 currentPosition = temp;
429 public final int getNextChar(char testedChar1, char testedChar2) {
430 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
431 //test can be done with (x==0) for the first and (x>0) for the second
432 //handle the case of unicode.
433 //when a unicode appears then we must use a buffer that holds char internal values
434 //At the end of this method currentCharacter holds the new visited char
435 //and currentPosition points right next after it
436 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
437 //On false, no side effect has occured.
439 //ALL getNextChar.... ARE OPTIMIZED COPIES
441 int temp = currentPosition;
444 currentCharacter = source[currentPosition++];
445 // if (((currentCharacter = source[currentPosition++]) == '\\')
446 // && (source[currentPosition] == 'u')) {
447 // //-------------unicode traitement ------------
448 // int c1, c2, c3, c4;
449 // int unicodeSize = 6;
450 // currentPosition++;
451 // while (source[currentPosition] == 'u') {
452 // currentPosition++;
456 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
458 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
460 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
462 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
464 // currentPosition = temp;
468 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
469 // if (currentCharacter == testedChar1)
471 // else if (currentCharacter == testedChar2)
474 // currentPosition = temp;
478 // //need the unicode buffer
479 // if (withoutUnicodePtr == 0) {
480 // //buffer all the entries that have been left aside....
481 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
485 // withoutUnicodeBuffer,
487 // withoutUnicodePtr);
489 // //fill the buffer with the char
490 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
492 // } //-------------end unicode traitement--------------
494 if (currentCharacter == testedChar1)
496 else if (currentCharacter == testedChar2)
499 currentPosition = temp;
503 // if (withoutUnicodePtr != 0)
504 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
507 } catch (IndexOutOfBoundsException e) {
508 currentPosition = temp;
512 public final boolean getNextCharAsDigit() {
514 //handle the case of unicode.
515 //when a unicode appears then we must use a buffer that holds char internal values
516 //At the end of this method currentCharacter holds the new visited char
517 //and currentPosition points right next after it
518 //Both previous lines are true if the currentCharacter is a digit
519 //On false, no side effect has occured.
521 //ALL getNextChar.... ARE OPTIMIZED COPIES
523 int temp = currentPosition;
525 currentCharacter = source[currentPosition++];
526 // if (((currentCharacter = source[currentPosition++]) == '\\')
527 // && (source[currentPosition] == 'u')) {
528 // //-------------unicode traitement ------------
529 // int c1, c2, c3, c4;
530 // int unicodeSize = 6;
531 // currentPosition++;
532 // while (source[currentPosition] == 'u') {
533 // currentPosition++;
537 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
539 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
541 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
543 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
545 // currentPosition = temp;
549 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
550 // if (!Character.isDigit(currentCharacter)) {
551 // currentPosition = temp;
555 // //need the unicode buffer
556 // if (withoutUnicodePtr == 0) {
557 // //buffer all the entries that have been left aside....
558 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
562 // withoutUnicodeBuffer,
564 // withoutUnicodePtr);
566 // //fill the buffer with the char
567 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
569 // } //-------------end unicode traitement--------------
571 if (!Character.isDigit(currentCharacter)) {
572 currentPosition = temp;
575 // if (withoutUnicodePtr != 0)
576 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
579 } catch (IndexOutOfBoundsException e) {
580 currentPosition = temp;
584 public final boolean getNextCharAsDigit(int radix) {
586 //handle the case of unicode.
587 //when a unicode appears then we must use a buffer that holds char internal values
588 //At the end of this method currentCharacter holds the new visited char
589 //and currentPosition points right next after it
590 //Both previous lines are true if the currentCharacter is a digit base on radix
591 //On false, no side effect has occured.
593 //ALL getNextChar.... ARE OPTIMIZED COPIES
595 int temp = currentPosition;
597 currentCharacter = source[currentPosition++];
598 // if (((currentCharacter = source[currentPosition++]) == '\\')
599 // && (source[currentPosition] == 'u')) {
600 // //-------------unicode traitement ------------
601 // int c1, c2, c3, c4;
602 // int unicodeSize = 6;
603 // currentPosition++;
604 // while (source[currentPosition] == 'u') {
605 // currentPosition++;
609 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
611 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
613 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
615 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
617 // currentPosition = temp;
621 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
622 // if (Character.digit(currentCharacter, radix) == -1) {
623 // currentPosition = temp;
627 // //need the unicode buffer
628 // if (withoutUnicodePtr == 0) {
629 // //buffer all the entries that have been left aside....
630 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
634 // withoutUnicodeBuffer,
636 // withoutUnicodePtr);
638 // //fill the buffer with the char
639 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
641 // } //-------------end unicode traitement--------------
643 if (Character.digit(currentCharacter, radix) == -1) {
644 currentPosition = temp;
647 // if (withoutUnicodePtr != 0)
648 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
651 } catch (IndexOutOfBoundsException e) {
652 currentPosition = temp;
656 public boolean getNextCharAsJavaIdentifierPart() {
658 //handle the case of unicode.
659 //when a unicode appears then we must use a buffer that holds char internal values
660 //At the end of this method currentCharacter holds the new visited char
661 //and currentPosition points right next after it
662 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
663 //On false, no side effect has occured.
665 //ALL getNextChar.... ARE OPTIMIZED COPIES
667 int temp = currentPosition;
669 currentCharacter = source[currentPosition++];
670 // if (((currentCharacter = source[currentPosition++]) == '\\')
671 // && (source[currentPosition] == 'u')) {
672 // //-------------unicode traitement ------------
673 // int c1, c2, c3, c4;
674 // int unicodeSize = 6;
675 // currentPosition++;
676 // while (source[currentPosition] == 'u') {
677 // currentPosition++;
681 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
683 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
685 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
687 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
689 // currentPosition = temp;
693 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
694 // if (!isPHPIdentifierPart(currentCharacter)) {
695 // currentPosition = temp;
699 // //need the unicode buffer
700 // if (withoutUnicodePtr == 0) {
701 // //buffer all the entries that have been left aside....
702 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
706 // withoutUnicodeBuffer,
708 // withoutUnicodePtr);
710 // //fill the buffer with the char
711 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
713 // } //-------------end unicode traitement--------------
715 if (!isPHPIdentifierPart(currentCharacter)) {
716 currentPosition = temp;
720 // if (withoutUnicodePtr != 0)
721 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
724 } catch (IndexOutOfBoundsException e) {
725 currentPosition = temp;
730 public int getNextToken() throws InvalidInputException {
731 int htmlPosition = currentPosition;
734 currentCharacter = source[currentPosition++];
735 if (currentCharacter == '<') {
736 if (getNextChar('?')) {
737 currentCharacter = source[currentPosition++];
738 if ((currentCharacter == ' ')
739 || Character.isWhitespace(currentCharacter)) {
741 startPosition = currentPosition;
743 if (tokenizeWhiteSpace) {
744 // && (whiteStart != currentPosition - 1)) {
745 // reposition scanner in case we are interested by spaces as tokens
746 startPosition = htmlPosition;
747 return TokenNameHTML;
751 (currentCharacter == 'P') || (currentCharacter == 'p');
753 int test = getNextChar('H', 'h');
755 test = getNextChar('P', 'p');
758 startPosition = currentPosition;
761 if (tokenizeWhiteSpace) {
762 // && (whiteStart != currentPosition - 1)) {
763 // reposition scanner in case we are interested by spaces as tokens
764 startPosition = htmlPosition;
765 return TokenNameHTML;
774 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
775 if (recordLineSeparator) {
782 } //-----------------end switch while try--------------------
783 catch (IndexOutOfBoundsException e) {
784 if (tokenizeWhiteSpace) {
785 // && (whiteStart != currentPosition - 1)) {
786 // reposition scanner in case we are interested by spaces as tokens
787 startPosition = htmlPosition;
795 jumpOverMethodBody();
797 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
800 while (true) { //loop for jumping over comments
801 withoutUnicodePtr = 0;
802 //start with a new token (even comment written with unicode )
804 // ---------Consume white space and handles startPosition---------
805 int whiteStart = currentPosition;
806 boolean isWhiteSpace;
808 startPosition = currentPosition;
809 currentCharacter = source[currentPosition++];
810 // if (((currentCharacter = source[currentPosition++]) == '\\')
811 // && (source[currentPosition] == 'u')) {
812 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
814 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
815 checkNonExternalizeString();
816 if (recordLineSeparator) {
823 (currentCharacter == ' ')
824 || Character.isWhitespace(currentCharacter);
826 } while (isWhiteSpace);
827 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
828 // reposition scanner in case we are interested by spaces as tokens
830 startPosition = whiteStart;
831 return TokenNameWHITESPACE;
833 //little trick to get out in the middle of a source compuation
834 if (currentPosition > eofPosition)
837 // ---------Identify the next token-------------
839 switch (currentCharacter) {
841 return TokenNameLPAREN;
843 return TokenNameRPAREN;
845 return TokenNameLBRACE;
847 return TokenNameRBRACE;
849 return TokenNameLBRACKET;
851 return TokenNameRBRACKET;
853 return TokenNameSEMICOLON;
855 return TokenNameCOMMA;
858 if (getNextCharAsDigit())
859 return scanNumber(true);
864 if ((test = getNextChar('+', '=')) == 0)
865 return TokenNamePLUS_PLUS;
867 return TokenNamePLUS_EQUAL;
868 return TokenNamePLUS;
873 if ((test = getNextChar('-', '=')) == 0)
874 return TokenNameMINUS_MINUS;
876 return TokenNameMINUS_EQUAL;
877 if (getNextChar('>'))
878 return TokenNameMINUS_GREATER;
880 return TokenNameMINUS;
883 if (getNextChar('='))
884 return TokenNameTWIDDLE_EQUAL;
885 return TokenNameTWIDDLE;
887 if (getNextChar('='))
888 return TokenNameNOT_EQUAL;
891 if (getNextChar('='))
892 return TokenNameMULTIPLY_EQUAL;
893 return TokenNameMULTIPLY;
895 if (getNextChar('='))
896 return TokenNameREMAINDER_EQUAL;
897 return TokenNameREMAINDER;
901 if ((test = getNextChar('=', '<')) == 0)
902 return TokenNameLESS_EQUAL;
904 if (getNextChar('='))
905 return TokenNameLEFT_SHIFT_EQUAL;
906 if (getNextChar('<')) {
907 int heredocStart = currentPosition;
908 int heredocLength = 0;
909 currentCharacter = source[currentPosition++];
910 if (isPHPIdentifierStart(currentCharacter)) {
911 currentCharacter = source[currentPosition++];
913 return TokenNameERROR;
915 while (isPHPIdentifierPart(currentCharacter)) {
916 currentCharacter = source[currentPosition++];
919 heredocLength = currentPosition - heredocStart - 1;
921 // heredoc end-tag determination
922 boolean endTag = true;
925 ch = source[currentPosition++];
926 if (ch == '\r' || ch == '\n') {
927 if (recordLineSeparator) {
932 for (int i = 0; i < heredocLength; i++) {
933 if (source[currentPosition + i]
934 != source[heredocStart + i]) {
940 currentPosition += heredocLength - 1;
941 currentCharacter = source[currentPosition++];
942 break; // do...while loop
950 return TokenNameHEREDOC;
952 return TokenNameLEFT_SHIFT;
954 return TokenNameLESS;
959 if ((test = getNextChar('=', '>')) == 0)
960 return TokenNameGREATER_EQUAL;
962 if ((test = getNextChar('=', '>')) == 0)
963 return TokenNameRIGHT_SHIFT_EQUAL;
964 return TokenNameRIGHT_SHIFT;
966 return TokenNameGREATER;
969 if (getNextChar('='))
970 return TokenNameEQUAL_EQUAL;
971 if (getNextChar('>'))
972 return TokenNameEQUAL_GREATER;
973 return TokenNameEQUAL;
977 if ((test = getNextChar('&', '=')) == 0)
978 return TokenNameAND_AND;
980 return TokenNameAND_EQUAL;
986 if ((test = getNextChar('|', '=')) == 0)
987 return TokenNameOR_OR;
989 return TokenNameOR_EQUAL;
993 if (getNextChar('='))
994 return TokenNameXOR_EQUAL;
997 if (getNextChar('>')) {
999 return TokenNameStopPHP;
1001 return TokenNameQUESTION;
1003 if (getNextChar(':'))
1004 return TokenNameCOLON_COLON;
1005 return TokenNameCOLON;
1011 // if ((test = getNextChar('\n', '\r')) == 0) {
1012 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1015 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1016 // for (int lookAhead = 0;
1019 // if (currentPosition + lookAhead
1020 // == source.length)
1022 // if (source[currentPosition + lookAhead]
1025 // if (source[currentPosition + lookAhead]
1027 // currentPosition += lookAhead + 1;
1031 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1034 // if (getNextChar('\'')) {
1035 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1036 // for (int lookAhead = 0;
1039 // if (currentPosition + lookAhead
1040 // == source.length)
1042 // if (source[currentPosition + lookAhead]
1045 // if (source[currentPosition + lookAhead]
1047 // currentPosition += lookAhead + 1;
1051 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1053 // if (getNextChar('\\'))
1054 // scanEscapeCharacter();
1055 // else { // consume next character
1056 // unicodeAsBackSlash = false;
1057 // if (((currentCharacter = source[currentPosition++])
1059 // && (source[currentPosition] == 'u')) {
1060 // getNextUnicodeChar();
1062 // if (withoutUnicodePtr != 0) {
1063 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1064 // currentCharacter;
1068 // // if (getNextChar('\''))
1069 // // return TokenNameCharacterLiteral;
1070 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1071 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1072 // if (currentPosition + lookAhead == source.length)
1074 // if (source[currentPosition + lookAhead] == '\n')
1076 // if (source[currentPosition + lookAhead] == '\'') {
1077 // currentPosition += lookAhead + 1;
1081 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1084 // consume next character
1085 unicodeAsBackSlash = false;
1086 currentCharacter = source[currentPosition++];
1087 // if (((currentCharacter = source[currentPosition++]) == '\\')
1088 // && (source[currentPosition] == 'u')) {
1089 // getNextUnicodeChar();
1091 // if (withoutUnicodePtr != 0) {
1092 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1093 // currentCharacter;
1097 while (currentCharacter != '\'') {
1099 /**** in PHP \r and \n are valid in string literals ****/
1100 // if ((currentCharacter == '\n')
1101 // || (currentCharacter == '\r')) {
1102 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1103 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1104 // if (currentPosition + lookAhead == source.length)
1106 // if (source[currentPosition + lookAhead] == '\n')
1108 // if (source[currentPosition + lookAhead] == '\"') {
1109 // currentPosition += lookAhead + 1;
1113 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1115 if (currentCharacter == '\\') {
1116 int escapeSize = currentPosition;
1117 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1118 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1119 scanSingleQuotedEscapeCharacter();
1120 escapeSize = currentPosition - escapeSize;
1121 if (withoutUnicodePtr == 0) {
1122 //buffer all the entries that have been left aside....
1124 currentPosition - escapeSize - 1 - startPosition;
1128 withoutUnicodeBuffer,
1131 withoutUnicodeBuffer[++withoutUnicodePtr] =
1133 } else { //overwrite the / in the buffer
1134 withoutUnicodeBuffer[withoutUnicodePtr] =
1136 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1137 withoutUnicodePtr--;
1141 // consume next character
1142 unicodeAsBackSlash = false;
1143 currentCharacter = source[currentPosition++];
1144 // if (((currentCharacter = source[currentPosition++]) == '\\')
1145 // && (source[currentPosition] == 'u')) {
1146 // getNextUnicodeChar();
1148 if (withoutUnicodePtr != 0) {
1149 withoutUnicodeBuffer[++withoutUnicodePtr] =
1155 } catch (IndexOutOfBoundsException e) {
1156 throw new InvalidInputException(UNTERMINATED_STRING);
1157 } catch (InvalidInputException e) {
1158 if (e.getMessage().equals(INVALID_ESCAPE)) {
1159 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1160 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1161 if (currentPosition + lookAhead == source.length)
1163 if (source[currentPosition + lookAhead] == '\n')
1165 if (source[currentPosition + lookAhead] == '\'') {
1166 currentPosition += lookAhead + 1;
1174 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1175 if (currentLine == null) {
1176 currentLine = new NLSLine();
1177 lines.add(currentLine);
1181 getCurrentTokenSourceString(),
1183 currentPosition - 1));
1185 return TokenNameStringConstant;
1188 // consume next character
1189 unicodeAsBackSlash = false;
1190 currentCharacter = source[currentPosition++];
1191 // if (((currentCharacter = source[currentPosition++]) == '\\')
1192 // && (source[currentPosition] == 'u')) {
1193 // getNextUnicodeChar();
1195 // if (withoutUnicodePtr != 0) {
1196 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1197 // currentCharacter;
1201 while (currentCharacter != '"') {
1203 /**** in PHP \r and \n are valid in string literals ****/
1204 // if ((currentCharacter == '\n')
1205 // || (currentCharacter == '\r')) {
1206 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1207 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1208 // if (currentPosition + lookAhead == source.length)
1210 // if (source[currentPosition + lookAhead] == '\n')
1212 // if (source[currentPosition + lookAhead] == '\"') {
1213 // currentPosition += lookAhead + 1;
1217 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1219 if (currentCharacter == '\\') {
1220 int escapeSize = currentPosition;
1221 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1222 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1223 scanDoubleQuotedEscapeCharacter();
1224 escapeSize = currentPosition - escapeSize;
1225 if (withoutUnicodePtr == 0) {
1226 //buffer all the entries that have been left aside....
1228 currentPosition - escapeSize - 1 - startPosition;
1232 withoutUnicodeBuffer,
1235 withoutUnicodeBuffer[++withoutUnicodePtr] =
1237 } else { //overwrite the / in the buffer
1238 withoutUnicodeBuffer[withoutUnicodePtr] =
1240 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1241 withoutUnicodePtr--;
1245 // consume next character
1246 unicodeAsBackSlash = false;
1247 currentCharacter = source[currentPosition++];
1248 // if (((currentCharacter = source[currentPosition++]) == '\\')
1249 // && (source[currentPosition] == 'u')) {
1250 // getNextUnicodeChar();
1252 if (withoutUnicodePtr != 0) {
1253 withoutUnicodeBuffer[++withoutUnicodePtr] =
1259 } catch (IndexOutOfBoundsException e) {
1260 throw new InvalidInputException(UNTERMINATED_STRING);
1261 } catch (InvalidInputException e) {
1262 if (e.getMessage().equals(INVALID_ESCAPE)) {
1263 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1264 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1265 if (currentPosition + lookAhead == source.length)
1267 if (source[currentPosition + lookAhead] == '\n')
1269 if (source[currentPosition + lookAhead] == '\"') {
1270 currentPosition += lookAhead + 1;
1278 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1279 if (currentLine == null) {
1280 currentLine = new NLSLine();
1281 lines.add(currentLine);
1285 getCurrentTokenSourceString(),
1287 currentPosition - 1));
1289 return TokenNameStringLiteral;
1292 // consume next character
1293 unicodeAsBackSlash = false;
1294 currentCharacter = source[currentPosition++];
1295 // if (((currentCharacter = source[currentPosition++]) == '\\')
1296 // && (source[currentPosition] == 'u')) {
1297 // getNextUnicodeChar();
1299 // if (withoutUnicodePtr != 0) {
1300 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1301 // currentCharacter;
1305 while (currentCharacter != '`') {
1307 /**** in PHP \r and \n are valid in string literals ****/
1308 // if ((currentCharacter == '\n')
1309 // || (currentCharacter == '\r')) {
1310 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1311 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1312 // if (currentPosition + lookAhead == source.length)
1314 // if (source[currentPosition + lookAhead] == '\n')
1316 // if (source[currentPosition + lookAhead] == '\"') {
1317 // currentPosition += lookAhead + 1;
1321 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1323 if (currentCharacter == '\\') {
1324 int escapeSize = currentPosition;
1325 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1326 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1327 scanDoubleQuotedEscapeCharacter();
1328 escapeSize = currentPosition - escapeSize;
1329 if (withoutUnicodePtr == 0) {
1330 //buffer all the entries that have been left aside....
1332 currentPosition - escapeSize - 1 - startPosition;
1336 withoutUnicodeBuffer,
1339 withoutUnicodeBuffer[++withoutUnicodePtr] =
1341 } else { //overwrite the / in the buffer
1342 withoutUnicodeBuffer[withoutUnicodePtr] =
1344 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1345 withoutUnicodePtr--;
1349 // consume next character
1350 unicodeAsBackSlash = false;
1351 currentCharacter = source[currentPosition++];
1352 // if (((currentCharacter = source[currentPosition++]) == '\\')
1353 // && (source[currentPosition] == 'u')) {
1354 // getNextUnicodeChar();
1356 if (withoutUnicodePtr != 0) {
1357 withoutUnicodeBuffer[++withoutUnicodePtr] =
1363 } catch (IndexOutOfBoundsException e) {
1364 throw new InvalidInputException(UNTERMINATED_STRING);
1365 } catch (InvalidInputException e) {
1366 if (e.getMessage().equals(INVALID_ESCAPE)) {
1367 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1368 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1369 if (currentPosition + lookAhead == source.length)
1371 if (source[currentPosition + lookAhead] == '\n')
1373 if (source[currentPosition + lookAhead] == '`') {
1374 currentPosition += lookAhead + 1;
1382 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1383 if (currentLine == null) {
1384 currentLine = new NLSLine();
1385 lines.add(currentLine);
1389 getCurrentTokenSourceString(),
1391 currentPosition - 1));
1393 return TokenNameStringInterpolated;
1398 if ((currentCharacter == '#')
1399 || (test = getNextChar('/', '*')) == 0) {
1401 int endPositionForLineComment = 0;
1402 try { //get the next char
1403 currentCharacter = source[currentPosition++];
1404 // if (((currentCharacter = source[currentPosition++])
1406 // && (source[currentPosition] == 'u')) {
1407 // //-------------unicode traitement ------------
1408 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1409 // currentPosition++;
1410 // while (source[currentPosition] == 'u') {
1411 // currentPosition++;
1414 // Character.getNumericValue(source[currentPosition++]))
1418 // Character.getNumericValue(source[currentPosition++]))
1422 // Character.getNumericValue(source[currentPosition++]))
1426 // Character.getNumericValue(source[currentPosition++]))
1429 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1431 // currentCharacter =
1432 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1436 //handle the \\u case manually into comment
1437 // if (currentCharacter == '\\') {
1438 // if (source[currentPosition] == '\\')
1439 // currentPosition++;
1440 // } //jump over the \\
1441 boolean isUnicode = false;
1442 while (currentCharacter != '\r'
1443 && currentCharacter != '\n') {
1444 if (currentCharacter == '?') {
1445 if (getNextChar('>')) {
1446 startPosition = currentPosition - 2;
1448 return TokenNameStopPHP;
1454 currentCharacter = source[currentPosition++];
1455 // if (((currentCharacter = source[currentPosition++])
1457 // && (source[currentPosition] == 'u')) {
1458 // isUnicode = true;
1459 // //-------------unicode traitement ------------
1460 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1461 // currentPosition++;
1462 // while (source[currentPosition] == 'u') {
1463 // currentPosition++;
1466 // Character.getNumericValue(source[currentPosition++]))
1470 // Character.getNumericValue(
1471 // source[currentPosition++]))
1475 // Character.getNumericValue(
1476 // source[currentPosition++]))
1480 // Character.getNumericValue(
1481 // source[currentPosition++]))
1484 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1486 // currentCharacter =
1487 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1490 //handle the \\u case manually into comment
1491 // if (currentCharacter == '\\') {
1492 // if (source[currentPosition] == '\\')
1493 // currentPosition++;
1494 // } //jump over the \\
1497 endPositionForLineComment = currentPosition - 6;
1499 endPositionForLineComment = currentPosition - 1;
1501 recordComment(false);
1502 if ((currentCharacter == '\r')
1503 || (currentCharacter == '\n')) {
1504 checkNonExternalizeString();
1505 if (recordLineSeparator) {
1507 pushUnicodeLineSeparator();
1509 pushLineSeparator();
1515 if (tokenizeComments) {
1517 currentPosition = endPositionForLineComment;
1518 // reset one character behind
1520 return TokenNameCOMMENT_LINE;
1522 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1523 if (tokenizeComments) {
1525 // reset one character behind
1526 return TokenNameCOMMENT_LINE;
1532 //traditional and annotation comment
1533 boolean isJavadoc = false, star = false;
1534 // consume next character
1535 unicodeAsBackSlash = false;
1536 currentCharacter = source[currentPosition++];
1537 // if (((currentCharacter = source[currentPosition++]) == '\\')
1538 // && (source[currentPosition] == 'u')) {
1539 // getNextUnicodeChar();
1541 // if (withoutUnicodePtr != 0) {
1542 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1543 // currentCharacter;
1547 if (currentCharacter == '*') {
1551 if ((currentCharacter == '\r')
1552 || (currentCharacter == '\n')) {
1553 checkNonExternalizeString();
1554 if (recordLineSeparator) {
1555 pushLineSeparator();
1560 try { //get the next char
1561 currentCharacter = source[currentPosition++];
1562 // if (((currentCharacter = source[currentPosition++])
1564 // && (source[currentPosition] == 'u')) {
1565 // //-------------unicode traitement ------------
1566 // getNextUnicodeChar();
1568 //handle the \\u case manually into comment
1569 // if (currentCharacter == '\\') {
1570 // if (source[currentPosition] == '\\')
1571 // currentPosition++;
1572 // //jump over the \\
1574 // empty comment is not a javadoc /**/
1575 if (currentCharacter == '/') {
1578 //loop until end of comment */
1579 while ((currentCharacter != '/') || (!star)) {
1580 if ((currentCharacter == '\r')
1581 || (currentCharacter == '\n')) {
1582 checkNonExternalizeString();
1583 if (recordLineSeparator) {
1584 pushLineSeparator();
1589 star = currentCharacter == '*';
1591 currentCharacter = source[currentPosition++];
1592 // if (((currentCharacter = source[currentPosition++])
1594 // && (source[currentPosition] == 'u')) {
1595 // //-------------unicode traitement ------------
1596 // getNextUnicodeChar();
1598 //handle the \\u case manually into comment
1599 // if (currentCharacter == '\\') {
1600 // if (source[currentPosition] == '\\')
1601 // currentPosition++;
1602 // } //jump over the \\
1604 recordComment(isJavadoc);
1605 if (tokenizeComments) {
1607 return TokenNameCOMMENT_PHPDOC;
1608 return TokenNameCOMMENT_BLOCK;
1610 } catch (IndexOutOfBoundsException e) {
1611 throw new InvalidInputException(UNTERMINATED_COMMENT);
1615 if (getNextChar('='))
1616 return TokenNameDIVIDE_EQUAL;
1617 return TokenNameDIVIDE;
1621 return TokenNameEOF;
1622 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1623 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1626 if (currentCharacter == '$') {
1627 while ((currentCharacter = source[currentPosition++]) == '$') {
1629 if (currentCharacter == '{')
1630 return TokenNameDOLLAR_LBRACE;
1631 if (isPHPIdentifierStart(currentCharacter))
1632 return scanIdentifierOrKeyword(true);
1633 return TokenNameERROR;
1635 if (isPHPIdentifierStart(currentCharacter))
1636 return scanIdentifierOrKeyword(false);
1637 if (Character.isDigit(currentCharacter))
1638 return scanNumber(false);
1639 return TokenNameERROR;
1642 } //-----------------end switch while try--------------------
1643 catch (IndexOutOfBoundsException e) {
1646 return TokenNameEOF;
1649 // public final void getNextUnicodeChar()
1650 // throws IndexOutOfBoundsException, InvalidInputException {
1652 // //handle the case of unicode.
1653 // //when a unicode appears then we must use a buffer that holds char internal values
1654 // //At the end of this method currentCharacter holds the new visited char
1655 // //and currentPosition points right next after it
1657 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1659 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1660 // currentPosition++;
1661 // while (source[currentPosition] == 'u') {
1662 // currentPosition++;
1666 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1668 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1670 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1672 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1674 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1676 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1677 // //need the unicode buffer
1678 // if (withoutUnicodePtr == 0) {
1679 // //buffer all the entries that have been left aside....
1680 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1681 // System.arraycopy(
1684 // withoutUnicodeBuffer,
1686 // withoutUnicodePtr);
1688 // //fill the buffer with the char
1689 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1691 // unicodeAsBackSlash = currentCharacter == '\\';
1693 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1695 public final void jumpOverMethodBody() {
1697 this.wasAcr = false;
1700 while (true) { //loop for jumping over comments
1701 // ---------Consume white space and handles startPosition---------
1702 boolean isWhiteSpace;
1704 startPosition = currentPosition;
1705 currentCharacter = source[currentPosition++];
1706 // if (((currentCharacter = source[currentPosition++]) == '\\')
1707 // && (source[currentPosition] == 'u')) {
1708 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1710 if (recordLineSeparator
1711 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1712 pushLineSeparator();
1713 isWhiteSpace = Character.isWhitespace(currentCharacter);
1715 } while (isWhiteSpace);
1717 // -------consume token until } is found---------
1718 switch (currentCharacter) {
1730 test = getNextChar('\\');
1733 scanDoubleQuotedEscapeCharacter();
1734 } catch (InvalidInputException ex) {
1737 // try { // consume next character
1738 unicodeAsBackSlash = false;
1739 currentCharacter = source[currentPosition++];
1740 // if (((currentCharacter = source[currentPosition++]) == '\\')
1741 // && (source[currentPosition] == 'u')) {
1742 // getNextUnicodeChar();
1744 if (withoutUnicodePtr != 0) {
1745 withoutUnicodeBuffer[++withoutUnicodePtr] =
1749 // } catch (InvalidInputException ex) {
1757 // try { // consume next character
1758 unicodeAsBackSlash = false;
1759 currentCharacter = source[currentPosition++];
1760 // if (((currentCharacter = source[currentPosition++]) == '\\')
1761 // && (source[currentPosition] == 'u')) {
1762 // getNextUnicodeChar();
1764 if (withoutUnicodePtr != 0) {
1765 withoutUnicodeBuffer[++withoutUnicodePtr] =
1769 // } catch (InvalidInputException ex) {
1771 while (currentCharacter != '"') {
1772 if (currentCharacter == '\r') {
1773 if (source[currentPosition] == '\n')
1776 // the string cannot go further that the line
1778 if (currentCharacter == '\n') {
1780 // the string cannot go further that the line
1782 if (currentCharacter == '\\') {
1784 scanDoubleQuotedEscapeCharacter();
1785 } catch (InvalidInputException ex) {
1788 // try { // consume next character
1789 unicodeAsBackSlash = false;
1790 currentCharacter = source[currentPosition++];
1791 // if (((currentCharacter = source[currentPosition++]) == '\\')
1792 // && (source[currentPosition] == 'u')) {
1793 // getNextUnicodeChar();
1795 if (withoutUnicodePtr != 0) {
1796 withoutUnicodeBuffer[++withoutUnicodePtr] =
1800 // } catch (InvalidInputException ex) {
1803 } catch (IndexOutOfBoundsException e) {
1810 if ((test = getNextChar('/', '*')) == 0) {
1814 currentCharacter = source[currentPosition++];
1815 // if (((currentCharacter = source[currentPosition++]) == '\\')
1816 // && (source[currentPosition] == 'u')) {
1817 // //-------------unicode traitement ------------
1818 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1819 // currentPosition++;
1820 // while (source[currentPosition] == 'u') {
1821 // currentPosition++;
1824 // Character.getNumericValue(source[currentPosition++]))
1828 // Character.getNumericValue(source[currentPosition++]))
1832 // Character.getNumericValue(source[currentPosition++]))
1836 // Character.getNumericValue(source[currentPosition++]))
1839 // //error don't care of the value
1840 // currentCharacter = 'A';
1841 // } //something different from \n and \r
1843 // currentCharacter =
1844 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1848 while (currentCharacter != '\r'
1849 && currentCharacter != '\n') {
1851 currentCharacter = source[currentPosition++];
1852 // if (((currentCharacter = source[currentPosition++])
1854 // && (source[currentPosition] == 'u')) {
1855 // //-------------unicode traitement ------------
1856 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1857 // currentPosition++;
1858 // while (source[currentPosition] == 'u') {
1859 // currentPosition++;
1862 // Character.getNumericValue(source[currentPosition++]))
1866 // Character.getNumericValue(source[currentPosition++]))
1870 // Character.getNumericValue(source[currentPosition++]))
1874 // Character.getNumericValue(source[currentPosition++]))
1877 // //error don't care of the value
1878 // currentCharacter = 'A';
1879 // } //something different from \n and \r
1881 // currentCharacter =
1882 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1886 if (recordLineSeparator
1887 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1888 pushLineSeparator();
1889 } catch (IndexOutOfBoundsException e) {
1890 } //an eof will them be generated
1894 //traditional and annotation comment
1895 boolean star = false;
1896 // try { // consume next character
1897 unicodeAsBackSlash = false;
1898 currentCharacter = source[currentPosition++];
1899 // if (((currentCharacter = source[currentPosition++]) == '\\')
1900 // && (source[currentPosition] == 'u')) {
1901 // getNextUnicodeChar();
1903 if (withoutUnicodePtr != 0) {
1904 withoutUnicodeBuffer[++withoutUnicodePtr] =
1908 // } catch (InvalidInputException ex) {
1910 if (currentCharacter == '*') {
1913 if (recordLineSeparator
1914 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1915 pushLineSeparator();
1916 try { //get the next char
1917 currentCharacter = source[currentPosition++];
1918 // if (((currentCharacter = source[currentPosition++]) == '\\')
1919 // && (source[currentPosition] == 'u')) {
1920 // //-------------unicode traitement ------------
1921 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1922 // currentPosition++;
1923 // while (source[currentPosition] == 'u') {
1924 // currentPosition++;
1927 // Character.getNumericValue(source[currentPosition++]))
1931 // Character.getNumericValue(source[currentPosition++]))
1935 // Character.getNumericValue(source[currentPosition++]))
1939 // Character.getNumericValue(source[currentPosition++]))
1942 // //error don't care of the value
1943 // currentCharacter = 'A';
1944 // } //something different from * and /
1946 // currentCharacter =
1947 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1950 //loop until end of comment */
1951 while ((currentCharacter != '/') || (!star)) {
1952 if (recordLineSeparator
1953 && ((currentCharacter == '\r')
1954 || (currentCharacter == '\n')))
1955 pushLineSeparator();
1956 star = currentCharacter == '*';
1958 currentCharacter = source[currentPosition++];
1959 // if (((currentCharacter = source[currentPosition++])
1961 // && (source[currentPosition] == 'u')) {
1962 // //-------------unicode traitement ------------
1963 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1964 // currentPosition++;
1965 // while (source[currentPosition] == 'u') {
1966 // currentPosition++;
1969 // Character.getNumericValue(source[currentPosition++]))
1973 // Character.getNumericValue(source[currentPosition++]))
1977 // Character.getNumericValue(source[currentPosition++]))
1981 // Character.getNumericValue(source[currentPosition++]))
1984 // //error don't care of the value
1985 // currentCharacter = 'A';
1986 // } //something different from * and /
1988 // currentCharacter =
1989 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1993 } catch (IndexOutOfBoundsException e) {
2002 if (isPHPIdentifierStart(currentCharacter)
2003 || currentCharacter == '$') {
2005 scanIdentifierOrKeyword((currentCharacter == '$'));
2006 } catch (InvalidInputException ex) {
2010 if (Character.isDigit(currentCharacter)) {
2013 } catch (InvalidInputException ex) {
2019 //-----------------end switch while try--------------------
2020 } catch (IndexOutOfBoundsException e) {
2021 } catch (InvalidInputException e) {
2025 // public final boolean jumpOverUnicodeWhiteSpace()
2026 // throws InvalidInputException {
2028 // //handle the case of unicode. Jump over the next whiteSpace
2029 // //making startPosition pointing on the next available char
2030 // //On false, the currentCharacter is filled up with a potential
2034 // this.wasAcr = false;
2035 // int c1, c2, c3, c4;
2036 // int unicodeSize = 6;
2037 // currentPosition++;
2038 // while (source[currentPosition] == 'u') {
2039 // currentPosition++;
2043 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2045 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2047 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2049 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2051 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2054 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2055 // if (recordLineSeparator
2056 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2057 // pushLineSeparator();
2058 // if (Character.isWhitespace(currentCharacter))
2061 // //buffer the new char which is not a white space
2062 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2063 // //withoutUnicodePtr == 1 is true here
2065 // } catch (IndexOutOfBoundsException e) {
2066 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2069 public final int[] getLineEnds() {
2070 //return a bounded copy of this.lineEnds
2073 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2077 public char[] getSource() {
2080 final char[] optimizedCurrentTokenSource1() {
2081 //return always the same char[] build only once
2083 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2084 char charOne = source[startPosition];
2139 return new char[] { charOne };
2143 final char[] optimizedCurrentTokenSource2() {
2144 //try to return the same char[] build only once
2148 (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2150 char[][] table = charArray_length[0][hash];
2152 while (++i < InternalTableSize) {
2153 char[] charArray = table[i];
2154 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2157 //---------other side---------
2159 int max = newEntry2;
2160 while (++i <= max) {
2161 char[] charArray = table[i];
2162 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2165 //--------add the entry-------
2166 if (++max >= InternalTableSize)
2169 table[max] = (r = new char[] { c0, c1 });
2174 final char[] optimizedCurrentTokenSource3() {
2175 //try to return the same char[] build only once
2179 (((c0 = source[startPosition]) << 12)
2180 + ((c1 = source[startPosition + 1]) << 6)
2181 + (c2 = source[startPosition + 2]))
2183 char[][] table = charArray_length[1][hash];
2185 while (++i < InternalTableSize) {
2186 char[] charArray = table[i];
2187 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2190 //---------other side---------
2192 int max = newEntry3;
2193 while (++i <= max) {
2194 char[] charArray = table[i];
2195 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2198 //--------add the entry-------
2199 if (++max >= InternalTableSize)
2202 table[max] = (r = new char[] { c0, c1, c2 });
2207 final char[] optimizedCurrentTokenSource4() {
2208 //try to return the same char[] build only once
2210 char c0, c1, c2, c3;
2212 ((((long) (c0 = source[startPosition])) << 18)
2213 + ((c1 = source[startPosition + 1]) << 12)
2214 + ((c2 = source[startPosition + 2]) << 6)
2215 + (c3 = source[startPosition + 3]))
2217 char[][] table = charArray_length[2][(int) hash];
2219 while (++i < InternalTableSize) {
2220 char[] charArray = table[i];
2221 if ((c0 == charArray[0])
2222 && (c1 == charArray[1])
2223 && (c2 == charArray[2])
2224 && (c3 == charArray[3]))
2227 //---------other side---------
2229 int max = newEntry4;
2230 while (++i <= max) {
2231 char[] charArray = table[i];
2232 if ((c0 == charArray[0])
2233 && (c1 == charArray[1])
2234 && (c2 == charArray[2])
2235 && (c3 == charArray[3]))
2238 //--------add the entry-------
2239 if (++max >= InternalTableSize)
2242 table[max] = (r = new char[] { c0, c1, c2, c3 });
2248 final char[] optimizedCurrentTokenSource5() {
2249 //try to return the same char[] build only once
2251 char c0, c1, c2, c3, c4;
2253 ((((long) (c0 = source[startPosition])) << 24)
2254 + (((long) (c1 = source[startPosition + 1])) << 18)
2255 + ((c2 = source[startPosition + 2]) << 12)
2256 + ((c3 = source[startPosition + 3]) << 6)
2257 + (c4 = source[startPosition + 4]))
2259 char[][] table = charArray_length[3][(int) hash];
2261 while (++i < InternalTableSize) {
2262 char[] charArray = table[i];
2263 if ((c0 == charArray[0])
2264 && (c1 == charArray[1])
2265 && (c2 == charArray[2])
2266 && (c3 == charArray[3])
2267 && (c4 == charArray[4]))
2270 //---------other side---------
2272 int max = newEntry5;
2273 while (++i <= max) {
2274 char[] charArray = table[i];
2275 if ((c0 == charArray[0])
2276 && (c1 == charArray[1])
2277 && (c2 == charArray[2])
2278 && (c3 == charArray[3])
2279 && (c4 == charArray[4]))
2282 //--------add the entry-------
2283 if (++max >= InternalTableSize)
2286 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2292 final char[] optimizedCurrentTokenSource6() {
2293 //try to return the same char[] build only once
2295 char c0, c1, c2, c3, c4, c5;
2297 ((((long) (c0 = source[startPosition])) << 32)
2298 + (((long) (c1 = source[startPosition + 1])) << 24)
2299 + (((long) (c2 = source[startPosition + 2])) << 18)
2300 + ((c3 = source[startPosition + 3]) << 12)
2301 + ((c4 = source[startPosition + 4]) << 6)
2302 + (c5 = source[startPosition + 5]))
2304 char[][] table = charArray_length[4][(int) hash];
2306 while (++i < InternalTableSize) {
2307 char[] charArray = table[i];
2308 if ((c0 == charArray[0])
2309 && (c1 == charArray[1])
2310 && (c2 == charArray[2])
2311 && (c3 == charArray[3])
2312 && (c4 == charArray[4])
2313 && (c5 == charArray[5]))
2316 //---------other side---------
2318 int max = newEntry6;
2319 while (++i <= max) {
2320 char[] charArray = table[i];
2321 if ((c0 == charArray[0])
2322 && (c1 == charArray[1])
2323 && (c2 == charArray[2])
2324 && (c3 == charArray[3])
2325 && (c4 == charArray[4])
2326 && (c5 == charArray[5]))
2329 //--------add the entry-------
2330 if (++max >= InternalTableSize)
2333 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2338 public final void pushLineSeparator() throws InvalidInputException {
2339 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2340 final int INCREMENT = 250;
2342 if (this.checkNonExternalizedStringLiterals) {
2343 // reinitialize the current line for non externalize strings purpose
2346 //currentCharacter is at position currentPosition-1
2349 if (currentCharacter == '\r') {
2350 int separatorPos = currentPosition - 1;
2351 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2353 //System.out.println("CR-" + separatorPos);
2355 lineEnds[++linePtr] = separatorPos;
2356 } catch (IndexOutOfBoundsException e) {
2357 //linePtr value is correct
2358 int oldLength = lineEnds.length;
2359 int[] old = lineEnds;
2360 lineEnds = new int[oldLength + INCREMENT];
2361 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2362 lineEnds[linePtr] = separatorPos;
2364 // look-ahead for merged cr+lf
2366 if (source[currentPosition] == '\n') {
2367 //System.out.println("look-ahead LF-" + currentPosition);
2368 lineEnds[linePtr] = currentPosition;
2374 } catch (IndexOutOfBoundsException e) {
2379 if (currentCharacter == '\n') {
2380 //must merge eventual cr followed by lf
2381 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2382 //System.out.println("merge LF-" + (currentPosition - 1));
2383 lineEnds[linePtr] = currentPosition - 1;
2385 int separatorPos = currentPosition - 1;
2386 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2388 // System.out.println("LF-" + separatorPos);
2390 lineEnds[++linePtr] = separatorPos;
2391 } catch (IndexOutOfBoundsException e) {
2392 //linePtr value is correct
2393 int oldLength = lineEnds.length;
2394 int[] old = lineEnds;
2395 lineEnds = new int[oldLength + INCREMENT];
2396 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2397 lineEnds[linePtr] = separatorPos;
2404 public final void pushUnicodeLineSeparator() {
2405 // isUnicode means that the \r or \n has been read as a unicode character
2407 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2409 final int INCREMENT = 250;
2410 //currentCharacter is at position currentPosition-1
2412 if (this.checkNonExternalizedStringLiterals) {
2413 // reinitialize the current line for non externalize strings purpose
2418 if (currentCharacter == '\r') {
2419 int separatorPos = currentPosition - 6;
2420 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2422 //System.out.println("CR-" + separatorPos);
2424 lineEnds[++linePtr] = separatorPos;
2425 } catch (IndexOutOfBoundsException e) {
2426 //linePtr value is correct
2427 int oldLength = lineEnds.length;
2428 int[] old = lineEnds;
2429 lineEnds = new int[oldLength + INCREMENT];
2430 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2431 lineEnds[linePtr] = separatorPos;
2433 // look-ahead for merged cr+lf
2434 if (source[currentPosition] == '\n') {
2435 //System.out.println("look-ahead LF-" + currentPosition);
2436 lineEnds[linePtr] = currentPosition;
2444 if (currentCharacter == '\n') {
2445 //must merge eventual cr followed by lf
2446 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2447 //System.out.println("merge LF-" + (currentPosition - 1));
2448 lineEnds[linePtr] = currentPosition - 6;
2450 int separatorPos = currentPosition - 6;
2451 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2453 // System.out.println("LF-" + separatorPos);
2455 lineEnds[++linePtr] = separatorPos;
2456 } catch (IndexOutOfBoundsException e) {
2457 //linePtr value is correct
2458 int oldLength = lineEnds.length;
2459 int[] old = lineEnds;
2460 lineEnds = new int[oldLength + INCREMENT];
2461 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2462 lineEnds[linePtr] = separatorPos;
2469 public final void recordComment(boolean isJavadoc) {
2471 // a new annotation comment is recorded
2473 commentStops[++commentPtr] =
2474 isJavadoc ? currentPosition : -currentPosition;
2475 } catch (IndexOutOfBoundsException e) {
2476 int oldStackLength = commentStops.length;
2477 int[] oldStack = commentStops;
2478 commentStops = new int[oldStackLength + 30];
2479 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2480 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2481 //grows the positions buffers too
2482 int[] old = commentStarts;
2483 commentStarts = new int[oldStackLength + 30];
2484 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2487 //the buffer is of a correct size here
2488 commentStarts[commentPtr] = startPosition;
2490 public void resetTo(int begin, int end) {
2491 //reset the scanner to a given position where it may rescan again
2494 initialPosition = startPosition = currentPosition = begin;
2495 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2496 commentPtr = -1; // reset comment stack
2499 public final void scanSingleQuotedEscapeCharacter()
2500 throws InvalidInputException {
2501 // the string with "\\u" is a legal string of two chars \ and u
2502 //thus we use a direct access to the source (for regular cases).
2504 // if (unicodeAsBackSlash) {
2505 // // consume next character
2506 // unicodeAsBackSlash = false;
2507 // if (((currentCharacter = source[currentPosition++]) == '\\')
2508 // && (source[currentPosition] == 'u')) {
2509 // getNextUnicodeChar();
2511 // if (withoutUnicodePtr != 0) {
2512 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2516 currentCharacter = source[currentPosition++];
2517 switch (currentCharacter) {
2519 currentCharacter = '\'';
2522 currentCharacter = '\\';
2525 currentCharacter = '\\';
2530 public final void scanDoubleQuotedEscapeCharacter()
2531 throws InvalidInputException {
2532 // the string with "\\u" is a legal string of two chars \ and u
2533 //thus we use a direct access to the source (for regular cases).
2535 // if (unicodeAsBackSlash) {
2536 // // consume next character
2537 // unicodeAsBackSlash = false;
2538 // if (((currentCharacter = source[currentPosition++]) == '\\')
2539 // && (source[currentPosition] == 'u')) {
2540 // getNextUnicodeChar();
2542 // if (withoutUnicodePtr != 0) {
2543 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2547 currentCharacter = source[currentPosition++];
2548 switch (currentCharacter) {
2550 // currentCharacter = '\b';
2553 currentCharacter = '\t';
2556 currentCharacter = '\n';
2559 // currentCharacter = '\f';
2562 currentCharacter = '\r';
2565 currentCharacter = '\"';
2568 currentCharacter = '\'';
2571 currentCharacter = '\\';
2574 currentCharacter = '$';
2577 // -----------octal escape--------------
2579 // OctalDigit OctalDigit
2580 // ZeroToThree OctalDigit OctalDigit
2582 int number = Character.getNumericValue(currentCharacter);
2583 if (number >= 0 && number <= 7) {
2584 boolean zeroToThreeNot = number > 3;
2586 .isDigit(currentCharacter = source[currentPosition++])) {
2587 int digit = Character.getNumericValue(currentCharacter);
2588 if (digit >= 0 && digit <= 7) {
2589 number = (number * 8) + digit;
2591 .isDigit(currentCharacter = source[currentPosition++])) {
2592 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2595 digit = Character.getNumericValue(currentCharacter);
2596 if (digit >= 0 && digit <= 7) {
2597 // has read \ZeroToThree OctalDigit OctalDigit
2598 number = (number * 8) + digit;
2599 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2603 } else { // has read \OctalDigit NonDigit--> ignore last character
2606 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2609 } else { // has read \OctalDigit --> ignore last character
2613 throw new InvalidInputException(INVALID_ESCAPE);
2614 currentCharacter = (char) number;
2617 // throw new InvalidInputException(INVALID_ESCAPE);
2621 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2622 // return scanIdentifierOrKeyword( false );
2625 public int scanIdentifierOrKeyword(boolean isVariable)
2626 throws InvalidInputException {
2629 //first dispatch on the first char.
2630 //then the length. If there are several
2631 //keywors with the same length AND the same first char, then do another
2632 //disptach on the second char :-)...cool....but fast !
2634 useAssertAsAnIndentifier = false;
2636 while (getNextCharAsJavaIdentifierPart()) {
2640 return TokenNameVariable;
2645 // if (withoutUnicodePtr == 0)
2647 //quick test on length == 1 but not on length > 12 while most identifier
2648 //have a length which is <= 12...but there are lots of identifier with
2652 if ((length = currentPosition - startPosition) == 1)
2653 return TokenNameIdentifier;
2655 data = new char[length];
2656 index = startPosition;
2657 for (int i = 0; i < length; i++) {
2658 data[i] = Character.toLowerCase(source[index + i]);
2662 // if ((length = withoutUnicodePtr) == 1)
2663 // return TokenNameIdentifier;
2664 // // data = withoutUnicodeBuffer;
2665 // data = new char[withoutUnicodeBuffer.length];
2666 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2667 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2672 firstLetter = data[index];
2673 switch (firstLetter) {
2675 case 'a' : // as and array
2678 if ((data[++index] == 's')) {
2681 return TokenNameIdentifier;
2684 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2685 return TokenNameAND;
2687 return TokenNameIdentifier;
2690 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2691 // return TokenNamearray;
2693 // return TokenNameIdentifier;
2695 return TokenNameIdentifier;
2700 if ((data[++index] == 'r')
2701 && (data[++index] == 'e')
2702 && (data[++index] == 'a')
2703 && (data[++index] == 'k'))
2704 return TokenNamebreak;
2706 return TokenNameIdentifier;
2708 return TokenNameIdentifier;
2711 case 'c' : //case class continue
2714 if ((data[++index] == 'a')
2715 && (data[++index] == 's')
2716 && (data[++index] == 'e'))
2717 return TokenNamecase;
2719 return TokenNameIdentifier;
2721 if ((data[++index] == 'l')
2722 && (data[++index] == 'a')
2723 && (data[++index] == 's')
2724 && (data[++index] == 's'))
2725 return TokenNameclass;
2727 return TokenNameIdentifier;
2729 if ((data[++index] == 'o')
2730 && (data[++index] == 'n')
2731 && (data[++index] == 't')
2732 && (data[++index] == 'i')
2733 && (data[++index] == 'n')
2734 && (data[++index] == 'u')
2735 && (data[++index] == 'e'))
2736 return TokenNamecontinue;
2738 return TokenNameIdentifier;
2740 return TokenNameIdentifier;
2743 case 'd' : //define default do
2746 if ((data[++index] == 'o'))
2749 return TokenNameIdentifier;
2751 if ((data[++index] == 'e')
2752 && (data[++index] == 'f')
2753 && (data[++index] == 'i')
2754 && (data[++index] == 'n')
2755 && (data[++index] == 'e'))
2756 return TokenNamedefine;
2758 return TokenNameIdentifier;
2760 if ((data[++index] == 'e')
2761 && (data[++index] == 'f')
2762 && (data[++index] == 'a')
2763 && (data[++index] == 'u')
2764 && (data[++index] == 'l')
2765 && (data[++index] == 't'))
2766 return TokenNamedefault;
2768 return TokenNameIdentifier;
2770 return TokenNameIdentifier;
2772 case 'e' : //echo else elseif extends
2775 if ((data[++index] == 'c')
2776 && (data[++index] == 'h')
2777 && (data[++index] == 'o'))
2778 return TokenNameecho;
2780 (data[index] == 'l')
2781 && (data[++index] == 's')
2782 && (data[++index] == 'e'))
2783 return TokenNameelse;
2785 return TokenNameIdentifier;
2787 if ((data[++index] == 'n')
2788 && (data[++index] == 'd')
2789 && (data[++index] == 'i')
2790 && (data[++index] == 'f'))
2791 return TokenNameendif;
2793 return TokenNameIdentifier;
2795 if ((data[++index] == 'n')
2796 && (data[++index] == 'd')
2797 && (data[++index] == 'f')
2798 && (data[++index] == 'o')
2799 && (data[++index] == 'r'))
2800 return TokenNameendfor;
2802 (data[index] == 'l')
2803 && (data[++index] == 's')
2804 && (data[++index] == 'e')
2805 && (data[++index] == 'i')
2806 && (data[++index] == 'f'))
2807 return TokenNameelseif;
2809 return TokenNameIdentifier;
2811 if ((data[++index] == 'x')
2812 && (data[++index] == 't')
2813 && (data[++index] == 'e')
2814 && (data[++index] == 'n')
2815 && (data[++index] == 'd')
2816 && (data[++index] == 's'))
2817 return TokenNameextends;
2819 return TokenNameIdentifier;
2820 case 8 : // endwhile
2821 if ((data[++index] == 'n')
2822 && (data[++index] == 'd')
2823 && (data[++index] == 'w')
2824 && (data[++index] == 'h')
2825 && (data[++index] == 'i')
2826 && (data[++index] == 'l')
2827 && (data[++index] == 'e'))
2828 return TokenNameendwhile;
2830 return TokenNameIdentifier;
2831 case 9 : // endswitch
2832 if ((data[++index] == 'n')
2833 && (data[++index] == 'd')
2834 && (data[++index] == 's')
2835 && (data[++index] == 'w')
2836 && (data[++index] == 'i')
2837 && (data[++index] == 't')
2838 && (data[++index] == 'c')
2839 && (data[++index] == 'h'))
2840 return TokenNameendswitch;
2842 return TokenNameIdentifier;
2843 case 10 : // endforeach
2844 if ((data[++index] == 'n')
2845 && (data[++index] == 'd')
2846 && (data[++index] == 'f')
2847 && (data[++index] == 'o')
2848 && (data[++index] == 'r')
2849 && (data[++index] == 'e')
2850 && (data[++index] == 'a')
2851 && (data[++index] == 'c')
2852 && (data[++index] == 'h'))
2853 return TokenNameendforeach;
2855 return TokenNameIdentifier;
2858 return TokenNameIdentifier;
2861 case 'f' : //for false function
2864 if ((data[++index] == 'o') && (data[++index] == 'r'))
2865 return TokenNamefor;
2867 return TokenNameIdentifier;
2869 if ((data[++index] == 'a')
2870 && (data[++index] == 'l')
2871 && (data[++index] == 's')
2872 && (data[++index] == 'e'))
2873 return TokenNamefalse;
2875 return TokenNameIdentifier;
2876 case 7 : // function
2877 if ((data[++index] == 'o')
2878 && (data[++index] == 'r')
2879 && (data[++index] == 'e')
2880 && (data[++index] == 'a')
2881 && (data[++index] == 'c')
2882 && (data[++index] == 'h'))
2883 return TokenNameforeach;
2885 return TokenNameIdentifier;
2886 case 8 : // function
2887 if ((data[++index] == 'u')
2888 && (data[++index] == 'n')
2889 && (data[++index] == 'c')
2890 && (data[++index] == 't')
2891 && (data[++index] == 'i')
2892 && (data[++index] == 'o')
2893 && (data[++index] == 'n'))
2894 return TokenNamefunction;
2896 return TokenNameIdentifier;
2898 return TokenNameIdentifier;
2902 if ((data[++index] == 'l')
2903 && (data[++index] == 'o')
2904 && (data[++index] == 'b')
2905 && (data[++index] == 'a')
2906 && (data[++index] == 'l')) {
2907 return TokenNameglobal;
2910 return TokenNameIdentifier;
2915 if (data[++index] == 'f')
2918 return TokenNameIdentifier;
2920 // if ((data[++index] == 'n') && (data[++index] == 't'))
2921 // return TokenNameint;
2923 // return TokenNameIdentifier;
2925 if ((data[++index] == 'n')
2926 && (data[++index] == 'c')
2927 && (data[++index] == 'l')
2928 && (data[++index] == 'u')
2929 && (data[++index] == 'd')
2930 && (data[++index] == 'e'))
2931 return TokenNameinclude;
2933 return TokenNameIdentifier;
2935 if ((data[++index] == 'n')
2936 && (data[++index] == 'c')
2937 && (data[++index] == 'l')
2938 && (data[++index] == 'u')
2939 && (data[++index] == 'd')
2940 && (data[++index] == 'e')
2941 && (data[++index] == '_')
2942 && (data[++index] == 'o')
2943 && (data[++index] == 'n')
2944 && (data[++index] == 'c')
2945 && (data[++index] == 'e'))
2946 return TokenNameinclude_once;
2948 return TokenNameIdentifier;
2950 return TokenNameIdentifier;
2955 if ((data[++index] == 'i')
2956 && (data[++index] == 's')
2957 && (data[++index] == 't')) {
2958 return TokenNamelist;
2961 return TokenNameIdentifier;
2963 case 'n' : // new null
2966 if ((data[++index] == 'e') && (data[++index] == 'w'))
2967 return TokenNamenew;
2969 return TokenNameIdentifier;
2971 if ((data[++index] == 'u')
2972 && (data[++index] == 'l')
2973 && (data[++index] == 'l'))
2974 return TokenNamenull;
2976 return TokenNameIdentifier;
2979 return TokenNameIdentifier;
2981 case 'o' : // or old_function
2983 if (data[++index] == 'r') {
2987 // if (length == 12) {
2988 // if ((data[++index] == 'l')
2989 // && (data[++index] == 'd')
2990 // && (data[++index] == '_')
2991 // && (data[++index] == 'f')
2992 // && (data[++index] == 'u')
2993 // && (data[++index] == 'n')
2994 // && (data[++index] == 'c')
2995 // && (data[++index] == 't')
2996 // && (data[++index] == 'i')
2997 // && (data[++index] == 'o')
2998 // && (data[++index] == 'n')) {
2999 // return TokenNameold_function;
3002 return TokenNameIdentifier;
3006 if ((data[++index] == 'r')
3007 && (data[++index] == 'i')
3008 && (data[++index] == 'n')
3009 && (data[++index] == 't')) {
3010 return TokenNameprint;
3013 return TokenNameIdentifier;
3014 case 'r' : //return require require_once
3016 if ((data[++index] == 'e')
3017 && (data[++index] == 't')
3018 && (data[++index] == 'u')
3019 && (data[++index] == 'r')
3020 && (data[++index] == 'n')) {
3021 return TokenNamereturn;
3023 } else if (length == 7) {
3024 if ((data[++index] == 'e')
3025 && (data[++index] == 'q')
3026 && (data[++index] == 'u')
3027 && (data[++index] == 'i')
3028 && (data[++index] == 'r')
3029 && (data[++index] == 'e')) {
3030 return TokenNamerequire;
3032 } else if (length == 12) {
3033 if ((data[++index] == 'e')
3034 && (data[++index] == 'q')
3035 && (data[++index] == 'u')
3036 && (data[++index] == 'i')
3037 && (data[++index] == 'r')
3038 && (data[++index] == 'e')
3039 && (data[++index] == '_')
3040 && (data[++index] == 'o')
3041 && (data[++index] == 'n')
3042 && (data[++index] == 'c')
3043 && (data[++index] == 'e')) {
3044 return TokenNamerequire_once;
3047 return TokenNameIdentifier;
3049 case 's' : //static switch
3052 if (data[++index] == 't')
3053 if ((data[++index] == 'a')
3054 && (data[++index] == 't')
3055 && (data[++index] == 'i')
3056 && (data[++index] == 'c')) {
3057 return TokenNamestatic;
3059 return TokenNameIdentifier;
3061 (data[index] == 'w')
3062 && (data[++index] == 'i')
3063 && (data[++index] == 't')
3064 && (data[++index] == 'c')
3065 && (data[++index] == 'h'))
3066 return TokenNameswitch;
3068 return TokenNameIdentifier;
3070 return TokenNameIdentifier;
3077 if ((data[++index] == 'r')
3078 && (data[++index] == 'u')
3079 && (data[++index] == 'e'))
3080 return TokenNametrue;
3082 return TokenNameIdentifier;
3083 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3084 // return TokenNamethis;
3087 return TokenNameIdentifier;
3093 if ((data[++index] == 'a') && (data[++index] == 'r'))
3094 return TokenNamevar;
3096 return TokenNameIdentifier;
3099 return TokenNameIdentifier;
3105 if ((data[++index] == 'h')
3106 && (data[++index] == 'i')
3107 && (data[++index] == 'l')
3108 && (data[++index] == 'e'))
3109 return TokenNamewhile;
3111 return TokenNameIdentifier;
3112 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3113 //return TokenNamewidefp ;
3115 //return TokenNameIdentifier;
3117 return TokenNameIdentifier;
3123 if ((data[++index] == 'o') && (data[++index] == 'r'))
3124 return TokenNameXOR;
3126 return TokenNameIdentifier;
3129 return TokenNameIdentifier;
3132 return TokenNameIdentifier;
3135 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3137 //when entering this method the currentCharacter is the firt
3138 //digit of the number , i.e. it may be preceeded by a . when
3141 boolean floating = dotPrefix;
3142 if ((!dotPrefix) && (currentCharacter == '0')) {
3143 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3144 //force the first char of the hexa number do exist...
3145 // consume next character
3146 unicodeAsBackSlash = false;
3147 currentCharacter = source[currentPosition++];
3148 // if (((currentCharacter = source[currentPosition++]) == '\\')
3149 // && (source[currentPosition] == 'u')) {
3150 // getNextUnicodeChar();
3152 // if (withoutUnicodePtr != 0) {
3153 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3156 if (Character.digit(currentCharacter, 16) == -1)
3157 throw new InvalidInputException(INVALID_HEXA);
3159 while (getNextCharAsDigit(16)) {
3161 // if (getNextChar('l', 'L') >= 0)
3162 // return TokenNameLongLiteral;
3164 return TokenNameIntegerLiteral;
3167 //there is x or X in the number
3168 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3169 if (getNextCharAsDigit()) {
3170 //-------------potential octal-----------------
3171 while (getNextCharAsDigit()) {
3174 // if (getNextChar('l', 'L') >= 0) {
3175 // return TokenNameLongLiteral;
3178 // if (getNextChar('f', 'F') >= 0) {
3179 // return TokenNameFloatingPointLiteral;
3182 if (getNextChar('d', 'D') >= 0) {
3183 return TokenNameDoubleLiteral;
3184 } else { //make the distinction between octal and float ....
3185 if (getNextChar('.')) { //bingo ! ....
3186 while (getNextCharAsDigit()) {
3188 if (getNextChar('e', 'E') >= 0) {
3189 // consume next character
3190 unicodeAsBackSlash = false;
3191 currentCharacter = source[currentPosition++];
3192 // if (((currentCharacter = source[currentPosition++]) == '\\')
3193 // && (source[currentPosition] == 'u')) {
3194 // getNextUnicodeChar();
3196 // if (withoutUnicodePtr != 0) {
3197 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3201 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3202 // consume next character
3203 unicodeAsBackSlash = false;
3204 currentCharacter = source[currentPosition++];
3205 // if (((currentCharacter = source[currentPosition++]) == '\\')
3206 // && (source[currentPosition] == 'u')) {
3207 // getNextUnicodeChar();
3209 // if (withoutUnicodePtr != 0) {
3210 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3211 // currentCharacter;
3215 if (!Character.isDigit(currentCharacter))
3216 throw new InvalidInputException(INVALID_FLOAT);
3217 while (getNextCharAsDigit()) {
3220 // if (getNextChar('f', 'F') >= 0)
3221 // return TokenNameFloatingPointLiteral;
3222 getNextChar('d', 'D'); //jump over potential d or D
3223 return TokenNameDoubleLiteral;
3225 return TokenNameIntegerLiteral;
3233 while (getNextCharAsDigit()) {
3236 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3237 // return TokenNameLongLiteral;
3239 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3240 while (getNextCharAsDigit()) {
3245 //if floating is true both exponant and suffix may be optional
3247 if (getNextChar('e', 'E') >= 0) {
3249 // consume next character
3250 unicodeAsBackSlash = false;
3251 currentCharacter = source[currentPosition++];
3252 // if (((currentCharacter = source[currentPosition++]) == '\\')
3253 // && (source[currentPosition] == 'u')) {
3254 // getNextUnicodeChar();
3256 // if (withoutUnicodePtr != 0) {
3257 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3261 if ((currentCharacter == '-')
3262 || (currentCharacter == '+')) { // consume next character
3263 unicodeAsBackSlash = false;
3264 currentCharacter = source[currentPosition++];
3265 // if (((currentCharacter = source[currentPosition++]) == '\\')
3266 // && (source[currentPosition] == 'u')) {
3267 // getNextUnicodeChar();
3269 // if (withoutUnicodePtr != 0) {
3270 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3274 if (!Character.isDigit(currentCharacter))
3275 throw new InvalidInputException(INVALID_FLOAT);
3276 while (getNextCharAsDigit()) {
3280 if (getNextChar('d', 'D') >= 0)
3281 return TokenNameDoubleLiteral;
3282 // if (getNextChar('f', 'F') >= 0)
3283 // return TokenNameFloatingPointLiteral;
3285 //the long flag has been tested before
3287 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3290 * Search the line number corresponding to a specific position
3293 public final int getLineNumber(int position) {
3295 if (lineEnds == null)
3297 int length = linePtr + 1;
3300 int g = 0, d = length - 1;
3304 if (position < lineEnds[m]) {
3306 } else if (position > lineEnds[m]) {
3312 if (position < lineEnds[m]) {
3318 public void setPHPMode(boolean mode) {
3322 public final void setSource(char[] source) {
3323 //the source-buffer is set to sourceString
3325 if (source == null) {
3326 this.source = new char[0];
3328 this.source = source;
3331 initialPosition = currentPosition = 0;
3332 containsAssertKeyword = false;
3333 withoutUnicodeBuffer = new char[this.source.length];
3337 public String toString() {
3338 if (startPosition == source.length)
3339 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3340 if (currentPosition > source.length)
3341 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3343 char front[] = new char[startPosition];
3344 System.arraycopy(source, 0, front, 0, startPosition);
3346 int middleLength = (currentPosition - 1) - startPosition + 1;
3348 if (middleLength > -1) {
3349 middle = new char[middleLength];
3350 System.arraycopy(source, startPosition, middle, 0, middleLength);
3352 middle = new char[0];
3355 char end[] = new char[source.length - (currentPosition - 1)];
3358 (currentPosition - 1) + 1,
3361 source.length - (currentPosition - 1) - 1);
3363 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3364 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3367 public final String toStringAction(int act) {
3369 case TokenNameERROR :
3370 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3371 case TokenNameStopPHP :
3372 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3373 case TokenNameIdentifier :
3374 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3375 case TokenNameVariable :
3376 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3378 return "as"; //$NON-NLS-1$
3379 case TokenNamebreak :
3380 return "break"; //$NON-NLS-1$
3381 case TokenNamecase :
3382 return "case"; //$NON-NLS-1$
3383 case TokenNameclass :
3384 return "class"; //$NON-NLS-1$
3385 case TokenNamecontinue :
3386 return "continue"; //$NON-NLS-1$
3387 case TokenNamedefault :
3388 return "default"; //$NON-NLS-1$
3389 case TokenNamedefine :
3390 return "define"; //$NON-NLS-1$
3392 return "do"; //$NON-NLS-1$
3393 case TokenNameecho :
3394 return "echo"; //$NON-NLS-1$
3395 case TokenNameelse :
3396 return "else"; //$NON-NLS-1$
3397 case TokenNameelseif :
3398 return "elseif"; //$NON-NLS-1$
3399 case TokenNameendfor :
3400 return "endfor"; //$NON-NLS-1$
3401 case TokenNameendforeach :
3402 return "endforeach"; //$NON-NLS-1$
3403 case TokenNameendif :
3404 return "endif"; //$NON-NLS-1$
3405 case TokenNameendswitch :
3406 return "endswitch"; //$NON-NLS-1$
3407 case TokenNameendwhile :
3408 return "endwhile"; //$NON-NLS-1$
3409 case TokenNameextends :
3410 return "extends"; //$NON-NLS-1$
3411 case TokenNamefalse :
3412 return "false"; //$NON-NLS-1$
3414 return "for"; //$NON-NLS-1$
3415 case TokenNameforeach :
3416 return "foreach"; //$NON-NLS-1$
3417 case TokenNamefunction :
3418 return "function"; //$NON-NLS-1$
3419 case TokenNameglobal :
3420 return "global"; //$NON-NLS-1$
3422 return "if"; //$NON-NLS-1$
3423 case TokenNameinclude :
3424 return "include"; //$NON-NLS-1$
3425 case TokenNameinclude_once :
3426 return "include_once"; //$NON-NLS-1$
3427 case TokenNamelist :
3428 return "list"; //$NON-NLS-1$
3430 return "new"; //$NON-NLS-1$
3431 case TokenNamenull :
3432 return "null"; //$NON-NLS-1$
3433 case TokenNameprint :
3434 return "print"; //$NON-NLS-1$
3435 case TokenNamerequire :
3436 return "require"; //$NON-NLS-1$
3437 case TokenNamerequire_once :
3438 return "require_once"; //$NON-NLS-1$
3439 case TokenNamereturn :
3440 return "return"; //$NON-NLS-1$
3441 case TokenNamestatic :
3442 return "static"; //$NON-NLS-1$
3443 case TokenNameswitch :
3444 return "switch"; //$NON-NLS-1$
3445 case TokenNametrue :
3446 return "true"; //$NON-NLS-1$
3448 return "var"; //$NON-NLS-1$
3449 case TokenNamewhile :
3450 return "while"; //$NON-NLS-1$
3451 case TokenNameIntegerLiteral :
3452 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3453 case TokenNameDoubleLiteral :
3454 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3455 case TokenNameStringLiteral :
3456 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3457 case TokenNameStringConstant :
3458 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3459 case TokenNameStringInterpolated :
3460 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3461 case TokenNameHEREDOC :
3462 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3464 case TokenNamePLUS_PLUS :
3465 return "++"; //$NON-NLS-1$
3466 case TokenNameMINUS_MINUS :
3467 return "--"; //$NON-NLS-1$
3468 case TokenNameEQUAL_EQUAL :
3469 return "=="; //$NON-NLS-1$
3470 case TokenNameEQUAL_GREATER :
3471 return "=>"; //$NON-NLS-1$
3472 case TokenNameLESS_EQUAL :
3473 return "<="; //$NON-NLS-1$
3474 case TokenNameGREATER_EQUAL :
3475 return ">="; //$NON-NLS-1$
3476 case TokenNameNOT_EQUAL :
3477 return "!="; //$NON-NLS-1$
3478 case TokenNameLEFT_SHIFT :
3479 return "<<"; //$NON-NLS-1$
3480 case TokenNameRIGHT_SHIFT :
3481 return ">>"; //$NON-NLS-1$
3482 case TokenNamePLUS_EQUAL :
3483 return "+="; //$NON-NLS-1$
3484 case TokenNameMINUS_EQUAL :
3485 return "-="; //$NON-NLS-1$
3486 case TokenNameMULTIPLY_EQUAL :
3487 return "*="; //$NON-NLS-1$
3488 case TokenNameDIVIDE_EQUAL :
3489 return "/="; //$NON-NLS-1$
3490 case TokenNameAND_EQUAL :
3491 return "&="; //$NON-NLS-1$
3492 case TokenNameOR_EQUAL :
3493 return "|="; //$NON-NLS-1$
3494 case TokenNameXOR_EQUAL :
3495 return "^="; //$NON-NLS-1$
3496 case TokenNameREMAINDER_EQUAL :
3497 return "%="; //$NON-NLS-1$
3498 case TokenNameLEFT_SHIFT_EQUAL :
3499 return "<<="; //$NON-NLS-1$
3500 case TokenNameRIGHT_SHIFT_EQUAL :
3501 return ">>="; //$NON-NLS-1$
3502 case TokenNameOR_OR :
3503 return "||"; //$NON-NLS-1$
3504 case TokenNameAND_AND :
3505 return "&&"; //$NON-NLS-1$
3506 case TokenNamePLUS :
3507 return "+"; //$NON-NLS-1$
3508 case TokenNameMINUS :
3509 return "-"; //$NON-NLS-1$
3510 case TokenNameMINUS_GREATER :
3513 return "!"; //$NON-NLS-1$
3514 case TokenNameREMAINDER :
3515 return "%"; //$NON-NLS-1$
3517 return "^"; //$NON-NLS-1$
3519 return "&"; //$NON-NLS-1$
3520 case TokenNameMULTIPLY :
3521 return "*"; //$NON-NLS-1$
3523 return "|"; //$NON-NLS-1$
3524 case TokenNameTWIDDLE :
3525 return "~"; //$NON-NLS-1$
3526 case TokenNameTWIDDLE_EQUAL :
3527 return "~="; //$NON-NLS-1$
3528 case TokenNameDIVIDE :
3529 return "/"; //$NON-NLS-1$
3530 case TokenNameGREATER :
3531 return ">"; //$NON-NLS-1$
3532 case TokenNameLESS :
3533 return "<"; //$NON-NLS-1$
3534 case TokenNameLPAREN :
3535 return "("; //$NON-NLS-1$
3536 case TokenNameRPAREN :
3537 return ")"; //$NON-NLS-1$
3538 case TokenNameLBRACE :
3539 return "{"; //$NON-NLS-1$
3540 case TokenNameRBRACE :
3541 return "}"; //$NON-NLS-1$
3542 case TokenNameLBRACKET :
3543 return "["; //$NON-NLS-1$
3544 case TokenNameRBRACKET :
3545 return "]"; //$NON-NLS-1$
3546 case TokenNameSEMICOLON :
3547 return ";"; //$NON-NLS-1$
3548 case TokenNameQUESTION :
3549 return "?"; //$NON-NLS-1$
3550 case TokenNameCOLON :
3551 return ":"; //$NON-NLS-1$
3552 case TokenNameCOMMA :
3553 return ","; //$NON-NLS-1$
3555 return "."; //$NON-NLS-1$
3556 case TokenNameEQUAL :
3557 return "="; //$NON-NLS-1$
3560 case TokenNameDOLLAR_LBRACE :
3563 return "EOF"; //$NON-NLS-1$
3565 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3570 boolean tokenizeComments,
3571 boolean tokenizeWhiteSpace,
3572 boolean checkNonExternalizedStringLiterals) {
3576 checkNonExternalizedStringLiterals,
3581 boolean tokenizeComments,
3582 boolean tokenizeWhiteSpace,
3583 boolean checkNonExternalizedStringLiterals,
3584 boolean assertMode) {
3585 this.eofPosition = Integer.MAX_VALUE;
3586 this.tokenizeComments = tokenizeComments;
3587 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3588 this.checkNonExternalizedStringLiterals =
3589 checkNonExternalizedStringLiterals;
3590 this.assertMode = assertMode;
3593 private void checkNonExternalizeString() throws InvalidInputException {
3594 if (currentLine == null)
3596 parseTags(currentLine);
3599 private void parseTags(NLSLine line) throws InvalidInputException {
3600 String s = new String(getCurrentTokenSource());
3601 int pos = s.indexOf(TAG_PREFIX);
3602 int lineLength = line.size();
3604 int start = pos + TAG_PREFIX_LENGTH;
3605 int end = s.indexOf(TAG_POSTFIX, start);
3606 String index = s.substring(start, end);
3609 i = Integer.parseInt(index) - 1;
3610 // Tags are one based not zero based.
3611 } catch (NumberFormatException e) {
3612 i = -1; // we don't want to consider this as a valid NLS tag
3614 if (line.exists(i)) {
3617 pos = s.indexOf(TAG_PREFIX, start);
3620 this.nonNLSStrings = new StringLiteral[lineLength];
3621 int nonNLSCounter = 0;
3622 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3623 StringLiteral literal = (StringLiteral) iterator.next();
3624 if (literal != null) {
3625 this.nonNLSStrings[nonNLSCounter++] = literal;
3628 if (nonNLSCounter == 0) {
3629 this.nonNLSStrings = null;
3633 this.wasNonExternalizedStringLiteral = true;
3634 if (nonNLSCounter != lineLength) {
3638 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),