1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
24 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
25 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
28 private boolean assertMode;
30 public boolean useAssertAsAnIndentifier = false;
32 //flag indicating if processed source contains occurrences of keyword assert
33 public boolean containsAssertKeyword = false;
35 public boolean recordLineSeparator;
37 public boolean ignorePHPOneLiner = false;
39 public boolean phpMode = false;
41 public Stack encapsedStringStack = null;
43 public char currentCharacter;
45 public int startPosition;
47 public int currentPosition;
49 public int initialPosition, eofPosition;
51 // after this position eof are generated instead of real token from the
53 public boolean tokenizeComments;
55 public boolean tokenizeWhiteSpace;
57 public boolean tokenizeStrings;
59 //source should be viewed as a window (aka a part)
60 //of a entire very large stream
64 public char[] withoutUnicodeBuffer;
66 public int withoutUnicodePtr;
68 //when == 0 ==> no unicode in the current token
69 public boolean unicodeAsBackSlash = false;
71 public boolean scanningFloatLiteral = false;
73 //support for /** comments
74 public int[] commentStops = new int[10];
76 public int[] commentStarts = new int[10];
78 public int commentPtr = -1; // no comment test with commentPtr value -1
80 protected int lastCommentLinePosition = -1;
82 //diet parsing support - jump over some method body when requested
83 public boolean diet = false;
85 //support for the poor-line-debuggers ....
86 //remember the position of the cr/lf
87 public int[] lineEnds = new int[250];
89 public int linePtr = -1;
91 public boolean wasAcr = false;
93 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
95 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
97 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
99 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
101 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
103 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
105 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
107 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
109 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
111 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
113 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
115 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
117 //----------------optimized identifier managment------------------
118 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
119 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
120 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
121 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
122 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
123 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
124 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
125 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
126 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
128 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
130 static final int TableSize = 30, InternalTableSize = 6;
133 public static final int OptimizedLength = 6;
136 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
138 // support for detecting non-externalized string literals
139 int currentLineNr = -1;
141 int previousLineNr = -1;
143 NLSLine currentLine = null;
145 List lines = new ArrayList();
147 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
149 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
151 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
153 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
155 public StringLiteral[] nonNLSStrings = null;
157 public boolean checkNonExternalizedStringLiterals = true;
159 public boolean wasNonExternalizedStringLiteral = false;
161 for (int i = 0; i < 6; i++) {
162 for (int j = 0; j < TableSize; j++) {
163 for (int k = 0; k < InternalTableSize; k++) {
164 charArray_length[i][j][k] = initCharArray;
170 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
172 public static final int RoundBracket = 0;
174 public static final int SquareBracket = 1;
176 public static final int CurlyBracket = 2;
178 public static final int BracketKinds = 3;
181 public char[][] foundTaskTags = null;
183 public char[][] foundTaskMessages;
185 public char[][] foundTaskPriorities = null;
187 public int[][] foundTaskPositions;
189 public int foundTaskCount = 0;
191 public char[][] taskTags = null;
193 public char[][] taskPriorities = null;
195 public static final boolean DEBUG = false;
197 public static final boolean TRACE = false;
200 * Determines if the specified character is permissible as the first character in a PHP identifier
202 public static boolean isPHPIdentifierStart(char ch) {
203 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
207 * Determines if the specified character may be part of a PHP identifier as other than the first character
209 public static boolean isPHPIdentifierPart(char ch) {
210 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
213 public final boolean atEnd() {
214 // This code is not relevant if source is
215 // Only a part of the real stream input
216 return source.length == currentPosition;
219 public char[] getCurrentIdentifierSource() {
220 //return the token REAL source (aka unicodes are precomputed)
222 // if (withoutUnicodePtr != 0)
223 // //0 is used as a fast test flag so the real first char is in position 1
225 // withoutUnicodeBuffer,
227 // result = new char[withoutUnicodePtr],
229 // withoutUnicodePtr);
231 int length = currentPosition - startPosition;
232 switch (length) { // see OptimizedLength
234 return optimizedCurrentTokenSource1();
236 return optimizedCurrentTokenSource2();
238 return optimizedCurrentTokenSource3();
240 return optimizedCurrentTokenSource4();
242 return optimizedCurrentTokenSource5();
244 return optimizedCurrentTokenSource6();
247 System.arraycopy(source, startPosition, result = new char[length], 0, length);
252 public int getCurrentTokenEndPosition() {
253 return this.currentPosition - 1;
256 public final char[] getCurrentTokenSource() {
257 // Return the token REAL source (aka unicodes are precomputed)
259 // if (withoutUnicodePtr != 0)
260 // // 0 is used as a fast test flag so the real first char is in position 1
262 // withoutUnicodeBuffer,
264 // result = new char[withoutUnicodePtr],
266 // withoutUnicodePtr);
269 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
274 public final char[] getCurrentTokenSource(int startPos) {
275 // Return the token REAL source (aka unicodes are precomputed)
277 // if (withoutUnicodePtr != 0)
278 // // 0 is used as a fast test flag so the real first char is in position 1
280 // withoutUnicodeBuffer,
282 // result = new char[withoutUnicodePtr],
284 // withoutUnicodePtr);
287 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
292 public final char[] getCurrentTokenSourceString() {
293 //return the token REAL source (aka unicodes are precomputed).
294 //REMOVE the two " that are at the beginning and the end.
296 if (withoutUnicodePtr != 0)
297 //0 is used as a fast test flag so the real first char is in position 1
298 System.arraycopy(withoutUnicodeBuffer, 2,
299 //2 is 1 (real start) + 1 (to jump over the ")
300 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
303 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
308 public int getCurrentTokenStartPosition() {
309 return this.startPosition;
312 public final char[] getCurrentStringLiteralSource() {
313 // Return the token REAL source (aka unicodes are precomputed)
316 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
322 * Search the source position corresponding to the end of a given line number
324 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
326 * In case the given line number is inconsistent, answers -1.
328 public final int getLineEnd(int lineNumber) {
329 if (lineEnds == null)
331 if (lineNumber >= lineEnds.length)
335 if (lineNumber == lineEnds.length - 1)
337 return lineEnds[lineNumber - 1];
338 // next line start one character behind the lineEnd of the previous line
342 * Search the source position corresponding to the beginning of a given line number
344 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
346 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
348 * In case the given line number is inconsistent, answers -1.
350 public final int getLineStart(int lineNumber) {
351 if (lineEnds == null)
353 if (lineNumber >= lineEnds.length)
358 return initialPosition;
359 return lineEnds[lineNumber - 2] + 1;
360 // next line start one character behind the lineEnd of the previous line
363 public final boolean getNextChar(char testedChar) {
365 //handle the case of unicode.
366 //when a unicode appears then we must use a buffer that holds char
368 //At the end of this method currentCharacter holds the new visited char
369 //and currentPosition points right next after it
370 //Both previous lines are true if the currentCharacter is == to the
372 //On false, no side effect has occured.
373 //ALL getNextChar.... ARE OPTIMIZED COPIES
374 int temp = currentPosition;
376 currentCharacter = source[currentPosition++];
377 // if (((currentCharacter = source[currentPosition++]) == '\\')
378 // && (source[currentPosition] == 'u')) {
379 // //-------------unicode traitement ------------
380 // int c1, c2, c3, c4;
381 // int unicodeSize = 6;
382 // currentPosition++;
383 // while (source[currentPosition] == 'u') {
384 // currentPosition++;
388 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
390 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
392 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
394 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
396 // currentPosition = temp;
400 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
401 // if (currentCharacter != testedChar) {
402 // currentPosition = temp;
405 // unicodeAsBackSlash = currentCharacter == '\\';
407 // //need the unicode buffer
408 // if (withoutUnicodePtr == 0) {
409 // //buffer all the entries that have been left aside....
410 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
414 // withoutUnicodeBuffer,
416 // withoutUnicodePtr);
418 // //fill the buffer with the char
419 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
422 // } //-------------end unicode traitement--------------
424 if (currentCharacter != testedChar) {
425 currentPosition = temp;
428 unicodeAsBackSlash = false;
429 // if (withoutUnicodePtr != 0)
430 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
433 } catch (IndexOutOfBoundsException e) {
434 unicodeAsBackSlash = false;
435 currentPosition = temp;
440 public final int getNextChar(char testedChar1, char testedChar2) {
441 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
442 //test can be done with (x==0) for the first and (x>0) for the second
443 //handle the case of unicode.
444 //when a unicode appears then we must use a buffer that holds char
446 //At the end of this method currentCharacter holds the new visited char
447 //and currentPosition points right next after it
448 //Both previous lines are true if the currentCharacter is == to the
450 //On false, no side effect has occured.
451 //ALL getNextChar.... ARE OPTIMIZED COPIES
452 int temp = currentPosition;
455 currentCharacter = source[currentPosition++];
456 // if (((currentCharacter = source[currentPosition++]) == '\\')
457 // && (source[currentPosition] == 'u')) {
458 // //-------------unicode traitement ------------
459 // int c1, c2, c3, c4;
460 // int unicodeSize = 6;
461 // currentPosition++;
462 // while (source[currentPosition] == 'u') {
463 // currentPosition++;
467 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
469 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
471 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
473 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
475 // currentPosition = temp;
479 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
480 // if (currentCharacter == testedChar1)
482 // else if (currentCharacter == testedChar2)
485 // currentPosition = temp;
489 // //need the unicode buffer
490 // if (withoutUnicodePtr == 0) {
491 // //buffer all the entries that have been left aside....
492 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
496 // withoutUnicodeBuffer,
498 // withoutUnicodePtr);
500 // //fill the buffer with the char
501 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
503 // } //-------------end unicode traitement--------------
505 if (currentCharacter == testedChar1)
507 else if (currentCharacter == testedChar2)
510 currentPosition = temp;
513 // if (withoutUnicodePtr != 0)
514 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
517 } catch (IndexOutOfBoundsException e) {
518 currentPosition = temp;
523 public final boolean getNextCharAsDigit() {
525 //handle the case of unicode.
526 //when a unicode appears then we must use a buffer that holds char
528 //At the end of this method currentCharacter holds the new visited char
529 //and currentPosition points right next after it
530 //Both previous lines are true if the currentCharacter is a digit
531 //On false, no side effect has occured.
532 //ALL getNextChar.... ARE OPTIMIZED COPIES
533 int temp = currentPosition;
535 currentCharacter = source[currentPosition++];
536 // if (((currentCharacter = source[currentPosition++]) == '\\')
537 // && (source[currentPosition] == 'u')) {
538 // //-------------unicode traitement ------------
539 // int c1, c2, c3, c4;
540 // int unicodeSize = 6;
541 // currentPosition++;
542 // while (source[currentPosition] == 'u') {
543 // currentPosition++;
547 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
549 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
551 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
553 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
555 // currentPosition = temp;
559 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
560 // if (!Character.isDigit(currentCharacter)) {
561 // currentPosition = temp;
565 // //need the unicode buffer
566 // if (withoutUnicodePtr == 0) {
567 // //buffer all the entries that have been left aside....
568 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
572 // withoutUnicodeBuffer,
574 // withoutUnicodePtr);
576 // //fill the buffer with the char
577 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
579 // } //-------------end unicode traitement--------------
581 if (!Character.isDigit(currentCharacter)) {
582 currentPosition = temp;
585 // if (withoutUnicodePtr != 0)
586 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
589 } catch (IndexOutOfBoundsException e) {
590 currentPosition = temp;
595 public final boolean getNextCharAsDigit(int radix) {
597 //handle the case of unicode.
598 //when a unicode appears then we must use a buffer that holds char
600 //At the end of this method currentCharacter holds the new visited char
601 //and currentPosition points right next after it
602 //Both previous lines are true if the currentCharacter is a digit base on
604 //On false, no side effect has occured.
605 //ALL getNextChar.... ARE OPTIMIZED COPIES
606 int temp = currentPosition;
608 currentCharacter = source[currentPosition++];
609 // if (((currentCharacter = source[currentPosition++]) == '\\')
610 // && (source[currentPosition] == 'u')) {
611 // //-------------unicode traitement ------------
612 // int c1, c2, c3, c4;
613 // int unicodeSize = 6;
614 // currentPosition++;
615 // while (source[currentPosition] == 'u') {
616 // currentPosition++;
620 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
622 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
624 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
626 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
628 // currentPosition = temp;
632 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
633 // if (Character.digit(currentCharacter, radix) == -1) {
634 // currentPosition = temp;
638 // //need the unicode buffer
639 // if (withoutUnicodePtr == 0) {
640 // //buffer all the entries that have been left aside....
641 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
645 // withoutUnicodeBuffer,
647 // withoutUnicodePtr);
649 // //fill the buffer with the char
650 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
652 // } //-------------end unicode traitement--------------
654 if (Character.digit(currentCharacter, radix) == -1) {
655 currentPosition = temp;
658 // if (withoutUnicodePtr != 0)
659 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
662 } catch (IndexOutOfBoundsException e) {
663 currentPosition = temp;
668 public boolean getNextCharAsJavaIdentifierPart() {
670 //handle the case of unicode.
671 //when a unicode appears then we must use a buffer that holds char
673 //At the end of this method currentCharacter holds the new visited char
674 //and currentPosition points right next after it
675 //Both previous lines are true if the currentCharacter is a
676 // JavaIdentifierPart
677 //On false, no side effect has occured.
678 //ALL getNextChar.... ARE OPTIMIZED COPIES
679 int temp = currentPosition;
681 currentCharacter = source[currentPosition++];
682 // if (((currentCharacter = source[currentPosition++]) == '\\')
683 // && (source[currentPosition] == 'u')) {
684 // //-------------unicode traitement ------------
685 // int c1, c2, c3, c4;
686 // int unicodeSize = 6;
687 // currentPosition++;
688 // while (source[currentPosition] == 'u') {
689 // currentPosition++;
693 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
695 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
697 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
699 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
701 // currentPosition = temp;
705 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
706 // if (!isPHPIdentifierPart(currentCharacter)) {
707 // currentPosition = temp;
711 // //need the unicode buffer
712 // if (withoutUnicodePtr == 0) {
713 // //buffer all the entries that have been left aside....
714 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
718 // withoutUnicodeBuffer,
720 // withoutUnicodePtr);
722 // //fill the buffer with the char
723 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
725 // } //-------------end unicode traitement--------------
727 if (!isPHPIdentifierPart(currentCharacter)) {
728 currentPosition = temp;
731 // if (withoutUnicodePtr != 0)
732 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
735 } catch (IndexOutOfBoundsException e) {
736 currentPosition = temp;
741 public int getCastOrParen() {
742 int tempPosition = currentPosition;
743 char tempCharacter = currentCharacter;
744 int tempToken = TokenNameLPAREN;
745 boolean found = false;
746 StringBuffer buf = new StringBuffer();
749 currentCharacter = source[currentPosition++];
750 } while (currentCharacter == ' ' || currentCharacter == '\t');
751 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
752 buf.append(currentCharacter);
753 currentCharacter = source[currentPosition++];
755 if (buf.length() >= 3 && buf.length() <= 7) {
756 char[] data = buf.toString().toCharArray();
758 switch (data.length) {
761 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
763 tempToken = TokenNameintCAST;
768 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
770 tempToken = TokenNameboolCAST;
773 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
775 tempToken = TokenNamedoubleCAST;
781 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
782 && (data[++index] == 'y')) {
784 tempToken = TokenNamearrayCAST;
787 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
788 && (data[++index] == 't')) {
790 tempToken = TokenNameunsetCAST;
793 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
794 && (data[++index] == 't')) {
796 tempToken = TokenNamedoubleCAST;
802 // object string double
803 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
804 && (data[++index] == 'c') && (data[++index] == 't')) {
806 tempToken = TokenNameobjectCAST;
809 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
810 && (data[++index] == 'n') && (data[++index] == 'g')) {
812 tempToken = TokenNamestringCAST;
815 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
816 && (data[++index] == 'l') && (data[++index] == 'e')) {
818 tempToken = TokenNamedoubleCAST;
825 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
826 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
828 tempToken = TokenNameboolCAST;
831 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
832 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
834 tempToken = TokenNameintCAST;
840 while (currentCharacter == ' ' || currentCharacter == '\t') {
841 currentCharacter = source[currentPosition++];
843 if (currentCharacter == ')') {
848 } catch (IndexOutOfBoundsException e) {
850 currentCharacter = tempCharacter;
851 currentPosition = tempPosition;
852 return TokenNameLPAREN;
855 public void consumeStringInterpolated() throws InvalidInputException {
857 // consume next character
858 unicodeAsBackSlash = false;
859 currentCharacter = source[currentPosition++];
860 // if (((currentCharacter = source[currentPosition++]) == '\\')
861 // && (source[currentPosition] == 'u')) {
862 // getNextUnicodeChar();
864 // if (withoutUnicodePtr != 0) {
865 // withoutUnicodeBuffer[++withoutUnicodePtr] =
869 while (currentCharacter != '`') {
870 /** ** in PHP \r and \n are valid in string literals *** */
871 // if ((currentCharacter == '\n')
872 // || (currentCharacter == '\r')) {
873 // // relocate if finding another quote fairly close: thus unicode
874 // '/u000D' will be fully consumed
875 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
876 // if (currentPosition + lookAhead == source.length)
878 // if (source[currentPosition + lookAhead] == '\n')
880 // if (source[currentPosition + lookAhead] == '\"') {
881 // currentPosition += lookAhead + 1;
885 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
887 if (currentCharacter == '\\') {
888 int escapeSize = currentPosition;
889 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
890 //scanEscapeCharacter make a side effect on this value and we need
891 // the previous value few lines down this one
892 scanDoubleQuotedEscapeCharacter();
893 escapeSize = currentPosition - escapeSize;
894 if (withoutUnicodePtr == 0) {
895 //buffer all the entries that have been left aside....
896 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
897 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
898 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
899 } else { //overwrite the / in the buffer
900 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
901 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
902 // where only one is correct
907 // consume next character
908 unicodeAsBackSlash = false;
909 currentCharacter = source[currentPosition++];
910 // if (((currentCharacter = source[currentPosition++]) == '\\')
911 // && (source[currentPosition] == 'u')) {
912 // getNextUnicodeChar();
914 if (withoutUnicodePtr != 0) {
915 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
919 } catch (IndexOutOfBoundsException e) {
920 // reset end position for error reporting
921 currentPosition -= 2;
922 throw new InvalidInputException(UNTERMINATED_STRING);
923 } catch (InvalidInputException e) {
924 if (e.getMessage().equals(INVALID_ESCAPE)) {
925 // relocate if finding another quote fairly close: thus unicode
926 // '/u000D' will be fully consumed
927 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
928 if (currentPosition + lookAhead == source.length)
930 if (source[currentPosition + lookAhead] == '\n')
932 if (source[currentPosition + lookAhead] == '`') {
933 currentPosition += lookAhead + 1;
940 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
941 // //$NON-NLS-?$ where ? is an
943 if (currentLine == null) {
944 currentLine = new NLSLine();
945 lines.add(currentLine);
947 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
951 public void consumeStringConstant() throws InvalidInputException {
953 // consume next character
954 unicodeAsBackSlash = false;
955 currentCharacter = source[currentPosition++];
956 // if (((currentCharacter = source[currentPosition++]) == '\\')
957 // && (source[currentPosition] == 'u')) {
958 // getNextUnicodeChar();
960 // if (withoutUnicodePtr != 0) {
961 // withoutUnicodeBuffer[++withoutUnicodePtr] =
965 while (currentCharacter != '\'') {
966 /** ** in PHP \r and \n are valid in string literals *** */
967 // if ((currentCharacter == '\n')
968 // || (currentCharacter == '\r')) {
969 // // relocate if finding another quote fairly close: thus unicode
970 // '/u000D' will be fully consumed
971 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
972 // if (currentPosition + lookAhead == source.length)
974 // if (source[currentPosition + lookAhead] == '\n')
976 // if (source[currentPosition + lookAhead] == '\"') {
977 // currentPosition += lookAhead + 1;
981 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
983 if (currentCharacter == '\\') {
984 int escapeSize = currentPosition;
985 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
986 //scanEscapeCharacter make a side effect on this value and we need
987 // the previous value few lines down this one
988 scanSingleQuotedEscapeCharacter();
989 escapeSize = currentPosition - escapeSize;
990 if (withoutUnicodePtr == 0) {
991 //buffer all the entries that have been left aside....
992 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
993 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
994 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
995 } else { //overwrite the / in the buffer
996 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
997 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
998 // where only one is correct
1003 // consume next character
1004 unicodeAsBackSlash = false;
1005 currentCharacter = source[currentPosition++];
1006 // if (((currentCharacter = source[currentPosition++]) == '\\')
1007 // && (source[currentPosition] == 'u')) {
1008 // getNextUnicodeChar();
1010 if (withoutUnicodePtr != 0) {
1011 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1015 } catch (IndexOutOfBoundsException e) {
1016 // reset end position for error reporting
1017 currentPosition -= 2;
1018 throw new InvalidInputException(UNTERMINATED_STRING);
1019 } catch (InvalidInputException e) {
1020 if (e.getMessage().equals(INVALID_ESCAPE)) {
1021 // relocate if finding another quote fairly close: thus unicode
1022 // '/u000D' will be fully consumed
1023 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1024 if (currentPosition + lookAhead == source.length)
1026 if (source[currentPosition + lookAhead] == '\n')
1028 if (source[currentPosition + lookAhead] == '\'') {
1029 currentPosition += lookAhead + 1;
1036 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1037 // //$NON-NLS-?$ where ? is an
1039 if (currentLine == null) {
1040 currentLine = new NLSLine();
1041 lines.add(currentLine);
1043 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1047 public void consumeStringLiteral() throws InvalidInputException {
1049 // consume next character
1050 unicodeAsBackSlash = false;
1051 currentCharacter = source[currentPosition++];
1052 // if (((currentCharacter = source[currentPosition++]) == '\\')
1053 // && (source[currentPosition] == 'u')) {
1054 // getNextUnicodeChar();
1056 // if (withoutUnicodePtr != 0) {
1057 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1058 // currentCharacter;
1061 while (currentCharacter != '"') {
1062 /** ** in PHP \r and \n are valid in string literals *** */
1063 // if ((currentCharacter == '\n')
1064 // || (currentCharacter == '\r')) {
1065 // // relocate if finding another quote fairly close: thus unicode
1066 // '/u000D' will be fully consumed
1067 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1068 // if (currentPosition + lookAhead == source.length)
1070 // if (source[currentPosition + lookAhead] == '\n')
1072 // if (source[currentPosition + lookAhead] == '\"') {
1073 // currentPosition += lookAhead + 1;
1077 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1079 if (currentCharacter == '\\') {
1080 int escapeSize = currentPosition;
1081 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1082 //scanEscapeCharacter make a side effect on this value and we need
1083 // the previous value few lines down this one
1084 scanDoubleQuotedEscapeCharacter();
1085 escapeSize = currentPosition - escapeSize;
1086 if (withoutUnicodePtr == 0) {
1087 //buffer all the entries that have been left aside....
1088 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1089 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1090 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1091 } else { //overwrite the / in the buffer
1092 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1093 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1094 // where only one is correct
1095 withoutUnicodePtr--;
1099 // consume next character
1100 unicodeAsBackSlash = false;
1101 currentCharacter = source[currentPosition++];
1102 // if (((currentCharacter = source[currentPosition++]) == '\\')
1103 // && (source[currentPosition] == 'u')) {
1104 // getNextUnicodeChar();
1106 if (withoutUnicodePtr != 0) {
1107 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1111 } catch (IndexOutOfBoundsException e) {
1112 // reset end position for error reporting
1113 currentPosition -= 2;
1114 throw new InvalidInputException(UNTERMINATED_STRING);
1115 } catch (InvalidInputException e) {
1116 if (e.getMessage().equals(INVALID_ESCAPE)) {
1117 // relocate if finding another quote fairly close: thus unicode
1118 // '/u000D' will be fully consumed
1119 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1120 if (currentPosition + lookAhead == source.length)
1122 if (source[currentPosition + lookAhead] == '\n')
1124 if (source[currentPosition + lookAhead] == '\"') {
1125 currentPosition += lookAhead + 1;
1132 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1133 // //$NON-NLS-?$ where ? is an
1135 if (currentLine == null) {
1136 currentLine = new NLSLine();
1137 lines.add(currentLine);
1139 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1143 public int getNextToken() throws InvalidInputException {
1145 return getInlinedHTML(currentPosition);
1148 this.wasAcr = false;
1150 jumpOverMethodBody();
1152 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1156 withoutUnicodePtr = 0;
1157 //start with a new token
1158 char encapsedChar = ' ';
1159 if (!encapsedStringStack.isEmpty()) {
1160 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1162 if (encapsedChar != '$' && encapsedChar != ' ') {
1163 currentCharacter = source[currentPosition++];
1164 if (currentCharacter == encapsedChar) {
1165 switch (currentCharacter) {
1167 return TokenNameEncapsedString0;
1169 return TokenNameEncapsedString1;
1171 return TokenNameEncapsedString2;
1174 while (currentCharacter != encapsedChar) {
1175 /** ** in PHP \r and \n are valid in string literals *** */
1176 switch (currentCharacter) {
1178 int escapeSize = currentPosition;
1179 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1180 //scanEscapeCharacter make a side effect on this value and
1181 // we need the previous value few lines down this one
1182 scanDoubleQuotedEscapeCharacter();
1183 escapeSize = currentPosition - escapeSize;
1184 if (withoutUnicodePtr == 0) {
1185 //buffer all the entries that have been left aside....
1186 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1187 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1188 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1189 } else { //overwrite the / in the buffer
1190 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1191 if (backSlashAsUnicodeInString) { //there are TWO \ in
1192 withoutUnicodePtr--;
1197 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1199 encapsedStringStack.push(new Character('$'));
1200 return TokenNameSTRING;
1204 if (source[currentPosition] == '$') { // CURLY_OPEN
1206 encapsedStringStack.push(new Character('$'));
1207 return TokenNameSTRING;
1210 // consume next character
1211 unicodeAsBackSlash = false;
1212 currentCharacter = source[currentPosition++];
1213 if (withoutUnicodePtr != 0) {
1214 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1219 return TokenNameSTRING;
1221 // ---------Consume white space and handles startPosition---------
1222 int whiteStart = currentPosition;
1223 startPosition = currentPosition;
1224 currentCharacter = source[currentPosition++];
1225 if (encapsedChar == '$') {
1226 switch (currentCharacter) {
1228 currentCharacter = source[currentPosition++];
1229 return TokenNameSTRING;
1231 if (encapsedChar == '$') {
1232 if (getNextChar('$'))
1233 return TokenNameLBRACE_DOLLAR;
1235 return TokenNameLBRACE;
1237 return TokenNameRBRACE;
1239 return TokenNameLBRACKET;
1241 return TokenNameRBRACKET;
1243 if (tokenizeStrings) {
1244 consumeStringConstant();
1245 return TokenNameStringSingleQuote;
1247 return TokenNameEncapsedString1;
1249 return TokenNameEncapsedString2;
1251 if (tokenizeStrings) {
1252 consumeStringInterpolated();
1253 return TokenNameStringInterpolated;
1255 return TokenNameEncapsedString0;
1257 if (getNextChar('>'))
1258 return TokenNameMINUS_GREATER;
1259 return TokenNameSTRING;
1261 if (currentCharacter == '$') {
1262 int oldPosition = currentPosition;
1264 currentCharacter = source[currentPosition++];
1265 if (currentCharacter == '{') {
1266 return TokenNameDOLLAR_LBRACE;
1268 if (isPHPIdentifierStart(currentCharacter)) {
1269 return scanIdentifierOrKeyword(true);
1271 currentPosition = oldPosition;
1272 return TokenNameSTRING;
1274 } catch (IndexOutOfBoundsException e) {
1275 currentPosition = oldPosition;
1276 return TokenNameSTRING;
1279 if (isPHPIdentifierStart(currentCharacter))
1280 return scanIdentifierOrKeyword(false);
1281 if (Character.isDigit(currentCharacter))
1282 return scanNumber(false);
1283 return TokenNameERROR;
1286 // boolean isWhiteSpace;
1288 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1289 startPosition = currentPosition;
1290 currentCharacter = source[currentPosition++];
1291 // if (((currentCharacter = source[currentPosition++]) == '\\')
1292 // && (source[currentPosition] == 'u')) {
1293 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1295 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1296 checkNonExternalizeString();
1297 if (recordLineSeparator) {
1298 pushLineSeparator();
1303 // isWhiteSpace = (currentCharacter == ' ')
1304 // || Character.isWhitespace(currentCharacter);
1307 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1308 // reposition scanner in case we are interested by spaces as tokens
1310 startPosition = whiteStart;
1311 return TokenNameWHITESPACE;
1313 //little trick to get out in the middle of a source compuation
1314 if (currentPosition > eofPosition)
1315 return TokenNameEOF;
1316 // ---------Identify the next token-------------
1317 switch (currentCharacter) {
1319 return getCastOrParen();
1321 return TokenNameRPAREN;
1323 return TokenNameLBRACE;
1325 return TokenNameRBRACE;
1327 return TokenNameLBRACKET;
1329 return TokenNameRBRACKET;
1331 return TokenNameSEMICOLON;
1333 return TokenNameCOMMA;
1335 if (getNextChar('='))
1336 return TokenNameDOT_EQUAL;
1337 if (getNextCharAsDigit())
1338 return scanNumber(true);
1339 return TokenNameDOT;
1342 if ((test = getNextChar('+', '=')) == 0)
1343 return TokenNamePLUS_PLUS;
1345 return TokenNamePLUS_EQUAL;
1346 return TokenNamePLUS;
1350 if ((test = getNextChar('-', '=')) == 0)
1351 return TokenNameMINUS_MINUS;
1353 return TokenNameMINUS_EQUAL;
1354 if (getNextChar('>'))
1355 return TokenNameMINUS_GREATER;
1356 return TokenNameMINUS;
1359 if (getNextChar('='))
1360 return TokenNameTWIDDLE_EQUAL;
1361 return TokenNameTWIDDLE;
1363 if (getNextChar('=')) {
1364 if (getNextChar('=')) {
1365 return TokenNameNOT_EQUAL_EQUAL;
1367 return TokenNameNOT_EQUAL;
1369 return TokenNameNOT;
1371 if (getNextChar('='))
1372 return TokenNameMULTIPLY_EQUAL;
1373 return TokenNameMULTIPLY;
1375 if (getNextChar('='))
1376 return TokenNameREMAINDER_EQUAL;
1377 return TokenNameREMAINDER;
1379 int oldPosition = currentPosition;
1381 currentCharacter = source[currentPosition++];
1382 } catch (IndexOutOfBoundsException e) {
1383 currentPosition = oldPosition;
1384 return TokenNameLESS;
1386 switch (currentCharacter) {
1388 return TokenNameLESS_EQUAL;
1390 return TokenNameNOT_EQUAL;
1392 if (getNextChar('='))
1393 return TokenNameLEFT_SHIFT_EQUAL;
1394 if (getNextChar('<')) {
1395 currentCharacter = source[currentPosition++];
1396 while (Character.isWhitespace(currentCharacter)) {
1397 currentCharacter = source[currentPosition++];
1399 int heredocStart = currentPosition - 1;
1400 int heredocLength = 0;
1401 if (isPHPIdentifierStart(currentCharacter)) {
1402 currentCharacter = source[currentPosition++];
1404 return TokenNameERROR;
1406 while (isPHPIdentifierPart(currentCharacter)) {
1407 currentCharacter = source[currentPosition++];
1409 heredocLength = currentPosition - heredocStart - 1;
1410 // heredoc end-tag determination
1411 boolean endTag = true;
1414 ch = source[currentPosition++];
1415 if (ch == '\r' || ch == '\n') {
1416 if (recordLineSeparator) {
1417 pushLineSeparator();
1421 for (int i = 0; i < heredocLength; i++) {
1422 if (source[currentPosition + i] != source[heredocStart + i]) {
1428 currentPosition += heredocLength - 1;
1429 currentCharacter = source[currentPosition++];
1430 break; // do...while loop
1436 return TokenNameHEREDOC;
1438 return TokenNameLEFT_SHIFT;
1440 currentPosition = oldPosition;
1441 return TokenNameLESS;
1445 if ((test = getNextChar('=', '>')) == 0)
1446 return TokenNameGREATER_EQUAL;
1448 if ((test = getNextChar('=', '>')) == 0)
1449 return TokenNameRIGHT_SHIFT_EQUAL;
1450 return TokenNameRIGHT_SHIFT;
1452 return TokenNameGREATER;
1455 if (getNextChar('=')) {
1456 if (getNextChar('=')) {
1457 return TokenNameEQUAL_EQUAL_EQUAL;
1459 return TokenNameEQUAL_EQUAL;
1461 if (getNextChar('>'))
1462 return TokenNameEQUAL_GREATER;
1463 return TokenNameEQUAL;
1466 if ((test = getNextChar('&', '=')) == 0)
1467 return TokenNameAND_AND;
1469 return TokenNameAND_EQUAL;
1470 return TokenNameAND;
1474 if ((test = getNextChar('|', '=')) == 0)
1475 return TokenNameOR_OR;
1477 return TokenNameOR_EQUAL;
1481 if (getNextChar('='))
1482 return TokenNameXOR_EQUAL;
1483 return TokenNameXOR;
1485 if (getNextChar('>')) {
1487 if (currentPosition == source.length) {
1489 return TokenNameINLINE_HTML;
1491 return getInlinedHTML(currentPosition - 2);
1493 return TokenNameQUESTION;
1495 if (getNextChar(':'))
1496 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1497 return TokenNameCOLON;
1501 consumeStringConstant();
1502 return TokenNameStringSingleQuote;
1504 if (tokenizeStrings) {
1505 consumeStringLiteral();
1506 return TokenNameStringDoubleQuote;
1508 return TokenNameEncapsedString2;
1510 if (tokenizeStrings) {
1511 consumeStringInterpolated();
1512 return TokenNameStringInterpolated;
1514 return TokenNameEncapsedString0;
1517 char startChar = currentCharacter;
1518 if (getNextChar('=')) {
1519 return TokenNameDIVIDE_EQUAL;
1522 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1524 this.lastCommentLinePosition = this.currentPosition;
1525 int endPositionForLineComment = 0;
1526 try { //get the next char
1527 currentCharacter = source[currentPosition++];
1528 // if (((currentCharacter = source[currentPosition++])
1530 // && (source[currentPosition] == 'u')) {
1531 // //-------------unicode traitement ------------
1532 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1533 // currentPosition++;
1534 // while (source[currentPosition] == 'u') {
1535 // currentPosition++;
1538 // Character.getNumericValue(source[currentPosition++]))
1542 // Character.getNumericValue(source[currentPosition++]))
1546 // Character.getNumericValue(source[currentPosition++]))
1550 // Character.getNumericValue(source[currentPosition++]))
1554 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1556 // currentCharacter =
1557 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1560 //handle the \\u case manually into comment
1561 // if (currentCharacter == '\\') {
1562 // if (source[currentPosition] == '\\')
1563 // currentPosition++;
1564 // } //jump over the \\
1565 boolean isUnicode = false;
1566 while (currentCharacter != '\r' && currentCharacter != '\n') {
1567 this.lastCommentLinePosition = this.currentPosition;
1568 if (currentCharacter == '?') {
1569 if (getNextChar('>')) {
1570 startPosition = currentPosition - 2;
1572 return TokenNameINLINE_HTML;
1577 currentCharacter = source[currentPosition++];
1578 // if (((currentCharacter = source[currentPosition++])
1580 // && (source[currentPosition] == 'u')) {
1581 // isUnicode = true;
1582 // //-------------unicode traitement ------------
1583 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1584 // currentPosition++;
1585 // while (source[currentPosition] == 'u') {
1586 // currentPosition++;
1589 // Character.getNumericValue(source[currentPosition++]))
1593 // Character.getNumericValue(
1594 // source[currentPosition++]))
1598 // Character.getNumericValue(
1599 // source[currentPosition++]))
1603 // Character.getNumericValue(
1604 // source[currentPosition++]))
1608 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1610 // currentCharacter =
1611 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1614 //handle the \\u case manually into comment
1615 // if (currentCharacter == '\\') {
1616 // if (source[currentPosition] == '\\')
1617 // currentPosition++;
1618 // } //jump over the \\
1621 endPositionForLineComment = currentPosition - 6;
1623 endPositionForLineComment = currentPosition - 1;
1625 // recordComment(false);
1626 recordComment(TokenNameCOMMENT_LINE);
1627 if (this.taskTags != null)
1628 checkTaskTag(this.startPosition, this.currentPosition);
1629 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1630 checkNonExternalizeString();
1631 if (recordLineSeparator) {
1633 pushUnicodeLineSeparator();
1635 pushLineSeparator();
1641 if (tokenizeComments) {
1643 currentPosition = endPositionForLineComment;
1644 // reset one character behind
1646 return TokenNameCOMMENT_LINE;
1648 } catch (IndexOutOfBoundsException e) { //an eof will them
1650 if (tokenizeComments) {
1652 // reset one character behind
1653 return TokenNameCOMMENT_LINE;
1659 //traditional and annotation comment
1660 boolean isJavadoc = false, star = false;
1661 // consume next character
1662 unicodeAsBackSlash = false;
1663 currentCharacter = source[currentPosition++];
1664 // if (((currentCharacter = source[currentPosition++]) ==
1666 // && (source[currentPosition] == 'u')) {
1667 // getNextUnicodeChar();
1669 // if (withoutUnicodePtr != 0) {
1670 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1671 // currentCharacter;
1674 if (currentCharacter == '*') {
1678 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1679 checkNonExternalizeString();
1680 if (recordLineSeparator) {
1681 pushLineSeparator();
1686 try { //get the next char
1687 currentCharacter = source[currentPosition++];
1688 // if (((currentCharacter = source[currentPosition++])
1690 // && (source[currentPosition] == 'u')) {
1691 // //-------------unicode traitement ------------
1692 // getNextUnicodeChar();
1694 //handle the \\u case manually into comment
1695 // if (currentCharacter == '\\') {
1696 // if (source[currentPosition] == '\\')
1697 // currentPosition++;
1698 // //jump over the \\
1700 // empty comment is not a javadoc /**/
1701 if (currentCharacter == '/') {
1704 //loop until end of comment */
1705 while ((currentCharacter != '/') || (!star)) {
1706 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1707 checkNonExternalizeString();
1708 if (recordLineSeparator) {
1709 pushLineSeparator();
1714 star = currentCharacter == '*';
1716 currentCharacter = source[currentPosition++];
1717 // if (((currentCharacter = source[currentPosition++])
1719 // && (source[currentPosition] == 'u')) {
1720 // //-------------unicode traitement ------------
1721 // getNextUnicodeChar();
1723 //handle the \\u case manually into comment
1724 // if (currentCharacter == '\\') {
1725 // if (source[currentPosition] == '\\')
1726 // currentPosition++;
1727 // } //jump over the \\
1729 //recordComment(isJavadoc);
1731 recordComment(TokenNameCOMMENT_PHPDOC);
1733 recordComment(TokenNameCOMMENT_BLOCK);
1736 if (tokenizeComments) {
1738 return TokenNameCOMMENT_PHPDOC;
1739 return TokenNameCOMMENT_BLOCK;
1741 } catch (IndexOutOfBoundsException e) {
1742 // reset end position for error reporting
1743 currentPosition -= 2;
1744 throw new InvalidInputException(UNTERMINATED_COMMENT);
1748 return TokenNameDIVIDE;
1752 return TokenNameEOF;
1753 //the atEnd may not be <currentPosition == source.length> if
1754 // source is only some part of a real (external) stream
1755 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1757 if (currentCharacter == '$') {
1758 int oldPosition = currentPosition;
1760 currentCharacter = source[currentPosition++];
1761 if (isPHPIdentifierStart(currentCharacter)) {
1762 return scanIdentifierOrKeyword(true);
1764 currentPosition = oldPosition;
1765 return TokenNameDOLLAR;
1767 } catch (IndexOutOfBoundsException e) {
1768 currentPosition = oldPosition;
1769 return TokenNameDOLLAR;
1772 if (isPHPIdentifierStart(currentCharacter))
1773 return scanIdentifierOrKeyword(false);
1774 if (Character.isDigit(currentCharacter))
1775 return scanNumber(false);
1776 return TokenNameERROR;
1779 } //-----------------end switch while try--------------------
1780 catch (IndexOutOfBoundsException e) {
1783 return TokenNameEOF;
1786 private int getInlinedHTML(int start) throws InvalidInputException {
1787 int token = getInlinedHTMLToken(start);
1788 if (token == TokenNameINLINE_HTML) {
1789 // Stack stack = new Stack();
1790 // // scan html for errors
1791 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1792 // int lastPHPEndPos=0;
1793 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1794 // Tag tag=(Tag)i.next();
1796 // if (tag instanceof StartTag) {
1797 // StartTag startTag=(StartTag)tag;
1798 // // System.out.println("startTag: "+tag);
1799 // if (startTag.isServerTag()) {
1800 // // TODO : what to do with a server tag ?
1802 // // do whatever with HTML start tag
1803 // // use startTag.getElement() to find the element corresponding
1804 // // to this start tag which may be useful if you implement code
1806 // stack.push(startTag);
1809 // EndTag endTag=(EndTag)tag;
1810 // StartTag stag = (StartTag) stack.peek();
1811 //// System.out.println("endTag: "+tag);
1812 // // do whatever with HTML end tag.
1821 * @throws InvalidInputException
1823 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1824 // int htmlPosition = start;
1825 if (currentPosition > source.length) {
1826 currentPosition = source.length;
1827 return TokenNameEOF;
1829 startPosition = start;
1832 currentCharacter = source[currentPosition++];
1833 if (currentCharacter == '<') {
1834 if (getNextChar('?')) {
1835 currentCharacter = source[currentPosition++];
1836 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1838 if (ignorePHPOneLiner) {
1839 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1841 return TokenNameINLINE_HTML;
1845 return TokenNameINLINE_HTML;
1848 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1850 int test = getNextChar('H', 'h');
1852 test = getNextChar('P', 'p');
1855 if (ignorePHPOneLiner) {
1856 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1858 return TokenNameINLINE_HTML;
1862 return TokenNameINLINE_HTML;
1870 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1871 if (recordLineSeparator) {
1872 pushLineSeparator();
1877 } //-----------------while--------------------
1879 return TokenNameINLINE_HTML;
1880 } //-----------------try--------------------
1881 catch (IndexOutOfBoundsException e) {
1882 startPosition = start;
1886 return TokenNameINLINE_HTML;
1892 private int lookAheadLinePHPTag() {
1893 // check if the PHP is only in this line (for CodeFormatter)
1894 int currentPositionInLine = currentPosition;
1895 char previousCharInLine = ' ';
1896 char currentCharInLine = ' ';
1897 boolean singleQuotedStringActive = false;
1898 boolean doubleQuotedStringActive = false;
1901 // look ahead in this line
1903 previousCharInLine = currentCharInLine;
1904 currentCharInLine = source[currentPositionInLine++];
1905 switch (currentCharInLine) {
1907 if (previousCharInLine == '?') {
1908 // update the scanner's current Position in the source
1909 currentPosition = currentPositionInLine;
1910 // use as "dummy" token
1911 return TokenNameEOF;
1915 if (doubleQuotedStringActive) {
1916 if (previousCharInLine != '\\') {
1917 doubleQuotedStringActive = false;
1920 if (!singleQuotedStringActive) {
1921 doubleQuotedStringActive = true;
1926 if (singleQuotedStringActive) {
1927 if (previousCharInLine != '\\') {
1928 singleQuotedStringActive = false;
1931 if (!doubleQuotedStringActive) {
1932 singleQuotedStringActive = true;
1938 return TokenNameINLINE_HTML;
1940 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1942 return TokenNameINLINE_HTML;
1946 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1948 return TokenNameINLINE_HTML;
1952 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1954 return TokenNameINLINE_HTML;
1959 } catch (IndexOutOfBoundsException e) {
1961 currentPosition = currentPositionInLine;
1962 return TokenNameINLINE_HTML;
1966 // public final void getNextUnicodeChar()
1967 // throws IndexOutOfBoundsException, InvalidInputException {
1969 // //handle the case of unicode.
1970 // //when a unicode appears then we must use a buffer that holds char
1972 // //At the end of this method currentCharacter holds the new visited char
1973 // //and currentPosition points right next after it
1975 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1977 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1978 // currentPosition++;
1979 // while (source[currentPosition] == 'u') {
1980 // currentPosition++;
1984 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1986 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1988 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1990 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1992 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1994 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1995 // //need the unicode buffer
1996 // if (withoutUnicodePtr == 0) {
1997 // //buffer all the entries that have been left aside....
1998 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1999 // System.arraycopy(
2002 // withoutUnicodeBuffer,
2004 // withoutUnicodePtr);
2006 // //fill the buffer with the char
2007 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2009 // unicodeAsBackSlash = currentCharacter == '\\';
2012 * Tokenize a method body, assuming that curly brackets are properly balanced.
2014 public final void jumpOverMethodBody() {
2015 this.wasAcr = false;
2018 while (true) { //loop for jumping over comments
2019 // ---------Consume white space and handles startPosition---------
2020 boolean isWhiteSpace;
2022 startPosition = currentPosition;
2023 currentCharacter = source[currentPosition++];
2024 // if (((currentCharacter = source[currentPosition++]) == '\\')
2025 // && (source[currentPosition] == 'u')) {
2026 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2028 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2029 pushLineSeparator();
2030 isWhiteSpace = Character.isWhitespace(currentCharacter);
2032 } while (isWhiteSpace);
2033 // -------consume token until } is found---------
2034 switch (currentCharacter) {
2045 test = getNextChar('\\');
2048 scanDoubleQuotedEscapeCharacter();
2049 } catch (InvalidInputException ex) {
2053 // try { // consume next character
2054 unicodeAsBackSlash = false;
2055 currentCharacter = source[currentPosition++];
2056 // if (((currentCharacter = source[currentPosition++]) == '\\')
2057 // && (source[currentPosition] == 'u')) {
2058 // getNextUnicodeChar();
2060 if (withoutUnicodePtr != 0) {
2061 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2064 // } catch (InvalidInputException ex) {
2072 // try { // consume next character
2073 unicodeAsBackSlash = false;
2074 currentCharacter = source[currentPosition++];
2075 // if (((currentCharacter = source[currentPosition++]) == '\\')
2076 // && (source[currentPosition] == 'u')) {
2077 // getNextUnicodeChar();
2079 if (withoutUnicodePtr != 0) {
2080 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2083 // } catch (InvalidInputException ex) {
2085 while (currentCharacter != '"') {
2086 if (currentCharacter == '\r') {
2087 if (source[currentPosition] == '\n')
2090 // the string cannot go further that the line
2092 if (currentCharacter == '\n') {
2094 // the string cannot go further that the line
2096 if (currentCharacter == '\\') {
2098 scanDoubleQuotedEscapeCharacter();
2099 } catch (InvalidInputException ex) {
2103 // try { // consume next character
2104 unicodeAsBackSlash = false;
2105 currentCharacter = source[currentPosition++];
2106 // if (((currentCharacter = source[currentPosition++]) == '\\')
2107 // && (source[currentPosition] == 'u')) {
2108 // getNextUnicodeChar();
2110 if (withoutUnicodePtr != 0) {
2111 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2114 // } catch (InvalidInputException ex) {
2117 } catch (IndexOutOfBoundsException e) {
2123 if ((test = getNextChar('/', '*')) == 0) {
2127 currentCharacter = source[currentPosition++];
2128 // if (((currentCharacter = source[currentPosition++]) ==
2130 // && (source[currentPosition] == 'u')) {
2131 // //-------------unicode traitement ------------
2132 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2133 // currentPosition++;
2134 // while (source[currentPosition] == 'u') {
2135 // currentPosition++;
2138 // Character.getNumericValue(source[currentPosition++]))
2142 // Character.getNumericValue(source[currentPosition++]))
2146 // Character.getNumericValue(source[currentPosition++]))
2150 // Character.getNumericValue(source[currentPosition++]))
2153 // //error don't care of the value
2154 // currentCharacter = 'A';
2155 // } //something different from \n and \r
2157 // currentCharacter =
2158 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2161 while (currentCharacter != '\r' && currentCharacter != '\n') {
2163 currentCharacter = source[currentPosition++];
2164 // if (((currentCharacter = source[currentPosition++])
2166 // && (source[currentPosition] == 'u')) {
2167 // //-------------unicode traitement ------------
2168 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2169 // currentPosition++;
2170 // while (source[currentPosition] == 'u') {
2171 // currentPosition++;
2174 // Character.getNumericValue(source[currentPosition++]))
2178 // Character.getNumericValue(source[currentPosition++]))
2182 // Character.getNumericValue(source[currentPosition++]))
2186 // Character.getNumericValue(source[currentPosition++]))
2189 // //error don't care of the value
2190 // currentCharacter = 'A';
2191 // } //something different from \n and \r
2193 // currentCharacter =
2194 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2198 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2199 pushLineSeparator();
2200 } catch (IndexOutOfBoundsException e) {
2201 } //an eof will them be generated
2205 //traditional and annotation comment
2206 boolean star = false;
2207 // try { // consume next character
2208 unicodeAsBackSlash = false;
2209 currentCharacter = source[currentPosition++];
2210 // if (((currentCharacter = source[currentPosition++]) == '\\')
2211 // && (source[currentPosition] == 'u')) {
2212 // getNextUnicodeChar();
2214 if (withoutUnicodePtr != 0) {
2215 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2218 // } catch (InvalidInputException ex) {
2220 if (currentCharacter == '*') {
2223 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2224 pushLineSeparator();
2225 try { //get the next char
2226 currentCharacter = source[currentPosition++];
2227 // if (((currentCharacter = source[currentPosition++]) ==
2229 // && (source[currentPosition] == 'u')) {
2230 // //-------------unicode traitement ------------
2231 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2232 // currentPosition++;
2233 // while (source[currentPosition] == 'u') {
2234 // currentPosition++;
2237 // Character.getNumericValue(source[currentPosition++]))
2241 // Character.getNumericValue(source[currentPosition++]))
2245 // Character.getNumericValue(source[currentPosition++]))
2249 // Character.getNumericValue(source[currentPosition++]))
2252 // //error don't care of the value
2253 // currentCharacter = 'A';
2254 // } //something different from * and /
2256 // currentCharacter =
2257 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2260 //loop until end of comment */
2261 while ((currentCharacter != '/') || (!star)) {
2262 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2263 pushLineSeparator();
2264 star = currentCharacter == '*';
2266 currentCharacter = source[currentPosition++];
2267 // if (((currentCharacter = source[currentPosition++])
2269 // && (source[currentPosition] == 'u')) {
2270 // //-------------unicode traitement ------------
2271 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2272 // currentPosition++;
2273 // while (source[currentPosition] == 'u') {
2274 // currentPosition++;
2277 // Character.getNumericValue(source[currentPosition++]))
2281 // Character.getNumericValue(source[currentPosition++]))
2285 // Character.getNumericValue(source[currentPosition++]))
2289 // Character.getNumericValue(source[currentPosition++]))
2292 // //error don't care of the value
2293 // currentCharacter = 'A';
2294 // } //something different from * and /
2296 // currentCharacter =
2297 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2301 } catch (IndexOutOfBoundsException e) {
2309 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2311 scanIdentifierOrKeyword((currentCharacter == '$'));
2312 } catch (InvalidInputException ex) {
2317 if (Character.isDigit(currentCharacter)) {
2320 } catch (InvalidInputException ex) {
2327 //-----------------end switch while try--------------------
2328 } catch (IndexOutOfBoundsException e) {
2329 } catch (InvalidInputException e) {
2334 // public final boolean jumpOverUnicodeWhiteSpace()
2335 // throws InvalidInputException {
2337 // //handle the case of unicode. Jump over the next whiteSpace
2338 // //making startPosition pointing on the next available char
2339 // //On false, the currentCharacter is filled up with a potential
2343 // this.wasAcr = false;
2344 // int c1, c2, c3, c4;
2345 // int unicodeSize = 6;
2346 // currentPosition++;
2347 // while (source[currentPosition] == 'u') {
2348 // currentPosition++;
2352 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2354 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2356 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2358 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2360 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2363 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2364 // if (recordLineSeparator
2365 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2366 // pushLineSeparator();
2367 // if (Character.isWhitespace(currentCharacter))
2370 // //buffer the new char which is not a white space
2371 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2372 // //withoutUnicodePtr == 1 is true here
2374 // } catch (IndexOutOfBoundsException e) {
2375 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2378 public final int[] getLineEnds() {
2379 //return a bounded copy of this.lineEnds
2381 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2385 public char[] getSource() {
2389 public static boolean isIdentifierOrKeyword(int token) {
2390 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2393 final char[] optimizedCurrentTokenSource1() {
2394 //return always the same char[] build only once
2395 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2396 char charOne = source[startPosition];
2451 return new char[] { charOne };
2455 final char[] optimizedCurrentTokenSource2() {
2456 //try to return the same char[] build only once
2458 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2459 char[][] table = charArray_length[0][hash];
2461 while (++i < InternalTableSize) {
2462 char[] charArray = table[i];
2463 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2466 //---------other side---------
2468 int max = newEntry2;
2469 while (++i <= max) {
2470 char[] charArray = table[i];
2471 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2474 //--------add the entry-------
2475 if (++max >= InternalTableSize)
2478 table[max] = (r = new char[] { c0, c1 });
2483 final char[] optimizedCurrentTokenSource3() {
2484 //try to return the same char[] build only once
2486 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2488 char[][] table = charArray_length[1][hash];
2490 while (++i < InternalTableSize) {
2491 char[] charArray = table[i];
2492 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2495 //---------other side---------
2497 int max = newEntry3;
2498 while (++i <= max) {
2499 char[] charArray = table[i];
2500 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2503 //--------add the entry-------
2504 if (++max >= InternalTableSize)
2507 table[max] = (r = new char[] { c0, c1, c2 });
2512 final char[] optimizedCurrentTokenSource4() {
2513 //try to return the same char[] build only once
2514 char c0, c1, c2, c3;
2515 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2516 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2518 char[][] table = charArray_length[2][(int) hash];
2520 while (++i < InternalTableSize) {
2521 char[] charArray = table[i];
2522 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2525 //---------other side---------
2527 int max = newEntry4;
2528 while (++i <= max) {
2529 char[] charArray = table[i];
2530 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2533 //--------add the entry-------
2534 if (++max >= InternalTableSize)
2537 table[max] = (r = new char[] { c0, c1, c2, c3 });
2542 final char[] optimizedCurrentTokenSource5() {
2543 //try to return the same char[] build only once
2544 char c0, c1, c2, c3, c4;
2545 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2546 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2548 char[][] table = charArray_length[3][(int) hash];
2550 while (++i < InternalTableSize) {
2551 char[] charArray = table[i];
2552 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2555 //---------other side---------
2557 int max = newEntry5;
2558 while (++i <= max) {
2559 char[] charArray = table[i];
2560 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2563 //--------add the entry-------
2564 if (++max >= InternalTableSize)
2567 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2572 final char[] optimizedCurrentTokenSource6() {
2573 //try to return the same char[] build only once
2574 char c0, c1, c2, c3, c4, c5;
2575 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2576 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2577 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2579 char[][] table = charArray_length[4][(int) hash];
2581 while (++i < InternalTableSize) {
2582 char[] charArray = table[i];
2583 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2584 && (c5 == charArray[5]))
2587 //---------other side---------
2589 int max = newEntry6;
2590 while (++i <= max) {
2591 char[] charArray = table[i];
2592 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2593 && (c5 == charArray[5]))
2596 //--------add the entry-------
2597 if (++max >= InternalTableSize)
2600 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2605 public final void pushLineSeparator() throws InvalidInputException {
2606 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2607 final int INCREMENT = 250;
2608 if (this.checkNonExternalizedStringLiterals) {
2609 // reinitialize the current line for non externalize strings purpose
2612 //currentCharacter is at position currentPosition-1
2614 if (currentCharacter == '\r') {
2615 int separatorPos = currentPosition - 1;
2616 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2618 //System.out.println("CR-" + separatorPos);
2620 lineEnds[++linePtr] = separatorPos;
2621 } catch (IndexOutOfBoundsException e) {
2622 //linePtr value is correct
2623 int oldLength = lineEnds.length;
2624 int[] old = lineEnds;
2625 lineEnds = new int[oldLength + INCREMENT];
2626 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2627 lineEnds[linePtr] = separatorPos;
2629 // look-ahead for merged cr+lf
2631 if (source[currentPosition] == '\n') {
2632 //System.out.println("look-ahead LF-" + currentPosition);
2633 lineEnds[linePtr] = currentPosition;
2639 } catch (IndexOutOfBoundsException e) {
2644 if (currentCharacter == '\n') {
2645 //must merge eventual cr followed by lf
2646 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2647 //System.out.println("merge LF-" + (currentPosition - 1));
2648 lineEnds[linePtr] = currentPosition - 1;
2650 int separatorPos = currentPosition - 1;
2651 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2653 // System.out.println("LF-" + separatorPos);
2655 lineEnds[++linePtr] = separatorPos;
2656 } catch (IndexOutOfBoundsException e) {
2657 //linePtr value is correct
2658 int oldLength = lineEnds.length;
2659 int[] old = lineEnds;
2660 lineEnds = new int[oldLength + INCREMENT];
2661 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2662 lineEnds[linePtr] = separatorPos;
2670 public final void pushUnicodeLineSeparator() {
2671 // isUnicode means that the \r or \n has been read as a unicode character
2672 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2673 final int INCREMENT = 250;
2674 //currentCharacter is at position currentPosition-1
2675 if (this.checkNonExternalizedStringLiterals) {
2676 // reinitialize the current line for non externalize strings purpose
2680 if (currentCharacter == '\r') {
2681 int separatorPos = currentPosition - 6;
2682 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2684 //System.out.println("CR-" + separatorPos);
2686 lineEnds[++linePtr] = separatorPos;
2687 } catch (IndexOutOfBoundsException e) {
2688 //linePtr value is correct
2689 int oldLength = lineEnds.length;
2690 int[] old = lineEnds;
2691 lineEnds = new int[oldLength + INCREMENT];
2692 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2693 lineEnds[linePtr] = separatorPos;
2695 // look-ahead for merged cr+lf
2696 if (source[currentPosition] == '\n') {
2697 //System.out.println("look-ahead LF-" + currentPosition);
2698 lineEnds[linePtr] = currentPosition;
2706 if (currentCharacter == '\n') {
2707 //must merge eventual cr followed by lf
2708 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2709 //System.out.println("merge LF-" + (currentPosition - 1));
2710 lineEnds[linePtr] = currentPosition - 6;
2712 int separatorPos = currentPosition - 6;
2713 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2715 // System.out.println("LF-" + separatorPos);
2717 lineEnds[++linePtr] = separatorPos;
2718 } catch (IndexOutOfBoundsException e) {
2719 //linePtr value is correct
2720 int oldLength = lineEnds.length;
2721 int[] old = lineEnds;
2722 lineEnds = new int[oldLength + INCREMENT];
2723 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2724 lineEnds[linePtr] = separatorPos;
2732 public void recordComment(int token) {
2734 int stopPosition = this.currentPosition;
2736 case TokenNameCOMMENT_LINE:
2737 stopPosition = -this.lastCommentLinePosition;
2739 case TokenNameCOMMENT_BLOCK:
2740 stopPosition = -this.currentPosition;
2744 // a new comment is recorded
2745 int length = this.commentStops.length;
2746 if (++this.commentPtr >= length) {
2747 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2748 //grows the positions buffers too
2749 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2751 this.commentStops[this.commentPtr] = stopPosition;
2752 this.commentStarts[this.commentPtr] = this.startPosition;
2755 // public final void recordComment(boolean isJavadoc) {
2756 // // a new annotation comment is recorded
2758 // commentStops[++commentPtr] = isJavadoc
2759 // ? currentPosition
2760 // : -currentPosition;
2761 // } catch (IndexOutOfBoundsException e) {
2762 // int oldStackLength = commentStops.length;
2763 // int[] oldStack = commentStops;
2764 // commentStops = new int[oldStackLength + 30];
2765 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2766 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2767 // //grows the positions buffers too
2768 // int[] old = commentStarts;
2769 // commentStarts = new int[oldStackLength + 30];
2770 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2772 // //the buffer is of a correct size here
2773 // commentStarts[commentPtr] = startPosition;
2775 public void resetTo(int begin, int end) {
2776 //reset the scanner to a given position where it may rescan again
2778 initialPosition = startPosition = currentPosition = begin;
2779 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2780 commentPtr = -1; // reset comment stack
2783 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2784 // the string with "\\u" is a legal string of two chars \ and u
2785 //thus we use a direct access to the source (for regular cases).
2786 // if (unicodeAsBackSlash) {
2787 // // consume next character
2788 // unicodeAsBackSlash = false;
2789 // if (((currentCharacter = source[currentPosition++]) == '\\')
2790 // && (source[currentPosition] == 'u')) {
2791 // getNextUnicodeChar();
2793 // if (withoutUnicodePtr != 0) {
2794 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2798 currentCharacter = source[currentPosition++];
2799 switch (currentCharacter) {
2801 currentCharacter = '\'';
2804 currentCharacter = '\\';
2807 currentCharacter = '\\';
2812 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2813 // the string with "\\u" is a legal string of two chars \ and u
2814 //thus we use a direct access to the source (for regular cases).
2815 // if (unicodeAsBackSlash) {
2816 // // consume next character
2817 // unicodeAsBackSlash = false;
2818 // if (((currentCharacter = source[currentPosition++]) == '\\')
2819 // && (source[currentPosition] == 'u')) {
2820 // getNextUnicodeChar();
2822 // if (withoutUnicodePtr != 0) {
2823 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2827 currentCharacter = source[currentPosition++];
2828 switch (currentCharacter) {
2830 // currentCharacter = '\b';
2833 currentCharacter = '\t';
2836 currentCharacter = '\n';
2839 // currentCharacter = '\f';
2842 currentCharacter = '\r';
2845 currentCharacter = '\"';
2848 currentCharacter = '\'';
2851 currentCharacter = '\\';
2854 currentCharacter = '$';
2857 // -----------octal escape--------------
2859 // OctalDigit OctalDigit
2860 // ZeroToThree OctalDigit OctalDigit
2861 int number = Character.getNumericValue(currentCharacter);
2862 if (number >= 0 && number <= 7) {
2863 boolean zeroToThreeNot = number > 3;
2864 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2865 int digit = Character.getNumericValue(currentCharacter);
2866 if (digit >= 0 && digit <= 7) {
2867 number = (number * 8) + digit;
2868 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2869 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2870 // Digit --> ignore last character
2873 digit = Character.getNumericValue(currentCharacter);
2874 if (digit >= 0 && digit <= 7) {
2875 // has read \ZeroToThree OctalDigit OctalDigit
2876 number = (number * 8) + digit;
2877 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2878 // --> ignore last character
2882 } else { // has read \OctalDigit NonDigit--> ignore last
2886 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2890 } else { // has read \OctalDigit --> ignore last character
2894 throw new InvalidInputException(INVALID_ESCAPE);
2895 currentCharacter = (char) number;
2898 // throw new InvalidInputException(INVALID_ESCAPE);
2902 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2903 // return scanIdentifierOrKeyword( false );
2905 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2907 //first dispatch on the first char.
2908 //then the length. If there are several
2909 //keywors with the same length AND the same first char, then do another
2910 //disptach on the second char :-)...cool....but fast !
2911 useAssertAsAnIndentifier = false;
2912 while (getNextCharAsJavaIdentifierPart()) {
2916 // if (new String(getCurrentTokenSource()).equals("$this")) {
2917 // return TokenNamethis;
2919 return TokenNameVariable;
2924 // if (withoutUnicodePtr == 0)
2925 //quick test on length == 1 but not on length > 12 while most identifier
2926 //have a length which is <= 12...but there are lots of identifier with
2929 if ((length = currentPosition - startPosition) == 1)
2930 return TokenNameIdentifier;
2932 data = new char[length];
2933 index = startPosition;
2934 for (int i = 0; i < length; i++) {
2935 data[i] = Character.toLowerCase(source[index + i]);
2939 // if ((length = withoutUnicodePtr) == 1)
2940 // return TokenNameIdentifier;
2941 // // data = withoutUnicodeBuffer;
2942 // data = new char[withoutUnicodeBuffer.length];
2943 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2944 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2948 firstLetter = data[index];
2949 switch (firstLetter) {
2954 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
2955 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2956 return TokenNameFILE;
2957 index = 0; //__LINE__
2958 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
2959 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2960 return TokenNameLINE;
2964 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
2965 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
2966 return TokenNameCLASS_C;
2970 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
2971 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
2972 && (data[++index] == '_'))
2973 return TokenNameMETHOD_C;
2977 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
2978 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
2979 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
2980 return TokenNameFUNC_C;
2983 return TokenNameIdentifier;
2985 // as and array abstract
2989 if ((data[++index] == 's')) {
2992 return TokenNameIdentifier;
2996 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2997 return TokenNameand;
2999 return TokenNameIdentifier;
3003 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3004 return TokenNamearray;
3006 return TokenNameIdentifier;
3008 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3009 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3010 return TokenNameabstract;
3012 return TokenNameIdentifier;
3014 return TokenNameIdentifier;
3020 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3021 return TokenNamebreak;
3023 return TokenNameIdentifier;
3025 return TokenNameIdentifier;
3028 //case catch class clone const continue
3031 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3032 return TokenNamecase;
3034 return TokenNameIdentifier;
3036 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3037 return TokenNamecatch;
3039 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3040 return TokenNameclass;
3042 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3043 return TokenNameclone;
3045 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3046 return TokenNameconst;
3048 return TokenNameIdentifier;
3050 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3051 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3052 return TokenNamecontinue;
3054 return TokenNameIdentifier;
3056 return TokenNameIdentifier;
3059 // declare default do die
3060 // TODO delete define ==> no keyword !
3063 if ((data[++index] == 'o'))
3066 return TokenNameIdentifier;
3068 // if ((data[++index] == 'e')
3069 // && (data[++index] == 'f')
3070 // && (data[++index] == 'i')
3071 // && (data[++index] == 'n')
3072 // && (data[++index] == 'e'))
3073 // return TokenNamedefine;
3075 // return TokenNameIdentifier;
3077 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3078 && (data[++index] == 'r') && (data[++index] == 'e'))
3079 return TokenNamedeclare;
3081 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3082 && (data[++index] == 'l') && (data[++index] == 't'))
3083 return TokenNamedefault;
3085 return TokenNameIdentifier;
3087 return TokenNameIdentifier;
3090 //echo else exit elseif extends eval
3093 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3094 return TokenNameecho;
3095 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3096 return TokenNameelse;
3097 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3098 return TokenNameexit;
3099 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3100 return TokenNameeval;
3102 return TokenNameIdentifier;
3105 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3106 return TokenNameendif;
3107 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3108 return TokenNameempty;
3110 return TokenNameIdentifier;
3113 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3114 && (data[++index] == 'r'))
3115 return TokenNameendfor;
3116 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3117 && (data[++index] == 'f'))
3118 return TokenNameelseif;
3120 return TokenNameIdentifier;
3122 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3123 && (data[++index] == 'd') && (data[++index] == 's'))
3124 return TokenNameextends;
3126 return TokenNameIdentifier;
3129 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3130 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3131 return TokenNameendwhile;
3133 return TokenNameIdentifier;
3136 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3137 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3138 return TokenNameendswitch;
3140 return TokenNameIdentifier;
3143 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3144 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3145 && (data[++index] == 'e'))
3146 return TokenNameendforeach;
3148 if ((data[++index] == 'n') // endforeach
3149 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3150 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3151 return TokenNameendforeach;
3153 return TokenNameIdentifier;
3155 return TokenNameIdentifier;
3158 //for false final function
3161 if ((data[++index] == 'o') && (data[++index] == 'r'))
3162 return TokenNamefor;
3164 return TokenNameIdentifier;
3166 // if ((data[++index] == 'a') && (data[++index] == 'l')
3167 // && (data[++index] == 's') && (data[++index] == 'e'))
3168 // return TokenNamefalse;
3169 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3170 return TokenNamefinal;
3172 return TokenNameIdentifier;
3175 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3176 && (data[++index] == 'c') && (data[++index] == 'h'))
3177 return TokenNameforeach;
3179 return TokenNameIdentifier;
3182 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3183 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3184 return TokenNamefunction;
3186 return TokenNameIdentifier;
3188 return TokenNameIdentifier;
3193 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3194 && (data[++index] == 'l')) {
3195 return TokenNameglobal;
3198 return TokenNameIdentifier;
3200 //if int isset include include_once instanceof interface implements
3203 if (data[++index] == 'f')
3206 return TokenNameIdentifier;
3208 // if ((data[++index] == 'n') && (data[++index] == 't'))
3209 // return TokenNameint;
3211 // return TokenNameIdentifier;
3213 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3214 return TokenNameisset;
3216 return TokenNameIdentifier;
3218 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3219 && (data[++index] == 'd') && (data[++index] == 'e'))
3220 return TokenNameinclude;
3222 return TokenNameIdentifier;
3225 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3226 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3227 return TokenNameinterface;
3229 return TokenNameIdentifier;
3232 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3233 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3234 && (data[++index] == 'f'))
3235 return TokenNameinstanceof;
3236 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3237 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3238 && (data[++index] == 's'))
3239 return TokenNameimplements;
3241 return TokenNameIdentifier;
3243 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3244 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3245 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3246 return TokenNameinclude_once;
3248 return TokenNameIdentifier;
3250 return TokenNameIdentifier;
3255 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3256 return TokenNamelist;
3259 return TokenNameIdentifier;
3264 if ((data[++index] == 'e') && (data[++index] == 'w'))
3265 return TokenNamenew;
3267 return TokenNameIdentifier;
3269 // if ((data[++index] == 'u') && (data[++index] == 'l')
3270 // && (data[++index] == 'l'))
3271 // return TokenNamenull;
3273 // return TokenNameIdentifier;
3275 return TokenNameIdentifier;
3280 if (data[++index] == 'r') {
3284 // if (length == 12) {
3285 // if ((data[++index] == 'l')
3286 // && (data[++index] == 'd')
3287 // && (data[++index] == '_')
3288 // && (data[++index] == 'f')
3289 // && (data[++index] == 'u')
3290 // && (data[++index] == 'n')
3291 // && (data[++index] == 'c')
3292 // && (data[++index] == 't')
3293 // && (data[++index] == 'i')
3294 // && (data[++index] == 'o')
3295 // && (data[++index] == 'n')) {
3296 // return TokenNameold_function;
3299 return TokenNameIdentifier;
3301 // print public private protected
3304 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3305 return TokenNameprint;
3307 return TokenNameIdentifier;
3309 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3310 && (data[++index] == 'c')) {
3311 return TokenNamepublic;
3313 return TokenNameIdentifier;
3315 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3316 && (data[++index] == 't') && (data[++index] == 'e')) {
3317 return TokenNameprivate;
3319 return TokenNameIdentifier;
3321 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3322 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3323 return TokenNameprotected;
3325 return TokenNameIdentifier;
3327 return TokenNameIdentifier;
3329 //return require require_once
3331 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3332 && (data[++index] == 'n')) {
3333 return TokenNamereturn;
3335 } else if (length == 7) {
3336 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3337 && (data[++index] == 'r') && (data[++index] == 'e')) {
3338 return TokenNamerequire;
3340 } else if (length == 12) {
3341 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3342 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3343 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3344 return TokenNamerequire_once;
3347 return TokenNameIdentifier;
3352 if (data[++index] == 't')
3353 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3354 return TokenNamestatic;
3356 return TokenNameIdentifier;
3357 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3358 && (data[++index] == 'h'))
3359 return TokenNameswitch;
3361 return TokenNameIdentifier;
3363 return TokenNameIdentifier;
3369 if ((data[++index] == 'r') && (data[++index] == 'y'))
3370 return TokenNametry;
3372 return TokenNameIdentifier;
3374 // if ((data[++index] == 'r') && (data[++index] == 'u')
3375 // && (data[++index] == 'e'))
3376 // return TokenNametrue;
3378 // return TokenNameIdentifier;
3380 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3381 return TokenNamethrow;
3383 return TokenNameIdentifier;
3385 return TokenNameIdentifier;
3391 if ((data[++index] == 's') && (data[++index] == 'e'))
3392 return TokenNameuse;
3394 return TokenNameIdentifier;
3396 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3397 return TokenNameunset;
3399 return TokenNameIdentifier;
3401 return TokenNameIdentifier;
3407 if ((data[++index] == 'a') && (data[++index] == 'r'))
3408 return TokenNamevar;
3410 return TokenNameIdentifier;
3412 return TokenNameIdentifier;
3418 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3419 return TokenNamewhile;
3421 return TokenNameIdentifier;
3422 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3423 // (data[++index]=='e') && (data[++index]=='f')&&
3424 // (data[++index]=='p'))
3425 //return TokenNamewidefp ;
3427 //return TokenNameIdentifier;
3429 return TokenNameIdentifier;
3435 if ((data[++index] == 'o') && (data[++index] == 'r'))
3436 return TokenNamexor;
3438 return TokenNameIdentifier;
3440 return TokenNameIdentifier;
3443 return TokenNameIdentifier;
3447 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3448 //when entering this method the currentCharacter is the firt
3449 //digit of the number , i.e. it may be preceeded by a . when
3451 boolean floating = dotPrefix;
3452 if ((!dotPrefix) && (currentCharacter == '0')) {
3453 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3454 //force the first char of the hexa number do exist...
3455 // consume next character
3456 unicodeAsBackSlash = false;
3457 currentCharacter = source[currentPosition++];
3458 // if (((currentCharacter = source[currentPosition++]) == '\\')
3459 // && (source[currentPosition] == 'u')) {
3460 // getNextUnicodeChar();
3462 // if (withoutUnicodePtr != 0) {
3463 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3466 if (Character.digit(currentCharacter, 16) == -1)
3467 throw new InvalidInputException(INVALID_HEXA);
3469 while (getNextCharAsDigit(16)) {
3472 // if (getNextChar('l', 'L') >= 0)
3473 // return TokenNameLongLiteral;
3475 return TokenNameIntegerLiteral;
3477 //there is x or X in the number
3478 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3479 // 00078.0 is true !!!!! crazy language
3480 if (getNextCharAsDigit()) {
3481 //-------------potential octal-----------------
3482 while (getNextCharAsDigit()) {
3485 // if (getNextChar('l', 'L') >= 0) {
3486 // return TokenNameLongLiteral;
3489 // if (getNextChar('f', 'F') >= 0) {
3490 // return TokenNameFloatingPointLiteral;
3492 if (getNextChar('d', 'D') >= 0) {
3493 return TokenNameDoubleLiteral;
3494 } else { //make the distinction between octal and float ....
3495 if (getNextChar('.')) { //bingo ! ....
3496 while (getNextCharAsDigit()) {
3499 if (getNextChar('e', 'E') >= 0) {
3500 // consume next character
3501 unicodeAsBackSlash = false;
3502 currentCharacter = source[currentPosition++];
3503 // if (((currentCharacter = source[currentPosition++]) == '\\')
3504 // && (source[currentPosition] == 'u')) {
3505 // getNextUnicodeChar();
3507 // if (withoutUnicodePtr != 0) {
3508 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3511 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3512 // consume next character
3513 unicodeAsBackSlash = false;
3514 currentCharacter = source[currentPosition++];
3515 // if (((currentCharacter = source[currentPosition++]) == '\\')
3516 // && (source[currentPosition] == 'u')) {
3517 // getNextUnicodeChar();
3519 // if (withoutUnicodePtr != 0) {
3520 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3521 // currentCharacter;
3525 if (!Character.isDigit(currentCharacter))
3526 throw new InvalidInputException(INVALID_FLOAT);
3527 while (getNextCharAsDigit()) {
3531 // if (getNextChar('f', 'F') >= 0)
3532 // return TokenNameFloatingPointLiteral;
3533 getNextChar('d', 'D'); //jump over potential d or D
3534 return TokenNameDoubleLiteral;
3536 return TokenNameIntegerLiteral;
3543 while (getNextCharAsDigit()) {
3546 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3547 // return TokenNameLongLiteral;
3548 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3549 while (getNextCharAsDigit()) {
3554 //if floating is true both exponant and suffix may be optional
3555 if (getNextChar('e', 'E') >= 0) {
3557 // consume next character
3558 unicodeAsBackSlash = false;
3559 currentCharacter = source[currentPosition++];
3560 // if (((currentCharacter = source[currentPosition++]) == '\\')
3561 // && (source[currentPosition] == 'u')) {
3562 // getNextUnicodeChar();
3564 // if (withoutUnicodePtr != 0) {
3565 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3568 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3571 unicodeAsBackSlash = false;
3572 currentCharacter = source[currentPosition++];
3573 // if (((currentCharacter = source[currentPosition++]) == '\\')
3574 // && (source[currentPosition] == 'u')) {
3575 // getNextUnicodeChar();
3577 // if (withoutUnicodePtr != 0) {
3578 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3582 if (!Character.isDigit(currentCharacter))
3583 throw new InvalidInputException(INVALID_FLOAT);
3584 while (getNextCharAsDigit()) {
3588 if (getNextChar('d', 'D') >= 0)
3589 return TokenNameDoubleLiteral;
3590 // if (getNextChar('f', 'F') >= 0)
3591 // return TokenNameFloatingPointLiteral;
3592 //the long flag has been tested before
3593 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3597 * Search the line number corresponding to a specific position
3600 public final int getLineNumber(int position) {
3601 if (lineEnds == null)
3603 int length = linePtr + 1;
3606 int g = 0, d = length - 1;
3610 if (position < lineEnds[m]) {
3612 } else if (position > lineEnds[m]) {
3618 if (position < lineEnds[m]) {
3624 public void setPHPMode(boolean mode) {
3628 public final void setSource(char[] source) {
3629 //the source-buffer is set to sourceString
3630 if (source == null) {
3631 this.source = new char[0];
3633 this.source = source;
3636 initialPosition = currentPosition = 0;
3637 containsAssertKeyword = false;
3638 withoutUnicodeBuffer = new char[this.source.length];
3639 encapsedStringStack = new Stack();
3642 public String toString() {
3643 if (startPosition == source.length)
3644 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3645 if (currentPosition > source.length)
3646 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3647 char front[] = new char[startPosition];
3648 System.arraycopy(source, 0, front, 0, startPosition);
3649 int middleLength = (currentPosition - 1) - startPosition + 1;
3651 if (middleLength > -1) {
3652 middle = new char[middleLength];
3653 System.arraycopy(source, startPosition, middle, 0, middleLength);
3655 middle = new char[0];
3657 char end[] = new char[source.length - (currentPosition - 1)];
3658 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3659 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3660 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3664 public final String toStringAction(int act) {
3666 case TokenNameERROR:
3667 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3669 case TokenNameINLINE_HTML:
3670 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3671 case TokenNameIdentifier:
3672 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3673 case TokenNameVariable:
3674 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3675 case TokenNameabstract:
3676 return "abstract"; //$NON-NLS-1$
3678 return "AND"; //$NON-NLS-1$
3679 case TokenNamearray:
3680 return "array"; //$NON-NLS-1$
3682 return "as"; //$NON-NLS-1$
3683 case TokenNamebreak:
3684 return "break"; //$NON-NLS-1$
3686 return "case"; //$NON-NLS-1$
3687 case TokenNameclass:
3688 return "class"; //$NON-NLS-1$
3689 case TokenNamecatch:
3690 return "catch"; //$NON-NLS-1$
3691 case TokenNameclone:
3694 case TokenNameconst:
3697 case TokenNamecontinue:
3698 return "continue"; //$NON-NLS-1$
3699 case TokenNamedefault:
3700 return "default"; //$NON-NLS-1$
3701 // case TokenNamedefine :
3702 // return "define"; //$NON-NLS-1$
3704 return "do"; //$NON-NLS-1$
3706 return "echo"; //$NON-NLS-1$
3708 return "else"; //$NON-NLS-1$
3709 case TokenNameelseif:
3710 return "elseif"; //$NON-NLS-1$
3711 case TokenNameendfor:
3712 return "endfor"; //$NON-NLS-1$
3713 case TokenNameendforeach:
3714 return "endforeach"; //$NON-NLS-1$
3715 case TokenNameendif:
3716 return "endif"; //$NON-NLS-1$
3717 case TokenNameendswitch:
3718 return "endswitch"; //$NON-NLS-1$
3719 case TokenNameendwhile:
3720 return "endwhile"; //$NON-NLS-1$
3723 case TokenNameextends:
3724 return "extends"; //$NON-NLS-1$
3725 // case TokenNamefalse :
3726 // return "false"; //$NON-NLS-1$
3727 case TokenNamefinal:
3728 return "final"; //$NON-NLS-1$
3730 return "for"; //$NON-NLS-1$
3731 case TokenNameforeach:
3732 return "foreach"; //$NON-NLS-1$
3733 case TokenNamefunction:
3734 return "function"; //$NON-NLS-1$
3735 case TokenNameglobal:
3736 return "global"; //$NON-NLS-1$
3738 return "if"; //$NON-NLS-1$
3739 case TokenNameimplements:
3740 return "implements"; //$NON-NLS-1$
3741 case TokenNameinclude:
3742 return "include"; //$NON-NLS-1$
3743 case TokenNameinclude_once:
3744 return "include_once"; //$NON-NLS-1$
3745 case TokenNameinstanceof:
3746 return "instanceof"; //$NON-NLS-1$
3747 case TokenNameinterface:
3748 return "interface"; //$NON-NLS-1$
3749 case TokenNameisset:
3750 return "isset"; //$NON-NLS-1$
3752 return "list"; //$NON-NLS-1$
3754 return "new"; //$NON-NLS-1$
3755 // case TokenNamenull :
3756 // return "null"; //$NON-NLS-1$
3758 return "OR"; //$NON-NLS-1$
3759 case TokenNameprint:
3760 return "print"; //$NON-NLS-1$
3761 case TokenNameprivate:
3762 return "private"; //$NON-NLS-1$
3763 case TokenNameprotected:
3764 return "protected"; //$NON-NLS-1$
3765 case TokenNamepublic:
3766 return "public"; //$NON-NLS-1$
3767 case TokenNamerequire:
3768 return "require"; //$NON-NLS-1$
3769 case TokenNamerequire_once:
3770 return "require_once"; //$NON-NLS-1$
3771 case TokenNamereturn:
3772 return "return"; //$NON-NLS-1$
3773 case TokenNamestatic:
3774 return "static"; //$NON-NLS-1$
3775 case TokenNameswitch:
3776 return "switch"; //$NON-NLS-1$
3777 // case TokenNametrue :
3778 // return "true"; //$NON-NLS-1$
3779 case TokenNameunset:
3780 return "unset"; //$NON-NLS-1$
3782 return "var"; //$NON-NLS-1$
3783 case TokenNamewhile:
3784 return "while"; //$NON-NLS-1$
3786 return "XOR"; //$NON-NLS-1$
3787 // case TokenNamethis :
3788 // return "$this"; //$NON-NLS-1$
3789 case TokenNameIntegerLiteral:
3790 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3791 case TokenNameDoubleLiteral:
3792 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3793 case TokenNameStringDoubleQuote:
3794 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3795 case TokenNameStringSingleQuote:
3796 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3797 case TokenNameStringInterpolated:
3798 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3799 case TokenNameEncapsedString0:
3800 return "`"; //$NON-NLS-1$
3801 case TokenNameEncapsedString1:
3802 return "\'"; //$NON-NLS-1$
3803 case TokenNameEncapsedString2:
3804 return "\""; //$NON-NLS-1$
3805 case TokenNameSTRING:
3806 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3807 case TokenNameHEREDOC:
3808 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3809 case TokenNamePLUS_PLUS:
3810 return "++"; //$NON-NLS-1$
3811 case TokenNameMINUS_MINUS:
3812 return "--"; //$NON-NLS-1$
3813 case TokenNameEQUAL_EQUAL:
3814 return "=="; //$NON-NLS-1$
3815 case TokenNameEQUAL_EQUAL_EQUAL:
3816 return "==="; //$NON-NLS-1$
3817 case TokenNameEQUAL_GREATER:
3818 return "=>"; //$NON-NLS-1$
3819 case TokenNameLESS_EQUAL:
3820 return "<="; //$NON-NLS-1$
3821 case TokenNameGREATER_EQUAL:
3822 return ">="; //$NON-NLS-1$
3823 case TokenNameNOT_EQUAL:
3824 return "!="; //$NON-NLS-1$
3825 case TokenNameNOT_EQUAL_EQUAL:
3826 return "!=="; //$NON-NLS-1$
3827 case TokenNameLEFT_SHIFT:
3828 return "<<"; //$NON-NLS-1$
3829 case TokenNameRIGHT_SHIFT:
3830 return ">>"; //$NON-NLS-1$
3831 case TokenNamePLUS_EQUAL:
3832 return "+="; //$NON-NLS-1$
3833 case TokenNameMINUS_EQUAL:
3834 return "-="; //$NON-NLS-1$
3835 case TokenNameMULTIPLY_EQUAL:
3836 return "*="; //$NON-NLS-1$
3837 case TokenNameDIVIDE_EQUAL:
3838 return "/="; //$NON-NLS-1$
3839 case TokenNameAND_EQUAL:
3840 return "&="; //$NON-NLS-1$
3841 case TokenNameOR_EQUAL:
3842 return "|="; //$NON-NLS-1$
3843 case TokenNameXOR_EQUAL:
3844 return "^="; //$NON-NLS-1$
3845 case TokenNameREMAINDER_EQUAL:
3846 return "%="; //$NON-NLS-1$
3847 case TokenNameDOT_EQUAL:
3848 return ".="; //$NON-NLS-1$
3849 case TokenNameLEFT_SHIFT_EQUAL:
3850 return "<<="; //$NON-NLS-1$
3851 case TokenNameRIGHT_SHIFT_EQUAL:
3852 return ">>="; //$NON-NLS-1$
3853 case TokenNameOR_OR:
3854 return "||"; //$NON-NLS-1$
3855 case TokenNameAND_AND:
3856 return "&&"; //$NON-NLS-1$
3858 return "+"; //$NON-NLS-1$
3859 case TokenNameMINUS:
3860 return "-"; //$NON-NLS-1$
3861 case TokenNameMINUS_GREATER:
3864 return "!"; //$NON-NLS-1$
3865 case TokenNameREMAINDER:
3866 return "%"; //$NON-NLS-1$
3868 return "^"; //$NON-NLS-1$
3870 return "&"; //$NON-NLS-1$
3871 case TokenNameMULTIPLY:
3872 return "*"; //$NON-NLS-1$
3874 return "|"; //$NON-NLS-1$
3875 case TokenNameTWIDDLE:
3876 return "~"; //$NON-NLS-1$
3877 case TokenNameTWIDDLE_EQUAL:
3878 return "~="; //$NON-NLS-1$
3879 case TokenNameDIVIDE:
3880 return "/"; //$NON-NLS-1$
3881 case TokenNameGREATER:
3882 return ">"; //$NON-NLS-1$
3884 return "<"; //$NON-NLS-1$
3885 case TokenNameLPAREN:
3886 return "("; //$NON-NLS-1$
3887 case TokenNameRPAREN:
3888 return ")"; //$NON-NLS-1$
3889 case TokenNameLBRACE:
3890 return "{"; //$NON-NLS-1$
3891 case TokenNameRBRACE:
3892 return "}"; //$NON-NLS-1$
3893 case TokenNameLBRACKET:
3894 return "["; //$NON-NLS-1$
3895 case TokenNameRBRACKET:
3896 return "]"; //$NON-NLS-1$
3897 case TokenNameSEMICOLON:
3898 return ";"; //$NON-NLS-1$
3899 case TokenNameQUESTION:
3900 return "?"; //$NON-NLS-1$
3901 case TokenNameCOLON:
3902 return ":"; //$NON-NLS-1$
3903 case TokenNameCOMMA:
3904 return ","; //$NON-NLS-1$
3906 return "."; //$NON-NLS-1$
3907 case TokenNameEQUAL:
3908 return "="; //$NON-NLS-1$
3911 case TokenNameDOLLAR:
3913 case TokenNameDOLLAR_LBRACE:
3915 case TokenNameLBRACE_DOLLAR:
3918 return "EOF"; //$NON-NLS-1$
3919 case TokenNameWHITESPACE:
3920 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3921 case TokenNameCOMMENT_LINE:
3922 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3923 case TokenNameCOMMENT_BLOCK:
3924 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3925 case TokenNameCOMMENT_PHPDOC:
3926 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3927 // case TokenNameHTML :
3928 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3931 return "__FILE__"; //$NON-NLS-1$
3933 return "__LINE__"; //$NON-NLS-1$
3934 case TokenNameCLASS_C:
3935 return "__CLASS__"; //$NON-NLS-1$
3936 case TokenNameMETHOD_C:
3937 return "__METHOD__"; //$NON-NLS-1$
3938 case TokenNameFUNC_C:
3939 return "__FUNCTION__"; //$NON-NLS-1
3940 case TokenNameboolCAST:
3941 return "( bool )"; //$NON-NLS-1$
3942 case TokenNameintCAST:
3943 return "( int )"; //$NON-NLS-1$
3944 case TokenNamedoubleCAST:
3945 return "( double )"; //$NON-NLS-1$
3946 case TokenNameobjectCAST:
3947 return "( object )"; //$NON-NLS-1$
3948 case TokenNamestringCAST:
3949 return "( string )"; //$NON-NLS-1$
3951 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3959 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3960 this(tokenizeComments, tokenizeWhiteSpace, false);
3963 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3964 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3967 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3968 boolean assertMode) {
3969 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null);
3972 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3973 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities) {
3974 this.eofPosition = Integer.MAX_VALUE;
3975 this.tokenizeComments = tokenizeComments;
3976 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3977 this.tokenizeStrings = tokenizeStrings;
3978 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3979 this.assertMode = assertMode;
3980 this.encapsedStringStack = null;
3981 this.taskTags = taskTags;
3982 this.taskPriorities = taskPriorities;
3985 private void checkNonExternalizeString() throws InvalidInputException {
3986 if (currentLine == null)
3988 parseTags(currentLine);
3991 private void parseTags(NLSLine line) throws InvalidInputException {
3992 String s = new String(getCurrentTokenSource());
3993 int pos = s.indexOf(TAG_PREFIX);
3994 int lineLength = line.size();
3996 int start = pos + TAG_PREFIX_LENGTH;
3997 int end = s.indexOf(TAG_POSTFIX, start);
3998 String index = s.substring(start, end);
4001 i = Integer.parseInt(index) - 1;
4002 // Tags are one based not zero based.
4003 } catch (NumberFormatException e) {
4004 i = -1; // we don't want to consider this as a valid NLS tag
4006 if (line.exists(i)) {
4009 pos = s.indexOf(TAG_PREFIX, start);
4011 this.nonNLSStrings = new StringLiteral[lineLength];
4012 int nonNLSCounter = 0;
4013 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4014 StringLiteral literal = (StringLiteral) iterator.next();
4015 if (literal != null) {
4016 this.nonNLSStrings[nonNLSCounter++] = literal;
4019 if (nonNLSCounter == 0) {
4020 this.nonNLSStrings = null;
4024 this.wasNonExternalizedStringLiteral = true;
4025 if (nonNLSCounter != lineLength) {
4026 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4031 public final void scanEscapeCharacter() throws InvalidInputException {
4032 // the string with "\\u" is a legal string of two chars \ and u
4033 //thus we use a direct access to the source (for regular cases).
4034 if (unicodeAsBackSlash) {
4035 // consume next character
4036 unicodeAsBackSlash = false;
4037 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4038 // (source[currentPosition] == 'u')) {
4039 // getNextUnicodeChar();
4041 if (withoutUnicodePtr != 0) {
4042 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4046 currentCharacter = source[currentPosition++];
4047 switch (currentCharacter) {
4049 currentCharacter = '\b';
4052 currentCharacter = '\t';
4055 currentCharacter = '\n';
4058 currentCharacter = '\f';
4061 currentCharacter = '\r';
4064 currentCharacter = '\"';
4067 currentCharacter = '\'';
4070 currentCharacter = '\\';
4073 // -----------octal escape--------------
4075 // OctalDigit OctalDigit
4076 // ZeroToThree OctalDigit OctalDigit
4077 int number = Character.getNumericValue(currentCharacter);
4078 if (number >= 0 && number <= 7) {
4079 boolean zeroToThreeNot = number > 3;
4080 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4081 int digit = Character.getNumericValue(currentCharacter);
4082 if (digit >= 0 && digit <= 7) {
4083 number = (number * 8) + digit;
4084 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4085 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4086 // Digit --> ignore last character
4089 digit = Character.getNumericValue(currentCharacter);
4090 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4091 // OctalDigit OctalDigit
4092 number = (number * 8) + digit;
4093 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4094 // --> ignore last character
4098 } else { // has read \OctalDigit NonDigit--> ignore last
4102 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4106 } else { // has read \OctalDigit --> ignore last character
4110 throw new InvalidInputException(INVALID_ESCAPE);
4111 currentCharacter = (char) number;
4113 throw new InvalidInputException(INVALID_ESCAPE);
4117 // chech presence of task: tags
4118 public void checkTaskTag(int commentStart, int commentEnd) {
4119 // only look for newer task: tags
4120 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4123 int foundTaskIndex = this.foundTaskCount;
4124 nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4126 char[] priority = null;
4127 // check for tag occurrence
4128 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4129 tag = this.taskTags[itag];
4130 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4131 int tagLength = tag.length;
4132 for (int t = 0; t < tagLength; t++) {
4133 if (this.source[i + t] != tag[t])
4136 if (this.foundTaskTags == null) {
4137 this.foundTaskTags = new char[5][];
4138 this.foundTaskMessages = new char[5][];
4139 this.foundTaskPriorities = new char[5][];
4140 this.foundTaskPositions = new int[5][];
4141 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4142 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4143 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4144 this.foundTaskCount);
4145 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4146 this.foundTaskCount);
4147 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4148 this.foundTaskCount);
4150 this.foundTaskTags[this.foundTaskCount] = tag;
4151 this.foundTaskPriorities[this.foundTaskCount] = priority;
4152 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4153 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4154 this.foundTaskCount++;
4155 i += tagLength - 1; // will be incremented when looping
4158 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4159 // retrieve message start and end positions
4160 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4161 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4162 // at most beginning of next task
4163 if (max_value < msgStart)
4164 max_value = msgStart; // would only occur if tag is before EOF.
4167 for (int j = msgStart; j < max_value; j++) {
4168 if ((c = this.source[j]) == '\n' || c == '\r') {
4174 for (int j = max_value; j > msgStart; j--) {
4175 if ((c = this.source[j]) == '*') {
4183 if (msgStart == end)
4186 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4188 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4190 // update the end position of the task
4191 this.foundTaskPositions[i][1] = end;
4192 // get the message source
4193 final int messageLength = end - msgStart + 1;
4194 char[] message = new char[messageLength];
4195 System.arraycopy(source, msgStart, message, 0, messageLength);
4196 this.foundTaskMessages[i] = message;