1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
25 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
26 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public Stack encapsedStringStack = null;
44 public char currentCharacter;
46 public int startPosition;
48 public int currentPosition;
50 public int initialPosition, eofPosition;
52 // after this position eof are generated instead of real token from the
54 public boolean tokenizeComments;
56 public boolean tokenizeWhiteSpace;
58 public boolean tokenizeStrings;
60 //source should be viewed as a window (aka a part)
61 //of a entire very large stream
65 public char[] withoutUnicodeBuffer;
67 public int withoutUnicodePtr;
69 //when == 0 ==> no unicode in the current token
70 public boolean unicodeAsBackSlash = false;
72 public boolean scanningFloatLiteral = false;
74 //support for /** comments
75 public int[] commentStops = new int[10];
77 public int[] commentStarts = new int[10];
79 public int commentPtr = -1; // no comment test with commentPtr value -1
81 protected int lastCommentLinePosition = -1;
83 //diet parsing support - jump over some method body when requested
84 public boolean diet = false;
86 //support for the poor-line-debuggers ....
87 //remember the position of the cr/lf
88 public int[] lineEnds = new int[250];
90 public int linePtr = -1;
92 public boolean wasAcr = false;
94 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
96 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
98 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
100 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
102 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
104 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
106 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
108 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
110 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
112 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
116 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
118 //----------------optimized identifier managment------------------
119 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
120 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
121 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
122 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
123 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
124 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
125 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
126 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
127 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
129 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
131 static final int TableSize = 30, InternalTableSize = 6;
134 public static final int OptimizedLength = 6;
137 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
139 // support for detecting non-externalized string literals
140 int currentLineNr = -1;
142 int previousLineNr = -1;
144 NLSLine currentLine = null;
146 List lines = new ArrayList();
148 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
150 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
152 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
154 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
156 public StringLiteral[] nonNLSStrings = null;
158 public boolean checkNonExternalizedStringLiterals = true;
160 public boolean wasNonExternalizedStringLiteral = false;
162 for (int i = 0; i < 6; i++) {
163 for (int j = 0; j < TableSize; j++) {
164 for (int k = 0; k < InternalTableSize; k++) {
165 charArray_length[i][j][k] = initCharArray;
171 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
173 public static final int RoundBracket = 0;
175 public static final int SquareBracket = 1;
177 public static final int CurlyBracket = 2;
179 public static final int BracketKinds = 3;
182 public char[][] foundTaskTags = null;
184 public char[][] foundTaskMessages;
186 public char[][] foundTaskPriorities = null;
188 public int[][] foundTaskPositions;
190 public int foundTaskCount = 0;
192 public char[][] taskTags = null;
194 public char[][] taskPriorities = null;
196 public static final boolean DEBUG = false;
198 public static final boolean TRACE = false;
200 public ICompilationUnit compilationUnit = null;
202 * Determines if the specified character is permissible as the first character in a PHP identifier
204 public static boolean isPHPIdentifierStart(char ch) {
205 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
209 * Determines if the specified character may be part of a PHP identifier as other than the first character
211 public static boolean isPHPIdentifierPart(char ch) {
212 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
215 public final boolean atEnd() {
216 // This code is not relevant if source is
217 // Only a part of the real stream input
218 return source.length == currentPosition;
221 public char[] getCurrentIdentifierSource() {
222 //return the token REAL source (aka unicodes are precomputed)
224 // if (withoutUnicodePtr != 0)
225 // //0 is used as a fast test flag so the real first char is in position 1
227 // withoutUnicodeBuffer,
229 // result = new char[withoutUnicodePtr],
231 // withoutUnicodePtr);
233 int length = currentPosition - startPosition;
234 switch (length) { // see OptimizedLength
236 return optimizedCurrentTokenSource1();
238 return optimizedCurrentTokenSource2();
240 return optimizedCurrentTokenSource3();
242 return optimizedCurrentTokenSource4();
244 return optimizedCurrentTokenSource5();
246 return optimizedCurrentTokenSource6();
249 System.arraycopy(source, startPosition, result = new char[length], 0, length);
254 public int getCurrentTokenEndPosition() {
255 return this.currentPosition - 1;
258 public final char[] getCurrentTokenSource() {
259 // Return the token REAL source (aka unicodes are precomputed)
261 // if (withoutUnicodePtr != 0)
262 // // 0 is used as a fast test flag so the real first char is in position 1
264 // withoutUnicodeBuffer,
266 // result = new char[withoutUnicodePtr],
268 // withoutUnicodePtr);
271 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
276 public final char[] getCurrentTokenSource(int startPos) {
277 // Return the token REAL source (aka unicodes are precomputed)
279 // if (withoutUnicodePtr != 0)
280 // // 0 is used as a fast test flag so the real first char is in position 1
282 // withoutUnicodeBuffer,
284 // result = new char[withoutUnicodePtr],
286 // withoutUnicodePtr);
289 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
294 public final char[] getCurrentTokenSourceString() {
295 //return the token REAL source (aka unicodes are precomputed).
296 //REMOVE the two " that are at the beginning and the end.
298 if (withoutUnicodePtr != 0)
299 //0 is used as a fast test flag so the real first char is in position 1
300 System.arraycopy(withoutUnicodeBuffer, 2,
301 //2 is 1 (real start) + 1 (to jump over the ")
302 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
305 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
310 public int getCurrentTokenStartPosition() {
311 return this.startPosition;
314 public final char[] getCurrentStringLiteralSource() {
315 // Return the token REAL source (aka unicodes are precomputed)
316 if (startPosition + 1 >= currentPosition) {
321 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
326 public final char[] getCurrentStringLiteralSource(int startPos) {
327 // Return the token REAL source (aka unicodes are precomputed)
330 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
335 * Search the source position corresponding to the end of a given line number
337 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
339 * In case the given line number is inconsistent, answers -1.
341 public final int getLineEnd(int lineNumber) {
342 if (lineEnds == null)
344 if (lineNumber >= lineEnds.length)
348 if (lineNumber == lineEnds.length - 1)
350 return lineEnds[lineNumber - 1];
351 // next line start one character behind the lineEnd of the previous line
355 * Search the source position corresponding to the beginning of a given line number
357 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
359 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
361 * In case the given line number is inconsistent, answers -1.
363 public final int getLineStart(int lineNumber) {
364 if (lineEnds == null)
366 if (lineNumber >= lineEnds.length)
371 return initialPosition;
372 return lineEnds[lineNumber - 2] + 1;
373 // next line start one character behind the lineEnd of the previous line
376 public final boolean getNextChar(char testedChar) {
378 //handle the case of unicode.
379 //when a unicode appears then we must use a buffer that holds char
381 //At the end of this method currentCharacter holds the new visited char
382 //and currentPosition points right next after it
383 //Both previous lines are true if the currentCharacter is == to the
385 //On false, no side effect has occured.
386 //ALL getNextChar.... ARE OPTIMIZED COPIES
387 int temp = currentPosition;
389 currentCharacter = source[currentPosition++];
390 // if (((currentCharacter = source[currentPosition++]) == '\\')
391 // && (source[currentPosition] == 'u')) {
392 // //-------------unicode traitement ------------
393 // int c1, c2, c3, c4;
394 // int unicodeSize = 6;
395 // currentPosition++;
396 // while (source[currentPosition] == 'u') {
397 // currentPosition++;
401 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
403 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
405 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
407 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
409 // currentPosition = temp;
413 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
414 // if (currentCharacter != testedChar) {
415 // currentPosition = temp;
418 // unicodeAsBackSlash = currentCharacter == '\\';
420 // //need the unicode buffer
421 // if (withoutUnicodePtr == 0) {
422 // //buffer all the entries that have been left aside....
423 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
427 // withoutUnicodeBuffer,
429 // withoutUnicodePtr);
431 // //fill the buffer with the char
432 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
435 // } //-------------end unicode traitement--------------
437 if (currentCharacter != testedChar) {
438 currentPosition = temp;
441 unicodeAsBackSlash = false;
442 // if (withoutUnicodePtr != 0)
443 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
446 } catch (IndexOutOfBoundsException e) {
447 unicodeAsBackSlash = false;
448 currentPosition = temp;
453 public final int getNextChar(char testedChar1, char testedChar2) {
454 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
455 //test can be done with (x==0) for the first and (x>0) for the second
456 //handle the case of unicode.
457 //when a unicode appears then we must use a buffer that holds char
459 //At the end of this method currentCharacter holds the new visited char
460 //and currentPosition points right next after it
461 //Both previous lines are true if the currentCharacter is == to the
463 //On false, no side effect has occured.
464 //ALL getNextChar.... ARE OPTIMIZED COPIES
465 int temp = currentPosition;
468 currentCharacter = source[currentPosition++];
469 // if (((currentCharacter = source[currentPosition++]) == '\\')
470 // && (source[currentPosition] == 'u')) {
471 // //-------------unicode traitement ------------
472 // int c1, c2, c3, c4;
473 // int unicodeSize = 6;
474 // currentPosition++;
475 // while (source[currentPosition] == 'u') {
476 // currentPosition++;
480 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
482 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
484 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
486 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
488 // currentPosition = temp;
492 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
493 // if (currentCharacter == testedChar1)
495 // else if (currentCharacter == testedChar2)
498 // currentPosition = temp;
502 // //need the unicode buffer
503 // if (withoutUnicodePtr == 0) {
504 // //buffer all the entries that have been left aside....
505 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
509 // withoutUnicodeBuffer,
511 // withoutUnicodePtr);
513 // //fill the buffer with the char
514 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
516 // } //-------------end unicode traitement--------------
518 if (currentCharacter == testedChar1)
520 else if (currentCharacter == testedChar2)
523 currentPosition = temp;
526 // if (withoutUnicodePtr != 0)
527 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
530 } catch (IndexOutOfBoundsException e) {
531 currentPosition = temp;
536 public final boolean getNextCharAsDigit() {
538 //handle the case of unicode.
539 //when a unicode appears then we must use a buffer that holds char
541 //At the end of this method currentCharacter holds the new visited char
542 //and currentPosition points right next after it
543 //Both previous lines are true if the currentCharacter is a digit
544 //On false, no side effect has occured.
545 //ALL getNextChar.... ARE OPTIMIZED COPIES
546 int temp = currentPosition;
548 currentCharacter = source[currentPosition++];
549 // if (((currentCharacter = source[currentPosition++]) == '\\')
550 // && (source[currentPosition] == 'u')) {
551 // //-------------unicode traitement ------------
552 // int c1, c2, c3, c4;
553 // int unicodeSize = 6;
554 // currentPosition++;
555 // while (source[currentPosition] == 'u') {
556 // currentPosition++;
560 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
562 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
564 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
566 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
568 // currentPosition = temp;
572 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
573 // if (!Character.isDigit(currentCharacter)) {
574 // currentPosition = temp;
578 // //need the unicode buffer
579 // if (withoutUnicodePtr == 0) {
580 // //buffer all the entries that have been left aside....
581 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
585 // withoutUnicodeBuffer,
587 // withoutUnicodePtr);
589 // //fill the buffer with the char
590 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
592 // } //-------------end unicode traitement--------------
594 if (!Character.isDigit(currentCharacter)) {
595 currentPosition = temp;
598 // if (withoutUnicodePtr != 0)
599 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
602 } catch (IndexOutOfBoundsException e) {
603 currentPosition = temp;
608 public final boolean getNextCharAsDigit(int radix) {
610 //handle the case of unicode.
611 //when a unicode appears then we must use a buffer that holds char
613 //At the end of this method currentCharacter holds the new visited char
614 //and currentPosition points right next after it
615 //Both previous lines are true if the currentCharacter is a digit base on
617 //On false, no side effect has occured.
618 //ALL getNextChar.... ARE OPTIMIZED COPIES
619 int temp = currentPosition;
621 currentCharacter = source[currentPosition++];
622 // if (((currentCharacter = source[currentPosition++]) == '\\')
623 // && (source[currentPosition] == 'u')) {
624 // //-------------unicode traitement ------------
625 // int c1, c2, c3, c4;
626 // int unicodeSize = 6;
627 // currentPosition++;
628 // while (source[currentPosition] == 'u') {
629 // currentPosition++;
633 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
635 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
637 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
639 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
641 // currentPosition = temp;
645 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
646 // if (Character.digit(currentCharacter, radix) == -1) {
647 // currentPosition = temp;
651 // //need the unicode buffer
652 // if (withoutUnicodePtr == 0) {
653 // //buffer all the entries that have been left aside....
654 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
658 // withoutUnicodeBuffer,
660 // withoutUnicodePtr);
662 // //fill the buffer with the char
663 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
665 // } //-------------end unicode traitement--------------
667 if (Character.digit(currentCharacter, radix) == -1) {
668 currentPosition = temp;
671 // if (withoutUnicodePtr != 0)
672 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
675 } catch (IndexOutOfBoundsException e) {
676 currentPosition = temp;
681 public boolean getNextCharAsJavaIdentifierPart() {
683 //handle the case of unicode.
684 //when a unicode appears then we must use a buffer that holds char
686 //At the end of this method currentCharacter holds the new visited char
687 //and currentPosition points right next after it
688 //Both previous lines are true if the currentCharacter is a
689 // JavaIdentifierPart
690 //On false, no side effect has occured.
691 //ALL getNextChar.... ARE OPTIMIZED COPIES
692 int temp = currentPosition;
694 currentCharacter = source[currentPosition++];
695 // if (((currentCharacter = source[currentPosition++]) == '\\')
696 // && (source[currentPosition] == 'u')) {
697 // //-------------unicode traitement ------------
698 // int c1, c2, c3, c4;
699 // int unicodeSize = 6;
700 // currentPosition++;
701 // while (source[currentPosition] == 'u') {
702 // currentPosition++;
706 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
708 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
710 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
712 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
714 // currentPosition = temp;
718 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
719 // if (!isPHPIdentifierPart(currentCharacter)) {
720 // currentPosition = temp;
724 // //need the unicode buffer
725 // if (withoutUnicodePtr == 0) {
726 // //buffer all the entries that have been left aside....
727 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
731 // withoutUnicodeBuffer,
733 // withoutUnicodePtr);
735 // //fill the buffer with the char
736 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
738 // } //-------------end unicode traitement--------------
740 if (!isPHPIdentifierPart(currentCharacter)) {
741 currentPosition = temp;
744 // if (withoutUnicodePtr != 0)
745 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
748 } catch (IndexOutOfBoundsException e) {
749 currentPosition = temp;
754 public int getCastOrParen() {
755 int tempPosition = currentPosition;
756 char tempCharacter = currentCharacter;
757 int tempToken = TokenNameLPAREN;
758 boolean found = false;
759 StringBuffer buf = new StringBuffer();
762 currentCharacter = source[currentPosition++];
763 } while (currentCharacter == ' ' || currentCharacter == '\t');
764 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
765 buf.append(currentCharacter);
766 currentCharacter = source[currentPosition++];
768 if (buf.length() >= 3 && buf.length() <= 7) {
769 char[] data = buf.toString().toCharArray();
771 switch (data.length) {
774 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
776 tempToken = TokenNameintCAST;
781 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
783 tempToken = TokenNameboolCAST;
786 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
788 tempToken = TokenNamedoubleCAST;
794 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
795 && (data[++index] == 'y')) {
797 tempToken = TokenNamearrayCAST;
800 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
801 && (data[++index] == 't')) {
803 tempToken = TokenNameunsetCAST;
806 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
807 && (data[++index] == 't')) {
809 tempToken = TokenNamedoubleCAST;
815 // object string double
816 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
817 && (data[++index] == 'c') && (data[++index] == 't')) {
819 tempToken = TokenNameobjectCAST;
822 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
823 && (data[++index] == 'n') && (data[++index] == 'g')) {
825 tempToken = TokenNamestringCAST;
828 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
829 && (data[++index] == 'l') && (data[++index] == 'e')) {
831 tempToken = TokenNamedoubleCAST;
838 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
839 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
841 tempToken = TokenNameboolCAST;
844 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
845 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
847 tempToken = TokenNameintCAST;
853 while (currentCharacter == ' ' || currentCharacter == '\t') {
854 currentCharacter = source[currentPosition++];
856 if (currentCharacter == ')') {
861 } catch (IndexOutOfBoundsException e) {
863 currentCharacter = tempCharacter;
864 currentPosition = tempPosition;
865 return TokenNameLPAREN;
868 public void consumeStringInterpolated() throws InvalidInputException {
870 // consume next character
871 unicodeAsBackSlash = false;
872 currentCharacter = source[currentPosition++];
873 // if (((currentCharacter = source[currentPosition++]) == '\\')
874 // && (source[currentPosition] == 'u')) {
875 // getNextUnicodeChar();
877 // if (withoutUnicodePtr != 0) {
878 // withoutUnicodeBuffer[++withoutUnicodePtr] =
882 while (currentCharacter != '`') {
883 /** ** in PHP \r and \n are valid in string literals *** */
884 // if ((currentCharacter == '\n')
885 // || (currentCharacter == '\r')) {
886 // // relocate if finding another quote fairly close: thus unicode
887 // '/u000D' will be fully consumed
888 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
889 // if (currentPosition + lookAhead == source.length)
891 // if (source[currentPosition + lookAhead] == '\n')
893 // if (source[currentPosition + lookAhead] == '\"') {
894 // currentPosition += lookAhead + 1;
898 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
900 if (currentCharacter == '\\') {
901 int escapeSize = currentPosition;
902 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
903 //scanEscapeCharacter make a side effect on this value and we need
904 // the previous value few lines down this one
905 scanDoubleQuotedEscapeCharacter();
906 escapeSize = currentPosition - escapeSize;
907 if (withoutUnicodePtr == 0) {
908 //buffer all the entries that have been left aside....
909 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
910 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
911 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
912 } else { //overwrite the / in the buffer
913 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
914 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
915 // where only one is correct
920 // consume next character
921 unicodeAsBackSlash = false;
922 currentCharacter = source[currentPosition++];
923 // if (((currentCharacter = source[currentPosition++]) == '\\')
924 // && (source[currentPosition] == 'u')) {
925 // getNextUnicodeChar();
927 if (withoutUnicodePtr != 0) {
928 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
932 } catch (IndexOutOfBoundsException e) {
933 // reset end position for error reporting
934 currentPosition -= 2;
935 throw new InvalidInputException(UNTERMINATED_STRING);
936 } catch (InvalidInputException e) {
937 if (e.getMessage().equals(INVALID_ESCAPE)) {
938 // relocate if finding another quote fairly close: thus unicode
939 // '/u000D' will be fully consumed
940 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
941 if (currentPosition + lookAhead == source.length)
943 if (source[currentPosition + lookAhead] == '\n')
945 if (source[currentPosition + lookAhead] == '`') {
946 currentPosition += lookAhead + 1;
953 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
954 // //$NON-NLS-?$ where ? is an
956 if (currentLine == null) {
957 currentLine = new NLSLine();
958 lines.add(currentLine);
960 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
964 public void consumeStringConstant() throws InvalidInputException {
966 // consume next character
967 unicodeAsBackSlash = false;
968 currentCharacter = source[currentPosition++];
969 // if (((currentCharacter = source[currentPosition++]) == '\\')
970 // && (source[currentPosition] == 'u')) {
971 // getNextUnicodeChar();
973 // if (withoutUnicodePtr != 0) {
974 // withoutUnicodeBuffer[++withoutUnicodePtr] =
978 while (currentCharacter != '\'') {
979 /** ** in PHP \r and \n are valid in string literals *** */
980 // if ((currentCharacter == '\n')
981 // || (currentCharacter == '\r')) {
982 // // relocate if finding another quote fairly close: thus unicode
983 // '/u000D' will be fully consumed
984 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
985 // if (currentPosition + lookAhead == source.length)
987 // if (source[currentPosition + lookAhead] == '\n')
989 // if (source[currentPosition + lookAhead] == '\"') {
990 // currentPosition += lookAhead + 1;
994 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
996 if (currentCharacter == '\\') {
997 int escapeSize = currentPosition;
998 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
999 //scanEscapeCharacter make a side effect on this value and we need
1000 // the previous value few lines down this one
1001 scanSingleQuotedEscapeCharacter();
1002 escapeSize = currentPosition - escapeSize;
1003 if (withoutUnicodePtr == 0) {
1004 //buffer all the entries that have been left aside....
1005 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1006 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1007 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1008 } else { //overwrite the / in the buffer
1009 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1010 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1011 // where only one is correct
1012 withoutUnicodePtr--;
1016 // consume next character
1017 unicodeAsBackSlash = false;
1018 currentCharacter = source[currentPosition++];
1019 // if (((currentCharacter = source[currentPosition++]) == '\\')
1020 // && (source[currentPosition] == 'u')) {
1021 // getNextUnicodeChar();
1023 if (withoutUnicodePtr != 0) {
1024 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1028 } catch (IndexOutOfBoundsException e) {
1029 // reset end position for error reporting
1030 currentPosition -= 2;
1031 throw new InvalidInputException(UNTERMINATED_STRING);
1032 } catch (InvalidInputException e) {
1033 if (e.getMessage().equals(INVALID_ESCAPE)) {
1034 // relocate if finding another quote fairly close: thus unicode
1035 // '/u000D' will be fully consumed
1036 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1037 if (currentPosition + lookAhead == source.length)
1039 if (source[currentPosition + lookAhead] == '\n')
1041 if (source[currentPosition + lookAhead] == '\'') {
1042 currentPosition += lookAhead + 1;
1049 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1050 // //$NON-NLS-?$ where ? is an
1052 if (currentLine == null) {
1053 currentLine = new NLSLine();
1054 lines.add(currentLine);
1056 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1060 public void consumeStringLiteral() throws InvalidInputException {
1062 // consume next character
1063 unicodeAsBackSlash = false;
1064 currentCharacter = source[currentPosition++];
1065 // if (((currentCharacter = source[currentPosition++]) == '\\')
1066 // && (source[currentPosition] == 'u')) {
1067 // getNextUnicodeChar();
1069 // if (withoutUnicodePtr != 0) {
1070 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1071 // currentCharacter;
1074 while (currentCharacter != '"') {
1075 /** ** in PHP \r and \n are valid in string literals *** */
1076 // if ((currentCharacter == '\n')
1077 // || (currentCharacter == '\r')) {
1078 // // relocate if finding another quote fairly close: thus unicode
1079 // '/u000D' will be fully consumed
1080 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1081 // if (currentPosition + lookAhead == source.length)
1083 // if (source[currentPosition + lookAhead] == '\n')
1085 // if (source[currentPosition + lookAhead] == '\"') {
1086 // currentPosition += lookAhead + 1;
1090 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1092 if (currentCharacter == '\\') {
1093 int escapeSize = currentPosition;
1094 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1095 //scanEscapeCharacter make a side effect on this value and we need
1096 // the previous value few lines down this one
1097 scanDoubleQuotedEscapeCharacter();
1098 escapeSize = currentPosition - escapeSize;
1099 if (withoutUnicodePtr == 0) {
1100 //buffer all the entries that have been left aside....
1101 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1102 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1103 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1104 } else { //overwrite the / in the buffer
1105 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1106 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1107 // where only one is correct
1108 withoutUnicodePtr--;
1112 // consume next character
1113 unicodeAsBackSlash = false;
1114 currentCharacter = source[currentPosition++];
1115 // if (((currentCharacter = source[currentPosition++]) == '\\')
1116 // && (source[currentPosition] == 'u')) {
1117 // getNextUnicodeChar();
1119 if (withoutUnicodePtr != 0) {
1120 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1124 } catch (IndexOutOfBoundsException e) {
1125 // reset end position for error reporting
1126 currentPosition -= 2;
1127 throw new InvalidInputException(UNTERMINATED_STRING);
1128 } catch (InvalidInputException e) {
1129 if (e.getMessage().equals(INVALID_ESCAPE)) {
1130 // relocate if finding another quote fairly close: thus unicode
1131 // '/u000D' will be fully consumed
1132 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1133 if (currentPosition + lookAhead == source.length)
1135 if (source[currentPosition + lookAhead] == '\n')
1137 if (source[currentPosition + lookAhead] == '\"') {
1138 currentPosition += lookAhead + 1;
1145 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1146 // //$NON-NLS-?$ where ? is an
1148 if (currentLine == null) {
1149 currentLine = new NLSLine();
1150 lines.add(currentLine);
1152 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1156 public int getNextToken() throws InvalidInputException {
1158 return getInlinedHTML(currentPosition);
1161 this.wasAcr = false;
1163 jumpOverMethodBody();
1165 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1169 withoutUnicodePtr = 0;
1170 //start with a new token
1171 char encapsedChar = ' ';
1172 if (!encapsedStringStack.isEmpty()) {
1173 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1175 if (encapsedChar != '$' && encapsedChar != ' ') {
1176 currentCharacter = source[currentPosition++];
1177 if (currentCharacter == encapsedChar) {
1178 switch (currentCharacter) {
1180 return TokenNameEncapsedString0;
1182 return TokenNameEncapsedString1;
1184 return TokenNameEncapsedString2;
1187 while (currentCharacter != encapsedChar) {
1188 /** ** in PHP \r and \n are valid in string literals *** */
1189 switch (currentCharacter) {
1191 int escapeSize = currentPosition;
1192 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1193 //scanEscapeCharacter make a side effect on this value and
1194 // we need the previous value few lines down this one
1195 scanDoubleQuotedEscapeCharacter();
1196 escapeSize = currentPosition - escapeSize;
1197 if (withoutUnicodePtr == 0) {
1198 //buffer all the entries that have been left aside....
1199 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1200 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1201 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1202 } else { //overwrite the / in the buffer
1203 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1204 if (backSlashAsUnicodeInString) { //there are TWO \ in
1205 withoutUnicodePtr--;
1210 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1212 encapsedStringStack.push(new Character('$'));
1213 return TokenNameSTRING;
1217 if (source[currentPosition] == '$') { // CURLY_OPEN
1219 encapsedStringStack.push(new Character('$'));
1220 return TokenNameSTRING;
1223 // consume next character
1224 unicodeAsBackSlash = false;
1225 currentCharacter = source[currentPosition++];
1226 if (withoutUnicodePtr != 0) {
1227 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1232 return TokenNameSTRING;
1234 // ---------Consume white space and handles startPosition---------
1235 int whiteStart = currentPosition;
1236 startPosition = currentPosition;
1237 currentCharacter = source[currentPosition++];
1238 if (encapsedChar == '$') {
1239 switch (currentCharacter) {
1241 currentCharacter = source[currentPosition++];
1242 return TokenNameSTRING;
1244 if (encapsedChar == '$') {
1245 if (getNextChar('$'))
1246 return TokenNameLBRACE_DOLLAR;
1248 return TokenNameLBRACE;
1250 return TokenNameRBRACE;
1252 return TokenNameLBRACKET;
1254 return TokenNameRBRACKET;
1256 if (tokenizeStrings) {
1257 consumeStringConstant();
1258 return TokenNameStringSingleQuote;
1260 return TokenNameEncapsedString1;
1262 return TokenNameEncapsedString2;
1264 if (tokenizeStrings) {
1265 consumeStringInterpolated();
1266 return TokenNameStringInterpolated;
1268 return TokenNameEncapsedString0;
1270 if (getNextChar('>'))
1271 return TokenNameMINUS_GREATER;
1272 return TokenNameSTRING;
1274 if (currentCharacter == '$') {
1275 int oldPosition = currentPosition;
1277 currentCharacter = source[currentPosition++];
1278 if (currentCharacter == '{') {
1279 return TokenNameDOLLAR_LBRACE;
1281 if (isPHPIdentifierStart(currentCharacter)) {
1282 return scanIdentifierOrKeyword(true);
1284 currentPosition = oldPosition;
1285 return TokenNameSTRING;
1287 } catch (IndexOutOfBoundsException e) {
1288 currentPosition = oldPosition;
1289 return TokenNameSTRING;
1292 if (isPHPIdentifierStart(currentCharacter))
1293 return scanIdentifierOrKeyword(false);
1294 if (Character.isDigit(currentCharacter))
1295 return scanNumber(false);
1296 return TokenNameERROR;
1299 // boolean isWhiteSpace;
1301 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1302 startPosition = currentPosition;
1303 currentCharacter = source[currentPosition++];
1304 // if (((currentCharacter = source[currentPosition++]) == '\\')
1305 // && (source[currentPosition] == 'u')) {
1306 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1308 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1309 checkNonExternalizeString();
1310 if (recordLineSeparator) {
1311 pushLineSeparator();
1316 // isWhiteSpace = (currentCharacter == ' ')
1317 // || Character.isWhitespace(currentCharacter);
1320 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1321 // reposition scanner in case we are interested by spaces as tokens
1323 startPosition = whiteStart;
1324 return TokenNameWHITESPACE;
1326 //little trick to get out in the middle of a source compuation
1327 if (currentPosition > eofPosition)
1328 return TokenNameEOF;
1329 // ---------Identify the next token-------------
1330 switch (currentCharacter) {
1332 return getCastOrParen();
1334 return TokenNameRPAREN;
1336 return TokenNameLBRACE;
1338 return TokenNameRBRACE;
1340 return TokenNameLBRACKET;
1342 return TokenNameRBRACKET;
1344 return TokenNameSEMICOLON;
1346 return TokenNameCOMMA;
1348 if (getNextChar('='))
1349 return TokenNameDOT_EQUAL;
1350 if (getNextCharAsDigit())
1351 return scanNumber(true);
1352 return TokenNameDOT;
1355 if ((test = getNextChar('+', '=')) == 0)
1356 return TokenNamePLUS_PLUS;
1358 return TokenNamePLUS_EQUAL;
1359 return TokenNamePLUS;
1363 if ((test = getNextChar('-', '=')) == 0)
1364 return TokenNameMINUS_MINUS;
1366 return TokenNameMINUS_EQUAL;
1367 if (getNextChar('>'))
1368 return TokenNameMINUS_GREATER;
1369 return TokenNameMINUS;
1372 if (getNextChar('='))
1373 return TokenNameTWIDDLE_EQUAL;
1374 return TokenNameTWIDDLE;
1376 if (getNextChar('=')) {
1377 if (getNextChar('=')) {
1378 return TokenNameNOT_EQUAL_EQUAL;
1380 return TokenNameNOT_EQUAL;
1382 return TokenNameNOT;
1384 if (getNextChar('='))
1385 return TokenNameMULTIPLY_EQUAL;
1386 return TokenNameMULTIPLY;
1388 if (getNextChar('='))
1389 return TokenNameREMAINDER_EQUAL;
1390 return TokenNameREMAINDER;
1392 int oldPosition = currentPosition;
1394 currentCharacter = source[currentPosition++];
1395 } catch (IndexOutOfBoundsException e) {
1396 currentPosition = oldPosition;
1397 return TokenNameLESS;
1399 switch (currentCharacter) {
1401 return TokenNameLESS_EQUAL;
1403 return TokenNameNOT_EQUAL;
1405 if (getNextChar('='))
1406 return TokenNameLEFT_SHIFT_EQUAL;
1407 if (getNextChar('<')) {
1408 currentCharacter = source[currentPosition++];
1409 while (Character.isWhitespace(currentCharacter)) {
1410 currentCharacter = source[currentPosition++];
1412 int heredocStart = currentPosition - 1;
1413 int heredocLength = 0;
1414 if (isPHPIdentifierStart(currentCharacter)) {
1415 currentCharacter = source[currentPosition++];
1417 return TokenNameERROR;
1419 while (isPHPIdentifierPart(currentCharacter)) {
1420 currentCharacter = source[currentPosition++];
1422 heredocLength = currentPosition - heredocStart - 1;
1423 // heredoc end-tag determination
1424 boolean endTag = true;
1427 ch = source[currentPosition++];
1428 if (ch == '\r' || ch == '\n') {
1429 if (recordLineSeparator) {
1430 pushLineSeparator();
1434 for (int i = 0; i < heredocLength; i++) {
1435 if (source[currentPosition + i] != source[heredocStart + i]) {
1441 currentPosition += heredocLength - 1;
1442 currentCharacter = source[currentPosition++];
1443 break; // do...while loop
1449 return TokenNameHEREDOC;
1451 return TokenNameLEFT_SHIFT;
1453 currentPosition = oldPosition;
1454 return TokenNameLESS;
1458 if ((test = getNextChar('=', '>')) == 0)
1459 return TokenNameGREATER_EQUAL;
1461 if ((test = getNextChar('=', '>')) == 0)
1462 return TokenNameRIGHT_SHIFT_EQUAL;
1463 return TokenNameRIGHT_SHIFT;
1465 return TokenNameGREATER;
1468 if (getNextChar('=')) {
1469 if (getNextChar('=')) {
1470 return TokenNameEQUAL_EQUAL_EQUAL;
1472 return TokenNameEQUAL_EQUAL;
1474 if (getNextChar('>'))
1475 return TokenNameEQUAL_GREATER;
1476 return TokenNameEQUAL;
1479 if ((test = getNextChar('&', '=')) == 0)
1480 return TokenNameAND_AND;
1482 return TokenNameAND_EQUAL;
1483 return TokenNameAND;
1487 if ((test = getNextChar('|', '=')) == 0)
1488 return TokenNameOR_OR;
1490 return TokenNameOR_EQUAL;
1494 if (getNextChar('='))
1495 return TokenNameXOR_EQUAL;
1496 return TokenNameXOR;
1498 if (getNextChar('>')) {
1500 if (currentPosition == source.length) {
1502 return TokenNameINLINE_HTML;
1504 return getInlinedHTML(currentPosition - 2);
1506 return TokenNameQUESTION;
1508 if (getNextChar(':'))
1509 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1510 return TokenNameCOLON;
1514 consumeStringConstant();
1515 return TokenNameStringSingleQuote;
1517 if (tokenizeStrings) {
1518 consumeStringLiteral();
1519 return TokenNameStringDoubleQuote;
1521 return TokenNameEncapsedString2;
1523 if (tokenizeStrings) {
1524 consumeStringInterpolated();
1525 return TokenNameStringInterpolated;
1527 return TokenNameEncapsedString0;
1530 char startChar = currentCharacter;
1531 if (getNextChar('=') && startChar=='/') {
1532 return TokenNameDIVIDE_EQUAL;
1535 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1537 this.lastCommentLinePosition = this.currentPosition;
1538 int endPositionForLineComment = 0;
1539 try { //get the next char
1540 currentCharacter = source[currentPosition++];
1541 // if (((currentCharacter = source[currentPosition++])
1543 // && (source[currentPosition] == 'u')) {
1544 // //-------------unicode traitement ------------
1545 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1546 // currentPosition++;
1547 // while (source[currentPosition] == 'u') {
1548 // currentPosition++;
1551 // Character.getNumericValue(source[currentPosition++]))
1555 // Character.getNumericValue(source[currentPosition++]))
1559 // Character.getNumericValue(source[currentPosition++]))
1563 // Character.getNumericValue(source[currentPosition++]))
1567 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1569 // currentCharacter =
1570 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1573 //handle the \\u case manually into comment
1574 // if (currentCharacter == '\\') {
1575 // if (source[currentPosition] == '\\')
1576 // currentPosition++;
1577 // } //jump over the \\
1578 boolean isUnicode = false;
1579 while (currentCharacter != '\r' && currentCharacter != '\n') {
1580 this.lastCommentLinePosition = this.currentPosition;
1581 if (currentCharacter == '?') {
1582 if (getNextChar('>')) {
1583 startPosition = currentPosition - 2;
1585 return TokenNameINLINE_HTML;
1590 currentCharacter = source[currentPosition++];
1591 // if (((currentCharacter = source[currentPosition++])
1593 // && (source[currentPosition] == 'u')) {
1594 // isUnicode = true;
1595 // //-------------unicode traitement ------------
1596 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1597 // currentPosition++;
1598 // while (source[currentPosition] == 'u') {
1599 // currentPosition++;
1602 // Character.getNumericValue(source[currentPosition++]))
1606 // Character.getNumericValue(
1607 // source[currentPosition++]))
1611 // Character.getNumericValue(
1612 // source[currentPosition++]))
1616 // Character.getNumericValue(
1617 // source[currentPosition++]))
1621 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1623 // currentCharacter =
1624 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1627 //handle the \\u case manually into comment
1628 // if (currentCharacter == '\\') {
1629 // if (source[currentPosition] == '\\')
1630 // currentPosition++;
1631 // } //jump over the \\
1634 endPositionForLineComment = currentPosition - 6;
1636 endPositionForLineComment = currentPosition - 1;
1638 // recordComment(false);
1639 recordComment(TokenNameCOMMENT_LINE);
1640 if (this.taskTags != null)
1641 checkTaskTag(this.startPosition, this.currentPosition);
1642 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1643 checkNonExternalizeString();
1644 if (recordLineSeparator) {
1646 pushUnicodeLineSeparator();
1648 pushLineSeparator();
1654 if (tokenizeComments) {
1656 currentPosition = endPositionForLineComment;
1657 // reset one character behind
1659 return TokenNameCOMMENT_LINE;
1661 } catch (IndexOutOfBoundsException e) { //an eof will them
1663 if (tokenizeComments) {
1665 // reset one character behind
1666 return TokenNameCOMMENT_LINE;
1672 //traditional and annotation comment
1673 boolean isJavadoc = false, star = false;
1674 // consume next character
1675 unicodeAsBackSlash = false;
1676 currentCharacter = source[currentPosition++];
1677 // if (((currentCharacter = source[currentPosition++]) ==
1679 // && (source[currentPosition] == 'u')) {
1680 // getNextUnicodeChar();
1682 // if (withoutUnicodePtr != 0) {
1683 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1684 // currentCharacter;
1687 if (currentCharacter == '*') {
1691 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1692 checkNonExternalizeString();
1693 if (recordLineSeparator) {
1694 pushLineSeparator();
1699 try { //get the next char
1700 currentCharacter = source[currentPosition++];
1701 // if (((currentCharacter = source[currentPosition++])
1703 // && (source[currentPosition] == 'u')) {
1704 // //-------------unicode traitement ------------
1705 // getNextUnicodeChar();
1707 //handle the \\u case manually into comment
1708 // if (currentCharacter == '\\') {
1709 // if (source[currentPosition] == '\\')
1710 // currentPosition++;
1711 // //jump over the \\
1713 // empty comment is not a javadoc /**/
1714 if (currentCharacter == '/') {
1717 //loop until end of comment */
1718 while ((currentCharacter != '/') || (!star)) {
1719 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1720 checkNonExternalizeString();
1721 if (recordLineSeparator) {
1722 pushLineSeparator();
1727 star = currentCharacter == '*';
1729 currentCharacter = source[currentPosition++];
1730 // if (((currentCharacter = source[currentPosition++])
1732 // && (source[currentPosition] == 'u')) {
1733 // //-------------unicode traitement ------------
1734 // getNextUnicodeChar();
1736 //handle the \\u case manually into comment
1737 // if (currentCharacter == '\\') {
1738 // if (source[currentPosition] == '\\')
1739 // currentPosition++;
1740 // } //jump over the \\
1742 //recordComment(isJavadoc);
1744 recordComment(TokenNameCOMMENT_PHPDOC);
1746 recordComment(TokenNameCOMMENT_BLOCK);
1749 if (tokenizeComments) {
1751 return TokenNameCOMMENT_PHPDOC;
1752 return TokenNameCOMMENT_BLOCK;
1754 } catch (IndexOutOfBoundsException e) {
1755 // reset end position for error reporting
1756 currentPosition -= 2;
1757 throw new InvalidInputException(UNTERMINATED_COMMENT);
1761 return TokenNameDIVIDE;
1765 return TokenNameEOF;
1766 //the atEnd may not be <currentPosition == source.length> if
1767 // source is only some part of a real (external) stream
1768 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1770 if (currentCharacter == '$') {
1771 int oldPosition = currentPosition;
1773 currentCharacter = source[currentPosition++];
1774 if (isPHPIdentifierStart(currentCharacter)) {
1775 return scanIdentifierOrKeyword(true);
1777 currentPosition = oldPosition;
1778 return TokenNameDOLLAR;
1780 } catch (IndexOutOfBoundsException e) {
1781 currentPosition = oldPosition;
1782 return TokenNameDOLLAR;
1785 if (isPHPIdentifierStart(currentCharacter))
1786 return scanIdentifierOrKeyword(false);
1787 if (Character.isDigit(currentCharacter))
1788 return scanNumber(false);
1789 return TokenNameERROR;
1792 } //-----------------end switch while try--------------------
1793 catch (IndexOutOfBoundsException e) {
1796 return TokenNameEOF;
1799 private int getInlinedHTML(int start) throws InvalidInputException {
1800 int token = getInlinedHTMLToken(start);
1801 if (token == TokenNameINLINE_HTML) {
1802 // Stack stack = new Stack();
1803 // // scan html for errors
1804 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1805 // int lastPHPEndPos=0;
1806 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1807 // Tag tag=(Tag)i.next();
1809 // if (tag instanceof StartTag) {
1810 // StartTag startTag=(StartTag)tag;
1811 // // System.out.println("startTag: "+tag);
1812 // if (startTag.isServerTag()) {
1813 // // TODO : what to do with a server tag ?
1815 // // do whatever with HTML start tag
1816 // // use startTag.getElement() to find the element corresponding
1817 // // to this start tag which may be useful if you implement code
1819 // stack.push(startTag);
1822 // EndTag endTag=(EndTag)tag;
1823 // StartTag stag = (StartTag) stack.peek();
1824 //// System.out.println("endTag: "+tag);
1825 // // do whatever with HTML end tag.
1834 * @throws InvalidInputException
1836 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1837 // int htmlPosition = start;
1838 if (currentPosition > source.length) {
1839 currentPosition = source.length;
1840 return TokenNameEOF;
1842 startPosition = start;
1845 currentCharacter = source[currentPosition++];
1846 if (currentCharacter == '<') {
1847 if (getNextChar('?')) {
1848 currentCharacter = source[currentPosition++];
1849 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1851 if (ignorePHPOneLiner) {
1852 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1854 return TokenNameINLINE_HTML;
1858 return TokenNameINLINE_HTML;
1861 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1863 int test = getNextChar('H', 'h');
1865 test = getNextChar('P', 'p');
1868 if (ignorePHPOneLiner) {
1869 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1871 return TokenNameINLINE_HTML;
1875 return TokenNameINLINE_HTML;
1883 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1884 if (recordLineSeparator) {
1885 pushLineSeparator();
1890 } //-----------------while--------------------
1892 return TokenNameINLINE_HTML;
1893 } //-----------------try--------------------
1894 catch (IndexOutOfBoundsException e) {
1895 startPosition = start;
1899 return TokenNameINLINE_HTML;
1905 private int lookAheadLinePHPTag() {
1906 // check if the PHP is only in this line (for CodeFormatter)
1907 int currentPositionInLine = currentPosition;
1908 char previousCharInLine = ' ';
1909 char currentCharInLine = ' ';
1910 boolean singleQuotedStringActive = false;
1911 boolean doubleQuotedStringActive = false;
1914 // look ahead in this line
1916 previousCharInLine = currentCharInLine;
1917 currentCharInLine = source[currentPositionInLine++];
1918 switch (currentCharInLine) {
1920 if (previousCharInLine == '?') {
1921 // update the scanner's current Position in the source
1922 currentPosition = currentPositionInLine;
1923 // use as "dummy" token
1924 return TokenNameEOF;
1928 if (doubleQuotedStringActive) {
1929 if (previousCharInLine != '\\') {
1930 doubleQuotedStringActive = false;
1933 if (!singleQuotedStringActive) {
1934 doubleQuotedStringActive = true;
1939 if (singleQuotedStringActive) {
1940 if (previousCharInLine != '\\') {
1941 singleQuotedStringActive = false;
1944 if (!doubleQuotedStringActive) {
1945 singleQuotedStringActive = true;
1951 return TokenNameINLINE_HTML;
1953 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1955 return TokenNameINLINE_HTML;
1959 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1961 return TokenNameINLINE_HTML;
1965 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1967 return TokenNameINLINE_HTML;
1972 } catch (IndexOutOfBoundsException e) {
1974 currentPosition = currentPositionInLine;
1975 return TokenNameINLINE_HTML;
1979 // public final void getNextUnicodeChar()
1980 // throws IndexOutOfBoundsException, InvalidInputException {
1982 // //handle the case of unicode.
1983 // //when a unicode appears then we must use a buffer that holds char
1985 // //At the end of this method currentCharacter holds the new visited char
1986 // //and currentPosition points right next after it
1988 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1990 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1991 // currentPosition++;
1992 // while (source[currentPosition] == 'u') {
1993 // currentPosition++;
1997 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1999 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2001 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2003 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2005 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2007 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2008 // //need the unicode buffer
2009 // if (withoutUnicodePtr == 0) {
2010 // //buffer all the entries that have been left aside....
2011 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2012 // System.arraycopy(
2015 // withoutUnicodeBuffer,
2017 // withoutUnicodePtr);
2019 // //fill the buffer with the char
2020 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2022 // unicodeAsBackSlash = currentCharacter == '\\';
2025 * Tokenize a method body, assuming that curly brackets are properly balanced.
2027 public final void jumpOverMethodBody() {
2028 this.wasAcr = false;
2031 while (true) { //loop for jumping over comments
2032 // ---------Consume white space and handles startPosition---------
2033 boolean isWhiteSpace;
2035 startPosition = currentPosition;
2036 currentCharacter = source[currentPosition++];
2037 // if (((currentCharacter = source[currentPosition++]) == '\\')
2038 // && (source[currentPosition] == 'u')) {
2039 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2041 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2042 pushLineSeparator();
2043 isWhiteSpace = Character.isWhitespace(currentCharacter);
2045 } while (isWhiteSpace);
2046 // -------consume token until } is found---------
2047 switch (currentCharacter) {
2058 test = getNextChar('\\');
2061 scanDoubleQuotedEscapeCharacter();
2062 } catch (InvalidInputException ex) {
2066 // try { // consume next character
2067 unicodeAsBackSlash = false;
2068 currentCharacter = source[currentPosition++];
2069 // if (((currentCharacter = source[currentPosition++]) == '\\')
2070 // && (source[currentPosition] == 'u')) {
2071 // getNextUnicodeChar();
2073 if (withoutUnicodePtr != 0) {
2074 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2077 // } catch (InvalidInputException ex) {
2085 // try { // consume next character
2086 unicodeAsBackSlash = false;
2087 currentCharacter = source[currentPosition++];
2088 // if (((currentCharacter = source[currentPosition++]) == '\\')
2089 // && (source[currentPosition] == 'u')) {
2090 // getNextUnicodeChar();
2092 if (withoutUnicodePtr != 0) {
2093 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2096 // } catch (InvalidInputException ex) {
2098 while (currentCharacter != '"') {
2099 if (currentCharacter == '\r') {
2100 if (source[currentPosition] == '\n')
2103 // the string cannot go further that the line
2105 if (currentCharacter == '\n') {
2107 // the string cannot go further that the line
2109 if (currentCharacter == '\\') {
2111 scanDoubleQuotedEscapeCharacter();
2112 } catch (InvalidInputException ex) {
2116 // try { // consume next character
2117 unicodeAsBackSlash = false;
2118 currentCharacter = source[currentPosition++];
2119 // if (((currentCharacter = source[currentPosition++]) == '\\')
2120 // && (source[currentPosition] == 'u')) {
2121 // getNextUnicodeChar();
2123 if (withoutUnicodePtr != 0) {
2124 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2127 // } catch (InvalidInputException ex) {
2130 } catch (IndexOutOfBoundsException e) {
2136 if ((test = getNextChar('/', '*')) == 0) {
2140 currentCharacter = source[currentPosition++];
2141 // if (((currentCharacter = source[currentPosition++]) ==
2143 // && (source[currentPosition] == 'u')) {
2144 // //-------------unicode traitement ------------
2145 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2146 // currentPosition++;
2147 // while (source[currentPosition] == 'u') {
2148 // currentPosition++;
2151 // Character.getNumericValue(source[currentPosition++]))
2155 // Character.getNumericValue(source[currentPosition++]))
2159 // Character.getNumericValue(source[currentPosition++]))
2163 // Character.getNumericValue(source[currentPosition++]))
2166 // //error don't care of the value
2167 // currentCharacter = 'A';
2168 // } //something different from \n and \r
2170 // currentCharacter =
2171 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2174 while (currentCharacter != '\r' && currentCharacter != '\n') {
2176 currentCharacter = source[currentPosition++];
2177 // if (((currentCharacter = source[currentPosition++])
2179 // && (source[currentPosition] == 'u')) {
2180 // //-------------unicode traitement ------------
2181 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2182 // currentPosition++;
2183 // while (source[currentPosition] == 'u') {
2184 // currentPosition++;
2187 // Character.getNumericValue(source[currentPosition++]))
2191 // Character.getNumericValue(source[currentPosition++]))
2195 // Character.getNumericValue(source[currentPosition++]))
2199 // Character.getNumericValue(source[currentPosition++]))
2202 // //error don't care of the value
2203 // currentCharacter = 'A';
2204 // } //something different from \n and \r
2206 // currentCharacter =
2207 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2211 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2212 pushLineSeparator();
2213 } catch (IndexOutOfBoundsException e) {
2214 } //an eof will them be generated
2218 //traditional and annotation comment
2219 boolean star = false;
2220 // try { // consume next character
2221 unicodeAsBackSlash = false;
2222 currentCharacter = source[currentPosition++];
2223 // if (((currentCharacter = source[currentPosition++]) == '\\')
2224 // && (source[currentPosition] == 'u')) {
2225 // getNextUnicodeChar();
2227 if (withoutUnicodePtr != 0) {
2228 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2231 // } catch (InvalidInputException ex) {
2233 if (currentCharacter == '*') {
2236 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2237 pushLineSeparator();
2238 try { //get the next char
2239 currentCharacter = source[currentPosition++];
2240 // if (((currentCharacter = source[currentPosition++]) ==
2242 // && (source[currentPosition] == 'u')) {
2243 // //-------------unicode traitement ------------
2244 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2245 // currentPosition++;
2246 // while (source[currentPosition] == 'u') {
2247 // currentPosition++;
2250 // Character.getNumericValue(source[currentPosition++]))
2254 // Character.getNumericValue(source[currentPosition++]))
2258 // Character.getNumericValue(source[currentPosition++]))
2262 // Character.getNumericValue(source[currentPosition++]))
2265 // //error don't care of the value
2266 // currentCharacter = 'A';
2267 // } //something different from * and /
2269 // currentCharacter =
2270 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2273 //loop until end of comment */
2274 while ((currentCharacter != '/') || (!star)) {
2275 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2276 pushLineSeparator();
2277 star = currentCharacter == '*';
2279 currentCharacter = source[currentPosition++];
2280 // if (((currentCharacter = source[currentPosition++])
2282 // && (source[currentPosition] == 'u')) {
2283 // //-------------unicode traitement ------------
2284 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2285 // currentPosition++;
2286 // while (source[currentPosition] == 'u') {
2287 // currentPosition++;
2290 // Character.getNumericValue(source[currentPosition++]))
2294 // Character.getNumericValue(source[currentPosition++]))
2298 // Character.getNumericValue(source[currentPosition++]))
2302 // Character.getNumericValue(source[currentPosition++]))
2305 // //error don't care of the value
2306 // currentCharacter = 'A';
2307 // } //something different from * and /
2309 // currentCharacter =
2310 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2314 } catch (IndexOutOfBoundsException e) {
2322 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2324 scanIdentifierOrKeyword((currentCharacter == '$'));
2325 } catch (InvalidInputException ex) {
2330 if (Character.isDigit(currentCharacter)) {
2333 } catch (InvalidInputException ex) {
2340 //-----------------end switch while try--------------------
2341 } catch (IndexOutOfBoundsException e) {
2342 } catch (InvalidInputException e) {
2347 // public final boolean jumpOverUnicodeWhiteSpace()
2348 // throws InvalidInputException {
2350 // //handle the case of unicode. Jump over the next whiteSpace
2351 // //making startPosition pointing on the next available char
2352 // //On false, the currentCharacter is filled up with a potential
2356 // this.wasAcr = false;
2357 // int c1, c2, c3, c4;
2358 // int unicodeSize = 6;
2359 // currentPosition++;
2360 // while (source[currentPosition] == 'u') {
2361 // currentPosition++;
2365 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2367 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2369 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2371 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2373 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2376 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2377 // if (recordLineSeparator
2378 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2379 // pushLineSeparator();
2380 // if (Character.isWhitespace(currentCharacter))
2383 // //buffer the new char which is not a white space
2384 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2385 // //withoutUnicodePtr == 1 is true here
2387 // } catch (IndexOutOfBoundsException e) {
2388 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2391 public final int[] getLineEnds() {
2392 //return a bounded copy of this.lineEnds
2394 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2398 public char[] getSource() {
2402 public static boolean isIdentifierOrKeyword(int token) {
2403 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2406 final char[] optimizedCurrentTokenSource1() {
2407 //return always the same char[] build only once
2408 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2409 char charOne = source[startPosition];
2464 return new char[] { charOne };
2468 final char[] optimizedCurrentTokenSource2() {
2469 //try to return the same char[] build only once
2471 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2472 char[][] table = charArray_length[0][hash];
2474 while (++i < InternalTableSize) {
2475 char[] charArray = table[i];
2476 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2479 //---------other side---------
2481 int max = newEntry2;
2482 while (++i <= max) {
2483 char[] charArray = table[i];
2484 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2487 //--------add the entry-------
2488 if (++max >= InternalTableSize)
2491 table[max] = (r = new char[] { c0, c1 });
2496 final char[] optimizedCurrentTokenSource3() {
2497 //try to return the same char[] build only once
2499 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2501 char[][] table = charArray_length[1][hash];
2503 while (++i < InternalTableSize) {
2504 char[] charArray = table[i];
2505 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2508 //---------other side---------
2510 int max = newEntry3;
2511 while (++i <= max) {
2512 char[] charArray = table[i];
2513 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2516 //--------add the entry-------
2517 if (++max >= InternalTableSize)
2520 table[max] = (r = new char[] { c0, c1, c2 });
2525 final char[] optimizedCurrentTokenSource4() {
2526 //try to return the same char[] build only once
2527 char c0, c1, c2, c3;
2528 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2529 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2531 char[][] table = charArray_length[2][(int) hash];
2533 while (++i < InternalTableSize) {
2534 char[] charArray = table[i];
2535 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2538 //---------other side---------
2540 int max = newEntry4;
2541 while (++i <= max) {
2542 char[] charArray = table[i];
2543 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2546 //--------add the entry-------
2547 if (++max >= InternalTableSize)
2550 table[max] = (r = new char[] { c0, c1, c2, c3 });
2555 final char[] optimizedCurrentTokenSource5() {
2556 //try to return the same char[] build only once
2557 char c0, c1, c2, c3, c4;
2558 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2559 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2561 char[][] table = charArray_length[3][(int) hash];
2563 while (++i < InternalTableSize) {
2564 char[] charArray = table[i];
2565 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2568 //---------other side---------
2570 int max = newEntry5;
2571 while (++i <= max) {
2572 char[] charArray = table[i];
2573 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2576 //--------add the entry-------
2577 if (++max >= InternalTableSize)
2580 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2585 final char[] optimizedCurrentTokenSource6() {
2586 //try to return the same char[] build only once
2587 char c0, c1, c2, c3, c4, c5;
2588 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2589 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2590 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2592 char[][] table = charArray_length[4][(int) hash];
2594 while (++i < InternalTableSize) {
2595 char[] charArray = table[i];
2596 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2597 && (c5 == charArray[5]))
2600 //---------other side---------
2602 int max = newEntry6;
2603 while (++i <= max) {
2604 char[] charArray = table[i];
2605 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2606 && (c5 == charArray[5]))
2609 //--------add the entry-------
2610 if (++max >= InternalTableSize)
2613 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2618 public final void pushLineSeparator() throws InvalidInputException {
2619 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2620 final int INCREMENT = 250;
2621 if (this.checkNonExternalizedStringLiterals) {
2622 // reinitialize the current line for non externalize strings purpose
2625 //currentCharacter is at position currentPosition-1
2627 if (currentCharacter == '\r') {
2628 int separatorPos = currentPosition - 1;
2629 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2631 //System.out.println("CR-" + separatorPos);
2633 lineEnds[++linePtr] = separatorPos;
2634 } catch (IndexOutOfBoundsException e) {
2635 //linePtr value is correct
2636 int oldLength = lineEnds.length;
2637 int[] old = lineEnds;
2638 lineEnds = new int[oldLength + INCREMENT];
2639 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2640 lineEnds[linePtr] = separatorPos;
2642 // look-ahead for merged cr+lf
2644 if (source[currentPosition] == '\n') {
2645 //System.out.println("look-ahead LF-" + currentPosition);
2646 lineEnds[linePtr] = currentPosition;
2652 } catch (IndexOutOfBoundsException e) {
2657 if (currentCharacter == '\n') {
2658 //must merge eventual cr followed by lf
2659 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2660 //System.out.println("merge LF-" + (currentPosition - 1));
2661 lineEnds[linePtr] = currentPosition - 1;
2663 int separatorPos = currentPosition - 1;
2664 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2666 // System.out.println("LF-" + separatorPos);
2668 lineEnds[++linePtr] = separatorPos;
2669 } catch (IndexOutOfBoundsException e) {
2670 //linePtr value is correct
2671 int oldLength = lineEnds.length;
2672 int[] old = lineEnds;
2673 lineEnds = new int[oldLength + INCREMENT];
2674 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2675 lineEnds[linePtr] = separatorPos;
2683 public final void pushUnicodeLineSeparator() {
2684 // isUnicode means that the \r or \n has been read as a unicode character
2685 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2686 final int INCREMENT = 250;
2687 //currentCharacter is at position currentPosition-1
2688 if (this.checkNonExternalizedStringLiterals) {
2689 // reinitialize the current line for non externalize strings purpose
2693 if (currentCharacter == '\r') {
2694 int separatorPos = currentPosition - 6;
2695 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2697 //System.out.println("CR-" + separatorPos);
2699 lineEnds[++linePtr] = separatorPos;
2700 } catch (IndexOutOfBoundsException e) {
2701 //linePtr value is correct
2702 int oldLength = lineEnds.length;
2703 int[] old = lineEnds;
2704 lineEnds = new int[oldLength + INCREMENT];
2705 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2706 lineEnds[linePtr] = separatorPos;
2708 // look-ahead for merged cr+lf
2709 if (source[currentPosition] == '\n') {
2710 //System.out.println("look-ahead LF-" + currentPosition);
2711 lineEnds[linePtr] = currentPosition;
2719 if (currentCharacter == '\n') {
2720 //must merge eventual cr followed by lf
2721 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2722 //System.out.println("merge LF-" + (currentPosition - 1));
2723 lineEnds[linePtr] = currentPosition - 6;
2725 int separatorPos = currentPosition - 6;
2726 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2728 // System.out.println("LF-" + separatorPos);
2730 lineEnds[++linePtr] = separatorPos;
2731 } catch (IndexOutOfBoundsException e) {
2732 //linePtr value is correct
2733 int oldLength = lineEnds.length;
2734 int[] old = lineEnds;
2735 lineEnds = new int[oldLength + INCREMENT];
2736 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2737 lineEnds[linePtr] = separatorPos;
2745 public void recordComment(int token) {
2747 int stopPosition = this.currentPosition;
2749 case TokenNameCOMMENT_LINE:
2750 stopPosition = -this.lastCommentLinePosition;
2752 case TokenNameCOMMENT_BLOCK:
2753 stopPosition = -this.currentPosition;
2757 // a new comment is recorded
2758 int length = this.commentStops.length;
2759 if (++this.commentPtr >= length) {
2760 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2761 //grows the positions buffers too
2762 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2764 this.commentStops[this.commentPtr] = stopPosition;
2765 this.commentStarts[this.commentPtr] = this.startPosition;
2768 // public final void recordComment(boolean isJavadoc) {
2769 // // a new annotation comment is recorded
2771 // commentStops[++commentPtr] = isJavadoc
2772 // ? currentPosition
2773 // : -currentPosition;
2774 // } catch (IndexOutOfBoundsException e) {
2775 // int oldStackLength = commentStops.length;
2776 // int[] oldStack = commentStops;
2777 // commentStops = new int[oldStackLength + 30];
2778 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2779 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2780 // //grows the positions buffers too
2781 // int[] old = commentStarts;
2782 // commentStarts = new int[oldStackLength + 30];
2783 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2785 // //the buffer is of a correct size here
2786 // commentStarts[commentPtr] = startPosition;
2788 public void resetTo(int begin, int end) {
2789 //reset the scanner to a given position where it may rescan again
2791 initialPosition = startPosition = currentPosition = begin;
2792 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2793 commentPtr = -1; // reset comment stack
2796 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2797 // the string with "\\u" is a legal string of two chars \ and u
2798 //thus we use a direct access to the source (for regular cases).
2799 // if (unicodeAsBackSlash) {
2800 // // consume next character
2801 // unicodeAsBackSlash = false;
2802 // if (((currentCharacter = source[currentPosition++]) == '\\')
2803 // && (source[currentPosition] == 'u')) {
2804 // getNextUnicodeChar();
2806 // if (withoutUnicodePtr != 0) {
2807 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2811 currentCharacter = source[currentPosition++];
2812 switch (currentCharacter) {
2814 currentCharacter = '\'';
2817 currentCharacter = '\\';
2820 currentCharacter = '\\';
2825 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2826 // the string with "\\u" is a legal string of two chars \ and u
2827 //thus we use a direct access to the source (for regular cases).
2828 // if (unicodeAsBackSlash) {
2829 // // consume next character
2830 // unicodeAsBackSlash = false;
2831 // if (((currentCharacter = source[currentPosition++]) == '\\')
2832 // && (source[currentPosition] == 'u')) {
2833 // getNextUnicodeChar();
2835 // if (withoutUnicodePtr != 0) {
2836 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2840 currentCharacter = source[currentPosition++];
2841 switch (currentCharacter) {
2843 // currentCharacter = '\b';
2846 currentCharacter = '\t';
2849 currentCharacter = '\n';
2852 // currentCharacter = '\f';
2855 currentCharacter = '\r';
2858 currentCharacter = '\"';
2861 currentCharacter = '\'';
2864 currentCharacter = '\\';
2867 currentCharacter = '$';
2870 // -----------octal escape--------------
2872 // OctalDigit OctalDigit
2873 // ZeroToThree OctalDigit OctalDigit
2874 int number = Character.getNumericValue(currentCharacter);
2875 if (number >= 0 && number <= 7) {
2876 boolean zeroToThreeNot = number > 3;
2877 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2878 int digit = Character.getNumericValue(currentCharacter);
2879 if (digit >= 0 && digit <= 7) {
2880 number = (number * 8) + digit;
2881 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2882 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2883 // Digit --> ignore last character
2886 digit = Character.getNumericValue(currentCharacter);
2887 if (digit >= 0 && digit <= 7) {
2888 // has read \ZeroToThree OctalDigit OctalDigit
2889 number = (number * 8) + digit;
2890 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2891 // --> ignore last character
2895 } else { // has read \OctalDigit NonDigit--> ignore last
2899 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2903 } else { // has read \OctalDigit --> ignore last character
2907 throw new InvalidInputException(INVALID_ESCAPE);
2908 currentCharacter = (char) number;
2911 // throw new InvalidInputException(INVALID_ESCAPE);
2915 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2916 // return scanIdentifierOrKeyword( false );
2918 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2920 //first dispatch on the first char.
2921 //then the length. If there are several
2922 //keywors with the same length AND the same first char, then do another
2923 //disptach on the second char :-)...cool....but fast !
2924 useAssertAsAnIndentifier = false;
2925 while (getNextCharAsJavaIdentifierPart()) {
2929 // if (new String(getCurrentTokenSource()).equals("$this")) {
2930 // return TokenNamethis;
2932 return TokenNameVariable;
2937 // if (withoutUnicodePtr == 0)
2938 //quick test on length == 1 but not on length > 12 while most identifier
2939 //have a length which is <= 12...but there are lots of identifier with
2942 if ((length = currentPosition - startPosition) == 1)
2943 return TokenNameIdentifier;
2945 data = new char[length];
2946 index = startPosition;
2947 for (int i = 0; i < length; i++) {
2948 data[i] = Character.toLowerCase(source[index + i]);
2952 // if ((length = withoutUnicodePtr) == 1)
2953 // return TokenNameIdentifier;
2954 // // data = withoutUnicodeBuffer;
2955 // data = new char[withoutUnicodeBuffer.length];
2956 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2957 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2961 firstLetter = data[index];
2962 switch (firstLetter) {
2967 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
2968 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2969 return TokenNameFILE;
2970 index = 0; //__LINE__
2971 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
2972 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2973 return TokenNameLINE;
2977 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
2978 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
2979 return TokenNameCLASS_C;
2983 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
2984 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
2985 && (data[++index] == '_'))
2986 return TokenNameMETHOD_C;
2990 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
2991 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
2992 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
2993 return TokenNameFUNC_C;
2996 return TokenNameIdentifier;
2998 // as and array abstract
3002 if ((data[++index] == 's')) {
3005 return TokenNameIdentifier;
3009 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3010 return TokenNameand;
3012 return TokenNameIdentifier;
3016 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3017 return TokenNamearray;
3019 return TokenNameIdentifier;
3021 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3022 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3023 return TokenNameabstract;
3025 return TokenNameIdentifier;
3027 return TokenNameIdentifier;
3033 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3034 return TokenNamebreak;
3036 return TokenNameIdentifier;
3038 return TokenNameIdentifier;
3041 //case catch class clone const continue
3044 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3045 return TokenNamecase;
3047 return TokenNameIdentifier;
3049 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3050 return TokenNamecatch;
3052 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3053 return TokenNameclass;
3055 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3056 return TokenNameclone;
3058 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3059 return TokenNameconst;
3061 return TokenNameIdentifier;
3063 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3064 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3065 return TokenNamecontinue;
3067 return TokenNameIdentifier;
3069 return TokenNameIdentifier;
3072 // declare default do die
3073 // TODO delete define ==> no keyword !
3076 if ((data[++index] == 'o'))
3079 return TokenNameIdentifier;
3081 // if ((data[++index] == 'e')
3082 // && (data[++index] == 'f')
3083 // && (data[++index] == 'i')
3084 // && (data[++index] == 'n')
3085 // && (data[++index] == 'e'))
3086 // return TokenNamedefine;
3088 // return TokenNameIdentifier;
3090 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3091 && (data[++index] == 'r') && (data[++index] == 'e'))
3092 return TokenNamedeclare;
3094 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3095 && (data[++index] == 'l') && (data[++index] == 't'))
3096 return TokenNamedefault;
3098 return TokenNameIdentifier;
3100 return TokenNameIdentifier;
3103 //echo else exit elseif extends eval
3106 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3107 return TokenNameecho;
3108 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3109 return TokenNameelse;
3110 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3111 return TokenNameexit;
3112 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3113 return TokenNameeval;
3115 return TokenNameIdentifier;
3118 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3119 return TokenNameendif;
3120 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3121 return TokenNameempty;
3123 return TokenNameIdentifier;
3126 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3127 && (data[++index] == 'r'))
3128 return TokenNameendfor;
3129 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3130 && (data[++index] == 'f'))
3131 return TokenNameelseif;
3133 return TokenNameIdentifier;
3135 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3136 && (data[++index] == 'd') && (data[++index] == 's'))
3137 return TokenNameextends;
3139 return TokenNameIdentifier;
3142 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3143 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3144 return TokenNameendwhile;
3146 return TokenNameIdentifier;
3149 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3150 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3151 return TokenNameendswitch;
3153 return TokenNameIdentifier;
3156 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3157 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3158 && (data[++index] == 'e'))
3159 return TokenNameenddeclare;
3161 if ((data[++index] == 'n') // endforeach
3162 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3163 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3164 return TokenNameendforeach;
3166 return TokenNameIdentifier;
3168 return TokenNameIdentifier;
3171 //for false final function
3174 if ((data[++index] == 'o') && (data[++index] == 'r'))
3175 return TokenNamefor;
3177 return TokenNameIdentifier;
3179 // if ((data[++index] == 'a') && (data[++index] == 'l')
3180 // && (data[++index] == 's') && (data[++index] == 'e'))
3181 // return TokenNamefalse;
3182 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3183 return TokenNamefinal;
3185 return TokenNameIdentifier;
3188 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3189 && (data[++index] == 'c') && (data[++index] == 'h'))
3190 return TokenNameforeach;
3192 return TokenNameIdentifier;
3195 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3196 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3197 return TokenNamefunction;
3199 return TokenNameIdentifier;
3201 return TokenNameIdentifier;
3206 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3207 && (data[++index] == 'l')) {
3208 return TokenNameglobal;
3211 return TokenNameIdentifier;
3213 //if int isset include include_once instanceof interface implements
3216 if (data[++index] == 'f')
3219 return TokenNameIdentifier;
3221 // if ((data[++index] == 'n') && (data[++index] == 't'))
3222 // return TokenNameint;
3224 // return TokenNameIdentifier;
3226 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3227 return TokenNameisset;
3229 return TokenNameIdentifier;
3231 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3232 && (data[++index] == 'd') && (data[++index] == 'e'))
3233 return TokenNameinclude;
3235 return TokenNameIdentifier;
3238 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3239 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3240 return TokenNameinterface;
3242 return TokenNameIdentifier;
3245 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3246 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3247 && (data[++index] == 'f'))
3248 return TokenNameinstanceof;
3249 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3250 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3251 && (data[++index] == 's'))
3252 return TokenNameimplements;
3254 return TokenNameIdentifier;
3256 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3257 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3258 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3259 return TokenNameinclude_once;
3261 return TokenNameIdentifier;
3263 return TokenNameIdentifier;
3268 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3269 return TokenNamelist;
3272 return TokenNameIdentifier;
3277 if ((data[++index] == 'e') && (data[++index] == 'w'))
3278 return TokenNamenew;
3280 return TokenNameIdentifier;
3282 // if ((data[++index] == 'u') && (data[++index] == 'l')
3283 // && (data[++index] == 'l'))
3284 // return TokenNamenull;
3286 // return TokenNameIdentifier;
3288 return TokenNameIdentifier;
3293 if (data[++index] == 'r') {
3297 // if (length == 12) {
3298 // if ((data[++index] == 'l')
3299 // && (data[++index] == 'd')
3300 // && (data[++index] == '_')
3301 // && (data[++index] == 'f')
3302 // && (data[++index] == 'u')
3303 // && (data[++index] == 'n')
3304 // && (data[++index] == 'c')
3305 // && (data[++index] == 't')
3306 // && (data[++index] == 'i')
3307 // && (data[++index] == 'o')
3308 // && (data[++index] == 'n')) {
3309 // return TokenNameold_function;
3312 return TokenNameIdentifier;
3314 // print public private protected
3317 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3318 return TokenNameprint;
3320 return TokenNameIdentifier;
3322 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3323 && (data[++index] == 'c')) {
3324 return TokenNamepublic;
3326 return TokenNameIdentifier;
3328 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3329 && (data[++index] == 't') && (data[++index] == 'e')) {
3330 return TokenNameprivate;
3332 return TokenNameIdentifier;
3334 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3335 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3336 return TokenNameprotected;
3338 return TokenNameIdentifier;
3340 return TokenNameIdentifier;
3342 //return require require_once
3344 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3345 && (data[++index] == 'n')) {
3346 return TokenNamereturn;
3348 } else if (length == 7) {
3349 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3350 && (data[++index] == 'r') && (data[++index] == 'e')) {
3351 return TokenNamerequire;
3353 } else if (length == 12) {
3354 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3355 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3356 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3357 return TokenNamerequire_once;
3360 return TokenNameIdentifier;
3365 if (data[++index] == 't')
3366 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3367 return TokenNamestatic;
3369 return TokenNameIdentifier;
3370 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3371 && (data[++index] == 'h'))
3372 return TokenNameswitch;
3374 return TokenNameIdentifier;
3376 return TokenNameIdentifier;
3382 if ((data[++index] == 'r') && (data[++index] == 'y'))
3383 return TokenNametry;
3385 return TokenNameIdentifier;
3387 // if ((data[++index] == 'r') && (data[++index] == 'u')
3388 // && (data[++index] == 'e'))
3389 // return TokenNametrue;
3391 // return TokenNameIdentifier;
3393 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3394 return TokenNamethrow;
3396 return TokenNameIdentifier;
3398 return TokenNameIdentifier;
3404 if ((data[++index] == 's') && (data[++index] == 'e'))
3405 return TokenNameuse;
3407 return TokenNameIdentifier;
3409 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3410 return TokenNameunset;
3412 return TokenNameIdentifier;
3414 return TokenNameIdentifier;
3420 if ((data[++index] == 'a') && (data[++index] == 'r'))
3421 return TokenNamevar;
3423 return TokenNameIdentifier;
3425 return TokenNameIdentifier;
3431 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3432 return TokenNamewhile;
3434 return TokenNameIdentifier;
3435 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3436 // (data[++index]=='e') && (data[++index]=='f')&&
3437 // (data[++index]=='p'))
3438 //return TokenNamewidefp ;
3440 //return TokenNameIdentifier;
3442 return TokenNameIdentifier;
3448 if ((data[++index] == 'o') && (data[++index] == 'r'))
3449 return TokenNamexor;
3451 return TokenNameIdentifier;
3453 return TokenNameIdentifier;
3456 return TokenNameIdentifier;
3460 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3461 //when entering this method the currentCharacter is the firt
3462 //digit of the number , i.e. it may be preceeded by a . when
3464 boolean floating = dotPrefix;
3465 if ((!dotPrefix) && (currentCharacter == '0')) {
3466 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3467 //force the first char of the hexa number do exist...
3468 // consume next character
3469 unicodeAsBackSlash = false;
3470 currentCharacter = source[currentPosition++];
3471 // if (((currentCharacter = source[currentPosition++]) == '\\')
3472 // && (source[currentPosition] == 'u')) {
3473 // getNextUnicodeChar();
3475 // if (withoutUnicodePtr != 0) {
3476 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3479 if (Character.digit(currentCharacter, 16) == -1)
3480 throw new InvalidInputException(INVALID_HEXA);
3482 while (getNextCharAsDigit(16)) {
3485 // if (getNextChar('l', 'L') >= 0)
3486 // return TokenNameLongLiteral;
3488 return TokenNameIntegerLiteral;
3490 //there is x or X in the number
3491 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3492 // 00078.0 is true !!!!! crazy language
3493 if (getNextCharAsDigit()) {
3494 //-------------potential octal-----------------
3495 while (getNextCharAsDigit()) {
3498 // if (getNextChar('l', 'L') >= 0) {
3499 // return TokenNameLongLiteral;
3502 // if (getNextChar('f', 'F') >= 0) {
3503 // return TokenNameFloatingPointLiteral;
3505 if (getNextChar('d', 'D') >= 0) {
3506 return TokenNameDoubleLiteral;
3507 } else { //make the distinction between octal and float ....
3508 if (getNextChar('.')) { //bingo ! ....
3509 while (getNextCharAsDigit()) {
3512 if (getNextChar('e', 'E') >= 0) {
3513 // consume next character
3514 unicodeAsBackSlash = false;
3515 currentCharacter = source[currentPosition++];
3516 // if (((currentCharacter = source[currentPosition++]) == '\\')
3517 // && (source[currentPosition] == 'u')) {
3518 // getNextUnicodeChar();
3520 // if (withoutUnicodePtr != 0) {
3521 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3524 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3525 // consume next character
3526 unicodeAsBackSlash = false;
3527 currentCharacter = source[currentPosition++];
3528 // if (((currentCharacter = source[currentPosition++]) == '\\')
3529 // && (source[currentPosition] == 'u')) {
3530 // getNextUnicodeChar();
3532 // if (withoutUnicodePtr != 0) {
3533 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3534 // currentCharacter;
3538 if (!Character.isDigit(currentCharacter))
3539 throw new InvalidInputException(INVALID_FLOAT);
3540 while (getNextCharAsDigit()) {
3544 // if (getNextChar('f', 'F') >= 0)
3545 // return TokenNameFloatingPointLiteral;
3546 getNextChar('d', 'D'); //jump over potential d or D
3547 return TokenNameDoubleLiteral;
3549 return TokenNameIntegerLiteral;
3556 while (getNextCharAsDigit()) {
3559 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3560 // return TokenNameLongLiteral;
3561 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3562 while (getNextCharAsDigit()) {
3567 //if floating is true both exponant and suffix may be optional
3568 if (getNextChar('e', 'E') >= 0) {
3570 // consume next character
3571 unicodeAsBackSlash = false;
3572 currentCharacter = source[currentPosition++];
3573 // if (((currentCharacter = source[currentPosition++]) == '\\')
3574 // && (source[currentPosition] == 'u')) {
3575 // getNextUnicodeChar();
3577 // if (withoutUnicodePtr != 0) {
3578 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3581 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3584 unicodeAsBackSlash = false;
3585 currentCharacter = source[currentPosition++];
3586 // if (((currentCharacter = source[currentPosition++]) == '\\')
3587 // && (source[currentPosition] == 'u')) {
3588 // getNextUnicodeChar();
3590 // if (withoutUnicodePtr != 0) {
3591 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3595 if (!Character.isDigit(currentCharacter))
3596 throw new InvalidInputException(INVALID_FLOAT);
3597 while (getNextCharAsDigit()) {
3601 if (getNextChar('d', 'D') >= 0)
3602 return TokenNameDoubleLiteral;
3603 // if (getNextChar('f', 'F') >= 0)
3604 // return TokenNameFloatingPointLiteral;
3605 //the long flag has been tested before
3606 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3610 * Search the line number corresponding to a specific position
3613 public final int getLineNumber(int position) {
3614 if (lineEnds == null)
3616 int length = linePtr + 1;
3619 int g = 0, d = length - 1;
3623 if (position < lineEnds[m]) {
3625 } else if (position > lineEnds[m]) {
3631 if (position < lineEnds[m]) {
3637 public void setPHPMode(boolean mode) {
3641 public final void setSource(char[] source) {
3642 setSource(null, source);
3645 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3646 //the source-buffer is set to sourceString
3647 this.compilationUnit = compilationUnit;
3648 if (source == null) {
3649 this.source = new char[0];
3651 this.source = source;
3654 initialPosition = currentPosition = 0;
3655 containsAssertKeyword = false;
3656 withoutUnicodeBuffer = new char[this.source.length];
3657 encapsedStringStack = new Stack();
3660 public String toString() {
3661 if (startPosition == source.length)
3662 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3663 if (currentPosition > source.length)
3664 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3665 char front[] = new char[startPosition];
3666 System.arraycopy(source, 0, front, 0, startPosition);
3667 int middleLength = (currentPosition - 1) - startPosition + 1;
3669 if (middleLength > -1) {
3670 middle = new char[middleLength];
3671 System.arraycopy(source, startPosition, middle, 0, middleLength);
3673 middle = new char[0];
3675 char end[] = new char[source.length - (currentPosition - 1)];
3676 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3677 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3678 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3682 public final String toStringAction(int act) {
3684 case TokenNameERROR:
3685 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3687 case TokenNameINLINE_HTML:
3688 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3689 case TokenNameIdentifier:
3690 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3691 case TokenNameVariable:
3692 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3693 case TokenNameabstract:
3694 return "abstract"; //$NON-NLS-1$
3696 return "AND"; //$NON-NLS-1$
3697 case TokenNamearray:
3698 return "array"; //$NON-NLS-1$
3700 return "as"; //$NON-NLS-1$
3701 case TokenNamebreak:
3702 return "break"; //$NON-NLS-1$
3704 return "case"; //$NON-NLS-1$
3705 case TokenNameclass:
3706 return "class"; //$NON-NLS-1$
3707 case TokenNamecatch:
3708 return "catch"; //$NON-NLS-1$
3709 case TokenNameclone:
3712 case TokenNameconst:
3715 case TokenNamecontinue:
3716 return "continue"; //$NON-NLS-1$
3717 case TokenNamedefault:
3718 return "default"; //$NON-NLS-1$
3719 // case TokenNamedefine :
3720 // return "define"; //$NON-NLS-1$
3722 return "do"; //$NON-NLS-1$
3724 return "echo"; //$NON-NLS-1$
3726 return "else"; //$NON-NLS-1$
3727 case TokenNameelseif:
3728 return "elseif"; //$NON-NLS-1$
3729 case TokenNameendfor:
3730 return "endfor"; //$NON-NLS-1$
3731 case TokenNameendforeach:
3732 return "endforeach"; //$NON-NLS-1$
3733 case TokenNameendif:
3734 return "endif"; //$NON-NLS-1$
3735 case TokenNameendswitch:
3736 return "endswitch"; //$NON-NLS-1$
3737 case TokenNameendwhile:
3738 return "endwhile"; //$NON-NLS-1$
3741 case TokenNameextends:
3742 return "extends"; //$NON-NLS-1$
3743 // case TokenNamefalse :
3744 // return "false"; //$NON-NLS-1$
3745 case TokenNamefinal:
3746 return "final"; //$NON-NLS-1$
3748 return "for"; //$NON-NLS-1$
3749 case TokenNameforeach:
3750 return "foreach"; //$NON-NLS-1$
3751 case TokenNamefunction:
3752 return "function"; //$NON-NLS-1$
3753 case TokenNameglobal:
3754 return "global"; //$NON-NLS-1$
3756 return "if"; //$NON-NLS-1$
3757 case TokenNameimplements:
3758 return "implements"; //$NON-NLS-1$
3759 case TokenNameinclude:
3760 return "include"; //$NON-NLS-1$
3761 case TokenNameinclude_once:
3762 return "include_once"; //$NON-NLS-1$
3763 case TokenNameinstanceof:
3764 return "instanceof"; //$NON-NLS-1$
3765 case TokenNameinterface:
3766 return "interface"; //$NON-NLS-1$
3767 case TokenNameisset:
3768 return "isset"; //$NON-NLS-1$
3770 return "list"; //$NON-NLS-1$
3772 return "new"; //$NON-NLS-1$
3773 // case TokenNamenull :
3774 // return "null"; //$NON-NLS-1$
3776 return "OR"; //$NON-NLS-1$
3777 case TokenNameprint:
3778 return "print"; //$NON-NLS-1$
3779 case TokenNameprivate:
3780 return "private"; //$NON-NLS-1$
3781 case TokenNameprotected:
3782 return "protected"; //$NON-NLS-1$
3783 case TokenNamepublic:
3784 return "public"; //$NON-NLS-1$
3785 case TokenNamerequire:
3786 return "require"; //$NON-NLS-1$
3787 case TokenNamerequire_once:
3788 return "require_once"; //$NON-NLS-1$
3789 case TokenNamereturn:
3790 return "return"; //$NON-NLS-1$
3791 case TokenNamestatic:
3792 return "static"; //$NON-NLS-1$
3793 case TokenNameswitch:
3794 return "switch"; //$NON-NLS-1$
3795 // case TokenNametrue :
3796 // return "true"; //$NON-NLS-1$
3797 case TokenNameunset:
3798 return "unset"; //$NON-NLS-1$
3800 return "var"; //$NON-NLS-1$
3801 case TokenNamewhile:
3802 return "while"; //$NON-NLS-1$
3804 return "XOR"; //$NON-NLS-1$
3805 // case TokenNamethis :
3806 // return "$this"; //$NON-NLS-1$
3807 case TokenNameIntegerLiteral:
3808 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3809 case TokenNameDoubleLiteral:
3810 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3811 case TokenNameStringDoubleQuote:
3812 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3813 case TokenNameStringSingleQuote:
3814 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3815 case TokenNameStringInterpolated:
3816 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3817 case TokenNameEncapsedString0:
3818 return "`"; //$NON-NLS-1$
3819 case TokenNameEncapsedString1:
3820 return "\'"; //$NON-NLS-1$
3821 case TokenNameEncapsedString2:
3822 return "\""; //$NON-NLS-1$
3823 case TokenNameSTRING:
3824 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3825 case TokenNameHEREDOC:
3826 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3827 case TokenNamePLUS_PLUS:
3828 return "++"; //$NON-NLS-1$
3829 case TokenNameMINUS_MINUS:
3830 return "--"; //$NON-NLS-1$
3831 case TokenNameEQUAL_EQUAL:
3832 return "=="; //$NON-NLS-1$
3833 case TokenNameEQUAL_EQUAL_EQUAL:
3834 return "==="; //$NON-NLS-1$
3835 case TokenNameEQUAL_GREATER:
3836 return "=>"; //$NON-NLS-1$
3837 case TokenNameLESS_EQUAL:
3838 return "<="; //$NON-NLS-1$
3839 case TokenNameGREATER_EQUAL:
3840 return ">="; //$NON-NLS-1$
3841 case TokenNameNOT_EQUAL:
3842 return "!="; //$NON-NLS-1$
3843 case TokenNameNOT_EQUAL_EQUAL:
3844 return "!=="; //$NON-NLS-1$
3845 case TokenNameLEFT_SHIFT:
3846 return "<<"; //$NON-NLS-1$
3847 case TokenNameRIGHT_SHIFT:
3848 return ">>"; //$NON-NLS-1$
3849 case TokenNamePLUS_EQUAL:
3850 return "+="; //$NON-NLS-1$
3851 case TokenNameMINUS_EQUAL:
3852 return "-="; //$NON-NLS-1$
3853 case TokenNameMULTIPLY_EQUAL:
3854 return "*="; //$NON-NLS-1$
3855 case TokenNameDIVIDE_EQUAL:
3856 return "/="; //$NON-NLS-1$
3857 case TokenNameAND_EQUAL:
3858 return "&="; //$NON-NLS-1$
3859 case TokenNameOR_EQUAL:
3860 return "|="; //$NON-NLS-1$
3861 case TokenNameXOR_EQUAL:
3862 return "^="; //$NON-NLS-1$
3863 case TokenNameREMAINDER_EQUAL:
3864 return "%="; //$NON-NLS-1$
3865 case TokenNameDOT_EQUAL:
3866 return ".="; //$NON-NLS-1$
3867 case TokenNameLEFT_SHIFT_EQUAL:
3868 return "<<="; //$NON-NLS-1$
3869 case TokenNameRIGHT_SHIFT_EQUAL:
3870 return ">>="; //$NON-NLS-1$
3871 case TokenNameOR_OR:
3872 return "||"; //$NON-NLS-1$
3873 case TokenNameAND_AND:
3874 return "&&"; //$NON-NLS-1$
3876 return "+"; //$NON-NLS-1$
3877 case TokenNameMINUS:
3878 return "-"; //$NON-NLS-1$
3879 case TokenNameMINUS_GREATER:
3882 return "!"; //$NON-NLS-1$
3883 case TokenNameREMAINDER:
3884 return "%"; //$NON-NLS-1$
3886 return "^"; //$NON-NLS-1$
3888 return "&"; //$NON-NLS-1$
3889 case TokenNameMULTIPLY:
3890 return "*"; //$NON-NLS-1$
3892 return "|"; //$NON-NLS-1$
3893 case TokenNameTWIDDLE:
3894 return "~"; //$NON-NLS-1$
3895 case TokenNameTWIDDLE_EQUAL:
3896 return "~="; //$NON-NLS-1$
3897 case TokenNameDIVIDE:
3898 return "/"; //$NON-NLS-1$
3899 case TokenNameGREATER:
3900 return ">"; //$NON-NLS-1$
3902 return "<"; //$NON-NLS-1$
3903 case TokenNameLPAREN:
3904 return "("; //$NON-NLS-1$
3905 case TokenNameRPAREN:
3906 return ")"; //$NON-NLS-1$
3907 case TokenNameLBRACE:
3908 return "{"; //$NON-NLS-1$
3909 case TokenNameRBRACE:
3910 return "}"; //$NON-NLS-1$
3911 case TokenNameLBRACKET:
3912 return "["; //$NON-NLS-1$
3913 case TokenNameRBRACKET:
3914 return "]"; //$NON-NLS-1$
3915 case TokenNameSEMICOLON:
3916 return ";"; //$NON-NLS-1$
3917 case TokenNameQUESTION:
3918 return "?"; //$NON-NLS-1$
3919 case TokenNameCOLON:
3920 return ":"; //$NON-NLS-1$
3921 case TokenNameCOMMA:
3922 return ","; //$NON-NLS-1$
3924 return "."; //$NON-NLS-1$
3925 case TokenNameEQUAL:
3926 return "="; //$NON-NLS-1$
3929 case TokenNameDOLLAR:
3931 case TokenNameDOLLAR_LBRACE:
3933 case TokenNameLBRACE_DOLLAR:
3936 return "EOF"; //$NON-NLS-1$
3937 case TokenNameWHITESPACE:
3938 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3939 case TokenNameCOMMENT_LINE:
3940 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3941 case TokenNameCOMMENT_BLOCK:
3942 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3943 case TokenNameCOMMENT_PHPDOC:
3944 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3945 // case TokenNameHTML :
3946 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3949 return "__FILE__"; //$NON-NLS-1$
3951 return "__LINE__"; //$NON-NLS-1$
3952 case TokenNameCLASS_C:
3953 return "__CLASS__"; //$NON-NLS-1$
3954 case TokenNameMETHOD_C:
3955 return "__METHOD__"; //$NON-NLS-1$
3956 case TokenNameFUNC_C:
3957 return "__FUNCTION__"; //$NON-NLS-1
3958 case TokenNameboolCAST:
3959 return "( bool )"; //$NON-NLS-1$
3960 case TokenNameintCAST:
3961 return "( int )"; //$NON-NLS-1$
3962 case TokenNamedoubleCAST:
3963 return "( double )"; //$NON-NLS-1$
3964 case TokenNameobjectCAST:
3965 return "( object )"; //$NON-NLS-1$
3966 case TokenNamestringCAST:
3967 return "( string )"; //$NON-NLS-1$
3969 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3977 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3978 this(tokenizeComments, tokenizeWhiteSpace, false);
3981 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3982 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3985 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3986 boolean assertMode) {
3987 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null);
3990 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3991 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities) {
3992 this.eofPosition = Integer.MAX_VALUE;
3993 this.tokenizeComments = tokenizeComments;
3994 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3995 this.tokenizeStrings = tokenizeStrings;
3996 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3997 this.assertMode = assertMode;
3998 this.encapsedStringStack = null;
3999 this.taskTags = taskTags;
4000 this.taskPriorities = taskPriorities;
4003 private void checkNonExternalizeString() throws InvalidInputException {
4004 if (currentLine == null)
4006 parseTags(currentLine);
4009 private void parseTags(NLSLine line) throws InvalidInputException {
4010 String s = new String(getCurrentTokenSource());
4011 int pos = s.indexOf(TAG_PREFIX);
4012 int lineLength = line.size();
4014 int start = pos + TAG_PREFIX_LENGTH;
4015 int end = s.indexOf(TAG_POSTFIX, start);
4016 String index = s.substring(start, end);
4019 i = Integer.parseInt(index) - 1;
4020 // Tags are one based not zero based.
4021 } catch (NumberFormatException e) {
4022 i = -1; // we don't want to consider this as a valid NLS tag
4024 if (line.exists(i)) {
4027 pos = s.indexOf(TAG_PREFIX, start);
4029 this.nonNLSStrings = new StringLiteral[lineLength];
4030 int nonNLSCounter = 0;
4031 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4032 StringLiteral literal = (StringLiteral) iterator.next();
4033 if (literal != null) {
4034 this.nonNLSStrings[nonNLSCounter++] = literal;
4037 if (nonNLSCounter == 0) {
4038 this.nonNLSStrings = null;
4042 this.wasNonExternalizedStringLiteral = true;
4043 if (nonNLSCounter != lineLength) {
4044 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4049 public final void scanEscapeCharacter() throws InvalidInputException {
4050 // the string with "\\u" is a legal string of two chars \ and u
4051 //thus we use a direct access to the source (for regular cases).
4052 if (unicodeAsBackSlash) {
4053 // consume next character
4054 unicodeAsBackSlash = false;
4055 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4056 // (source[currentPosition] == 'u')) {
4057 // getNextUnicodeChar();
4059 if (withoutUnicodePtr != 0) {
4060 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4064 currentCharacter = source[currentPosition++];
4065 switch (currentCharacter) {
4067 currentCharacter = '\b';
4070 currentCharacter = '\t';
4073 currentCharacter = '\n';
4076 currentCharacter = '\f';
4079 currentCharacter = '\r';
4082 currentCharacter = '\"';
4085 currentCharacter = '\'';
4088 currentCharacter = '\\';
4091 // -----------octal escape--------------
4093 // OctalDigit OctalDigit
4094 // ZeroToThree OctalDigit OctalDigit
4095 int number = Character.getNumericValue(currentCharacter);
4096 if (number >= 0 && number <= 7) {
4097 boolean zeroToThreeNot = number > 3;
4098 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4099 int digit = Character.getNumericValue(currentCharacter);
4100 if (digit >= 0 && digit <= 7) {
4101 number = (number * 8) + digit;
4102 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4103 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4104 // Digit --> ignore last character
4107 digit = Character.getNumericValue(currentCharacter);
4108 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4109 // OctalDigit OctalDigit
4110 number = (number * 8) + digit;
4111 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4112 // --> ignore last character
4116 } else { // has read \OctalDigit NonDigit--> ignore last
4120 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4124 } else { // has read \OctalDigit --> ignore last character
4128 throw new InvalidInputException(INVALID_ESCAPE);
4129 currentCharacter = (char) number;
4131 throw new InvalidInputException(INVALID_ESCAPE);
4135 // chech presence of task: tags
4136 public void checkTaskTag(int commentStart, int commentEnd) {
4137 // only look for newer task: tags
4138 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4141 int foundTaskIndex = this.foundTaskCount;
4142 nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4144 char[] priority = null;
4145 // check for tag occurrence
4146 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4147 tag = this.taskTags[itag];
4148 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4149 int tagLength = tag.length;
4150 for (int t = 0; t < tagLength; t++) {
4151 if (this.source[i + t] != tag[t])
4154 if (this.foundTaskTags == null) {
4155 this.foundTaskTags = new char[5][];
4156 this.foundTaskMessages = new char[5][];
4157 this.foundTaskPriorities = new char[5][];
4158 this.foundTaskPositions = new int[5][];
4159 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4160 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4161 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4162 this.foundTaskCount);
4163 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4164 this.foundTaskCount);
4165 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4166 this.foundTaskCount);
4168 this.foundTaskTags[this.foundTaskCount] = tag;
4169 this.foundTaskPriorities[this.foundTaskCount] = priority;
4170 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4171 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4172 this.foundTaskCount++;
4173 i += tagLength - 1; // will be incremented when looping
4176 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4177 // retrieve message start and end positions
4178 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4179 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4180 // at most beginning of next task
4181 if (max_value < msgStart)
4182 max_value = msgStart; // would only occur if tag is before EOF.
4185 for (int j = msgStart; j < max_value; j++) {
4186 if ((c = this.source[j]) == '\n' || c == '\r') {
4192 for (int j = max_value; j > msgStart; j--) {
4193 if ((c = this.source[j]) == '*') {
4201 if (msgStart == end)
4204 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4206 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4208 // update the end position of the task
4209 this.foundTaskPositions[i][1] = end;
4210 // get the message source
4211 final int messageLength = end - msgStart + 1;
4212 char[] message = new char[messageLength];
4213 System.arraycopy(source, msgStart, message, 0, messageLength);
4214 this.foundTaskMessages[i] = message;