1 /*******************************************************************************
 
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
 
   3  * All rights reserved. This program and the accompanying materials 
 
   4  * are made available under the terms of the Common Public License v0.5 
 
   5  * which accompanies this distribution, and is available at
 
   6  * http://www.eclipse.org/legal/cpl-v05.html
 
   9  *     IBM Corporation - initial API and implementation
 
  10  ******************************************************************************/
 
  11 package net.sourceforge.phpdt.internal.compiler.parser;
 
  13 import java.util.ArrayList;
 
  14 import java.util.Iterator;
 
  15 import java.util.List;
 
  17 import net.sourceforge.phpdt.core.compiler.IScanner;
 
  18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
 
  19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
 
  20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
 
  22 public class Scanner implements IScanner, ITerminalSymbols {
 
  25    - getNextToken() which return the current type of the token
 
  26      (this value is not memorized by the scanner)
 
  27    - getCurrentTokenSource() which provides with the token "REAL" source
 
  28      (aka all unicode have been transformed into a correct char)
 
  29    - sourceStart gives the position into the stream
 
  30    - currentPosition-1 gives the sourceEnd position into the stream 
 
  34   private boolean assertMode;
 
  35   public boolean useAssertAsAnIndentifier = false;
 
  36   //flag indicating if processed source contains occurrences of keyword assert 
 
  37   public boolean containsAssertKeyword = false;
 
  39   public boolean recordLineSeparator;
 
  40   public boolean phpMode = false;
 
  42   public char currentCharacter;
 
  43   public int startPosition;
 
  44   public int currentPosition;
 
  45   public int initialPosition, eofPosition;
 
  46   // after this position eof are generated instead of real token from the source
 
  48   public boolean tokenizeComments;
 
  49   public boolean tokenizeWhiteSpace;
 
  51   //source should be viewed as a window (aka a part)
 
  52   //of a entire very large stream
 
  56   public char[] withoutUnicodeBuffer;
 
  57   public int withoutUnicodePtr;
 
  58   //when == 0 ==> no unicode in the current token
 
  59   public boolean unicodeAsBackSlash = false;
 
  61   public boolean scanningFloatLiteral = false;
 
  63   //support for /** comments
 
  64   //public char[][] comments = new char[10][];
 
  65   public int[] commentStops = new int[10];
 
  66   public int[] commentStarts = new int[10];
 
  67   public int commentPtr = -1; // no comment test with commentPtr value -1
 
  69   //diet parsing support - jump over some method body when requested
 
  70   public boolean diet = false;
 
  72   //support for the  poor-line-debuggers ....
 
  73   //remember the position of the cr/lf
 
  74   public int[] lineEnds = new int[250];
 
  75   public int linePtr = -1;
 
  76   public boolean wasAcr = false;
 
  78   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
 
  80   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
 
  81   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
 
  82   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 
  83   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 
  84   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 
  85   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 
  86   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 
  88   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 
  89   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 
  90   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 
  91   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 
  93   //----------------optimized identifier managment------------------
 
  94   static final char[] charArray_a = new char[] { 'a' },
 
  95     charArray_b = new char[] { 'b' },
 
  96     charArray_c = new char[] { 'c' },
 
  97     charArray_d = new char[] { 'd' },
 
  98     charArray_e = new char[] { 'e' },
 
  99     charArray_f = new char[] { 'f' },
 
 100     charArray_g = new char[] { 'g' },
 
 101     charArray_h = new char[] { 'h' },
 
 102     charArray_i = new char[] { 'i' },
 
 103     charArray_j = new char[] { 'j' },
 
 104     charArray_k = new char[] { 'k' },
 
 105     charArray_l = new char[] { 'l' },
 
 106     charArray_m = new char[] { 'm' },
 
 107     charArray_n = new char[] { 'n' },
 
 108     charArray_o = new char[] { 'o' },
 
 109     charArray_p = new char[] { 'p' },
 
 110     charArray_q = new char[] { 'q' },
 
 111     charArray_r = new char[] { 'r' },
 
 112     charArray_s = new char[] { 's' },
 
 113     charArray_t = new char[] { 't' },
 
 114     charArray_u = new char[] { 'u' },
 
 115     charArray_v = new char[] { 'v' },
 
 116     charArray_w = new char[] { 'w' },
 
 117     charArray_x = new char[] { 'x' },
 
 118     charArray_y = new char[] { 'y' },
 
 119     charArray_z = new char[] { 'z' };
 
 121   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 
 122   static final int TableSize = 30, InternalTableSize = 6;
 
 124   public static final int OptimizedLength = 6;
 
 126   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 
 127   // support for detecting non-externalized string literals
 
 128   int currentLineNr = -1;
 
 129   int previousLineNr = -1;
 
 130   NLSLine currentLine = null;
 
 131   List lines = new ArrayList();
 
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 
 136   public StringLiteral[] nonNLSStrings = null;
 
 137   public boolean checkNonExternalizedStringLiterals = true;
 
 138   public boolean wasNonExternalizedStringLiteral = false;
 
 141     for (int i = 0; i < 6; i++) {
 
 142       for (int j = 0; j < TableSize; j++) {
 
 143         for (int k = 0; k < InternalTableSize; k++) {
 
 144           charArray_length[i][j][k] = initCharArray;
 
 149   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 
 151   public static final int RoundBracket = 0;
 
 152   public static final int SquareBracket = 1;
 
 153   public static final int CurlyBracket = 2;
 
 154   public static final int BracketKinds = 3;
 
 156   public static final boolean DEBUG = false;
 
 161   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 
 162     this(tokenizeComments, tokenizeWhiteSpace, false);
 
 166    * Determines if the specified character is
 
 167    * permissible as the first character in a PHP identifier
 
 169   public static boolean isPHPIdentifierStart(char ch) {
 
 170     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 
 174    * Determines if the specified character may be part of a PHP identifier as
 
 175    * other than the first character
 
 177   public static boolean isPHPIdentifierPart(char ch) {
 
 178     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 
 181   public final boolean atEnd() {
 
 182     // This code is not relevant if source is 
 
 183     // Only a part of the real stream input
 
 185     return source.length == currentPosition;
 
 187   public char[] getCurrentIdentifierSource() {
 
 188     //return the token REAL source (aka unicodes are precomputed)
 
 191     //    if (withoutUnicodePtr != 0)
 
 192     //      //0 is used as a fast test flag so the real first char is in position 1
 
 194     //        withoutUnicodeBuffer,
 
 196     //        result = new char[withoutUnicodePtr],
 
 198     //        withoutUnicodePtr);
 
 200     int length = currentPosition - startPosition;
 
 201     switch (length) { // see OptimizedLength
 
 203         return optimizedCurrentTokenSource1();
 
 205         return optimizedCurrentTokenSource2();
 
 207         return optimizedCurrentTokenSource3();
 
 209         return optimizedCurrentTokenSource4();
 
 211         return optimizedCurrentTokenSource5();
 
 213         return optimizedCurrentTokenSource6();
 
 216     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 
 220   public int getCurrentTokenEndPosition() {
 
 221     return this.currentPosition - 1;
 
 224   public final char[] getCurrentTokenSource() {
 
 225     // Return the token REAL source (aka unicodes are precomputed)
 
 228     //    if (withoutUnicodePtr != 0)
 
 229     //      // 0 is used as a fast test flag so the real first char is in position 1
 
 231     //        withoutUnicodeBuffer,
 
 233     //        result = new char[withoutUnicodePtr],
 
 235     //        withoutUnicodePtr);
 
 238     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 
 243   public final char[] getCurrentTokenSource(int startPos) {
 
 244     // Return the token REAL source (aka unicodes are precomputed)
 
 247     //    if (withoutUnicodePtr != 0)
 
 248     //      // 0 is used as a fast test flag so the real first char is in position 1
 
 250     //        withoutUnicodeBuffer,
 
 252     //        result = new char[withoutUnicodePtr],
 
 254     //        withoutUnicodePtr);
 
 257     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 
 262   public final char[] getCurrentTokenSourceString() {
 
 263     //return the token REAL source (aka unicodes are precomputed).
 
 264     //REMOVE the two " that are at the beginning and the end.
 
 267     if (withoutUnicodePtr != 0)
 
 268       //0 is used as a fast test flag so the real first char is in position 1
 
 269       System.arraycopy(withoutUnicodeBuffer, 2,
 
 270       //2 is 1 (real start) + 1 (to jump over the ")
 
 271       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 
 274       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 
 278   public int getCurrentTokenStartPosition() {
 
 279     return this.startPosition;
 
 282   public final char[] getCurrentStringLiteralSource() {
 
 283     // Return the token REAL source (aka unicodes are precomputed)
 
 288     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 
 294    * Search the source position corresponding to the end of a given line number
 
 296    * Line numbers are 1-based, and relative to the scanner initialPosition. 
 
 297    * Character positions are 0-based.
 
 299    * In case the given line number is inconsistent, answers -1.
 
 301   public final int getLineEnd(int lineNumber) {
 
 303     if (lineEnds == null)
 
 305     if (lineNumber >= lineEnds.length)
 
 310     if (lineNumber == lineEnds.length - 1)
 
 312     return lineEnds[lineNumber - 1];
 
 313     // next line start one character behind the lineEnd of the previous line
 
 316    * Search the source position corresponding to the beginning of a given line number
 
 318    * Line numbers are 1-based, and relative to the scanner initialPosition. 
 
 319    * Character positions are 0-based.
 
 321    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 
 323    * In case the given line number is inconsistent, answers -1.
 
 325   public final int getLineStart(int lineNumber) {
 
 327     if (lineEnds == null)
 
 329     if (lineNumber >= lineEnds.length)
 
 335       return initialPosition;
 
 336     return lineEnds[lineNumber - 2] + 1;
 
 337     // next line start one character behind the lineEnd of the previous line
 
 339   public final boolean getNextChar(char testedChar) {
 
 341     //handle the case of unicode.
 
 342     //when a unicode appears then we must use a buffer that holds char internal values
 
 343     //At the end of this method currentCharacter holds the new visited char
 
 344     //and currentPosition points right next after it
 
 345     //Both previous lines are true if the currentCharacter is == to the testedChar
 
 346     //On false, no side effect has occured.
 
 348     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 350     int temp = currentPosition;
 
 352       currentCharacter = source[currentPosition++];
 
 353       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 354       //        && (source[currentPosition] == 'u')) {
 
 355       //        //-------------unicode traitement ------------
 
 356       //        int c1, c2, c3, c4;
 
 357       //        int unicodeSize = 6;
 
 358       //        currentPosition++;
 
 359       //        while (source[currentPosition] == 'u') {
 
 360       //          currentPosition++;
 
 364       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 366       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 368       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 370       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 372       //          currentPosition = temp;
 
 376       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 377       //        if (currentCharacter != testedChar) {
 
 378       //          currentPosition = temp;
 
 381       //        unicodeAsBackSlash = currentCharacter == '\\';
 
 383       //        //need the unicode buffer
 
 384       //        if (withoutUnicodePtr == 0) {
 
 385       //          //buffer all the entries that have been left aside....
 
 386       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 390       //            withoutUnicodeBuffer,
 
 392       //            withoutUnicodePtr);
 
 394       //        //fill the buffer with the char
 
 395       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 398       //      } //-------------end unicode traitement--------------
 
 400       if (currentCharacter != testedChar) {
 
 401         currentPosition = temp;
 
 404       unicodeAsBackSlash = false;
 
 405       //        if (withoutUnicodePtr != 0)
 
 406       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 409     } catch (IndexOutOfBoundsException e) {
 
 410       unicodeAsBackSlash = false;
 
 411       currentPosition = temp;
 
 415   public final int getNextChar(char testedChar1, char testedChar2) {
 
 416     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 
 417     //test can be done with (x==0) for the first and (x>0) for the second
 
 418     //handle the case of unicode.
 
 419     //when a unicode appears then we must use a buffer that holds char internal values
 
 420     //At the end of this method currentCharacter holds the new visited char
 
 421     //and currentPosition points right next after it
 
 422     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 
 423     //On false, no side effect has occured.
 
 425     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 427     int temp = currentPosition;
 
 430       currentCharacter = source[currentPosition++];
 
 431       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 432       //        && (source[currentPosition] == 'u')) {
 
 433       //        //-------------unicode traitement ------------
 
 434       //        int c1, c2, c3, c4;
 
 435       //        int unicodeSize = 6;
 
 436       //        currentPosition++;
 
 437       //        while (source[currentPosition] == 'u') {
 
 438       //          currentPosition++;
 
 442       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 444       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 446       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 448       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 450       //          currentPosition = temp;
 
 454       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 455       //        if (currentCharacter == testedChar1)
 
 457       //        else if (currentCharacter == testedChar2)
 
 460       //          currentPosition = temp;
 
 464       //        //need the unicode buffer
 
 465       //        if (withoutUnicodePtr == 0) {
 
 466       //          //buffer all the entries that have been left aside....
 
 467       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 471       //            withoutUnicodeBuffer,
 
 473       //            withoutUnicodePtr);
 
 475       //        //fill the buffer with the char
 
 476       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 478       //      } //-------------end unicode traitement--------------
 
 480       if (currentCharacter == testedChar1)
 
 482       else if (currentCharacter == testedChar2)
 
 485         currentPosition = temp;
 
 489       //        if (withoutUnicodePtr != 0)
 
 490       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 493     } catch (IndexOutOfBoundsException e) {
 
 494       currentPosition = temp;
 
 498   public final boolean getNextCharAsDigit() {
 
 500     //handle the case of unicode.
 
 501     //when a unicode appears then we must use a buffer that holds char internal values
 
 502     //At the end of this method currentCharacter holds the new visited char
 
 503     //and currentPosition points right next after it
 
 504     //Both previous lines are true if the currentCharacter is a digit
 
 505     //On false, no side effect has occured.
 
 507     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 509     int temp = currentPosition;
 
 511       currentCharacter = source[currentPosition++];
 
 512       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 513       //        && (source[currentPosition] == 'u')) {
 
 514       //        //-------------unicode traitement ------------
 
 515       //        int c1, c2, c3, c4;
 
 516       //        int unicodeSize = 6;
 
 517       //        currentPosition++;
 
 518       //        while (source[currentPosition] == 'u') {
 
 519       //          currentPosition++;
 
 523       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 525       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 527       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 529       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 531       //          currentPosition = temp;
 
 535       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 536       //        if (!Character.isDigit(currentCharacter)) {
 
 537       //          currentPosition = temp;
 
 541       //        //need the unicode buffer
 
 542       //        if (withoutUnicodePtr == 0) {
 
 543       //          //buffer all the entries that have been left aside....
 
 544       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 548       //            withoutUnicodeBuffer,
 
 550       //            withoutUnicodePtr);
 
 552       //        //fill the buffer with the char
 
 553       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 555       //      } //-------------end unicode traitement--------------
 
 557       if (!Character.isDigit(currentCharacter)) {
 
 558         currentPosition = temp;
 
 561       //        if (withoutUnicodePtr != 0)
 
 562       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 565     } catch (IndexOutOfBoundsException e) {
 
 566       currentPosition = temp;
 
 570   public final boolean getNextCharAsDigit(int radix) {
 
 572     //handle the case of unicode.
 
 573     //when a unicode appears then we must use a buffer that holds char internal values
 
 574     //At the end of this method currentCharacter holds the new visited char
 
 575     //and currentPosition points right next after it
 
 576     //Both previous lines are true if the currentCharacter is a digit base on radix
 
 577     //On false, no side effect has occured.
 
 579     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 581     int temp = currentPosition;
 
 583       currentCharacter = source[currentPosition++];
 
 584       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 585       //        && (source[currentPosition] == 'u')) {
 
 586       //        //-------------unicode traitement ------------
 
 587       //        int c1, c2, c3, c4;
 
 588       //        int unicodeSize = 6;
 
 589       //        currentPosition++;
 
 590       //        while (source[currentPosition] == 'u') {
 
 591       //          currentPosition++;
 
 595       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 597       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 599       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 601       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 603       //          currentPosition = temp;
 
 607       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 608       //        if (Character.digit(currentCharacter, radix) == -1) {
 
 609       //          currentPosition = temp;
 
 613       //        //need the unicode buffer
 
 614       //        if (withoutUnicodePtr == 0) {
 
 615       //          //buffer all the entries that have been left aside....
 
 616       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 620       //            withoutUnicodeBuffer,
 
 622       //            withoutUnicodePtr);
 
 624       //        //fill the buffer with the char
 
 625       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 627       //      } //-------------end unicode traitement--------------
 
 629       if (Character.digit(currentCharacter, radix) == -1) {
 
 630         currentPosition = temp;
 
 633       //        if (withoutUnicodePtr != 0)
 
 634       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 637     } catch (IndexOutOfBoundsException e) {
 
 638       currentPosition = temp;
 
 642   public boolean getNextCharAsJavaIdentifierPart() {
 
 644     //handle the case of unicode.
 
 645     //when a unicode appears then we must use a buffer that holds char internal values
 
 646     //At the end of this method currentCharacter holds the new visited char
 
 647     //and currentPosition points right next after it
 
 648     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 
 649     //On false, no side effect has occured.
 
 651     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 653     int temp = currentPosition;
 
 655       currentCharacter = source[currentPosition++];
 
 656       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 657       //        && (source[currentPosition] == 'u')) {
 
 658       //        //-------------unicode traitement ------------
 
 659       //        int c1, c2, c3, c4;
 
 660       //        int unicodeSize = 6;
 
 661       //        currentPosition++;
 
 662       //        while (source[currentPosition] == 'u') {
 
 663       //          currentPosition++;
 
 667       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 669       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 671       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 673       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 675       //          currentPosition = temp;
 
 679       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 680       //        if (!isPHPIdentifierPart(currentCharacter)) {
 
 681       //          currentPosition = temp;
 
 685       //        //need the unicode buffer
 
 686       //        if (withoutUnicodePtr == 0) {
 
 687       //          //buffer all the entries that have been left aside....
 
 688       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 692       //            withoutUnicodeBuffer,
 
 694       //            withoutUnicodePtr);
 
 696       //        //fill the buffer with the char
 
 697       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 699       //      } //-------------end unicode traitement--------------
 
 701       if (!isPHPIdentifierPart(currentCharacter)) {
 
 702         currentPosition = temp;
 
 706       //        if (withoutUnicodePtr != 0)
 
 707       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 710     } catch (IndexOutOfBoundsException e) {
 
 711       currentPosition = temp;
 
 716   public int getNextToken() throws InvalidInputException {
 
 717     int htmlPosition = currentPosition;
 
 720         currentCharacter = source[currentPosition++];
 
 721         if (currentCharacter == '<') {
 
 722           if (getNextChar('?')) {
 
 723             currentCharacter = source[currentPosition++];
 
 724             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
 
 726               startPosition = currentPosition;
 
 728               if (tokenizeWhiteSpace) {
 
 729                 // && (whiteStart != currentPosition - 1)) {
 
 730                 // reposition scanner in case we are interested by spaces as tokens
 
 731                 startPosition = htmlPosition;
 
 732                 return TokenNameHTML;
 
 735               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
 
 737                 int test = getNextChar('H', 'h');
 
 739                   test = getNextChar('P', 'p');
 
 742                     startPosition = currentPosition;
 
 745                     if (tokenizeWhiteSpace) {
 
 746                       // && (whiteStart != currentPosition - 1)) {
 
 747                       // reposition scanner in case we are interested by spaces as tokens
 
 748                       startPosition = htmlPosition;
 
 749                       return TokenNameHTML;
 
 758         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 
 759           if (recordLineSeparator) {
 
 766     } //-----------------end switch while try--------------------
 
 767     catch (IndexOutOfBoundsException e) {
 
 768       if (tokenizeWhiteSpace) {
 
 769         // && (whiteStart != currentPosition - 1)) {
 
 770         // reposition scanner in case we are interested by spaces as tokens
 
 771         startPosition = htmlPosition;
 
 779         jumpOverMethodBody();
 
 781         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 
 784         while (true) { //loop for jumping over comments
 
 785           withoutUnicodePtr = 0;
 
 786           //start with a new token (even comment written with unicode )
 
 788           // ---------Consume white space and handles startPosition---------
 
 789           int whiteStart = currentPosition;
 
 790           boolean isWhiteSpace;
 
 792             startPosition = currentPosition;
 
 793             currentCharacter = source[currentPosition++];
 
 794             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 
 795             //              && (source[currentPosition] == 'u')) {
 
 796             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 
 798             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 
 799               checkNonExternalizeString();
 
 800               if (recordLineSeparator) {
 
 806             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
 
 808           } while (isWhiteSpace);
 
 809           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 
 810             // reposition scanner in case we are interested by spaces as tokens
 
 812             startPosition = whiteStart;
 
 813             return TokenNameWHITESPACE;
 
 815           //little trick to get out in the middle of a source compuation
 
 816           if (currentPosition > eofPosition)
 
 819           // ---------Identify the next token-------------
 
 821           switch (currentCharacter) {
 
 823               return TokenNameLPAREN;
 
 825               return TokenNameRPAREN;
 
 827               return TokenNameLBRACE;
 
 829               return TokenNameRBRACE;
 
 831               return TokenNameLBRACKET;
 
 833               return TokenNameRBRACKET;
 
 835               return TokenNameSEMICOLON;
 
 837               return TokenNameCOMMA;
 
 840               if (getNextCharAsDigit())
 
 841                 return scanNumber(true);
 
 846                 if ((test = getNextChar('+', '=')) == 0)
 
 847                   return TokenNamePLUS_PLUS;
 
 849                   return TokenNamePLUS_EQUAL;
 
 850                 return TokenNamePLUS;
 
 855                 if ((test = getNextChar('-', '=')) == 0)
 
 856                   return TokenNameMINUS_MINUS;
 
 858                   return TokenNameMINUS_EQUAL;
 
 859                 if (getNextChar('>'))
 
 860                   return TokenNameMINUS_GREATER;
 
 862                 return TokenNameMINUS;
 
 865               if (getNextChar('='))
 
 866                 return TokenNameTWIDDLE_EQUAL;
 
 867               return TokenNameTWIDDLE;
 
 869               if (getNextChar('=')) {
 
 870                 if (getNextChar('=')) {
 
 871                   return TokenNameNOT_EQUAL_EQUAL;
 
 873                 return TokenNameNOT_EQUAL;
 
 877               if (getNextChar('='))
 
 878                 return TokenNameMULTIPLY_EQUAL;
 
 879               return TokenNameMULTIPLY;
 
 881               if (getNextChar('='))
 
 882                 return TokenNameREMAINDER_EQUAL;
 
 883               return TokenNameREMAINDER;
 
 887                 if ((test = getNextChar('=', '<')) == 0)
 
 888                   return TokenNameLESS_EQUAL;
 
 890                   if (getNextChar('='))
 
 891                     return TokenNameLEFT_SHIFT_EQUAL;
 
 892                   if (getNextChar('<')) {
 
 893                     int heredocStart = currentPosition;
 
 894                     int heredocLength = 0;
 
 895                     currentCharacter = source[currentPosition++];
 
 896                     if (isPHPIdentifierStart(currentCharacter)) {
 
 897                       currentCharacter = source[currentPosition++];
 
 899                       return TokenNameERROR;
 
 901                     while (isPHPIdentifierPart(currentCharacter)) {
 
 902                       currentCharacter = source[currentPosition++];
 
 905                     heredocLength = currentPosition - heredocStart - 1;
 
 907                     // heredoc end-tag determination
 
 908                     boolean endTag = true;
 
 911                       ch = source[currentPosition++];
 
 912                       if (ch == '\r' || ch == '\n') {
 
 913                         if (recordLineSeparator) {
 
 918                         for (int i = 0; i < heredocLength; i++) {
 
 919                           if (source[currentPosition + i] != source[heredocStart + i]) {
 
 925                           currentPosition += heredocLength - 1;
 
 926                           currentCharacter = source[currentPosition++];
 
 927                           break; // do...while loop
 
 935                     return TokenNameHEREDOC;
 
 937                   return TokenNameLEFT_SHIFT;
 
 939                 return TokenNameLESS;
 
 944                 if ((test = getNextChar('=', '>')) == 0)
 
 945                   return TokenNameGREATER_EQUAL;
 
 947                   if ((test = getNextChar('=', '>')) == 0)
 
 948                     return TokenNameRIGHT_SHIFT_EQUAL;
 
 949                   return TokenNameRIGHT_SHIFT;
 
 951                 return TokenNameGREATER;
 
 954               if (getNextChar('=')) {
 
 955                 if (getNextChar('=')) {
 
 956                   return TokenNameEQUAL_EQUAL_EQUAL;
 
 958                 return TokenNameEQUAL_EQUAL;
 
 960               if (getNextChar('>'))
 
 961                 return TokenNameEQUAL_GREATER;
 
 962               return TokenNameEQUAL;
 
 966                 if ((test = getNextChar('&', '=')) == 0)
 
 967                   return TokenNameAND_AND;
 
 969                   return TokenNameAND_EQUAL;
 
 975                 if ((test = getNextChar('|', '=')) == 0)
 
 976                   return TokenNameOR_OR;
 
 978                   return TokenNameOR_EQUAL;
 
 982               if (getNextChar('='))
 
 983                 return TokenNameXOR_EQUAL;
 
 986               if (getNextChar('>')) {
 
 988                 return TokenNameStopPHP;
 
 990               return TokenNameQUESTION;
 
 992               if (getNextChar(':'))
 
 993                 return TokenNameCOLON_COLON;
 
 994               return TokenNameCOLON;
 
1000               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
 
1001               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1004               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1005               //                                                                for (int lookAhead = 0;
 
1008               //                                                                        if (currentPosition + lookAhead
 
1009               //                                                                                == source.length)
 
1011               //                                                                        if (source[currentPosition + lookAhead]
 
1014               //                                                                        if (source[currentPosition + lookAhead]
 
1016               //                                                                                currentPosition += lookAhead + 1;
 
1020               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1023               //                                                if (getNextChar('\'')) {
 
1024               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1025               //                                                        for (int lookAhead = 0;
 
1028               //                                                                if (currentPosition + lookAhead
 
1029               //                                                                        == source.length)
 
1031               //                                                                if (source[currentPosition + lookAhead]
 
1034               //                                                                if (source[currentPosition + lookAhead]
 
1036               //                                                                        currentPosition += lookAhead + 1;
 
1040               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1042               //                                                if (getNextChar('\\'))
 
1043               //                                                        scanEscapeCharacter();
 
1044               //                                                else { // consume next character
 
1045               //                                                        unicodeAsBackSlash = false;
 
1046               //                                                        if (((currentCharacter = source[currentPosition++])
 
1048               //                                                                && (source[currentPosition] == 'u')) {
 
1049               //                                                                getNextUnicodeChar();
 
1051               //                                                                if (withoutUnicodePtr != 0) {
 
1052               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1053               //                                                                                currentCharacter;
 
1057               //                                                //            if (getNextChar('\''))
 
1058               //                                                //              return TokenNameCharacterLiteral;
 
1059               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1060               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
 
1061               //                                                        if (currentPosition + lookAhead == source.length)
 
1063               //                                                        if (source[currentPosition + lookAhead] == '\n')
 
1065               //                                                        if (source[currentPosition + lookAhead] == '\'') {
 
1066               //                                                                currentPosition += lookAhead + 1;
 
1070               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1073                 // consume next character
 
1074                 unicodeAsBackSlash = false;
 
1075                 currentCharacter = source[currentPosition++];
 
1076                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1077                 //                  && (source[currentPosition] == 'u')) {
 
1078                 //                  getNextUnicodeChar();
 
1080                 //                  if (withoutUnicodePtr != 0) {
 
1081                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1082                 //                      currentCharacter;
 
1086                 while (currentCharacter != '\'') {
 
1088                   /**** in PHP \r and \n are valid in string literals ****/
 
1089                   //                  if ((currentCharacter == '\n')
 
1090                   //                    || (currentCharacter == '\r')) {
 
1091                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1092                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1093                   //                      if (currentPosition + lookAhead == source.length)
 
1095                   //                      if (source[currentPosition + lookAhead] == '\n')
 
1097                   //                      if (source[currentPosition + lookAhead] == '\"') {
 
1098                   //                        currentPosition += lookAhead + 1;
 
1102                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 
1104                   if (currentCharacter == '\\') {
 
1105                     int escapeSize = currentPosition;
 
1106                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 
1107                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
 
1108                     scanSingleQuotedEscapeCharacter();
 
1109                     escapeSize = currentPosition - escapeSize;
 
1110                     if (withoutUnicodePtr == 0) {
 
1111                       //buffer all the entries that have been left aside....
 
1112                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 
1113                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 
1114                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1115                     } else { //overwrite the / in the buffer
 
1116                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 
1117                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
 
1118                         withoutUnicodePtr--;
 
1122                   // consume next character
 
1123                   unicodeAsBackSlash = false;
 
1124                   currentCharacter = source[currentPosition++];
 
1125                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1126                   //                    && (source[currentPosition] == 'u')) {
 
1127                   //                    getNextUnicodeChar();
 
1129                   if (withoutUnicodePtr != 0) {
 
1130                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1135               } catch (IndexOutOfBoundsException e) {
 
1136                 throw new InvalidInputException(UNTERMINATED_STRING);
 
1137               } catch (InvalidInputException e) {
 
1138                 if (e.getMessage().equals(INVALID_ESCAPE)) {
 
1139                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1140                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1141                     if (currentPosition + lookAhead == source.length)
 
1143                     if (source[currentPosition + lookAhead] == '\n')
 
1145                     if (source[currentPosition + lookAhead] == '\'') {
 
1146                       currentPosition += lookAhead + 1;
 
1154               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
 
1155                 if (currentLine == null) {
 
1156                   currentLine = new NLSLine();
 
1157                   lines.add(currentLine);
 
1159                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 
1161               return TokenNameStringConstant;
 
1164                 // consume next character
 
1165                 unicodeAsBackSlash = false;
 
1166                 currentCharacter = source[currentPosition++];
 
1167                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1168                 //                  && (source[currentPosition] == 'u')) {
 
1169                 //                  getNextUnicodeChar();
 
1171                 //                  if (withoutUnicodePtr != 0) {
 
1172                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1173                 //                      currentCharacter;
 
1177                 while (currentCharacter != '"') {
 
1179                   /**** in PHP \r and \n are valid in string literals ****/
 
1180                   //                  if ((currentCharacter == '\n')
 
1181                   //                    || (currentCharacter == '\r')) {
 
1182                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1183                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1184                   //                      if (currentPosition + lookAhead == source.length)
 
1186                   //                      if (source[currentPosition + lookAhead] == '\n')
 
1188                   //                      if (source[currentPosition + lookAhead] == '\"') {
 
1189                   //                        currentPosition += lookAhead + 1;
 
1193                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 
1195                   if (currentCharacter == '\\') {
 
1196                     int escapeSize = currentPosition;
 
1197                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 
1198                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
 
1199                     scanDoubleQuotedEscapeCharacter();
 
1200                     escapeSize = currentPosition - escapeSize;
 
1201                     if (withoutUnicodePtr == 0) {
 
1202                       //buffer all the entries that have been left aside....
 
1203                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 
1204                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 
1205                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1206                     } else { //overwrite the / in the buffer
 
1207                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 
1208                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
 
1209                         withoutUnicodePtr--;
 
1213                   // consume next character
 
1214                   unicodeAsBackSlash = false;
 
1215                   currentCharacter = source[currentPosition++];
 
1216                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1217                   //                    && (source[currentPosition] == 'u')) {
 
1218                   //                    getNextUnicodeChar();
 
1220                   if (withoutUnicodePtr != 0) {
 
1221                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1226               } catch (IndexOutOfBoundsException e) {
 
1227                 throw new InvalidInputException(UNTERMINATED_STRING);
 
1228               } catch (InvalidInputException e) {
 
1229                 if (e.getMessage().equals(INVALID_ESCAPE)) {
 
1230                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1231                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1232                     if (currentPosition + lookAhead == source.length)
 
1234                     if (source[currentPosition + lookAhead] == '\n')
 
1236                     if (source[currentPosition + lookAhead] == '\"') {
 
1237                       currentPosition += lookAhead + 1;
 
1245               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
 
1246                 if (currentLine == null) {
 
1247                   currentLine = new NLSLine();
 
1248                   lines.add(currentLine);
 
1250                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 
1252               return TokenNameStringLiteral;
 
1255                 // consume next character
 
1256                 unicodeAsBackSlash = false;
 
1257                 currentCharacter = source[currentPosition++];
 
1258                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1259                 //                  && (source[currentPosition] == 'u')) {
 
1260                 //                  getNextUnicodeChar();
 
1262                 //                  if (withoutUnicodePtr != 0) {
 
1263                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1264                 //                      currentCharacter;
 
1268                 while (currentCharacter != '`') {
 
1270                   /**** in PHP \r and \n are valid in string literals ****/
 
1271                   //                if ((currentCharacter == '\n')
 
1272                   //                  || (currentCharacter == '\r')) {
 
1273                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1274                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1275                   //                    if (currentPosition + lookAhead == source.length)
 
1277                   //                    if (source[currentPosition + lookAhead] == '\n')
 
1279                   //                    if (source[currentPosition + lookAhead] == '\"') {
 
1280                   //                      currentPosition += lookAhead + 1;
 
1284                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 
1286                   if (currentCharacter == '\\') {
 
1287                     int escapeSize = currentPosition;
 
1288                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 
1289                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
 
1290                     scanDoubleQuotedEscapeCharacter();
 
1291                     escapeSize = currentPosition - escapeSize;
 
1292                     if (withoutUnicodePtr == 0) {
 
1293                       //buffer all the entries that have been left aside....
 
1294                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 
1295                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 
1296                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1297                     } else { //overwrite the / in the buffer
 
1298                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 
1299                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
 
1300                         withoutUnicodePtr--;
 
1304                   // consume next character
 
1305                   unicodeAsBackSlash = false;
 
1306                   currentCharacter = source[currentPosition++];
 
1307                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1308                   //                    && (source[currentPosition] == 'u')) {
 
1309                   //                    getNextUnicodeChar();
 
1311                   if (withoutUnicodePtr != 0) {
 
1312                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1317               } catch (IndexOutOfBoundsException e) {
 
1318                 throw new InvalidInputException(UNTERMINATED_STRING);
 
1319               } catch (InvalidInputException e) {
 
1320                 if (e.getMessage().equals(INVALID_ESCAPE)) {
 
1321                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1322                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1323                     if (currentPosition + lookAhead == source.length)
 
1325                     if (source[currentPosition + lookAhead] == '\n')
 
1327                     if (source[currentPosition + lookAhead] == '`') {
 
1328                       currentPosition += lookAhead + 1;
 
1336               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
 
1337                 if (currentLine == null) {
 
1338                   currentLine = new NLSLine();
 
1339                   lines.add(currentLine);
 
1341                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 
1343               return TokenNameStringInterpolated;
 
1348                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
 
1350                   int endPositionForLineComment = 0;
 
1351                   try { //get the next char 
 
1352                     currentCharacter = source[currentPosition++];
 
1353                     //                    if (((currentCharacter = source[currentPosition++])
 
1355                     //                      && (source[currentPosition] == 'u')) {
 
1356                     //                      //-------------unicode traitement ------------
 
1357                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1358                     //                      currentPosition++;
 
1359                     //                      while (source[currentPosition] == 'u') {
 
1360                     //                        currentPosition++;
 
1363                     //                        Character.getNumericValue(source[currentPosition++]))
 
1367                     //                          Character.getNumericValue(source[currentPosition++]))
 
1371                     //                          Character.getNumericValue(source[currentPosition++]))
 
1375                     //                          Character.getNumericValue(source[currentPosition++]))
 
1378                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
1380                     //                        currentCharacter =
 
1381                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1385                     //handle the \\u case manually into comment
 
1386                     //                    if (currentCharacter == '\\') {
 
1387                     //                      if (source[currentPosition] == '\\')
 
1388                     //                        currentPosition++;
 
1389                     //                    } //jump over the \\
 
1390                     boolean isUnicode = false;
 
1391                     while (currentCharacter != '\r' && currentCharacter != '\n') {
 
1392                       if (currentCharacter == '?') {
 
1393                         if (getNextChar('>')) {
 
1394                           startPosition = currentPosition - 2;
 
1396                           return TokenNameStopPHP;
 
1402                       currentCharacter = source[currentPosition++];
 
1403                       //                      if (((currentCharacter = source[currentPosition++])
 
1405                       //                        && (source[currentPosition] == 'u')) {
 
1406                       //                        isUnicode = true;
 
1407                       //                        //-------------unicode traitement ------------
 
1408                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1409                       //                        currentPosition++;
 
1410                       //                        while (source[currentPosition] == 'u') {
 
1411                       //                          currentPosition++;
 
1414                       //                          Character.getNumericValue(source[currentPosition++]))
 
1418                       //                            Character.getNumericValue(
 
1419                       //                              source[currentPosition++]))
 
1423                       //                            Character.getNumericValue(
 
1424                       //                              source[currentPosition++]))
 
1428                       //                            Character.getNumericValue(
 
1429                       //                              source[currentPosition++]))
 
1432                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
1434                       //                          currentCharacter =
 
1435                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1438                       //handle the \\u case manually into comment
 
1439                       //                      if (currentCharacter == '\\') {
 
1440                       //                        if (source[currentPosition] == '\\')
 
1441                       //                          currentPosition++;
 
1442                       //                      } //jump over the \\
 
1445                       endPositionForLineComment = currentPosition - 6;
 
1447                       endPositionForLineComment = currentPosition - 1;
 
1449                     recordComment(false);
 
1450                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 
1451                       checkNonExternalizeString();
 
1452                       if (recordLineSeparator) {
 
1454                           pushUnicodeLineSeparator();
 
1456                           pushLineSeparator();
 
1462                     if (tokenizeComments) {
 
1464                         currentPosition = endPositionForLineComment;
 
1465                         // reset one character behind
 
1467                       return TokenNameCOMMENT_LINE;
 
1469                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
 
1470                     if (tokenizeComments) {
 
1472                       // reset one character behind
 
1473                       return TokenNameCOMMENT_LINE;
 
1479                   //traditional and annotation comment
 
1480                   boolean isJavadoc = false, star = false;
 
1481                   // consume next character
 
1482                   unicodeAsBackSlash = false;
 
1483                   currentCharacter = source[currentPosition++];
 
1484                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1485                   //                    && (source[currentPosition] == 'u')) {
 
1486                   //                    getNextUnicodeChar();
 
1488                   //                    if (withoutUnicodePtr != 0) {
 
1489                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1490                   //                        currentCharacter;
 
1494                   if (currentCharacter == '*') {
 
1498                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 
1499                     checkNonExternalizeString();
 
1500                     if (recordLineSeparator) {
 
1501                       pushLineSeparator();
 
1506                   try { //get the next char 
 
1507                     currentCharacter = source[currentPosition++];
 
1508                     //                    if (((currentCharacter = source[currentPosition++])
 
1510                     //                      && (source[currentPosition] == 'u')) {
 
1511                     //                      //-------------unicode traitement ------------
 
1512                     //                      getNextUnicodeChar();
 
1514                     //handle the \\u case manually into comment
 
1515                     //                    if (currentCharacter == '\\') {
 
1516                     //                      if (source[currentPosition] == '\\')
 
1517                     //                        currentPosition++;
 
1518                     //                      //jump over the \\
 
1520                     // empty comment is not a javadoc /**/
 
1521                     if (currentCharacter == '/') {
 
1524                     //loop until end of comment */
 
1525                     while ((currentCharacter != '/') || (!star)) {
 
1526                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 
1527                         checkNonExternalizeString();
 
1528                         if (recordLineSeparator) {
 
1529                           pushLineSeparator();
 
1534                       star = currentCharacter == '*';
 
1536                       currentCharacter = source[currentPosition++];
 
1537                       //                      if (((currentCharacter = source[currentPosition++])
 
1539                       //                        && (source[currentPosition] == 'u')) {
 
1540                       //                        //-------------unicode traitement ------------
 
1541                       //                        getNextUnicodeChar();
 
1543                       //handle the \\u case manually into comment
 
1544                       //                      if (currentCharacter == '\\') {
 
1545                       //                        if (source[currentPosition] == '\\')
 
1546                       //                          currentPosition++;
 
1547                       //                      } //jump over the \\
 
1549                     recordComment(isJavadoc);
 
1550                     if (tokenizeComments) {
 
1552                         return TokenNameCOMMENT_PHPDOC;
 
1553                       return TokenNameCOMMENT_BLOCK;
 
1555                   } catch (IndexOutOfBoundsException e) {
 
1556                     throw new InvalidInputException(UNTERMINATED_COMMENT);
 
1560                 if (getNextChar('='))
 
1561                   return TokenNameDIVIDE_EQUAL;
 
1562                 return TokenNameDIVIDE;
 
1566                 return TokenNameEOF;
 
1567               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
 
1568               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
 
1571               if (currentCharacter == '$') {
 
1572                 while ((currentCharacter = source[currentPosition++]) == '$') {
 
1574                 if (currentCharacter == '{')
 
1575                   return TokenNameDOLLAR_LBRACE;
 
1576                 if (isPHPIdentifierStart(currentCharacter))
 
1577                   return scanIdentifierOrKeyword(true);
 
1578                 return TokenNameERROR;
 
1580               if (isPHPIdentifierStart(currentCharacter))
 
1581                 return scanIdentifierOrKeyword(false);
 
1582               if (Character.isDigit(currentCharacter))
 
1583                 return scanNumber(false);
 
1584               return TokenNameERROR;
 
1587       } //-----------------end switch while try--------------------
 
1588       catch (IndexOutOfBoundsException e) {
 
1591     return TokenNameEOF;
 
1594   //  public final void getNextUnicodeChar()
 
1595   //    throws IndexOutOfBoundsException, InvalidInputException {
 
1597   //    //handle the case of unicode.
 
1598   //    //when a unicode appears then we must use a buffer that holds char internal values
 
1599   //    //At the end of this method currentCharacter holds the new visited char
 
1600   //    //and currentPosition points right next after it
 
1602   //    //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
1604   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
 
1605   //    currentPosition++;
 
1606   //    while (source[currentPosition] == 'u') {
 
1607   //      currentPosition++;
 
1611   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
1613   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
1615   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
1617   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
1619   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
1621   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1622   //      //need the unicode buffer
 
1623   //      if (withoutUnicodePtr == 0) {
 
1624   //        //buffer all the entries that have been left aside....
 
1625   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
1626   //        System.arraycopy(
 
1629   //          withoutUnicodeBuffer,
 
1631   //          withoutUnicodePtr);
 
1633   //      //fill the buffer with the char
 
1634   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1636   //    unicodeAsBackSlash = currentCharacter == '\\';
 
1638   /* Tokenize a method body, assuming that curly brackets are properly balanced.
 
1640   public final void jumpOverMethodBody() {
 
1642     this.wasAcr = false;
 
1645       while (true) { //loop for jumping over comments
 
1646         // ---------Consume white space and handles startPosition---------
 
1647         boolean isWhiteSpace;
 
1649           startPosition = currentPosition;
 
1650           currentCharacter = source[currentPosition++];
 
1651           //          if (((currentCharacter = source[currentPosition++]) == '\\')
 
1652           //            && (source[currentPosition] == 'u')) {
 
1653           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
 
1655           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1656             pushLineSeparator();
 
1657           isWhiteSpace = Character.isWhitespace(currentCharacter);
 
1659         } while (isWhiteSpace);
 
1661         // -------consume token until } is found---------
 
1662         switch (currentCharacter) {
 
1674               test = getNextChar('\\');
 
1677                   scanDoubleQuotedEscapeCharacter();
 
1678                 } catch (InvalidInputException ex) {
 
1681                 //                try { // consume next character
 
1682                 unicodeAsBackSlash = false;
 
1683                 currentCharacter = source[currentPosition++];
 
1684                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1685                 //                    && (source[currentPosition] == 'u')) {
 
1686                 //                    getNextUnicodeChar();
 
1688                 if (withoutUnicodePtr != 0) {
 
1689                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1692                 //                } catch (InvalidInputException ex) {
 
1700               //              try { // consume next character
 
1701               unicodeAsBackSlash = false;
 
1702               currentCharacter = source[currentPosition++];
 
1703               //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1704               //                  && (source[currentPosition] == 'u')) {
 
1705               //                  getNextUnicodeChar();
 
1707               if (withoutUnicodePtr != 0) {
 
1708                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1711               //              } catch (InvalidInputException ex) {
 
1713               while (currentCharacter != '"') {
 
1714                 if (currentCharacter == '\r') {
 
1715                   if (source[currentPosition] == '\n')
 
1718                   // the string cannot go further that the line
 
1720                 if (currentCharacter == '\n') {
 
1722                   // the string cannot go further that the line
 
1724                 if (currentCharacter == '\\') {
 
1726                     scanDoubleQuotedEscapeCharacter();
 
1727                   } catch (InvalidInputException ex) {
 
1730                 //                try { // consume next character
 
1731                 unicodeAsBackSlash = false;
 
1732                 currentCharacter = source[currentPosition++];
 
1733                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1734                 //                    && (source[currentPosition] == 'u')) {
 
1735                 //                    getNextUnicodeChar();
 
1737                 if (withoutUnicodePtr != 0) {
 
1738                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1741                 //                } catch (InvalidInputException ex) {
 
1744             } catch (IndexOutOfBoundsException e) {
 
1751               if ((test = getNextChar('/', '*')) == 0) {
 
1755                   currentCharacter = source[currentPosition++];
 
1756                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1757                   //                    && (source[currentPosition] == 'u')) {
 
1758                   //                    //-------------unicode traitement ------------
 
1759                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1760                   //                    currentPosition++;
 
1761                   //                    while (source[currentPosition] == 'u') {
 
1762                   //                      currentPosition++;
 
1765                   //                      Character.getNumericValue(source[currentPosition++]))
 
1769                   //                        Character.getNumericValue(source[currentPosition++]))
 
1773                   //                        Character.getNumericValue(source[currentPosition++]))
 
1777                   //                        Character.getNumericValue(source[currentPosition++]))
 
1780                   //                      //error don't care of the value
 
1781                   //                      currentCharacter = 'A';
 
1782                   //                    } //something different from \n and \r
 
1784                   //                      currentCharacter =
 
1785                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1789                   while (currentCharacter != '\r' && currentCharacter != '\n') {
 
1791                     currentCharacter = source[currentPosition++];
 
1792                     //                    if (((currentCharacter = source[currentPosition++])
 
1794                     //                      && (source[currentPosition] == 'u')) {
 
1795                     //                      //-------------unicode traitement ------------
 
1796                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1797                     //                      currentPosition++;
 
1798                     //                      while (source[currentPosition] == 'u') {
 
1799                     //                        currentPosition++;
 
1802                     //                        Character.getNumericValue(source[currentPosition++]))
 
1806                     //                          Character.getNumericValue(source[currentPosition++]))
 
1810                     //                          Character.getNumericValue(source[currentPosition++]))
 
1814                     //                          Character.getNumericValue(source[currentPosition++]))
 
1817                     //                        //error don't care of the value
 
1818                     //                        currentCharacter = 'A';
 
1819                     //                      } //something different from \n and \r
 
1821                     //                        currentCharacter =
 
1822                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1826                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1827                     pushLineSeparator();
 
1828                 } catch (IndexOutOfBoundsException e) {
 
1829                 } //an eof will them be generated
 
1833                 //traditional and annotation comment
 
1834                 boolean star = false;
 
1835                 //                try { // consume next character
 
1836                 unicodeAsBackSlash = false;
 
1837                 currentCharacter = source[currentPosition++];
 
1838                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1839                 //                    && (source[currentPosition] == 'u')) {
 
1840                 //                    getNextUnicodeChar();
 
1842                 if (withoutUnicodePtr != 0) {
 
1843                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1846                 //                } catch (InvalidInputException ex) {
 
1848                 if (currentCharacter == '*') {
 
1851                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1852                   pushLineSeparator();
 
1853                 try { //get the next char 
 
1854                   currentCharacter = source[currentPosition++];
 
1855                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1856                   //                    && (source[currentPosition] == 'u')) {
 
1857                   //                    //-------------unicode traitement ------------
 
1858                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1859                   //                    currentPosition++;
 
1860                   //                    while (source[currentPosition] == 'u') {
 
1861                   //                      currentPosition++;
 
1864                   //                      Character.getNumericValue(source[currentPosition++]))
 
1868                   //                        Character.getNumericValue(source[currentPosition++]))
 
1872                   //                        Character.getNumericValue(source[currentPosition++]))
 
1876                   //                        Character.getNumericValue(source[currentPosition++]))
 
1879                   //                      //error don't care of the value
 
1880                   //                      currentCharacter = 'A';
 
1881                   //                    } //something different from * and /
 
1883                   //                      currentCharacter =
 
1884                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1887                   //loop until end of comment */ 
 
1888                   while ((currentCharacter != '/') || (!star)) {
 
1889                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1890                       pushLineSeparator();
 
1891                     star = currentCharacter == '*';
 
1893                     currentCharacter = source[currentPosition++];
 
1894                     //                    if (((currentCharacter = source[currentPosition++])
 
1896                     //                      && (source[currentPosition] == 'u')) {
 
1897                     //                      //-------------unicode traitement ------------
 
1898                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1899                     //                      currentPosition++;
 
1900                     //                      while (source[currentPosition] == 'u') {
 
1901                     //                        currentPosition++;
 
1904                     //                        Character.getNumericValue(source[currentPosition++]))
 
1908                     //                          Character.getNumericValue(source[currentPosition++]))
 
1912                     //                          Character.getNumericValue(source[currentPosition++]))
 
1916                     //                          Character.getNumericValue(source[currentPosition++]))
 
1919                     //                        //error don't care of the value
 
1920                     //                        currentCharacter = 'A';
 
1921                     //                      } //something different from * and /
 
1923                     //                        currentCharacter =
 
1924                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1928                 } catch (IndexOutOfBoundsException e) {
 
1937             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
 
1939                 scanIdentifierOrKeyword((currentCharacter == '$'));
 
1940               } catch (InvalidInputException ex) {
 
1944             if (Character.isDigit(currentCharacter)) {
 
1947               } catch (InvalidInputException ex) {
 
1953       //-----------------end switch while try--------------------
 
1954     } catch (IndexOutOfBoundsException e) {
 
1955     } catch (InvalidInputException e) {
 
1959   //  public final boolean jumpOverUnicodeWhiteSpace()
 
1960   //    throws InvalidInputException {
 
1962   //    //handle the case of unicode. Jump over the next whiteSpace
 
1963   //    //making startPosition pointing on the next available char
 
1964   //    //On false, the currentCharacter is filled up with a potential
 
1968   //      this.wasAcr = false;
 
1969   //      int c1, c2, c3, c4;
 
1970   //      int unicodeSize = 6;
 
1971   //      currentPosition++;
 
1972   //      while (source[currentPosition] == 'u') {
 
1973   //        currentPosition++;
 
1977   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
1979   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
1981   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
1983   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
1985   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
1988   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1989   //      if (recordLineSeparator
 
1990   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1991   //        pushLineSeparator();
 
1992   //      if (Character.isWhitespace(currentCharacter))
 
1995   //      //buffer the new char which is not a white space
 
1996   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1997   //      //withoutUnicodePtr == 1 is true here
 
1999   //    } catch (IndexOutOfBoundsException e) {
 
2000   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
2003   public final int[] getLineEnds() {
 
2004     //return a bounded copy of this.lineEnds 
 
2007     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
 
2011   public char[] getSource() {
 
2014   final char[] optimizedCurrentTokenSource1() {
 
2015     //return always the same char[] build only once
 
2017     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
 
2018     char charOne = source[startPosition];
 
2073         return new char[] { charOne };
 
2077   final char[] optimizedCurrentTokenSource2() {
 
2078     //try to return the same char[] build only once
 
2081     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
 
2082     char[][] table = charArray_length[0][hash];
 
2084     while (++i < InternalTableSize) {
 
2085       char[] charArray = table[i];
 
2086       if ((c0 == charArray[0]) && (c1 == charArray[1]))
 
2089     //---------other side---------
 
2091     int max = newEntry2;
 
2092     while (++i <= max) {
 
2093       char[] charArray = table[i];
 
2094       if ((c0 == charArray[0]) && (c1 == charArray[1]))
 
2097     //--------add the entry-------
 
2098     if (++max >= InternalTableSize)
 
2101     table[max] = (r = new char[] { c0, c1 });
 
2106   final char[] optimizedCurrentTokenSource3() {
 
2107     //try to return the same char[] build only once
 
2111       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
 
2113     char[][] table = charArray_length[1][hash];
 
2115     while (++i < InternalTableSize) {
 
2116       char[] charArray = table[i];
 
2117       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
 
2120     //---------other side---------
 
2122     int max = newEntry3;
 
2123     while (++i <= max) {
 
2124       char[] charArray = table[i];
 
2125       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
 
2128     //--------add the entry-------
 
2129     if (++max >= InternalTableSize)
 
2132     table[max] = (r = new char[] { c0, c1, c2 });
 
2137   final char[] optimizedCurrentTokenSource4() {
 
2138     //try to return the same char[] build only once
 
2140     char c0, c1, c2, c3;
 
2142       ((((long) (c0 = source[startPosition])) << 18)
 
2143         + ((c1 = source[startPosition + 1]) << 12)
 
2144         + ((c2 = source[startPosition + 2]) << 6)
 
2145         + (c3 = source[startPosition + 3]))
 
2147     char[][] table = charArray_length[2][(int) hash];
 
2149     while (++i < InternalTableSize) {
 
2150       char[] charArray = table[i];
 
2151       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
 
2154     //---------other side---------
 
2156     int max = newEntry4;
 
2157     while (++i <= max) {
 
2158       char[] charArray = table[i];
 
2159       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
 
2162     //--------add the entry-------
 
2163     if (++max >= InternalTableSize)
 
2166     table[max] = (r = new char[] { c0, c1, c2, c3 });
 
2172   final char[] optimizedCurrentTokenSource5() {
 
2173     //try to return the same char[] build only once
 
2175     char c0, c1, c2, c3, c4;
 
2177       ((((long) (c0 = source[startPosition])) << 24)
 
2178         + (((long) (c1 = source[startPosition + 1])) << 18)
 
2179         + ((c2 = source[startPosition + 2]) << 12)
 
2180         + ((c3 = source[startPosition + 3]) << 6)
 
2181         + (c4 = source[startPosition + 4]))
 
2183     char[][] table = charArray_length[3][(int) hash];
 
2185     while (++i < InternalTableSize) {
 
2186       char[] charArray = table[i];
 
2187       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
 
2190     //---------other side---------
 
2192     int max = newEntry5;
 
2193     while (++i <= max) {
 
2194       char[] charArray = table[i];
 
2195       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
 
2198     //--------add the entry-------
 
2199     if (++max >= InternalTableSize)
 
2202     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
 
2208   final char[] optimizedCurrentTokenSource6() {
 
2209     //try to return the same char[] build only once
 
2211     char c0, c1, c2, c3, c4, c5;
 
2213       ((((long) (c0 = source[startPosition])) << 32)
 
2214         + (((long) (c1 = source[startPosition + 1])) << 24)
 
2215         + (((long) (c2 = source[startPosition + 2])) << 18)
 
2216         + ((c3 = source[startPosition + 3]) << 12)
 
2217         + ((c4 = source[startPosition + 4]) << 6)
 
2218         + (c5 = source[startPosition + 5]))
 
2220     char[][] table = charArray_length[4][(int) hash];
 
2222     while (++i < InternalTableSize) {
 
2223       char[] charArray = table[i];
 
2224       if ((c0 == charArray[0])
 
2225         && (c1 == charArray[1])
 
2226         && (c2 == charArray[2])
 
2227         && (c3 == charArray[3])
 
2228         && (c4 == charArray[4])
 
2229         && (c5 == charArray[5]))
 
2232     //---------other side---------
 
2234     int max = newEntry6;
 
2235     while (++i <= max) {
 
2236       char[] charArray = table[i];
 
2237       if ((c0 == charArray[0])
 
2238         && (c1 == charArray[1])
 
2239         && (c2 == charArray[2])
 
2240         && (c3 == charArray[3])
 
2241         && (c4 == charArray[4])
 
2242         && (c5 == charArray[5]))
 
2245     //--------add the entry-------
 
2246     if (++max >= InternalTableSize)
 
2249     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
 
2254   public final void pushLineSeparator() throws InvalidInputException {
 
2255     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
 
2256     final int INCREMENT = 250;
 
2258     if (this.checkNonExternalizedStringLiterals) {
 
2259       // reinitialize the current line for non externalize strings purpose
 
2262     //currentCharacter is at position currentPosition-1
 
2265     if (currentCharacter == '\r') {
 
2266       int separatorPos = currentPosition - 1;
 
2267       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2269       //System.out.println("CR-" + separatorPos);
 
2271         lineEnds[++linePtr] = separatorPos;
 
2272       } catch (IndexOutOfBoundsException e) {
 
2273         //linePtr value is correct
 
2274         int oldLength = lineEnds.length;
 
2275         int[] old = lineEnds;
 
2276         lineEnds = new int[oldLength + INCREMENT];
 
2277         System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2278         lineEnds[linePtr] = separatorPos;
 
2280       // look-ahead for merged cr+lf
 
2282         if (source[currentPosition] == '\n') {
 
2283           //System.out.println("look-ahead LF-" + currentPosition);                     
 
2284           lineEnds[linePtr] = currentPosition;
 
2290       } catch (IndexOutOfBoundsException e) {
 
2295       if (currentCharacter == '\n') {
 
2296         //must merge eventual cr followed by lf
 
2297         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
 
2298           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
 
2299           lineEnds[linePtr] = currentPosition - 1;
 
2301           int separatorPos = currentPosition - 1;
 
2302           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2304           // System.out.println("LF-" + separatorPos);                                                  
 
2306             lineEnds[++linePtr] = separatorPos;
 
2307           } catch (IndexOutOfBoundsException e) {
 
2308             //linePtr value is correct
 
2309             int oldLength = lineEnds.length;
 
2310             int[] old = lineEnds;
 
2311             lineEnds = new int[oldLength + INCREMENT];
 
2312             System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2313             lineEnds[linePtr] = separatorPos;
 
2320   public final void pushUnicodeLineSeparator() {
 
2321     // isUnicode means that the \r or \n has been read as a unicode character
 
2323     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
 
2325     final int INCREMENT = 250;
 
2326     //currentCharacter is at position currentPosition-1
 
2328     if (this.checkNonExternalizedStringLiterals) {
 
2329       // reinitialize the current line for non externalize strings purpose
 
2334     if (currentCharacter == '\r') {
 
2335       int separatorPos = currentPosition - 6;
 
2336       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2338       //System.out.println("CR-" + separatorPos);
 
2340         lineEnds[++linePtr] = separatorPos;
 
2341       } catch (IndexOutOfBoundsException e) {
 
2342         //linePtr value is correct
 
2343         int oldLength = lineEnds.length;
 
2344         int[] old = lineEnds;
 
2345         lineEnds = new int[oldLength + INCREMENT];
 
2346         System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2347         lineEnds[linePtr] = separatorPos;
 
2349       // look-ahead for merged cr+lf
 
2350       if (source[currentPosition] == '\n') {
 
2351         //System.out.println("look-ahead LF-" + currentPosition);                       
 
2352         lineEnds[linePtr] = currentPosition;
 
2360       if (currentCharacter == '\n') {
 
2361         //must merge eventual cr followed by lf
 
2362         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
 
2363           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
 
2364           lineEnds[linePtr] = currentPosition - 6;
 
2366           int separatorPos = currentPosition - 6;
 
2367           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2369           // System.out.println("LF-" + separatorPos);                                                  
 
2371             lineEnds[++linePtr] = separatorPos;
 
2372           } catch (IndexOutOfBoundsException e) {
 
2373             //linePtr value is correct
 
2374             int oldLength = lineEnds.length;
 
2375             int[] old = lineEnds;
 
2376             lineEnds = new int[oldLength + INCREMENT];
 
2377             System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2378             lineEnds[linePtr] = separatorPos;
 
2385   public final void recordComment(boolean isJavadoc) {
 
2387     // a new annotation comment is recorded
 
2389       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
 
2390     } catch (IndexOutOfBoundsException e) {
 
2391       int oldStackLength = commentStops.length;
 
2392       int[] oldStack = commentStops;
 
2393       commentStops = new int[oldStackLength + 30];
 
2394       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
 
2395       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
 
2396       //grows the positions buffers too
 
2397       int[] old = commentStarts;
 
2398       commentStarts = new int[oldStackLength + 30];
 
2399       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
 
2402     //the buffer is of a correct size here
 
2403     commentStarts[commentPtr] = startPosition;
 
2405   public void resetTo(int begin, int end) {
 
2406     //reset the scanner to a given position where it may rescan again
 
2409     initialPosition = startPosition = currentPosition = begin;
 
2410     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
 
2411     commentPtr = -1; // reset comment stack
 
2414   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
 
2415     // the string with "\\u" is a legal string of two chars \ and u
 
2416     //thus we use a direct access to the source (for regular cases).
 
2418     //    if (unicodeAsBackSlash) {
 
2419     //      // consume next character
 
2420     //      unicodeAsBackSlash = false;
 
2421     //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
2422     //        && (source[currentPosition] == 'u')) {
 
2423     //        getNextUnicodeChar();
 
2425     //        if (withoutUnicodePtr != 0) {
 
2426     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
2430     currentCharacter = source[currentPosition++];
 
2431     switch (currentCharacter) {
 
2433         currentCharacter = '\'';
 
2436         currentCharacter = '\\';
 
2439         currentCharacter = '\\';
 
2444   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
 
2445     // the string with "\\u" is a legal string of two chars \ and u
 
2446     //thus we use a direct access to the source (for regular cases).
 
2448     //    if (unicodeAsBackSlash) {
 
2449     //      // consume next character
 
2450     //      unicodeAsBackSlash = false;
 
2451     //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
2452     //        && (source[currentPosition] == 'u')) {
 
2453     //        getNextUnicodeChar();
 
2455     //        if (withoutUnicodePtr != 0) {
 
2456     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
2460     currentCharacter = source[currentPosition++];
 
2461     switch (currentCharacter) {
 
2463       //        currentCharacter = '\b';
 
2466         currentCharacter = '\t';
 
2469         currentCharacter = '\n';
 
2472         //        currentCharacter = '\f';
 
2475         currentCharacter = '\r';
 
2478         currentCharacter = '\"';
 
2481         currentCharacter = '\'';
 
2484         currentCharacter = '\\';
 
2487         currentCharacter = '$';
 
2490         // -----------octal escape--------------
 
2492         // OctalDigit OctalDigit
 
2493         // ZeroToThree OctalDigit OctalDigit
 
2495         int number = Character.getNumericValue(currentCharacter);
 
2496         if (number >= 0 && number <= 7) {
 
2497           boolean zeroToThreeNot = number > 3;
 
2498           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
 
2499             int digit = Character.getNumericValue(currentCharacter);
 
2500             if (digit >= 0 && digit <= 7) {
 
2501               number = (number * 8) + digit;
 
2502               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
 
2503                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
 
2506                   digit = Character.getNumericValue(currentCharacter);
 
2507                   if (digit >= 0 && digit <= 7) {
 
2508                     // has read \ZeroToThree OctalDigit OctalDigit
 
2509                     number = (number * 8) + digit;
 
2510                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
 
2514               } else { // has read \OctalDigit NonDigit--> ignore last character
 
2517             } else { // has read \OctalDigit NonOctalDigit--> ignore last character                                             
 
2520           } else { // has read \OctalDigit --> ignore last character
 
2524             throw new InvalidInputException(INVALID_ESCAPE);
 
2525           currentCharacter = (char) number;
 
2528         //     throw new InvalidInputException(INVALID_ESCAPE);
 
2532   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
 
2533   //    return scanIdentifierOrKeyword( false );
 
2536   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
 
2539     //first dispatch on the first char.
 
2540     //then the length. If there are several
 
2541     //keywors with the same length AND the same first char, then do another
 
2542     //disptach on the second char :-)...cool....but fast !
 
2544     useAssertAsAnIndentifier = false;
 
2546     while (getNextCharAsJavaIdentifierPart()) {
 
2550       if (new String(getCurrentTokenSource()).equals("$this")) {
 
2551         return TokenNamethis;
 
2553       return TokenNameVariable;
 
2558     //    if (withoutUnicodePtr == 0)
 
2560     //quick test on length == 1 but not on length > 12 while most identifier
 
2561     //have a length which is <= 12...but there are lots of identifier with
 
2565     if ((length = currentPosition - startPosition) == 1)
 
2566       return TokenNameIdentifier;
 
2568     data = new char[length];
 
2569     index = startPosition;
 
2570     for (int i = 0; i < length; i++) {
 
2571       data[i] = Character.toLowerCase(source[index + i]);
 
2575     //      if ((length = withoutUnicodePtr) == 1)
 
2576     //        return TokenNameIdentifier;
 
2577     //      // data = withoutUnicodeBuffer;
 
2578     //      data = new char[withoutUnicodeBuffer.length];
 
2579     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
 
2580     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
 
2585     firstLetter = data[index];
 
2586     switch (firstLetter) {
 
2588       case 'a' : // as and array
 
2591             if ((data[++index] == 's')) {
 
2594               return TokenNameIdentifier;
 
2597             if ((data[++index] == 'n') && (data[++index] == 'd')) {
 
2598               return TokenNameAND;
 
2600               return TokenNameIdentifier;
 
2603             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
 
2604             //              return TokenNamearray;
 
2606             //              return TokenNameIdentifier;
 
2608             return TokenNameIdentifier;
 
2613             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
 
2614               return TokenNamebreak;
 
2616               return TokenNameIdentifier;
 
2618             return TokenNameIdentifier;
 
2621       case 'c' : //case class continue
 
2624             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
 
2625               return TokenNamecase;
 
2627               return TokenNameIdentifier;
 
2629             if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
 
2630               return TokenNameclass;
 
2632               return TokenNameIdentifier;
 
2634             if ((data[++index] == 'o')
 
2635               && (data[++index] == 'n')
 
2636               && (data[++index] == 't')
 
2637               && (data[++index] == 'i')
 
2638               && (data[++index] == 'n')
 
2639               && (data[++index] == 'u')
 
2640               && (data[++index] == 'e'))
 
2641               return TokenNamecontinue;
 
2643               return TokenNameIdentifier;
 
2645             return TokenNameIdentifier;
 
2648       case 'd' : //define default do 
 
2651             if ((data[++index] == 'o'))
 
2654               return TokenNameIdentifier;
 
2656             if ((data[++index] == 'e')
 
2657               && (data[++index] == 'f')
 
2658               && (data[++index] == 'i')
 
2659               && (data[++index] == 'n')
 
2660               && (data[++index] == 'e'))
 
2661               return TokenNamedefine;
 
2663               return TokenNameIdentifier;
 
2665             if ((data[++index] == 'e')
 
2666               && (data[++index] == 'f')
 
2667               && (data[++index] == 'a')
 
2668               && (data[++index] == 'u')
 
2669               && (data[++index] == 'l')
 
2670               && (data[++index] == 't'))
 
2671               return TokenNamedefault;
 
2673               return TokenNameIdentifier;
 
2675             return TokenNameIdentifier;
 
2677       case 'e' : //echo else elseif extends
 
2680             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
 
2681               return TokenNameecho;
 
2682             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
 
2683               return TokenNameelse;
 
2685               return TokenNameIdentifier;
 
2687             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
 
2688               return TokenNameendif;
 
2690               return TokenNameIdentifier;
 
2692             if ((data[++index] == 'n')
 
2693               && (data[++index] == 'd')
 
2694               && (data[++index] == 'f')
 
2695               && (data[++index] == 'o')
 
2696               && (data[++index] == 'r'))
 
2697               return TokenNameendfor;
 
2699               (data[index] == 'l')
 
2700                 && (data[++index] == 's')
 
2701                 && (data[++index] == 'e')
 
2702                 && (data[++index] == 'i')
 
2703                 && (data[++index] == 'f'))
 
2704               return TokenNameelseif;
 
2706               return TokenNameIdentifier;
 
2708             if ((data[++index] == 'x')
 
2709               && (data[++index] == 't')
 
2710               && (data[++index] == 'e')
 
2711               && (data[++index] == 'n')
 
2712               && (data[++index] == 'd')
 
2713               && (data[++index] == 's'))
 
2714               return TokenNameextends;
 
2716               return TokenNameIdentifier;
 
2717           case 8 : // endwhile
 
2718             if ((data[++index] == 'n')
 
2719               && (data[++index] == 'd')
 
2720               && (data[++index] == 'w')
 
2721               && (data[++index] == 'h')
 
2722               && (data[++index] == 'i')
 
2723               && (data[++index] == 'l')
 
2724               && (data[++index] == 'e'))
 
2725               return TokenNameendwhile;
 
2727               return TokenNameIdentifier;
 
2728           case 9 : // endswitch
 
2729             if ((data[++index] == 'n')
 
2730               && (data[++index] == 'd')
 
2731               && (data[++index] == 's')
 
2732               && (data[++index] == 'w')
 
2733               && (data[++index] == 'i')
 
2734               && (data[++index] == 't')
 
2735               && (data[++index] == 'c')
 
2736               && (data[++index] == 'h'))
 
2737               return TokenNameendswitch;
 
2739               return TokenNameIdentifier;
 
2740           case 10 : // endforeach
 
2741             if ((data[++index] == 'n')
 
2742               && (data[++index] == 'd')
 
2743               && (data[++index] == 'f')
 
2744               && (data[++index] == 'o')
 
2745               && (data[++index] == 'r')
 
2746               && (data[++index] == 'e')
 
2747               && (data[++index] == 'a')
 
2748               && (data[++index] == 'c')
 
2749               && (data[++index] == 'h'))
 
2750               return TokenNameendforeach;
 
2752               return TokenNameIdentifier;
 
2755             return TokenNameIdentifier;
 
2758       case 'f' : //for false function
 
2761             if ((data[++index] == 'o') && (data[++index] == 'r'))
 
2762               return TokenNamefor;
 
2764               return TokenNameIdentifier;
 
2766             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
 
2767               return TokenNamefalse;
 
2769               return TokenNameIdentifier;
 
2770           case 7 : // function
 
2771             if ((data[++index] == 'o')
 
2772               && (data[++index] == 'r')
 
2773               && (data[++index] == 'e')
 
2774               && (data[++index] == 'a')
 
2775               && (data[++index] == 'c')
 
2776               && (data[++index] == 'h'))
 
2777               return TokenNameforeach;
 
2779               return TokenNameIdentifier;
 
2780           case 8 : // function
 
2781             if ((data[++index] == 'u')
 
2782               && (data[++index] == 'n')
 
2783               && (data[++index] == 'c')
 
2784               && (data[++index] == 't')
 
2785               && (data[++index] == 'i')
 
2786               && (data[++index] == 'o')
 
2787               && (data[++index] == 'n'))
 
2788               return TokenNamefunction;
 
2790               return TokenNameIdentifier;
 
2792             return TokenNameIdentifier;
 
2796           if ((data[++index] == 'l')
 
2797             && (data[++index] == 'o')
 
2798             && (data[++index] == 'b')
 
2799             && (data[++index] == 'a')
 
2800             && (data[++index] == 'l')) {
 
2801             return TokenNameglobal;
 
2804         return TokenNameIdentifier;
 
2809             if (data[++index] == 'f')
 
2812               return TokenNameIdentifier;
 
2814             //            if ((data[++index] == 'n') && (data[++index] == 't'))
 
2815             //              return TokenNameint;
 
2817             //              return TokenNameIdentifier;
 
2819             if ((data[++index] == 'n')
 
2820               && (data[++index] == 'c')
 
2821               && (data[++index] == 'l')
 
2822               && (data[++index] == 'u')
 
2823               && (data[++index] == 'd')
 
2824               && (data[++index] == 'e'))
 
2825               return TokenNameinclude;
 
2827               return TokenNameIdentifier;
 
2829             if ((data[++index] == 'n')
 
2830               && (data[++index] == 'c')
 
2831               && (data[++index] == 'l')
 
2832               && (data[++index] == 'u')
 
2833               && (data[++index] == 'd')
 
2834               && (data[++index] == 'e')
 
2835               && (data[++index] == '_')
 
2836               && (data[++index] == 'o')
 
2837               && (data[++index] == 'n')
 
2838               && (data[++index] == 'c')
 
2839               && (data[++index] == 'e'))
 
2840               return TokenNameinclude_once;
 
2842               return TokenNameIdentifier;
 
2844             return TokenNameIdentifier;
 
2849           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
 
2850             return TokenNamelist;
 
2853         return TokenNameIdentifier;
 
2855       case 'n' : // new null
 
2858             if ((data[++index] == 'e') && (data[++index] == 'w'))
 
2859               return TokenNamenew;
 
2861               return TokenNameIdentifier;
 
2863             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
 
2864               return TokenNamenull;
 
2866               return TokenNameIdentifier;
 
2869             return TokenNameIdentifier;
 
2871       case 'o' : // or old_function
 
2873           if (data[++index] == 'r') {
 
2877         //        if (length == 12) {
 
2878         //          if ((data[++index] == 'l')
 
2879         //            && (data[++index] == 'd')
 
2880         //            && (data[++index] == '_')
 
2881         //            && (data[++index] == 'f')
 
2882         //            && (data[++index] == 'u')
 
2883         //            && (data[++index] == 'n')
 
2884         //            && (data[++index] == 'c')
 
2885         //            && (data[++index] == 't')
 
2886         //            && (data[++index] == 'i')
 
2887         //            && (data[++index] == 'o')
 
2888         //            && (data[++index] == 'n')) {
 
2889         //            return TokenNameold_function;
 
2892         return TokenNameIdentifier;
 
2896           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 
2897             return TokenNameprint;
 
2900         return TokenNameIdentifier;
 
2901       case 'r' : //return require require_once
 
2903           if ((data[++index] == 'e')
 
2904             && (data[++index] == 't')
 
2905             && (data[++index] == 'u')
 
2906             && (data[++index] == 'r')
 
2907             && (data[++index] == 'n')) {
 
2908             return TokenNamereturn;
 
2910         } else if (length == 7) {
 
2911           if ((data[++index] == 'e')
 
2912             && (data[++index] == 'q')
 
2913             && (data[++index] == 'u')
 
2914             && (data[++index] == 'i')
 
2915             && (data[++index] == 'r')
 
2916             && (data[++index] == 'e')) {
 
2917             return TokenNamerequire;
 
2919         } else if (length == 12) {
 
2920           if ((data[++index] == 'e')
 
2921             && (data[++index] == 'q')
 
2922             && (data[++index] == 'u')
 
2923             && (data[++index] == 'i')
 
2924             && (data[++index] == 'r')
 
2925             && (data[++index] == 'e')
 
2926             && (data[++index] == '_')
 
2927             && (data[++index] == 'o')
 
2928             && (data[++index] == 'n')
 
2929             && (data[++index] == 'c')
 
2930             && (data[++index] == 'e')) {
 
2931             return TokenNamerequire_once;
 
2934           return TokenNameIdentifier;
 
2936       case 's' : //static switch 
 
2939             if (data[++index] == 't')
 
2940               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
 
2941                 return TokenNamestatic;
 
2943                 return TokenNameIdentifier;
 
2945               (data[index] == 'w')
 
2946                 && (data[++index] == 'i')
 
2947                 && (data[++index] == 't')
 
2948                 && (data[++index] == 'c')
 
2949                 && (data[++index] == 'h'))
 
2950               return TokenNameswitch;
 
2952               return TokenNameIdentifier;
 
2954             return TokenNameIdentifier;
 
2961             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
 
2962               return TokenNametrue;
 
2964               return TokenNameIdentifier;
 
2965             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
 
2966             //              return TokenNamethis;
 
2969             return TokenNameIdentifier;
 
2975             if ((data[++index] == 'a') && (data[++index] == 'r'))
 
2976               return TokenNamevar;
 
2978               return TokenNameIdentifier;
 
2981             return TokenNameIdentifier;
 
2987             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
 
2988               return TokenNamewhile;
 
2990               return TokenNameIdentifier;
 
2991             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
 
2992             //return TokenNamewidefp ;
 
2994             //return TokenNameIdentifier;
 
2996             return TokenNameIdentifier;
 
3002             if ((data[++index] == 'o') && (data[++index] == 'r'))
 
3003               return TokenNameXOR;
 
3005               return TokenNameIdentifier;
 
3008             return TokenNameIdentifier;
 
3011         return TokenNameIdentifier;
 
3014   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
 
3016     //when entering this method the currentCharacter is the firt
 
3017     //digit of the number , i.e. it may be preceeded by a . when
 
3020     boolean floating = dotPrefix;
 
3021     if ((!dotPrefix) && (currentCharacter == '0')) {
 
3022       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
 
3023         //force the first char of the hexa number do exist...
 
3024         // consume next character
 
3025         unicodeAsBackSlash = false;
 
3026         currentCharacter = source[currentPosition++];
 
3027         //        if (((currentCharacter = source[currentPosition++]) == '\\')
 
3028         //          && (source[currentPosition] == 'u')) {
 
3029         //          getNextUnicodeChar();
 
3031         //          if (withoutUnicodePtr != 0) {
 
3032         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3035         if (Character.digit(currentCharacter, 16) == -1)
 
3036           throw new InvalidInputException(INVALID_HEXA);
 
3038         while (getNextCharAsDigit(16)) {
 
3040         //        if (getNextChar('l', 'L') >= 0)
 
3041         //          return TokenNameLongLiteral;
 
3043         return TokenNameIntegerLiteral;
 
3046       //there is x or X in the number
 
3047       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
 
3048       if (getNextCharAsDigit()) {
 
3049         //-------------potential octal-----------------
 
3050         while (getNextCharAsDigit()) {
 
3053         //        if (getNextChar('l', 'L') >= 0) {
 
3054         //          return TokenNameLongLiteral;
 
3057         //        if (getNextChar('f', 'F') >= 0) {
 
3058         //          return TokenNameFloatingPointLiteral;
 
3061         if (getNextChar('d', 'D') >= 0) {
 
3062           return TokenNameDoubleLiteral;
 
3063         } else { //make the distinction between octal and float ....
 
3064           if (getNextChar('.')) { //bingo ! ....
 
3065             while (getNextCharAsDigit()) {
 
3067             if (getNextChar('e', 'E') >= 0) {
 
3068               // consume next character
 
3069               unicodeAsBackSlash = false;
 
3070               currentCharacter = source[currentPosition++];
 
3071               //              if (((currentCharacter = source[currentPosition++]) == '\\')
 
3072               //                && (source[currentPosition] == 'u')) {
 
3073               //                getNextUnicodeChar();
 
3075               //                if (withoutUnicodePtr != 0) {
 
3076               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3080               if ((currentCharacter == '-') || (currentCharacter == '+')) {
 
3081                 // consume next character
 
3082                 unicodeAsBackSlash = false;
 
3083                 currentCharacter = source[currentPosition++];
 
3084                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
3085                 //                  && (source[currentPosition] == 'u')) {
 
3086                 //                  getNextUnicodeChar();
 
3088                 //                  if (withoutUnicodePtr != 0) {
 
3089                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
3090                 //                      currentCharacter;
 
3094               if (!Character.isDigit(currentCharacter))
 
3095                 throw new InvalidInputException(INVALID_FLOAT);
 
3096               while (getNextCharAsDigit()) {
 
3099             //            if (getNextChar('f', 'F') >= 0)
 
3100             //              return TokenNameFloatingPointLiteral;
 
3101             getNextChar('d', 'D'); //jump over potential d or D
 
3102             return TokenNameDoubleLiteral;
 
3104             return TokenNameIntegerLiteral;
 
3112     while (getNextCharAsDigit()) {
 
3115     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
 
3116     //      return TokenNameLongLiteral;
 
3118     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
 
3119       while (getNextCharAsDigit()) {
 
3124     //if floating is true both exponant and suffix may be optional
 
3126     if (getNextChar('e', 'E') >= 0) {
 
3128       // consume next character
 
3129       unicodeAsBackSlash = false;
 
3130       currentCharacter = source[currentPosition++];
 
3131       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
3132       //        && (source[currentPosition] == 'u')) {
 
3133       //        getNextUnicodeChar();
 
3135       //        if (withoutUnicodePtr != 0) {
 
3136       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3140       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
 
3141         unicodeAsBackSlash = false;
 
3142         currentCharacter = source[currentPosition++];
 
3143         //        if (((currentCharacter = source[currentPosition++]) == '\\')
 
3144         //          && (source[currentPosition] == 'u')) {
 
3145         //          getNextUnicodeChar();
 
3147         //          if (withoutUnicodePtr != 0) {
 
3148         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3152       if (!Character.isDigit(currentCharacter))
 
3153         throw new InvalidInputException(INVALID_FLOAT);
 
3154       while (getNextCharAsDigit()) {
 
3158     if (getNextChar('d', 'D') >= 0)
 
3159       return TokenNameDoubleLiteral;
 
3160     //    if (getNextChar('f', 'F') >= 0)
 
3161     //      return TokenNameFloatingPointLiteral;
 
3163     //the long flag has been tested before
 
3165     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
 
3168    * Search the line number corresponding to a specific position
 
3171   public final int getLineNumber(int position) {
 
3173     if (lineEnds == null)
 
3175     int length = linePtr + 1;
 
3178     int g = 0, d = length - 1;
 
3182       if (position < lineEnds[m]) {
 
3184       } else if (position > lineEnds[m]) {
 
3190     if (position < lineEnds[m]) {
 
3196   public void setPHPMode(boolean mode) {
 
3200   public final void setSource(char[] source) {
 
3201     //the source-buffer is set to sourceString
 
3203     if (source == null) {
 
3204       this.source = new char[0];
 
3206       this.source = source;
 
3209     initialPosition = currentPosition = 0;
 
3210     containsAssertKeyword = false;
 
3211     withoutUnicodeBuffer = new char[this.source.length];
 
3215   public String toString() {
 
3216     if (startPosition == source.length)
 
3217       return "EOF\n\n" + new String(source); //$NON-NLS-1$
 
3218     if (currentPosition > source.length)
 
3219       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
 
3221     char front[] = new char[startPosition];
 
3222     System.arraycopy(source, 0, front, 0, startPosition);
 
3224     int middleLength = (currentPosition - 1) - startPosition + 1;
 
3226     if (middleLength > -1) {
 
3227       middle = new char[middleLength];
 
3228       System.arraycopy(source, startPosition, middle, 0, middleLength);
 
3230       middle = new char[0];
 
3233     char end[] = new char[source.length - (currentPosition - 1)];
 
3234     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
 
3236     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
 
3237     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
 
3240   public final String toStringAction(int act) {
 
3243       case TokenNameERROR :
 
3244         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3245       case TokenNameStopPHP :
 
3246         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3247       case TokenNameIdentifier :
 
3248         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3249       case TokenNameVariable :
 
3250         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3252         return "as"; //$NON-NLS-1$
 
3253       case TokenNamebreak :
 
3254         return "break"; //$NON-NLS-1$
 
3255       case TokenNamecase :
 
3256         return "case"; //$NON-NLS-1$
 
3257       case TokenNameclass :
 
3258         return "class"; //$NON-NLS-1$
 
3259       case TokenNamecontinue :
 
3260         return "continue"; //$NON-NLS-1$
 
3261       case TokenNamedefault :
 
3262         return "default"; //$NON-NLS-1$
 
3263       case TokenNamedefine :
 
3264         return "define"; //$NON-NLS-1$
 
3266         return "do"; //$NON-NLS-1$
 
3267       case TokenNameecho :
 
3268         return "echo"; //$NON-NLS-1$
 
3269       case TokenNameelse :
 
3270         return "else"; //$NON-NLS-1$
 
3271       case TokenNameelseif :
 
3272         return "elseif"; //$NON-NLS-1$
 
3273       case TokenNameendfor :
 
3274         return "endfor"; //$NON-NLS-1$
 
3275       case TokenNameendforeach :
 
3276         return "endforeach"; //$NON-NLS-1$
 
3277       case TokenNameendif :
 
3278         return "endif"; //$NON-NLS-1$
 
3279       case TokenNameendswitch :
 
3280         return "endswitch"; //$NON-NLS-1$
 
3281       case TokenNameendwhile :
 
3282         return "endwhile"; //$NON-NLS-1$
 
3283       case TokenNameextends :
 
3284         return "extends"; //$NON-NLS-1$
 
3285       case TokenNamefalse :
 
3286         return "false"; //$NON-NLS-1$
 
3288         return "for"; //$NON-NLS-1$
 
3289       case TokenNameforeach :
 
3290         return "foreach"; //$NON-NLS-1$
 
3291       case TokenNamefunction :
 
3292         return "function"; //$NON-NLS-1$
 
3293       case TokenNameglobal :
 
3294         return "global"; //$NON-NLS-1$
 
3296         return "if"; //$NON-NLS-1$
 
3297       case TokenNameinclude :
 
3298         return "include"; //$NON-NLS-1$
 
3299       case TokenNameinclude_once :
 
3300         return "include_once"; //$NON-NLS-1$
 
3301       case TokenNamelist :
 
3302         return "list"; //$NON-NLS-1$
 
3304         return "new"; //$NON-NLS-1$
 
3305       case TokenNamenull :
 
3306         return "null"; //$NON-NLS-1$
 
3307       case TokenNameprint :
 
3308         return "print"; //$NON-NLS-1$
 
3309       case TokenNamerequire :
 
3310         return "require"; //$NON-NLS-1$
 
3311       case TokenNamerequire_once :
 
3312         return "require_once"; //$NON-NLS-1$
 
3313       case TokenNamereturn :
 
3314         return "return"; //$NON-NLS-1$
 
3315       case TokenNamestatic :
 
3316         return "static"; //$NON-NLS-1$
 
3317       case TokenNameswitch :
 
3318         return "switch"; //$NON-NLS-1$
 
3319       case TokenNametrue :
 
3320         return "true"; //$NON-NLS-1$
 
3322         return "var"; //$NON-NLS-1$
 
3323       case TokenNamewhile :
 
3324         return "while"; //$NON-NLS-1$
 
3325       case TokenNamethis :
 
3326         return "$this"; //$NON-NLS-1$
 
3327       case TokenNameIntegerLiteral :
 
3328         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3329       case TokenNameDoubleLiteral :
 
3330         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3331       case TokenNameStringLiteral :
 
3332         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3333       case TokenNameStringConstant :
 
3334         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3335       case TokenNameStringInterpolated :
 
3336         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3337       case TokenNameHEREDOC :
 
3338         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3340       case TokenNamePLUS_PLUS :
 
3341         return "++"; //$NON-NLS-1$
 
3342       case TokenNameMINUS_MINUS :
 
3343         return "--"; //$NON-NLS-1$
 
3344       case TokenNameEQUAL_EQUAL :
 
3345         return "=="; //$NON-NLS-1$
 
3346       case TokenNameEQUAL_EQUAL_EQUAL :
 
3347         return "==="; //$NON-NLS-1$
 
3348       case TokenNameEQUAL_GREATER :
 
3349         return "=>"; //$NON-NLS-1$
 
3350       case TokenNameLESS_EQUAL :
 
3351         return "<="; //$NON-NLS-1$
 
3352       case TokenNameGREATER_EQUAL :
 
3353         return ">="; //$NON-NLS-1$
 
3354       case TokenNameNOT_EQUAL :
 
3355         return "!="; //$NON-NLS-1$
 
3356       case TokenNameNOT_EQUAL_EQUAL :
 
3357         return "!=="; //$NON-NLS-1$
 
3358       case TokenNameLEFT_SHIFT :
 
3359         return "<<"; //$NON-NLS-1$
 
3360       case TokenNameRIGHT_SHIFT :
 
3361         return ">>"; //$NON-NLS-1$
 
3362       case TokenNamePLUS_EQUAL :
 
3363         return "+="; //$NON-NLS-1$
 
3364       case TokenNameMINUS_EQUAL :
 
3365         return "-="; //$NON-NLS-1$
 
3366       case TokenNameMULTIPLY_EQUAL :
 
3367         return "*="; //$NON-NLS-1$
 
3368       case TokenNameDIVIDE_EQUAL :
 
3369         return "/="; //$NON-NLS-1$
 
3370       case TokenNameAND_EQUAL :
 
3371         return "&="; //$NON-NLS-1$
 
3372       case TokenNameOR_EQUAL :
 
3373         return "|="; //$NON-NLS-1$
 
3374       case TokenNameXOR_EQUAL :
 
3375         return "^="; //$NON-NLS-1$
 
3376       case TokenNameREMAINDER_EQUAL :
 
3377         return "%="; //$NON-NLS-1$
 
3378       case TokenNameLEFT_SHIFT_EQUAL :
 
3379         return "<<="; //$NON-NLS-1$
 
3380       case TokenNameRIGHT_SHIFT_EQUAL :
 
3381         return ">>="; //$NON-NLS-1$
 
3382       case TokenNameOR_OR :
 
3383         return "||"; //$NON-NLS-1$
 
3384       case TokenNameAND_AND :
 
3385         return "&&"; //$NON-NLS-1$
 
3386       case TokenNamePLUS :
 
3387         return "+"; //$NON-NLS-1$
 
3388       case TokenNameMINUS :
 
3389         return "-"; //$NON-NLS-1$
 
3390       case TokenNameMINUS_GREATER :
 
3393         return "!"; //$NON-NLS-1$
 
3394       case TokenNameREMAINDER :
 
3395         return "%"; //$NON-NLS-1$
 
3397         return "^"; //$NON-NLS-1$
 
3399         return "&"; //$NON-NLS-1$
 
3400       case TokenNameMULTIPLY :
 
3401         return "*"; //$NON-NLS-1$
 
3403         return "|"; //$NON-NLS-1$
 
3404       case TokenNameTWIDDLE :
 
3405         return "~"; //$NON-NLS-1$
 
3406       case TokenNameTWIDDLE_EQUAL :
 
3407         return "~="; //$NON-NLS-1$
 
3408       case TokenNameDIVIDE :
 
3409         return "/"; //$NON-NLS-1$
 
3410       case TokenNameGREATER :
 
3411         return ">"; //$NON-NLS-1$
 
3412       case TokenNameLESS :
 
3413         return "<"; //$NON-NLS-1$
 
3414       case TokenNameLPAREN :
 
3415         return "("; //$NON-NLS-1$
 
3416       case TokenNameRPAREN :
 
3417         return ")"; //$NON-NLS-1$
 
3418       case TokenNameLBRACE :
 
3419         return "{"; //$NON-NLS-1$
 
3420       case TokenNameRBRACE :
 
3421         return "}"; //$NON-NLS-1$
 
3422       case TokenNameLBRACKET :
 
3423         return "["; //$NON-NLS-1$
 
3424       case TokenNameRBRACKET :
 
3425         return "]"; //$NON-NLS-1$
 
3426       case TokenNameSEMICOLON :
 
3427         return ";"; //$NON-NLS-1$
 
3428       case TokenNameQUESTION :
 
3429         return "?"; //$NON-NLS-1$
 
3430       case TokenNameCOLON :
 
3431         return ":"; //$NON-NLS-1$
 
3432       case TokenNameCOMMA :
 
3433         return ","; //$NON-NLS-1$
 
3435         return "."; //$NON-NLS-1$
 
3436       case TokenNameEQUAL :
 
3437         return "="; //$NON-NLS-1$
 
3440       case TokenNameDOLLAR_LBRACE :
 
3443         return "EOF"; //$NON-NLS-1$
 
3444       case TokenNameWHITESPACE :
 
3445         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3446       case TokenNameCOMMENT_LINE :
 
3447         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3448       case TokenNameCOMMENT_BLOCK :
 
3449         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3450       case TokenNameCOMMENT_PHPDOC :
 
3451         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3452       case TokenNameHTML :
 
3453         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3455         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
 
3459   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
 
3460     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
 
3464     boolean tokenizeComments,
 
3465     boolean tokenizeWhiteSpace,
 
3466     boolean checkNonExternalizedStringLiterals,
 
3467     boolean assertMode) {
 
3468     this.eofPosition = Integer.MAX_VALUE;
 
3469     this.tokenizeComments = tokenizeComments;
 
3470     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
 
3471     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
 
3472     this.assertMode = assertMode;
 
3475   private void checkNonExternalizeString() throws InvalidInputException {
 
3476     if (currentLine == null)
 
3478     parseTags(currentLine);
 
3481   private void parseTags(NLSLine line) throws InvalidInputException {
 
3482     String s = new String(getCurrentTokenSource());
 
3483     int pos = s.indexOf(TAG_PREFIX);
 
3484     int lineLength = line.size();
 
3486       int start = pos + TAG_PREFIX_LENGTH;
 
3487       int end = s.indexOf(TAG_POSTFIX, start);
 
3488       String index = s.substring(start, end);
 
3491         i = Integer.parseInt(index) - 1;
 
3492         // Tags are one based not zero based.
 
3493       } catch (NumberFormatException e) {
 
3494         i = -1; // we don't want to consider this as a valid NLS tag
 
3496       if (line.exists(i)) {
 
3499       pos = s.indexOf(TAG_PREFIX, start);
 
3502     this.nonNLSStrings = new StringLiteral[lineLength];
 
3503     int nonNLSCounter = 0;
 
3504     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
 
3505       StringLiteral literal = (StringLiteral) iterator.next();
 
3506       if (literal != null) {
 
3507         this.nonNLSStrings[nonNLSCounter++] = literal;
 
3510     if (nonNLSCounter == 0) {
 
3511       this.nonNLSStrings = null;
 
3515     this.wasNonExternalizedStringLiteral = true;
 
3516     if (nonNLSCounter != lineLength) {
 
3517       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);