net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.*;
  18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  19
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21
  22   /* APIs ares
  23    - getNextToken() which return the current type of the token
  24      (this value is not memorized by the scanner)
  25    - getCurrentTokenSource() which provides with the token "REAL" source
  26      (aka all unicode have been transformed into a correct char)
  27    - sourceStart gives the position into the stream
  28    - currentPosition-1 gives the sourceEnd position into the stream
  29   */
  30
  31   // 1.4 feature
  32   private boolean assertMode;
  33   public boolean useAssertAsAnIndentifier = false;
  34   //flag indicating if processed source contains occurrences of keyword assert
  35   public boolean containsAssertKeyword = false;
  36
  37   public boolean recordLineSeparator;
  38   public boolean phpMode = false;
  39
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr;
  56   //when == 0 ==> no unicode in the current token
  57   public boolean unicodeAsBackSlash = false;
  58
  59   public boolean scanningFloatLiteral = false;
  60
  61   //support for /** comments
  62   //public char[][] comments = new char[10][];
  63   public int[] commentStops = new int[10];
  64   public int[] commentStarts = new int[10];
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
  66
  67   //diet parsing support - jump over some method body when requested
  68   public boolean diet = false;
  69
  70   //support for the  poor-line-debuggers ....
  71   //remember the position of the cr/lf
  72   public int[] lineEnds = new int[250];
  73   public int linePtr = -1;
  74   public boolean wasAcr = false;
  75
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  77
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  85
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  90
  91   //----------------optimized identifier managment------------------
  92   static final char[] charArray_a = new char[] { 'a' },
  93     charArray_b = new char[] { 'b' },
  94     charArray_c = new char[] { 'c' },
  95     charArray_d = new char[] { 'd' },
  96     charArray_e = new char[] { 'e' },
  97     charArray_f = new char[] { 'f' },
  98     charArray_g = new char[] { 'g' },
  99     charArray_h = new char[] { 'h' },
 100     charArray_i = new char[] { 'i' },
 101     charArray_j = new char[] { 'j' },
 102     charArray_k = new char[] { 'k' },
 103     charArray_l = new char[] { 'l' },
 104     charArray_m = new char[] { 'm' },
 105     charArray_n = new char[] { 'n' },
 106     charArray_o = new char[] { 'o' },
 107     charArray_p = new char[] { 'p' },
 108     charArray_q = new char[] { 'q' },
 109     charArray_r = new char[] { 'r' },
 110     charArray_s = new char[] { 's' },
 111     charArray_t = new char[] { 't' },
 112     charArray_u = new char[] { 'u' },
 113     charArray_v = new char[] { 'v' },
 114     charArray_w = new char[] { 'w' },
 115     charArray_x = new char[] { 'x' },
 116     charArray_y = new char[] { 'y' },
 117     charArray_z = new char[] { 'z' };
 118
 119   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 120   static final int TableSize = 30, InternalTableSize = 6;
 121   //30*6 = 180 entries
 122   public static final int OptimizedLength = 6;
 123   public /*static*/
 124   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 125   // support for detecting non-externalized string literals
 126   int currentLineNr = -1;
 127   int previousLineNr = -1;
 128   NLSLine currentLine = null;
 129   List lines = new ArrayList();
 130   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 131   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 132   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 133   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 134   public StringLiteral[] nonNLSStrings = null;
 135   public boolean checkNonExternalizedStringLiterals = true;
 136   public boolean wasNonExternalizedStringLiteral = false;
 137
 138   /*static*/ {
 139     for (int i = 0; i < 6; i++) {
 140       for (int j = 0; j < TableSize; j++) {
 141         for (int k = 0; k < InternalTableSize; k++) {
 142           charArray_length[i][j][k] = initCharArray;
 143         }
 144       }
 145     }
 146   }
 147   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 148
 149   public static final int RoundBracket = 0;
 150   public static final int SquareBracket = 1;
 151   public static final int CurlyBracket = 2;
 152   public static final int BracketKinds = 3;
 153
 154   public static final boolean DEBUG = false;
 155
 156   public Scanner() {
 157     this(false, false);
 158   }
 159   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 160     this(tokenizeComments, tokenizeWhiteSpace, false);
 161   }
 162
 163   /**
 164    * Determines if the specified character is
 165    * permissible as the first character in a PHP identifier
 166    */
 167   public static boolean isPHPIdentifierStart(char ch) {
 168     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 169   }
 170
 171   /**
 172    * Determines if the specified character may be part of a PHP identifier as
 173    * other than the first character
 174    */
 175   public static boolean isPHPIdentifierPart(char ch) {
 176     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 177   }
 178
 179   public final boolean atEnd() {
 180     // This code is not relevant if source is
 181     // Only a part of the real stream input
 182
 183     return source.length == currentPosition;
 184   }
 185   public char[] getCurrentIdentifierSource() {
 186     //return the token REAL source (aka unicodes are precomputed)
 187
 188     char[] result;
 189     //    if (withoutUnicodePtr != 0)
 190     //      //0 is used as a fast test flag so the real first char is in position 1
 191     //      System.arraycopy(
 192     //        withoutUnicodeBuffer,
 193     //        1,
 194     //        result = new char[withoutUnicodePtr],
 195     //        0,
 196     //        withoutUnicodePtr);
 197     //    else {
 198     int length = currentPosition - startPosition;
 199     switch (length) { // see OptimizedLength
 200       case 1 :
 201         return optimizedCurrentTokenSource1();
 202       case 2 :
 203         return optimizedCurrentTokenSource2();
 204       case 3 :
 205         return optimizedCurrentTokenSource3();
 206       case 4 :
 207         return optimizedCurrentTokenSource4();
 208       case 5 :
 209         return optimizedCurrentTokenSource5();
 210       case 6 :
 211         return optimizedCurrentTokenSource6();
 212     }
 213     //no optimization
 214     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 215     //   }
 216     return result;
 217   }
 218   public int getCurrentTokenEndPosition() {
 219     return this.currentPosition - 1;
 220   }
 221   public final char[] getCurrentTokenSource() {
 222     // Return the token REAL source (aka unicodes are precomputed)
 223
 224     char[] result;
 225     //    if (withoutUnicodePtr != 0)
 226     //      // 0 is used as a fast test flag so the real first char is in position 1
 227     //      System.arraycopy(
 228     //        withoutUnicodeBuffer,
 229     //        1,
 230     //        result = new char[withoutUnicodePtr],
 231     //        0,
 232     //        withoutUnicodePtr);
 233     //    else {
 234     int length;
 235     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 236     //    }
 237     return result;
 238   }
 239
 240   public final char[] getCurrentTokenSource(int startPos) {
 241     // Return the token REAL source (aka unicodes are precomputed)
 242
 243     char[] result;
 244     //    if (withoutUnicodePtr != 0)
 245     //      // 0 is used as a fast test flag so the real first char is in position 1
 246     //      System.arraycopy(
 247     //        withoutUnicodeBuffer,
 248     //        1,
 249     //        result = new char[withoutUnicodePtr],
 250     //        0,
 251     //        withoutUnicodePtr);
 252     //    else {
 253     int length;
 254     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 255     //  }
 256     return result;
 257   }
 258
 259   public final char[] getCurrentTokenSourceString() {
 260     //return the token REAL source (aka unicodes are precomputed).
 261     //REMOVE the two " that are at the beginning and the end.
 262
 263     char[] result;
 264     if (withoutUnicodePtr != 0)
 265       //0 is used as a fast test flag so the real first char is in position 1
 266       System.arraycopy(withoutUnicodeBuffer, 2,
 267       //2 is 1 (real start) + 1 (to jump over the ")
 268       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 269     else {
 270       int length;
 271       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 272     }
 273     return result;
 274   }
 275   public int getCurrentTokenStartPosition() {
 276     return this.startPosition;
 277   }
 278   /*
 279    * Search the source position corresponding to the end of a given line number
 280    *
 281    * Line numbers are 1-based, and relative to the scanner initialPosition.
 282    * Character positions are 0-based.
 283    *
 284    * In case the given line number is inconsistent, answers -1.
 285    */
 286   public final int getLineEnd(int lineNumber) {
 287
 288     if (lineEnds == null)
 289       return -1;
 290     if (lineNumber >= lineEnds.length)
 291       return -1;
 292     if (lineNumber <= 0)
 293       return -1;
 294
 295     if (lineNumber == lineEnds.length - 1)
 296       return eofPosition;
 297     return lineEnds[lineNumber - 1];
 298     // next line start one character behind the lineEnd of the previous line
 299   }
 300   /**
 301    * Search the source position corresponding to the beginning of a given line number
 302    *
 303    * Line numbers are 1-based, and relative to the scanner initialPosition.
 304    * Character positions are 0-based.
 305    *
 306    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 307    *
 308    * In case the given line number is inconsistent, answers -1.
 309    */
 310   public final int getLineStart(int lineNumber) {
 311
 312     if (lineEnds == null)
 313       return -1;
 314     if (lineNumber >= lineEnds.length)
 315       return -1;
 316     if (lineNumber <= 0)
 317       return -1;
 318
 319     if (lineNumber == 1)
 320       return initialPosition;
 321     return lineEnds[lineNumber - 2] + 1;
 322     // next line start one character behind the lineEnd of the previous line
 323   }
 324   public final boolean getNextChar(char testedChar) {
 325     //BOOLEAN
 326     //handle the case of unicode.
 327     //when a unicode appears then we must use a buffer that holds char internal values
 328     //At the end of this method currentCharacter holds the new visited char
 329     //and currentPosition points right next after it
 330     //Both previous lines are true if the currentCharacter is == to the testedChar
 331     //On false, no side effect has occured.
 332
 333     //ALL getNextChar.... ARE OPTIMIZED COPIES
 334
 335     int temp = currentPosition;
 336     try {
 337       currentCharacter = source[currentPosition++];
 338       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 339       //        && (source[currentPosition] == 'u')) {
 340       //        //-------------unicode traitement ------------
 341       //        int c1, c2, c3, c4;
 342       //        int unicodeSize = 6;
 343       //        currentPosition++;
 344       //        while (source[currentPosition] == 'u') {
 345       //          currentPosition++;
 346       //          unicodeSize++;
 347       //        }
 348       //
 349       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 350       //          || c1 < 0)
 351       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 352       //            || c2 < 0)
 353       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 354       //            || c3 < 0)
 355       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 356       //            || c4 < 0)) {
 357       //          currentPosition = temp;
 358       //          return false;
 359       //        }
 360       //
 361       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 362       //        if (currentCharacter != testedChar) {
 363       //          currentPosition = temp;
 364       //          return false;
 365       //        }
 366       //        unicodeAsBackSlash = currentCharacter == '\\';
 367       //
 368       //        //need the unicode buffer
 369       //        if (withoutUnicodePtr == 0) {
 370       //          //buffer all the entries that have been left aside....
 371       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 372       //          System.arraycopy(
 373       //            source,
 374       //            startPosition,
 375       //            withoutUnicodeBuffer,
 376       //            1,
 377       //            withoutUnicodePtr);
 378       //        }
 379       //        //fill the buffer with the char
 380       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 381       //        return true;
 382       //
 383       //      } //-------------end unicode traitement--------------
 384       //      else {
 385       if (currentCharacter != testedChar) {
 386         currentPosition = temp;
 387         return false;
 388       }
 389       unicodeAsBackSlash = false;
 390       //        if (withoutUnicodePtr != 0)
 391       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 392       return true;
 393       //      }
 394     } catch (IndexOutOfBoundsException e) {
 395       unicodeAsBackSlash = false;
 396       currentPosition = temp;
 397       return false;
 398     }
 399   }
 400   public final int getNextChar(char testedChar1, char testedChar2) {
 401     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 402     //test can be done with (x==0) for the first and (x>0) for the second
 403     //handle the case of unicode.
 404     //when a unicode appears then we must use a buffer that holds char internal values
 405     //At the end of this method currentCharacter holds the new visited char
 406     //and currentPosition points right next after it
 407     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 408     //On false, no side effect has occured.
 409
 410     //ALL getNextChar.... ARE OPTIMIZED COPIES
 411
 412     int temp = currentPosition;
 413     try {
 414       int result;
 415       currentCharacter = source[currentPosition++];
 416       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 417       //        && (source[currentPosition] == 'u')) {
 418       //        //-------------unicode traitement ------------
 419       //        int c1, c2, c3, c4;
 420       //        int unicodeSize = 6;
 421       //        currentPosition++;
 422       //        while (source[currentPosition] == 'u') {
 423       //          currentPosition++;
 424       //          unicodeSize++;
 425       //        }
 426       //
 427       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 428       //          || c1 < 0)
 429       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 430       //            || c2 < 0)
 431       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 432       //            || c3 < 0)
 433       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 434       //            || c4 < 0)) {
 435       //          currentPosition = temp;
 436       //          return 2;
 437       //        }
 438       //
 439       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 440       //        if (currentCharacter == testedChar1)
 441       //          result = 0;
 442       //        else if (currentCharacter == testedChar2)
 443       //          result = 1;
 444       //        else {
 445       //          currentPosition = temp;
 446       //          return -1;
 447       //        }
 448       //
 449       //        //need the unicode buffer
 450       //        if (withoutUnicodePtr == 0) {
 451       //          //buffer all the entries that have been left aside....
 452       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 453       //          System.arraycopy(
 454       //            source,
 455       //            startPosition,
 456       //            withoutUnicodeBuffer,
 457       //            1,
 458       //            withoutUnicodePtr);
 459       //        }
 460       //        //fill the buffer with the char
 461       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 462       //        return result;
 463       //      } //-------------end unicode traitement--------------
 464       //      else {
 465       if (currentCharacter == testedChar1)
 466         result = 0;
 467       else if (currentCharacter == testedChar2)
 468         result = 1;
 469       else {
 470         currentPosition = temp;
 471         return -1;
 472       }
 473
 474       //        if (withoutUnicodePtr != 0)
 475       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 476       return result;
 477       //     }
 478     } catch (IndexOutOfBoundsException e) {
 479       currentPosition = temp;
 480       return -1;
 481     }
 482   }
 483   public final boolean getNextCharAsDigit() {
 484     //BOOLEAN
 485     //handle the case of unicode.
 486     //when a unicode appears then we must use a buffer that holds char internal values
 487     //At the end of this method currentCharacter holds the new visited char
 488     //and currentPosition points right next after it
 489     //Both previous lines are true if the currentCharacter is a digit
 490     //On false, no side effect has occured.
 491
 492     //ALL getNextChar.... ARE OPTIMIZED COPIES
 493
 494     int temp = currentPosition;
 495     try {
 496       currentCharacter = source[currentPosition++];
 497       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 498       //        && (source[currentPosition] == 'u')) {
 499       //        //-------------unicode traitement ------------
 500       //        int c1, c2, c3, c4;
 501       //        int unicodeSize = 6;
 502       //        currentPosition++;
 503       //        while (source[currentPosition] == 'u') {
 504       //          currentPosition++;
 505       //          unicodeSize++;
 506       //        }
 507       //
 508       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 509       //          || c1 < 0)
 510       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 511       //            || c2 < 0)
 512       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 513       //            || c3 < 0)
 514       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 515       //            || c4 < 0)) {
 516       //          currentPosition = temp;
 517       //          return false;
 518       //        }
 519       //
 520       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 521       //        if (!Character.isDigit(currentCharacter)) {
 522       //          currentPosition = temp;
 523       //          return false;
 524       //        }
 525       //
 526       //        //need the unicode buffer
 527       //        if (withoutUnicodePtr == 0) {
 528       //          //buffer all the entries that have been left aside....
 529       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 530       //          System.arraycopy(
 531       //            source,
 532       //            startPosition,
 533       //            withoutUnicodeBuffer,
 534       //            1,
 535       //            withoutUnicodePtr);
 536       //        }
 537       //        //fill the buffer with the char
 538       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 539       //        return true;
 540       //      } //-------------end unicode traitement--------------
 541       //      else {
 542       if (!Character.isDigit(currentCharacter)) {
 543         currentPosition = temp;
 544         return false;
 545       }
 546       //        if (withoutUnicodePtr != 0)
 547       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 548       return true;
 549       //      }
 550     } catch (IndexOutOfBoundsException e) {
 551       currentPosition = temp;
 552       return false;
 553     }
 554   }
 555   public final boolean getNextCharAsDigit(int radix) {
 556     //BOOLEAN
 557     //handle the case of unicode.
 558     //when a unicode appears then we must use a buffer that holds char internal values
 559     //At the end of this method currentCharacter holds the new visited char
 560     //and currentPosition points right next after it
 561     //Both previous lines are true if the currentCharacter is a digit base on radix
 562     //On false, no side effect has occured.
 563
 564     //ALL getNextChar.... ARE OPTIMIZED COPIES
 565
 566     int temp = currentPosition;
 567     try {
 568       currentCharacter = source[currentPosition++];
 569       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 570       //        && (source[currentPosition] == 'u')) {
 571       //        //-------------unicode traitement ------------
 572       //        int c1, c2, c3, c4;
 573       //        int unicodeSize = 6;
 574       //        currentPosition++;
 575       //        while (source[currentPosition] == 'u') {
 576       //          currentPosition++;
 577       //          unicodeSize++;
 578       //        }
 579       //
 580       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 581       //          || c1 < 0)
 582       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 583       //            || c2 < 0)
 584       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 585       //            || c3 < 0)
 586       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 587       //            || c4 < 0)) {
 588       //          currentPosition = temp;
 589       //          return false;
 590       //        }
 591       //
 592       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 593       //        if (Character.digit(currentCharacter, radix) == -1) {
 594       //          currentPosition = temp;
 595       //          return false;
 596       //        }
 597       //
 598       //        //need the unicode buffer
 599       //        if (withoutUnicodePtr == 0) {
 600       //          //buffer all the entries that have been left aside....
 601       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 602       //          System.arraycopy(
 603       //            source,
 604       //            startPosition,
 605       //            withoutUnicodeBuffer,
 606       //            1,
 607       //            withoutUnicodePtr);
 608       //        }
 609       //        //fill the buffer with the char
 610       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 611       //        return true;
 612       //      } //-------------end unicode traitement--------------
 613       //      else {
 614       if (Character.digit(currentCharacter, radix) == -1) {
 615         currentPosition = temp;
 616         return false;
 617       }
 618       //        if (withoutUnicodePtr != 0)
 619       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 620       return true;
 621       //      }
 622     } catch (IndexOutOfBoundsException e) {
 623       currentPosition = temp;
 624       return false;
 625     }
 626   }
 627   public boolean getNextCharAsJavaIdentifierPart() {
 628     //BOOLEAN
 629     //handle the case of unicode.
 630     //when a unicode appears then we must use a buffer that holds char internal values
 631     //At the end of this method currentCharacter holds the new visited char
 632     //and currentPosition points right next after it
 633     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 634     //On false, no side effect has occured.
 635
 636     //ALL getNextChar.... ARE OPTIMIZED COPIES
 637
 638     int temp = currentPosition;
 639     try {
 640       currentCharacter = source[currentPosition++];
 641       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 642       //        && (source[currentPosition] == 'u')) {
 643       //        //-------------unicode traitement ------------
 644       //        int c1, c2, c3, c4;
 645       //        int unicodeSize = 6;
 646       //        currentPosition++;
 647       //        while (source[currentPosition] == 'u') {
 648       //          currentPosition++;
 649       //          unicodeSize++;
 650       //        }
 651       //
 652       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 653       //          || c1 < 0)
 654       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 655       //            || c2 < 0)
 656       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 657       //            || c3 < 0)
 658       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 659       //            || c4 < 0)) {
 660       //          currentPosition = temp;
 661       //          return false;
 662       //        }
 663       //
 664       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 665       //        if (!isPHPIdentifierPart(currentCharacter)) {
 666       //          currentPosition = temp;
 667       //          return false;
 668       //        }
 669       //
 670       //        //need the unicode buffer
 671       //        if (withoutUnicodePtr == 0) {
 672       //          //buffer all the entries that have been left aside....
 673       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 674       //          System.arraycopy(
 675       //            source,
 676       //            startPosition,
 677       //            withoutUnicodeBuffer,
 678       //            1,
 679       //            withoutUnicodePtr);
 680       //        }
 681       //        //fill the buffer with the char
 682       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 683       //        return true;
 684       //      } //-------------end unicode traitement--------------
 685       //      else {
 686       if (!isPHPIdentifierPart(currentCharacter)) {
 687         currentPosition = temp;
 688         return false;
 689       }
 690
 691       //        if (withoutUnicodePtr != 0)
 692       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 693       return true;
 694       //      }
 695     } catch (IndexOutOfBoundsException e) {
 696       currentPosition = temp;
 697       return false;
 698     }
 699   }
 700
 701   public int getNextToken() throws InvalidInputException {
 702     int htmlPosition = currentPosition;
 703     try {
 704       while (!phpMode) {
 705         currentCharacter = source[currentPosition++];
 706         if (currentCharacter == '<') {
 707           if (getNextChar('?')) {
 708             currentCharacter = source[currentPosition++];
 709             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
 710               // <?
 711               startPosition = currentPosition;
 712               phpMode = true;
 713               if (tokenizeWhiteSpace) {
 714                 // && (whiteStart != currentPosition - 1)) {
 715                 // reposition scanner in case we are interested by spaces as tokens
 716                 startPosition = htmlPosition;
 717                 return TokenNameHTML;
 718               }
 719             } else {
 720               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
 721               if (phpStart) {
 722                 int test = getNextChar('H', 'h');
 723                 if (test >= 0) {
 724                   test = getNextChar('P', 'p');
 725                   if (test >= 0) {
 726                     // <?PHP  <?php
 727                     startPosition = currentPosition;
 728                     phpMode = true;
 729
 730                     if (tokenizeWhiteSpace) {
 731                       // && (whiteStart != currentPosition - 1)) {
 732                       // reposition scanner in case we are interested by spaces as tokens
 733                       startPosition = htmlPosition;
 734                       return TokenNameHTML;
 735                     }
 736                   }
 737                 }
 738               }
 739             }
 740           }
 741         }
 742
 743         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 744           if (recordLineSeparator) {
 745             pushLineSeparator();
 746           } else {
 747             currentLine = null;
 748           }
 749         }
 750       }
 751     } //-----------------end switch while try--------------------
 752     catch (IndexOutOfBoundsException e) {
 753       if (tokenizeWhiteSpace) {
 754         // && (whiteStart != currentPosition - 1)) {
 755         // reposition scanner in case we are interested by spaces as tokens
 756         startPosition = htmlPosition;
 757       }
 758       return TokenNameEOF;
 759     }
 760
 761     if (phpMode) {
 762       this.wasAcr = false;
 763       if (diet) {
 764         jumpOverMethodBody();
 765         diet = false;
 766         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 767       }
 768       try {
 769         while (true) { //loop for jumping over comments
 770           withoutUnicodePtr = 0;
 771           //start with a new token (even comment written with unicode )
 772
 773           // ---------Consume white space and handles startPosition---------
 774           int whiteStart = currentPosition;
 775           boolean isWhiteSpace;
 776           do {
 777             startPosition = currentPosition;
 778             currentCharacter = source[currentPosition++];
 779             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 780             //              && (source[currentPosition] == 'u')) {
 781             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 782             //            } else {
 783             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 784               checkNonExternalizeString();
 785               if (recordLineSeparator) {
 786                 pushLineSeparator();
 787               } else {
 788                 currentLine = null;
 789               }
 790             }
 791             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
 792             //            }
 793           } while (isWhiteSpace);
 794           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 795             // reposition scanner in case we are interested by spaces as tokens
 796             currentPosition--;
 797             startPosition = whiteStart;
 798             return TokenNameWHITESPACE;
 799           }
 800           //little trick to get out in the middle of a source compuation
 801           if (currentPosition > eofPosition)
 802             return TokenNameEOF;
 803
 804           // ---------Identify the next token-------------
 805
 806           switch (currentCharacter) {
 807             case '(' :
 808               return TokenNameLPAREN;
 809             case ')' :
 810               return TokenNameRPAREN;
 811             case '{' :
 812               return TokenNameLBRACE;
 813             case '}' :
 814               return TokenNameRBRACE;
 815             case '[' :
 816               return TokenNameLBRACKET;
 817             case ']' :
 818               return TokenNameRBRACKET;
 819             case ';' :
 820               return TokenNameSEMICOLON;
 821             case ',' :
 822               return TokenNameCOMMA;
 823
 824             case '.' :
 825               if (getNextCharAsDigit())
 826                 return scanNumber(true);
 827               return TokenNameDOT;
 828             case '+' :
 829               {
 830                 int test;
 831                 if ((test = getNextChar('+', '=')) == 0)
 832                   return TokenNamePLUS_PLUS;
 833                 if (test > 0)
 834                   return TokenNamePLUS_EQUAL;
 835                 return TokenNamePLUS;
 836               }
 837             case '-' :
 838               {
 839                 int test;
 840                 if ((test = getNextChar('-', '=')) == 0)
 841                   return TokenNameMINUS_MINUS;
 842                 if (test > 0)
 843                   return TokenNameMINUS_EQUAL;
 844                 if (getNextChar('>'))
 845                   return TokenNameMINUS_GREATER;
 846
 847                 return TokenNameMINUS;
 848               }
 849             case '~' :
 850               if (getNextChar('='))
 851                 return TokenNameTWIDDLE_EQUAL;
 852               return TokenNameTWIDDLE;
 853             case '!' :
 854               if (getNextChar('='))
 855                 return TokenNameNOT_EQUAL;
 856               return TokenNameNOT;
 857             case '*' :
 858               if (getNextChar('='))
 859                 return TokenNameMULTIPLY_EQUAL;
 860               return TokenNameMULTIPLY;
 861             case '%' :
 862               if (getNextChar('='))
 863                 return TokenNameREMAINDER_EQUAL;
 864               return TokenNameREMAINDER;
 865             case '<' :
 866               {
 867                 int test;
 868                 if ((test = getNextChar('=', '<')) == 0)
 869                   return TokenNameLESS_EQUAL;
 870                 if (test > 0) {
 871                   if (getNextChar('='))
 872                     return TokenNameLEFT_SHIFT_EQUAL;
 873                   if (getNextChar('<')) {
 874                     int heredocStart = currentPosition;
 875                     int heredocLength = 0;
 876                     currentCharacter = source[currentPosition++];
 877                     if (isPHPIdentifierStart(currentCharacter)) {
 878                       currentCharacter = source[currentPosition++];
 879                     } else {
 880                       return TokenNameERROR;
 881                     }
 882                     while (isPHPIdentifierPart(currentCharacter)) {
 883                       currentCharacter = source[currentPosition++];
 884                     }
 885
 886                     heredocLength = currentPosition - heredocStart - 1;
 887
 888                     // heredoc end-tag determination
 889                     boolean endTag = true;
 890                     char ch;
 891                     do {
 892                       ch = source[currentPosition++];
 893                       if (ch == '\r' || ch == '\n') {
 894                         if (recordLineSeparator) {
 895                           pushLineSeparator();
 896                         } else {
 897                           currentLine = null;
 898                         }
 899                         for (int i = 0; i < heredocLength; i++) {
 900                           if (source[currentPosition + i] != source[heredocStart + i]) {
 901                             endTag = false;
 902                             break;
 903                           }
 904                         }
 905                         if (endTag) {
 906                           currentPosition += heredocLength - 1;
 907                           currentCharacter = source[currentPosition++];
 908                           break; // do...while loop
 909                         } else {
 910                           endTag = true;
 911                         }
 912                       }
 913
 914                     } while (true);
 915
 916                     return TokenNameHEREDOC;
 917                   }
 918                   return TokenNameLEFT_SHIFT;
 919                 }
 920                 return TokenNameLESS;
 921               }
 922             case '>' :
 923               {
 924                 int test;
 925                 if ((test = getNextChar('=', '>')) == 0)
 926                   return TokenNameGREATER_EQUAL;
 927                 if (test > 0) {
 928                   if ((test = getNextChar('=', '>')) == 0)
 929                     return TokenNameRIGHT_SHIFT_EQUAL;
 930                   return TokenNameRIGHT_SHIFT;
 931                 }
 932                 return TokenNameGREATER;
 933               }
 934             case '=' :
 935               if (getNextChar('='))
 936                 return TokenNameEQUAL_EQUAL;
 937               if (getNextChar('>'))
 938                 return TokenNameEQUAL_GREATER;
 939               return TokenNameEQUAL;
 940             case '&' :
 941               {
 942                 int test;
 943                 if ((test = getNextChar('&', '=')) == 0)
 944                   return TokenNameAND_AND;
 945                 if (test > 0)
 946                   return TokenNameAND_EQUAL;
 947                 return TokenNameAND;
 948               }
 949             case '|' :
 950               {
 951                 int test;
 952                 if ((test = getNextChar('|', '=')) == 0)
 953                   return TokenNameOR_OR;
 954                 if (test > 0)
 955                   return TokenNameOR_EQUAL;
 956                 return TokenNameOR;
 957               }
 958             case '^' :
 959               if (getNextChar('='))
 960                 return TokenNameXOR_EQUAL;
 961               return TokenNameXOR;
 962             case '?' :
 963               if (getNextChar('>')) {
 964                 phpMode = false;
 965                 return TokenNameStopPHP;
 966               }
 967               return TokenNameQUESTION;
 968             case ':' :
 969               if (getNextChar(':'))
 970                 return TokenNameCOLON_COLON;
 971               return TokenNameCOLON;
 972             case '@' :
 973               return TokenNameAT;
 974               //                                        case '\'' :
 975               //                                                {
 976               //                                                        int test;
 977               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
 978               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 979               //                                                        }
 980               //                                                        if (test > 0) {
 981               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 982               //                                                                for (int lookAhead = 0;
 983               //                                                                        lookAhead < 3;
 984               //                                                                        lookAhead++) {
 985               //                                                                        if (currentPosition + lookAhead
 986               //                                                                                == source.length)
 987               //                                                                                break;
 988               //                                                                        if (source[currentPosition + lookAhead]
 989               //                                                                                == '\n')
 990               //                                                                                break;
 991               //                                                                        if (source[currentPosition + lookAhead]
 992               //                                                                                == '\'') {
 993               //                                                                                currentPosition += lookAhead + 1;
 994               //                                                                                break;
 995               //                                                                        }
 996               //                                                                }
 997               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 998               //                                                        }
 999               //                                                }
1000               //                                                if (getNextChar('\'')) {
1001               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1002               //                                                        for (int lookAhead = 0;
1003               //                                                                lookAhead < 3;
1004               //                                                                lookAhead++) {
1005               //                                                                if (currentPosition + lookAhead
1006               //                                                                        == source.length)
1007               //                                                                        break;
1008               //                                                                if (source[currentPosition + lookAhead]
1009               //                                                                        == '\n')
1010               //                                                                        break;
1011               //                                                                if (source[currentPosition + lookAhead]
1012               //                                                                        == '\'') {
1013               //                                                                        currentPosition += lookAhead + 1;
1014               //                                                                        break;
1015               //                                                                }
1016               //                                                        }
1017               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1018               //                                                }
1019               //                                                if (getNextChar('\\'))
1020               //                                                        scanEscapeCharacter();
1021               //                                                else { // consume next character
1022               //                                                        unicodeAsBackSlash = false;
1023               //                                                        if (((currentCharacter = source[currentPosition++])
1024               //                                                                == '\\')
1025               //                                                                && (source[currentPosition] == 'u')) {
1026               //                                                                getNextUnicodeChar();
1027               //                                                        } else {
1028               //                                                                if (withoutUnicodePtr != 0) {
1029               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1030               //                                                                                currentCharacter;
1031               //                                                                }
1032               //                                                        }
1033               //                                                }
1034               //                                                //            if (getNextChar('\''))
1035               //                                                //              return TokenNameCharacterLiteral;
1036               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1037               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1038               //                                                        if (currentPosition + lookAhead == source.length)
1039               //                                                                break;
1040               //                                                        if (source[currentPosition + lookAhead] == '\n')
1041               //                                                                break;
1042               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1043               //                                                                currentPosition += lookAhead + 1;
1044               //                                                                break;
1045               //                                                        }
1046               //                                                }
1047               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1048             case '\'' :
1049               try {
1050                 // consume next character
1051                 unicodeAsBackSlash = false;
1052                 currentCharacter = source[currentPosition++];
1053                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1054                 //                  && (source[currentPosition] == 'u')) {
1055                 //                  getNextUnicodeChar();
1056                 //                } else {
1057                 //                  if (withoutUnicodePtr != 0) {
1058                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1059                 //                      currentCharacter;
1060                 //                  }
1061                 //                }
1062
1063                 while (currentCharacter != '\'') {
1064
1065                   /**** in PHP \r and \n are valid in string literals ****/
1066                   //                  if ((currentCharacter == '\n')
1067                   //                    || (currentCharacter == '\r')) {
1068                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1069                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1070                   //                      if (currentPosition + lookAhead == source.length)
1071                   //                        break;
1072                   //                      if (source[currentPosition + lookAhead] == '\n')
1073                   //                        break;
1074                   //                      if (source[currentPosition + lookAhead] == '\"') {
1075                   //                        currentPosition += lookAhead + 1;
1076                   //                        break;
1077                   //                      }
1078                   //                    }
1079                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1080                   //                  }
1081                   if (currentCharacter == '\\') {
1082                     int escapeSize = currentPosition;
1083                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1084                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1085                     scanSingleQuotedEscapeCharacter();
1086                     escapeSize = currentPosition - escapeSize;
1087                     if (withoutUnicodePtr == 0) {
1088                       //buffer all the entries that have been left aside....
1089                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1090                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1091                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1092                     } else { //overwrite the / in the buffer
1093                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1094                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1095                         withoutUnicodePtr--;
1096                       }
1097                     }
1098                   }
1099                   // consume next character
1100                   unicodeAsBackSlash = false;
1101                   currentCharacter = source[currentPosition++];
1102                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1103                   //                    && (source[currentPosition] == 'u')) {
1104                   //                    getNextUnicodeChar();
1105                   //                  } else {
1106                   if (withoutUnicodePtr != 0) {
1107                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1108                   }
1109                   //                  }
1110
1111                 }
1112               } catch (IndexOutOfBoundsException e) {
1113                 throw new InvalidInputException(UNTERMINATED_STRING);
1114               } catch (InvalidInputException e) {
1115                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1116                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1117                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1118                     if (currentPosition + lookAhead == source.length)
1119                       break;
1120                     if (source[currentPosition + lookAhead] == '\n')
1121                       break;
1122                     if (source[currentPosition + lookAhead] == '\'') {
1123                       currentPosition += lookAhead + 1;
1124                       break;
1125                     }
1126                   }
1127
1128                 }
1129                 throw e; // rethrow
1130               }
1131               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1132                 if (currentLine == null) {
1133                   currentLine = new NLSLine();
1134                   lines.add(currentLine);
1135                 }
1136                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1137               }
1138               return TokenNameStringConstant;
1139             case '"' :
1140               try {
1141                 // consume next character
1142                 unicodeAsBackSlash = false;
1143                 currentCharacter = source[currentPosition++];
1144                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1145                 //                  && (source[currentPosition] == 'u')) {
1146                 //                  getNextUnicodeChar();
1147                 //                } else {
1148                 //                  if (withoutUnicodePtr != 0) {
1149                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1150                 //                      currentCharacter;
1151                 //                  }
1152                 //                }
1153
1154                 while (currentCharacter != '"') {
1155
1156                   /**** in PHP \r and \n are valid in string literals ****/
1157                   //                  if ((currentCharacter == '\n')
1158                   //                    || (currentCharacter == '\r')) {
1159                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1160                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1161                   //                      if (currentPosition + lookAhead == source.length)
1162                   //                        break;
1163                   //                      if (source[currentPosition + lookAhead] == '\n')
1164                   //                        break;
1165                   //                      if (source[currentPosition + lookAhead] == '\"') {
1166                   //                        currentPosition += lookAhead + 1;
1167                   //                        break;
1168                   //                      }
1169                   //                    }
1170                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1171                   //                  }
1172                   if (currentCharacter == '\\') {
1173                     int escapeSize = currentPosition;
1174                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1175                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1176                     scanDoubleQuotedEscapeCharacter();
1177                     escapeSize = currentPosition - escapeSize;
1178                     if (withoutUnicodePtr == 0) {
1179                       //buffer all the entries that have been left aside....
1180                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1181                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1182                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1183                     } else { //overwrite the / in the buffer
1184                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1185                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1186                         withoutUnicodePtr--;
1187                       }
1188                     }
1189                   }
1190                   // consume next character
1191                   unicodeAsBackSlash = false;
1192                   currentCharacter = source[currentPosition++];
1193                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1194                   //                    && (source[currentPosition] == 'u')) {
1195                   //                    getNextUnicodeChar();
1196                   //                  } else {
1197                   if (withoutUnicodePtr != 0) {
1198                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1199                   }
1200                   //                  }
1201
1202                 }
1203               } catch (IndexOutOfBoundsException e) {
1204                 throw new InvalidInputException(UNTERMINATED_STRING);
1205               } catch (InvalidInputException e) {
1206                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1207                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1208                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1209                     if (currentPosition + lookAhead == source.length)
1210                       break;
1211                     if (source[currentPosition + lookAhead] == '\n')
1212                       break;
1213                     if (source[currentPosition + lookAhead] == '\"') {
1214                       currentPosition += lookAhead + 1;
1215                       break;
1216                     }
1217                   }
1218
1219                 }
1220                 throw e; // rethrow
1221               }
1222               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1223                 if (currentLine == null) {
1224                   currentLine = new NLSLine();
1225                   lines.add(currentLine);
1226                 }
1227                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1228               }
1229               return TokenNameStringLiteral;
1230             case '`' :
1231               try {
1232                 // consume next character
1233                 unicodeAsBackSlash = false;
1234                 currentCharacter = source[currentPosition++];
1235                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1236                 //                  && (source[currentPosition] == 'u')) {
1237                 //                  getNextUnicodeChar();
1238                 //                } else {
1239                 //                  if (withoutUnicodePtr != 0) {
1240                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1241                 //                      currentCharacter;
1242                 //                  }
1243                 //                }
1244
1245                 while (currentCharacter != '`') {
1246
1247                   /**** in PHP \r and \n are valid in string literals ****/
1248                   //                if ((currentCharacter == '\n')
1249                   //                  || (currentCharacter == '\r')) {
1250                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1251                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1252                   //                    if (currentPosition + lookAhead == source.length)
1253                   //                      break;
1254                   //                    if (source[currentPosition + lookAhead] == '\n')
1255                   //                      break;
1256                   //                    if (source[currentPosition + lookAhead] == '\"') {
1257                   //                      currentPosition += lookAhead + 1;
1258                   //                      break;
1259                   //                    }
1260                   //                  }
1261                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1262                   //                }
1263                   if (currentCharacter == '\\') {
1264                     int escapeSize = currentPosition;
1265                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1266                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1267                     scanDoubleQuotedEscapeCharacter();
1268                     escapeSize = currentPosition - escapeSize;
1269                     if (withoutUnicodePtr == 0) {
1270                       //buffer all the entries that have been left aside....
1271                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1272                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1273                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1274                     } else { //overwrite the / in the buffer
1275                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1276                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1277                         withoutUnicodePtr--;
1278                       }
1279                     }
1280                   }
1281                   // consume next character
1282                   unicodeAsBackSlash = false;
1283                   currentCharacter = source[currentPosition++];
1284                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1285                   //                    && (source[currentPosition] == 'u')) {
1286                   //                    getNextUnicodeChar();
1287                   //                  } else {
1288                   if (withoutUnicodePtr != 0) {
1289                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1290                   }
1291                   //                  }
1292
1293                 }
1294               } catch (IndexOutOfBoundsException e) {
1295                 throw new InvalidInputException(UNTERMINATED_STRING);
1296               } catch (InvalidInputException e) {
1297                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1298                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1299                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1300                     if (currentPosition + lookAhead == source.length)
1301                       break;
1302                     if (source[currentPosition + lookAhead] == '\n')
1303                       break;
1304                     if (source[currentPosition + lookAhead] == '`') {
1305                       currentPosition += lookAhead + 1;
1306                       break;
1307                     }
1308                   }
1309
1310                 }
1311                 throw e; // rethrow
1312               }
1313               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1314                 if (currentLine == null) {
1315                   currentLine = new NLSLine();
1316                   lines.add(currentLine);
1317                 }
1318                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1319               }
1320               return TokenNameStringInterpolated;
1321             case '#' :
1322             case '/' :
1323               {
1324                 int test;
1325                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1326                   //line comment
1327                   int endPositionForLineComment = 0;
1328                   try { //get the next char
1329                     currentCharacter = source[currentPosition++];
1330                     //                    if (((currentCharacter = source[currentPosition++])
1331                     //                      == '\\')
1332                     //                      && (source[currentPosition] == 'u')) {
1333                     //                      //-------------unicode traitement ------------
1334                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1335                     //                      currentPosition++;
1336                     //                      while (source[currentPosition] == 'u') {
1337                     //                        currentPosition++;
1338                     //                      }
1339                     //                      if ((c1 =
1340                     //                        Character.getNumericValue(source[currentPosition++]))
1341                     //                        > 15
1342                     //                        || c1 < 0
1343                     //                        || (c2 =
1344                     //                          Character.getNumericValue(source[currentPosition++]))
1345                     //                          > 15
1346                     //                        || c2 < 0
1347                     //                        || (c3 =
1348                     //                          Character.getNumericValue(source[currentPosition++]))
1349                     //                          > 15
1350                     //                        || c3 < 0
1351                     //                        || (c4 =
1352                     //                          Character.getNumericValue(source[currentPosition++]))
1353                     //                          > 15
1354                     //                        || c4 < 0) {
1355                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1356                     //                      } else {
1357                     //                        currentCharacter =
1358                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1359                     //                      }
1360                     //                    }
1361
1362                     //handle the \\u case manually into comment
1363                     //                    if (currentCharacter == '\\') {
1364                     //                      if (source[currentPosition] == '\\')
1365                     //                        currentPosition++;
1366                     //                    } //jump over the \\
1367                     boolean isUnicode = false;
1368                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1369                       if (currentCharacter == '?') {
1370                         if (getNextChar('>')) {
1371                           startPosition = currentPosition - 2;
1372                           phpMode = false;
1373                           return TokenNameStopPHP;
1374                         }
1375                       }
1376
1377                       //get the next char
1378                       isUnicode = false;
1379                       currentCharacter = source[currentPosition++];
1380                       //                      if (((currentCharacter = source[currentPosition++])
1381                       //                        == '\\')
1382                       //                        && (source[currentPosition] == 'u')) {
1383                       //                        isUnicode = true;
1384                       //                        //-------------unicode traitement ------------
1385                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1386                       //                        currentPosition++;
1387                       //                        while (source[currentPosition] == 'u') {
1388                       //                          currentPosition++;
1389                       //                        }
1390                       //                        if ((c1 =
1391                       //                          Character.getNumericValue(source[currentPosition++]))
1392                       //                          > 15
1393                       //                          || c1 < 0
1394                       //                          || (c2 =
1395                       //                            Character.getNumericValue(
1396                       //                              source[currentPosition++]))
1397                       //                            > 15
1398                       //                          || c2 < 0
1399                       //                          || (c3 =
1400                       //                            Character.getNumericValue(
1401                       //                              source[currentPosition++]))
1402                       //                            > 15
1403                       //                          || c3 < 0
1404                       //                          || (c4 =
1405                       //                            Character.getNumericValue(
1406                       //                              source[currentPosition++]))
1407                       //                            > 15
1408                       //                          || c4 < 0) {
1409                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1410                       //                        } else {
1411                       //                          currentCharacter =
1412                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1413                       //                        }
1414                       //                      }
1415                       //handle the \\u case manually into comment
1416                       //                      if (currentCharacter == '\\') {
1417                       //                        if (source[currentPosition] == '\\')
1418                       //                          currentPosition++;
1419                       //                      } //jump over the \\
1420                     }
1421                     if (isUnicode) {
1422                       endPositionForLineComment = currentPosition - 6;
1423                     } else {
1424                       endPositionForLineComment = currentPosition - 1;
1425                     }
1426                     recordComment(false);
1427                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1428                       checkNonExternalizeString();
1429                       if (recordLineSeparator) {
1430                         if (isUnicode) {
1431                           pushUnicodeLineSeparator();
1432                         } else {
1433                           pushLineSeparator();
1434                         }
1435                       } else {
1436                         currentLine = null;
1437                       }
1438                     }
1439                     if (tokenizeComments) {
1440                       if (!isUnicode) {
1441                         currentPosition = endPositionForLineComment;
1442                         // reset one character behind
1443                       }
1444                       return TokenNameCOMMENT_LINE;
1445                     }
1446                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1447                     if (tokenizeComments) {
1448                       currentPosition--;
1449                       // reset one character behind
1450                       return TokenNameCOMMENT_LINE;
1451                     }
1452                   }
1453                   break;
1454                 }
1455                 if (test > 0) {
1456                   //traditional and annotation comment
1457                   boolean isJavadoc = false, star = false;
1458                   // consume next character
1459                   unicodeAsBackSlash = false;
1460                   currentCharacter = source[currentPosition++];
1461                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1462                   //                    && (source[currentPosition] == 'u')) {
1463                   //                    getNextUnicodeChar();
1464                   //                  } else {
1465                   //                    if (withoutUnicodePtr != 0) {
1466                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1467                   //                        currentCharacter;
1468                   //                    }
1469                   //                  }
1470
1471                   if (currentCharacter == '*') {
1472                     isJavadoc = true;
1473                     star = true;
1474                   }
1475                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1476                     checkNonExternalizeString();
1477                     if (recordLineSeparator) {
1478                       pushLineSeparator();
1479                     } else {
1480                       currentLine = null;
1481                     }
1482                   }
1483                   try { //get the next char
1484                     currentCharacter = source[currentPosition++];
1485                     //                    if (((currentCharacter = source[currentPosition++])
1486                     //                      == '\\')
1487                     //                      && (source[currentPosition] == 'u')) {
1488                     //                      //-------------unicode traitement ------------
1489                     //                      getNextUnicodeChar();
1490                     //                    }
1491                     //handle the \\u case manually into comment
1492                     //                    if (currentCharacter == '\\') {
1493                     //                      if (source[currentPosition] == '\\')
1494                     //                        currentPosition++;
1495                     //                      //jump over the \\
1496                     //                    }
1497                     // empty comment is not a javadoc /**/
1498                     if (currentCharacter == '/') {
1499                       isJavadoc = false;
1500                     }
1501                     //loop until end of comment */
1502                     while ((currentCharacter != '/') || (!star)) {
1503                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1504                         checkNonExternalizeString();
1505                         if (recordLineSeparator) {
1506                           pushLineSeparator();
1507                         } else {
1508                           currentLine = null;
1509                         }
1510                       }
1511                       star = currentCharacter == '*';
1512                       //get next char
1513                       currentCharacter = source[currentPosition++];
1514                       //                      if (((currentCharacter = source[currentPosition++])
1515                       //                        == '\\')
1516                       //                        && (source[currentPosition] == 'u')) {
1517                       //                        //-------------unicode traitement ------------
1518                       //                        getNextUnicodeChar();
1519                       //                      }
1520                       //handle the \\u case manually into comment
1521                       //                      if (currentCharacter == '\\') {
1522                       //                        if (source[currentPosition] == '\\')
1523                       //                          currentPosition++;
1524                       //                      } //jump over the \\
1525                     }
1526                     recordComment(isJavadoc);
1527                     if (tokenizeComments) {
1528                       if (isJavadoc)
1529                         return TokenNameCOMMENT_PHPDOC;
1530                       return TokenNameCOMMENT_BLOCK;
1531                     }
1532                   } catch (IndexOutOfBoundsException e) {
1533                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1534                   }
1535                   break;
1536                 }
1537                 if (getNextChar('='))
1538                   return TokenNameDIVIDE_EQUAL;
1539                 return TokenNameDIVIDE;
1540               }
1541             case '\u001a' :
1542               if (atEnd())
1543                 return TokenNameEOF;
1544               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1545               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1546
1547             default :
1548               if (currentCharacter == '$') {
1549                 while ((currentCharacter = source[currentPosition++]) == '$') {
1550                 }
1551                 if (currentCharacter == '{')
1552                   return TokenNameDOLLAR_LBRACE;
1553                 if (isPHPIdentifierStart(currentCharacter))
1554                   return scanIdentifierOrKeyword(true);
1555                 return TokenNameERROR;
1556               }
1557               if (isPHPIdentifierStart(currentCharacter))
1558                 return scanIdentifierOrKeyword(false);
1559               if (Character.isDigit(currentCharacter))
1560                 return scanNumber(false);
1561               return TokenNameERROR;
1562           }
1563         }
1564       } //-----------------end switch while try--------------------
1565       catch (IndexOutOfBoundsException e) {
1566       }
1567     }
1568     return TokenNameEOF;
1569   }
1570
1571   //  public final void getNextUnicodeChar()
1572   //    throws IndexOutOfBoundsException, InvalidInputException {
1573   //    //VOID
1574   //    //handle the case of unicode.
1575   //    //when a unicode appears then we must use a buffer that holds char internal values
1576   //    //At the end of this method currentCharacter holds the new visited char
1577   //    //and currentPosition points right next after it
1578   //
1579   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1580   //
1581   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1582   //    currentPosition++;
1583   //    while (source[currentPosition] == 'u') {
1584   //      currentPosition++;
1585   //      unicodeSize++;
1586   //    }
1587   //
1588   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1589   //      || c1 < 0
1590   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1591   //      || c2 < 0
1592   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1593   //      || c3 < 0
1594   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1595   //      || c4 < 0) {
1596   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1597   //    } else {
1598   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1599   //      //need the unicode buffer
1600   //      if (withoutUnicodePtr == 0) {
1601   //        //buffer all the entries that have been left aside....
1602   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1603   //        System.arraycopy(
1604   //          source,
1605   //          startPosition,
1606   //          withoutUnicodeBuffer,
1607   //          1,
1608   //          withoutUnicodePtr);
1609   //      }
1610   //      //fill the buffer with the char
1611   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1612   //    }
1613   //    unicodeAsBackSlash = currentCharacter == '\\';
1614   //  }
1615   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1616    */
1617   public final void jumpOverMethodBody() {
1618
1619     this.wasAcr = false;
1620     int found = 1;
1621     try {
1622       while (true) { //loop for jumping over comments
1623         // ---------Consume white space and handles startPosition---------
1624         boolean isWhiteSpace;
1625         do {
1626           startPosition = currentPosition;
1627           currentCharacter = source[currentPosition++];
1628           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1629           //            && (source[currentPosition] == 'u')) {
1630           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1631           //          } else {
1632           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1633             pushLineSeparator();
1634           isWhiteSpace = Character.isWhitespace(currentCharacter);
1635           //          }
1636         } while (isWhiteSpace);
1637
1638         // -------consume token until } is found---------
1639         switch (currentCharacter) {
1640           case '{' :
1641             found++;
1642             break;
1643           case '}' :
1644             found--;
1645             if (found == 0)
1646               return;
1647             break;
1648           case '\'' :
1649             {
1650               boolean test;
1651               test = getNextChar('\\');
1652               if (test) {
1653                 try {
1654                   scanDoubleQuotedEscapeCharacter();
1655                 } catch (InvalidInputException ex) {
1656                 };
1657               } else {
1658                 //                try { // consume next character
1659                 unicodeAsBackSlash = false;
1660                 currentCharacter = source[currentPosition++];
1661                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1662                 //                    && (source[currentPosition] == 'u')) {
1663                 //                    getNextUnicodeChar();
1664                 //                  } else {
1665                 if (withoutUnicodePtr != 0) {
1666                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1667                 }
1668                 //                  }
1669                 //                } catch (InvalidInputException ex) {
1670                 //                };
1671               }
1672               getNextChar('\'');
1673               break;
1674             }
1675           case '"' :
1676             try {
1677               //              try { // consume next character
1678               unicodeAsBackSlash = false;
1679               currentCharacter = source[currentPosition++];
1680               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1681               //                  && (source[currentPosition] == 'u')) {
1682               //                  getNextUnicodeChar();
1683               //                } else {
1684               if (withoutUnicodePtr != 0) {
1685                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1686               }
1687               //                }
1688               //              } catch (InvalidInputException ex) {
1689               //              };
1690               while (currentCharacter != '"') {
1691                 if (currentCharacter == '\r') {
1692                   if (source[currentPosition] == '\n')
1693                     currentPosition++;
1694                   break;
1695                   // the string cannot go further that the line
1696                 }
1697                 if (currentCharacter == '\n') {
1698                   break;
1699                   // the string cannot go further that the line
1700                 }
1701                 if (currentCharacter == '\\') {
1702                   try {
1703                     scanDoubleQuotedEscapeCharacter();
1704                   } catch (InvalidInputException ex) {
1705                   };
1706                 }
1707                 //                try { // consume next character
1708                 unicodeAsBackSlash = false;
1709                 currentCharacter = source[currentPosition++];
1710                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1711                 //                    && (source[currentPosition] == 'u')) {
1712                 //                    getNextUnicodeChar();
1713                 //                  } else {
1714                 if (withoutUnicodePtr != 0) {
1715                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1716                 }
1717                 //                  }
1718                 //                } catch (InvalidInputException ex) {
1719                 //                };
1720               }
1721             } catch (IndexOutOfBoundsException e) {
1722               return;
1723             }
1724             break;
1725           case '/' :
1726             {
1727               int test;
1728               if ((test = getNextChar('/', '*')) == 0) {
1729                 //line comment
1730                 try {
1731                   //get the next char
1732                   currentCharacter = source[currentPosition++];
1733                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1734                   //                    && (source[currentPosition] == 'u')) {
1735                   //                    //-------------unicode traitement ------------
1736                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1737                   //                    currentPosition++;
1738                   //                    while (source[currentPosition] == 'u') {
1739                   //                      currentPosition++;
1740                   //                    }
1741                   //                    if ((c1 =
1742                   //                      Character.getNumericValue(source[currentPosition++]))
1743                   //                      > 15
1744                   //                      || c1 < 0
1745                   //                      || (c2 =
1746                   //                        Character.getNumericValue(source[currentPosition++]))
1747                   //                        > 15
1748                   //                      || c2 < 0
1749                   //                      || (c3 =
1750                   //                        Character.getNumericValue(source[currentPosition++]))
1751                   //                        > 15
1752                   //                      || c3 < 0
1753                   //                      || (c4 =
1754                   //                        Character.getNumericValue(source[currentPosition++]))
1755                   //                        > 15
1756                   //                      || c4 < 0) {
1757                   //                      //error don't care of the value
1758                   //                      currentCharacter = 'A';
1759                   //                    } //something different from \n and \r
1760                   //                    else {
1761                   //                      currentCharacter =
1762                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1763                   //                    }
1764                   //                  }
1765
1766                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1767                     //get the next char
1768                     currentCharacter = source[currentPosition++];
1769                     //                    if (((currentCharacter = source[currentPosition++])
1770                     //                      == '\\')
1771                     //                      && (source[currentPosition] == 'u')) {
1772                     //                      //-------------unicode traitement ------------
1773                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1774                     //                      currentPosition++;
1775                     //                      while (source[currentPosition] == 'u') {
1776                     //                        currentPosition++;
1777                     //                      }
1778                     //                      if ((c1 =
1779                     //                        Character.getNumericValue(source[currentPosition++]))
1780                     //                        > 15
1781                     //                        || c1 < 0
1782                     //                        || (c2 =
1783                     //                          Character.getNumericValue(source[currentPosition++]))
1784                     //                          > 15
1785                     //                        || c2 < 0
1786                     //                        || (c3 =
1787                     //                          Character.getNumericValue(source[currentPosition++]))
1788                     //                          > 15
1789                     //                        || c3 < 0
1790                     //                        || (c4 =
1791                     //                          Character.getNumericValue(source[currentPosition++]))
1792                     //                          > 15
1793                     //                        || c4 < 0) {
1794                     //                        //error don't care of the value
1795                     //                        currentCharacter = 'A';
1796                     //                      } //something different from \n and \r
1797                     //                      else {
1798                     //                        currentCharacter =
1799                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1800                     //                      }
1801                     //                    }
1802                   }
1803                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1804                     pushLineSeparator();
1805                 } catch (IndexOutOfBoundsException e) {
1806                 } //an eof will them be generated
1807                 break;
1808               }
1809               if (test > 0) {
1810                 //traditional and annotation comment
1811                 boolean star = false;
1812                 //                try { // consume next character
1813                 unicodeAsBackSlash = false;
1814                 currentCharacter = source[currentPosition++];
1815                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1816                 //                    && (source[currentPosition] == 'u')) {
1817                 //                    getNextUnicodeChar();
1818                 //                  } else {
1819                 if (withoutUnicodePtr != 0) {
1820                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1821                 }
1822                 //                  };
1823                 //                } catch (InvalidInputException ex) {
1824                 //                };
1825                 if (currentCharacter == '*') {
1826                   star = true;
1827                 }
1828                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1829                   pushLineSeparator();
1830                 try { //get the next char
1831                   currentCharacter = source[currentPosition++];
1832                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1833                   //                    && (source[currentPosition] == 'u')) {
1834                   //                    //-------------unicode traitement ------------
1835                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1836                   //                    currentPosition++;
1837                   //                    while (source[currentPosition] == 'u') {
1838                   //                      currentPosition++;
1839                   //                    }
1840                   //                    if ((c1 =
1841                   //                      Character.getNumericValue(source[currentPosition++]))
1842                   //                      > 15
1843                   //                      || c1 < 0
1844                   //                      || (c2 =
1845                   //                        Character.getNumericValue(source[currentPosition++]))
1846                   //                        > 15
1847                   //                      || c2 < 0
1848                   //                      || (c3 =
1849                   //                        Character.getNumericValue(source[currentPosition++]))
1850                   //                        > 15
1851                   //                      || c3 < 0
1852                   //                      || (c4 =
1853                   //                        Character.getNumericValue(source[currentPosition++]))
1854                   //                        > 15
1855                   //                      || c4 < 0) {
1856                   //                      //error don't care of the value
1857                   //                      currentCharacter = 'A';
1858                   //                    } //something different from * and /
1859                   //                    else {
1860                   //                      currentCharacter =
1861                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1862                   //                    }
1863                   //                  }
1864                   //loop until end of comment */
1865                   while ((currentCharacter != '/') || (!star)) {
1866                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1867                       pushLineSeparator();
1868                     star = currentCharacter == '*';
1869                     //get next char
1870                     currentCharacter = source[currentPosition++];
1871                     //                    if (((currentCharacter = source[currentPosition++])
1872                     //                      == '\\')
1873                     //                      && (source[currentPosition] == 'u')) {
1874                     //                      //-------------unicode traitement ------------
1875                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1876                     //                      currentPosition++;
1877                     //                      while (source[currentPosition] == 'u') {
1878                     //                        currentPosition++;
1879                     //                      }
1880                     //                      if ((c1 =
1881                     //                        Character.getNumericValue(source[currentPosition++]))
1882                     //                        > 15
1883                     //                        || c1 < 0
1884                     //                        || (c2 =
1885                     //                          Character.getNumericValue(source[currentPosition++]))
1886                     //                          > 15
1887                     //                        || c2 < 0
1888                     //                        || (c3 =
1889                     //                          Character.getNumericValue(source[currentPosition++]))
1890                     //                          > 15
1891                     //                        || c3 < 0
1892                     //                        || (c4 =
1893                     //                          Character.getNumericValue(source[currentPosition++]))
1894                     //                          > 15
1895                     //                        || c4 < 0) {
1896                     //                        //error don't care of the value
1897                     //                        currentCharacter = 'A';
1898                     //                      } //something different from * and /
1899                     //                      else {
1900                     //                        currentCharacter =
1901                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1902                     //                      }
1903                     //                    }
1904                   }
1905                 } catch (IndexOutOfBoundsException e) {
1906                   return;
1907                 }
1908                 break;
1909               }
1910               break;
1911             }
1912
1913           default :
1914             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1915               try {
1916                 scanIdentifierOrKeyword((currentCharacter == '$'));
1917               } catch (InvalidInputException ex) {
1918               };
1919               break;
1920             }
1921             if (Character.isDigit(currentCharacter)) {
1922               try {
1923                 scanNumber(false);
1924               } catch (InvalidInputException ex) {
1925               };
1926               break;
1927             }
1928         }
1929       }
1930       //-----------------end switch while try--------------------
1931     } catch (IndexOutOfBoundsException e) {
1932     } catch (InvalidInputException e) {
1933     }
1934     return;
1935   }
1936   //  public final boolean jumpOverUnicodeWhiteSpace()
1937   //    throws InvalidInputException {
1938   //    //BOOLEAN
1939   //    //handle the case of unicode. Jump over the next whiteSpace
1940   //    //making startPosition pointing on the next available char
1941   //    //On false, the currentCharacter is filled up with a potential
1942   //    //correct char
1943   //
1944   //    try {
1945   //      this.wasAcr = false;
1946   //      int c1, c2, c3, c4;
1947   //      int unicodeSize = 6;
1948   //      currentPosition++;
1949   //      while (source[currentPosition] == 'u') {
1950   //        currentPosition++;
1951   //        unicodeSize++;
1952   //      }
1953   //
1954   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1955   //        || c1 < 0)
1956   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1957   //          || c2 < 0)
1958   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1959   //          || c3 < 0)
1960   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1961   //          || c4 < 0)) {
1962   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1963   //      }
1964   //
1965   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1966   //      if (recordLineSeparator
1967   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1968   //        pushLineSeparator();
1969   //      if (Character.isWhitespace(currentCharacter))
1970   //        return true;
1971   //
1972   //      //buffer the new char which is not a white space
1973   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1974   //      //withoutUnicodePtr == 1 is true here
1975   //      return false;
1976   //    } catch (IndexOutOfBoundsException e) {
1977   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1978   //    }
1979   //  }
1980   public final int[] getLineEnds() {
1981     //return a bounded copy of this.lineEnds
1982
1983     int[] copy;
1984     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
1985     return copy;
1986   }
1987
1988   public char[] getSource() {
1989     return this.source;
1990   }
1991   final char[] optimizedCurrentTokenSource1() {
1992     //return always the same char[] build only once
1993
1994     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
1995     char charOne = source[startPosition];
1996     switch (charOne) {
1997       case 'a' :
1998         return charArray_a;
1999       case 'b' :
2000         return charArray_b;
2001       case 'c' :
2002         return charArray_c;
2003       case 'd' :
2004         return charArray_d;
2005       case 'e' :
2006         return charArray_e;
2007       case 'f' :
2008         return charArray_f;
2009       case 'g' :
2010         return charArray_g;
2011       case 'h' :
2012         return charArray_h;
2013       case 'i' :
2014         return charArray_i;
2015       case 'j' :
2016         return charArray_j;
2017       case 'k' :
2018         return charArray_k;
2019       case 'l' :
2020         return charArray_l;
2021       case 'm' :
2022         return charArray_m;
2023       case 'n' :
2024         return charArray_n;
2025       case 'o' :
2026         return charArray_o;
2027       case 'p' :
2028         return charArray_p;
2029       case 'q' :
2030         return charArray_q;
2031       case 'r' :
2032         return charArray_r;
2033       case 's' :
2034         return charArray_s;
2035       case 't' :
2036         return charArray_t;
2037       case 'u' :
2038         return charArray_u;
2039       case 'v' :
2040         return charArray_v;
2041       case 'w' :
2042         return charArray_w;
2043       case 'x' :
2044         return charArray_x;
2045       case 'y' :
2046         return charArray_y;
2047       case 'z' :
2048         return charArray_z;
2049       default :
2050         return new char[] { charOne };
2051     }
2052   }
2053
2054   final char[] optimizedCurrentTokenSource2() {
2055     //try to return the same char[] build only once
2056
2057     char c0, c1;
2058     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2059     char[][] table = charArray_length[0][hash];
2060     int i = newEntry2;
2061     while (++i < InternalTableSize) {
2062       char[] charArray = table[i];
2063       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2064         return charArray;
2065     }
2066     //---------other side---------
2067     i = -1;
2068     int max = newEntry2;
2069     while (++i <= max) {
2070       char[] charArray = table[i];
2071       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2072         return charArray;
2073     }
2074     //--------add the entry-------
2075     if (++max >= InternalTableSize)
2076       max = 0;
2077     char[] r;
2078     table[max] = (r = new char[] { c0, c1 });
2079     newEntry2 = max;
2080     return r;
2081   }
2082
2083   final char[] optimizedCurrentTokenSource3() {
2084     //try to return the same char[] build only once
2085
2086     char c0, c1, c2;
2087     int hash =
2088       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2089         % TableSize;
2090     char[][] table = charArray_length[1][hash];
2091     int i = newEntry3;
2092     while (++i < InternalTableSize) {
2093       char[] charArray = table[i];
2094       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2095         return charArray;
2096     }
2097     //---------other side---------
2098     i = -1;
2099     int max = newEntry3;
2100     while (++i <= max) {
2101       char[] charArray = table[i];
2102       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2103         return charArray;
2104     }
2105     //--------add the entry-------
2106     if (++max >= InternalTableSize)
2107       max = 0;
2108     char[] r;
2109     table[max] = (r = new char[] { c0, c1, c2 });
2110     newEntry3 = max;
2111     return r;
2112   }
2113
2114   final char[] optimizedCurrentTokenSource4() {
2115     //try to return the same char[] build only once
2116
2117     char c0, c1, c2, c3;
2118     long hash =
2119       ((((long) (c0 = source[startPosition])) << 18)
2120         + ((c1 = source[startPosition + 1]) << 12)
2121         + ((c2 = source[startPosition + 2]) << 6)
2122         + (c3 = source[startPosition + 3]))
2123         % TableSize;
2124     char[][] table = charArray_length[2][(int) hash];
2125     int i = newEntry4;
2126     while (++i < InternalTableSize) {
2127       char[] charArray = table[i];
2128       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2129         return charArray;
2130     }
2131     //---------other side---------
2132     i = -1;
2133     int max = newEntry4;
2134     while (++i <= max) {
2135       char[] charArray = table[i];
2136       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2137         return charArray;
2138     }
2139     //--------add the entry-------
2140     if (++max >= InternalTableSize)
2141       max = 0;
2142     char[] r;
2143     table[max] = (r = new char[] { c0, c1, c2, c3 });
2144     newEntry4 = max;
2145     return r;
2146
2147   }
2148
2149   final char[] optimizedCurrentTokenSource5() {
2150     //try to return the same char[] build only once
2151
2152     char c0, c1, c2, c3, c4;
2153     long hash =
2154       ((((long) (c0 = source[startPosition])) << 24)
2155         + (((long) (c1 = source[startPosition + 1])) << 18)
2156         + ((c2 = source[startPosition + 2]) << 12)
2157         + ((c3 = source[startPosition + 3]) << 6)
2158         + (c4 = source[startPosition + 4]))
2159         % TableSize;
2160     char[][] table = charArray_length[3][(int) hash];
2161     int i = newEntry5;
2162     while (++i < InternalTableSize) {
2163       char[] charArray = table[i];
2164       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2165         return charArray;
2166     }
2167     //---------other side---------
2168     i = -1;
2169     int max = newEntry5;
2170     while (++i <= max) {
2171       char[] charArray = table[i];
2172       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2173         return charArray;
2174     }
2175     //--------add the entry-------
2176     if (++max >= InternalTableSize)
2177       max = 0;
2178     char[] r;
2179     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2180     newEntry5 = max;
2181     return r;
2182
2183   }
2184
2185   final char[] optimizedCurrentTokenSource6() {
2186     //try to return the same char[] build only once
2187
2188     char c0, c1, c2, c3, c4, c5;
2189     long hash =
2190       ((((long) (c0 = source[startPosition])) << 32)
2191         + (((long) (c1 = source[startPosition + 1])) << 24)
2192         + (((long) (c2 = source[startPosition + 2])) << 18)
2193         + ((c3 = source[startPosition + 3]) << 12)
2194         + ((c4 = source[startPosition + 4]) << 6)
2195         + (c5 = source[startPosition + 5]))
2196         % TableSize;
2197     char[][] table = charArray_length[4][(int) hash];
2198     int i = newEntry6;
2199     while (++i < InternalTableSize) {
2200       char[] charArray = table[i];
2201       if ((c0 == charArray[0])
2202         && (c1 == charArray[1])
2203         && (c2 == charArray[2])
2204         && (c3 == charArray[3])
2205         && (c4 == charArray[4])
2206         && (c5 == charArray[5]))
2207         return charArray;
2208     }
2209     //---------other side---------
2210     i = -1;
2211     int max = newEntry6;
2212     while (++i <= max) {
2213       char[] charArray = table[i];
2214       if ((c0 == charArray[0])
2215         && (c1 == charArray[1])
2216         && (c2 == charArray[2])
2217         && (c3 == charArray[3])
2218         && (c4 == charArray[4])
2219         && (c5 == charArray[5]))
2220         return charArray;
2221     }
2222     //--------add the entry-------
2223     if (++max >= InternalTableSize)
2224       max = 0;
2225     char[] r;
2226     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2227     newEntry6 = max;
2228     return r;
2229   }
2230
2231   public final void pushLineSeparator() throws InvalidInputException {
2232     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2233     final int INCREMENT = 250;
2234
2235     if (this.checkNonExternalizedStringLiterals) {
2236       // reinitialize the current line for non externalize strings purpose
2237       currentLine = null;
2238     }
2239     //currentCharacter is at position currentPosition-1
2240
2241     // cr 000D
2242     if (currentCharacter == '\r') {
2243       int separatorPos = currentPosition - 1;
2244       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2245         return;
2246       //System.out.println("CR-" + separatorPos);
2247       try {
2248         lineEnds[++linePtr] = separatorPos;
2249       } catch (IndexOutOfBoundsException e) {
2250         //linePtr value is correct
2251         int oldLength = lineEnds.length;
2252         int[] old = lineEnds;
2253         lineEnds = new int[oldLength + INCREMENT];
2254         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2255         lineEnds[linePtr] = separatorPos;
2256       }
2257       // look-ahead for merged cr+lf
2258       try {
2259         if (source[currentPosition] == '\n') {
2260           //System.out.println("look-ahead LF-" + currentPosition);
2261           lineEnds[linePtr] = currentPosition;
2262           currentPosition++;
2263           wasAcr = false;
2264         } else {
2265           wasAcr = true;
2266         }
2267       } catch (IndexOutOfBoundsException e) {
2268         wasAcr = true;
2269       }
2270     } else {
2271       // lf 000A
2272       if (currentCharacter == '\n') {
2273         //must merge eventual cr followed by lf
2274         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2275           //System.out.println("merge LF-" + (currentPosition - 1));
2276           lineEnds[linePtr] = currentPosition - 1;
2277         } else {
2278           int separatorPos = currentPosition - 1;
2279           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2280             return;
2281           // System.out.println("LF-" + separatorPos);
2282           try {
2283             lineEnds[++linePtr] = separatorPos;
2284           } catch (IndexOutOfBoundsException e) {
2285             //linePtr value is correct
2286             int oldLength = lineEnds.length;
2287             int[] old = lineEnds;
2288             lineEnds = new int[oldLength + INCREMENT];
2289             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2290             lineEnds[linePtr] = separatorPos;
2291           }
2292         }
2293         wasAcr = false;
2294       }
2295     }
2296   }
2297   public final void pushUnicodeLineSeparator() {
2298     // isUnicode means that the \r or \n has been read as a unicode character
2299
2300     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2301
2302     final int INCREMENT = 250;
2303     //currentCharacter is at position currentPosition-1
2304
2305     if (this.checkNonExternalizedStringLiterals) {
2306       // reinitialize the current line for non externalize strings purpose
2307       currentLine = null;
2308     }
2309
2310     // cr 000D
2311     if (currentCharacter == '\r') {
2312       int separatorPos = currentPosition - 6;
2313       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2314         return;
2315       //System.out.println("CR-" + separatorPos);
2316       try {
2317         lineEnds[++linePtr] = separatorPos;
2318       } catch (IndexOutOfBoundsException e) {
2319         //linePtr value is correct
2320         int oldLength = lineEnds.length;
2321         int[] old = lineEnds;
2322         lineEnds = new int[oldLength + INCREMENT];
2323         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2324         lineEnds[linePtr] = separatorPos;
2325       }
2326       // look-ahead for merged cr+lf
2327       if (source[currentPosition] == '\n') {
2328         //System.out.println("look-ahead LF-" + currentPosition);
2329         lineEnds[linePtr] = currentPosition;
2330         currentPosition++;
2331         wasAcr = false;
2332       } else {
2333         wasAcr = true;
2334       }
2335     } else {
2336       // lf 000A
2337       if (currentCharacter == '\n') {
2338         //must merge eventual cr followed by lf
2339         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2340           //System.out.println("merge LF-" + (currentPosition - 1));
2341           lineEnds[linePtr] = currentPosition - 6;
2342         } else {
2343           int separatorPos = currentPosition - 6;
2344           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2345             return;
2346           // System.out.println("LF-" + separatorPos);
2347           try {
2348             lineEnds[++linePtr] = separatorPos;
2349           } catch (IndexOutOfBoundsException e) {
2350             //linePtr value is correct
2351             int oldLength = lineEnds.length;
2352             int[] old = lineEnds;
2353             lineEnds = new int[oldLength + INCREMENT];
2354             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2355             lineEnds[linePtr] = separatorPos;
2356           }
2357         }
2358         wasAcr = false;
2359       }
2360     }
2361   }
2362   public final void recordComment(boolean isJavadoc) {
2363
2364     // a new annotation comment is recorded
2365     try {
2366       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2367     } catch (IndexOutOfBoundsException e) {
2368       int oldStackLength = commentStops.length;
2369       int[] oldStack = commentStops;
2370       commentStops = new int[oldStackLength + 30];
2371       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2372       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2373       //grows the positions buffers too
2374       int[] old = commentStarts;
2375       commentStarts = new int[oldStackLength + 30];
2376       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2377     }
2378
2379     //the buffer is of a correct size here
2380     commentStarts[commentPtr] = startPosition;
2381   }
2382   public void resetTo(int begin, int end) {
2383     //reset the scanner to a given position where it may rescan again
2384
2385     diet = false;
2386     initialPosition = startPosition = currentPosition = begin;
2387     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2388     commentPtr = -1; // reset comment stack
2389   }
2390
2391   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2392     // the string with "\\u" is a legal string of two chars \ and u
2393     //thus we use a direct access to the source (for regular cases).
2394
2395     //    if (unicodeAsBackSlash) {
2396     //      // consume next character
2397     //      unicodeAsBackSlash = false;
2398     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2399     //        && (source[currentPosition] == 'u')) {
2400     //        getNextUnicodeChar();
2401     //      } else {
2402     //        if (withoutUnicodePtr != 0) {
2403     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2404     //        }
2405     //      }
2406     //    } else
2407     currentCharacter = source[currentPosition++];
2408     switch (currentCharacter) {
2409       case '\'' :
2410         currentCharacter = '\'';
2411         break;
2412       case '\\' :
2413         currentCharacter = '\\';
2414         break;
2415       default :
2416         currentCharacter = '\\';
2417         currentPosition--;
2418     }
2419   }
2420
2421   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2422     // the string with "\\u" is a legal string of two chars \ and u
2423     //thus we use a direct access to the source (for regular cases).
2424
2425     //    if (unicodeAsBackSlash) {
2426     //      // consume next character
2427     //      unicodeAsBackSlash = false;
2428     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2429     //        && (source[currentPosition] == 'u')) {
2430     //        getNextUnicodeChar();
2431     //      } else {
2432     //        if (withoutUnicodePtr != 0) {
2433     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2434     //        }
2435     //      }
2436     //    } else
2437     currentCharacter = source[currentPosition++];
2438     switch (currentCharacter) {
2439       //      case 'b' :
2440       //        currentCharacter = '\b';
2441       //        break;
2442       case 't' :
2443         currentCharacter = '\t';
2444         break;
2445       case 'n' :
2446         currentCharacter = '\n';
2447         break;
2448         //      case 'f' :
2449         //        currentCharacter = '\f';
2450         //        break;
2451       case 'r' :
2452         currentCharacter = '\r';
2453         break;
2454       case '\"' :
2455         currentCharacter = '\"';
2456         break;
2457       case '\'' :
2458         currentCharacter = '\'';
2459         break;
2460       case '\\' :
2461         currentCharacter = '\\';
2462         break;
2463       case '$' :
2464         currentCharacter = '$';
2465         break;
2466       default :
2467         // -----------octal escape--------------
2468         // OctalDigit
2469         // OctalDigit OctalDigit
2470         // ZeroToThree OctalDigit OctalDigit
2471
2472         int number = Character.getNumericValue(currentCharacter);
2473         if (number >= 0 && number <= 7) {
2474           boolean zeroToThreeNot = number > 3;
2475           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2476             int digit = Character.getNumericValue(currentCharacter);
2477             if (digit >= 0 && digit <= 7) {
2478               number = (number * 8) + digit;
2479               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2480                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2481                   currentPosition--;
2482                 } else {
2483                   digit = Character.getNumericValue(currentCharacter);
2484                   if (digit >= 0 && digit <= 7) {
2485                     // has read \ZeroToThree OctalDigit OctalDigit
2486                     number = (number * 8) + digit;
2487                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2488                     currentPosition--;
2489                   }
2490                 }
2491               } else { // has read \OctalDigit NonDigit--> ignore last character
2492                 currentPosition--;
2493               }
2494             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2495               currentPosition--;
2496             }
2497           } else { // has read \OctalDigit --> ignore last character
2498             currentPosition--;
2499           }
2500           if (number > 255)
2501             throw new InvalidInputException(INVALID_ESCAPE);
2502           currentCharacter = (char) number;
2503         }
2504         //else
2505         //     throw new InvalidInputException(INVALID_ESCAPE);
2506     }
2507   }
2508
2509   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2510   //    return scanIdentifierOrKeyword( false );
2511   //  }
2512
2513   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2514     //test keywords
2515
2516     //first dispatch on the first char.
2517     //then the length. If there are several
2518     //keywors with the same length AND the same first char, then do another
2519     //disptach on the second char :-)...cool....but fast !
2520
2521     useAssertAsAnIndentifier = false;
2522
2523     while (getNextCharAsJavaIdentifierPart()) {
2524     };
2525
2526     if (isVariable) {
2527       return TokenNameVariable;
2528     }
2529     int index, length;
2530     char[] data;
2531     char firstLetter;
2532     //    if (withoutUnicodePtr == 0)
2533
2534     //quick test on length == 1 but not on length > 12 while most identifier
2535     //have a length which is <= 12...but there are lots of identifier with
2536     //only one char....
2537
2538     //      {
2539     if ((length = currentPosition - startPosition) == 1)
2540       return TokenNameIdentifier;
2541     //  data = source;
2542     data = new char[length];
2543     index = startPosition;
2544     for (int i = 0; i < length; i++) {
2545       data[i] = Character.toLowerCase(source[index + i]);
2546     }
2547     index = 0;
2548     //    } else {
2549     //      if ((length = withoutUnicodePtr) == 1)
2550     //        return TokenNameIdentifier;
2551     //      // data = withoutUnicodeBuffer;
2552     //      data = new char[withoutUnicodeBuffer.length];
2553     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2554     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2555     //      }
2556     //      index = 1;
2557     //    }
2558
2559     firstLetter = data[index];
2560     switch (firstLetter) {
2561
2562       case 'a' : // as and array
2563         switch (length) {
2564           case 2 : //as
2565             if ((data[++index] == 's')) {
2566               return TokenNameas;
2567             } else {
2568               return TokenNameIdentifier;
2569             }
2570           case 3 : //and
2571             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2572               return TokenNameAND;
2573             } else {
2574               return TokenNameIdentifier;
2575             }
2576             //          case 5 :
2577             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2578             //              return TokenNamearray;
2579             //            else
2580             //              return TokenNameIdentifier;
2581           default :
2582             return TokenNameIdentifier;
2583         }
2584       case 'b' : //break
2585         switch (length) {
2586           case 5 :
2587             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2588               return TokenNamebreak;
2589             else
2590               return TokenNameIdentifier;
2591           default :
2592             return TokenNameIdentifier;
2593         }
2594
2595       case 'c' : //case class continue
2596         switch (length) {
2597           case 4 :
2598             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2599               return TokenNamecase;
2600             else
2601               return TokenNameIdentifier;
2602           case 5 :
2603             if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2604               return TokenNameclass;
2605             else
2606               return TokenNameIdentifier;
2607           case 8 :
2608             if ((data[++index] == 'o')
2609               && (data[++index] == 'n')
2610               && (data[++index] == 't')
2611               && (data[++index] == 'i')
2612               && (data[++index] == 'n')
2613               && (data[++index] == 'u')
2614               && (data[++index] == 'e'))
2615               return TokenNamecontinue;
2616             else
2617               return TokenNameIdentifier;
2618           default :
2619             return TokenNameIdentifier;
2620         }
2621
2622       case 'd' : //define default do
2623         switch (length) {
2624           case 2 :
2625             if ((data[++index] == 'o'))
2626               return TokenNamedo;
2627             else
2628               return TokenNameIdentifier;
2629           case 6 :
2630             if ((data[++index] == 'e')
2631               && (data[++index] == 'f')
2632               && (data[++index] == 'i')
2633               && (data[++index] == 'n')
2634               && (data[++index] == 'e'))
2635               return TokenNamedefine;
2636             else
2637               return TokenNameIdentifier;
2638           case 7 :
2639             if ((data[++index] == 'e')
2640               && (data[++index] == 'f')
2641               && (data[++index] == 'a')
2642               && (data[++index] == 'u')
2643               && (data[++index] == 'l')
2644               && (data[++index] == 't'))
2645               return TokenNamedefault;
2646             else
2647               return TokenNameIdentifier;
2648           default :
2649             return TokenNameIdentifier;
2650         }
2651       case 'e' : //echo else elseif extends
2652         switch (length) {
2653           case 4 :
2654             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2655               return TokenNameecho;
2656             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2657               return TokenNameelse;
2658             else
2659               return TokenNameIdentifier;
2660           case 5 : // endif
2661             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2662               return TokenNameendif;
2663             else
2664               return TokenNameIdentifier;
2665           case 6 : // endfor
2666             if ((data[++index] == 'n')
2667               && (data[++index] == 'd')
2668               && (data[++index] == 'f')
2669               && (data[++index] == 'o')
2670               && (data[++index] == 'r'))
2671               return TokenNameendfor;
2672             else if (
2673               (data[index] == 'l')
2674                 && (data[++index] == 's')
2675                 && (data[++index] == 'e')
2676                 && (data[++index] == 'i')
2677                 && (data[++index] == 'f'))
2678               return TokenNameelseif;
2679             else
2680               return TokenNameIdentifier;
2681           case 7 :
2682             if ((data[++index] == 'x')
2683               && (data[++index] == 't')
2684               && (data[++index] == 'e')
2685               && (data[++index] == 'n')
2686               && (data[++index] == 'd')
2687               && (data[++index] == 's'))
2688               return TokenNameextends;
2689             else
2690               return TokenNameIdentifier;
2691           case 8 : // endwhile
2692             if ((data[++index] == 'n')
2693               && (data[++index] == 'd')
2694               && (data[++index] == 'w')
2695               && (data[++index] == 'h')
2696               && (data[++index] == 'i')
2697               && (data[++index] == 'l')
2698               && (data[++index] == 'e'))
2699               return TokenNameendwhile;
2700             else
2701               return TokenNameIdentifier;
2702           case 9 : // endswitch
2703             if ((data[++index] == 'n')
2704               && (data[++index] == 'd')
2705               && (data[++index] == 's')
2706               && (data[++index] == 'w')
2707               && (data[++index] == 'i')
2708               && (data[++index] == 't')
2709               && (data[++index] == 'c')
2710               && (data[++index] == 'h'))
2711               return TokenNameendswitch;
2712             else
2713               return TokenNameIdentifier;
2714           case 10 : // endforeach
2715             if ((data[++index] == 'n')
2716               && (data[++index] == 'd')
2717               && (data[++index] == 'f')
2718               && (data[++index] == 'o')
2719               && (data[++index] == 'r')
2720               && (data[++index] == 'e')
2721               && (data[++index] == 'a')
2722               && (data[++index] == 'c')
2723               && (data[++index] == 'h'))
2724               return TokenNameendforeach;
2725             else
2726               return TokenNameIdentifier;
2727
2728           default :
2729             return TokenNameIdentifier;
2730         }
2731
2732       case 'f' : //for false function
2733         switch (length) {
2734           case 3 :
2735             if ((data[++index] == 'o') && (data[++index] == 'r'))
2736               return TokenNamefor;
2737             else
2738               return TokenNameIdentifier;
2739           case 5 :
2740             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2741               return TokenNamefalse;
2742             else
2743               return TokenNameIdentifier;
2744           case 7 : // function
2745             if ((data[++index] == 'o')
2746               && (data[++index] == 'r')
2747               && (data[++index] == 'e')
2748               && (data[++index] == 'a')
2749               && (data[++index] == 'c')
2750               && (data[++index] == 'h'))
2751               return TokenNameforeach;
2752             else
2753               return TokenNameIdentifier;
2754           case 8 : // function
2755             if ((data[++index] == 'u')
2756               && (data[++index] == 'n')
2757               && (data[++index] == 'c')
2758               && (data[++index] == 't')
2759               && (data[++index] == 'i')
2760               && (data[++index] == 'o')
2761               && (data[++index] == 'n'))
2762               return TokenNamefunction;
2763             else
2764               return TokenNameIdentifier;
2765           default :
2766             return TokenNameIdentifier;
2767         }
2768       case 'g' : //global
2769         if (length == 6) {
2770           if ((data[++index] == 'l')
2771             && (data[++index] == 'o')
2772             && (data[++index] == 'b')
2773             && (data[++index] == 'a')
2774             && (data[++index] == 'l')) {
2775             return TokenNameglobal;
2776           }
2777         }
2778         return TokenNameIdentifier;
2779
2780       case 'i' : //if int
2781         switch (length) {
2782           case 2 :
2783             if (data[++index] == 'f')
2784               return TokenNameif;
2785             else
2786               return TokenNameIdentifier;
2787             //          case 3 :
2788             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2789             //              return TokenNameint;
2790             //            else
2791             //              return TokenNameIdentifier;
2792           case 7 :
2793             if ((data[++index] == 'n')
2794               && (data[++index] == 'c')
2795               && (data[++index] == 'l')
2796               && (data[++index] == 'u')
2797               && (data[++index] == 'd')
2798               && (data[++index] == 'e'))
2799               return TokenNameinclude;
2800             else
2801               return TokenNameIdentifier;
2802           case 12 :
2803             if ((data[++index] == 'n')
2804               && (data[++index] == 'c')
2805               && (data[++index] == 'l')
2806               && (data[++index] == 'u')
2807               && (data[++index] == 'd')
2808               && (data[++index] == 'e')
2809               && (data[++index] == '_')
2810               && (data[++index] == 'o')
2811               && (data[++index] == 'n')
2812               && (data[++index] == 'c')
2813               && (data[++index] == 'e'))
2814               return TokenNameinclude_once;
2815             else
2816               return TokenNameIdentifier;
2817           default :
2818             return TokenNameIdentifier;
2819         }
2820
2821       case 'l' : //list
2822         if (length == 4) {
2823           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2824             return TokenNamelist;
2825           }
2826         }
2827         return TokenNameIdentifier;
2828
2829       case 'n' : // new null
2830         switch (length) {
2831           case 3 :
2832             if ((data[++index] == 'e') && (data[++index] == 'w'))
2833               return TokenNamenew;
2834             else
2835               return TokenNameIdentifier;
2836           case 4 :
2837             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2838               return TokenNamenull;
2839             else
2840               return TokenNameIdentifier;
2841
2842           default :
2843             return TokenNameIdentifier;
2844         }
2845       case 'o' : // or old_function
2846         if (length == 2) {
2847           if (data[++index] == 'r') {
2848             return TokenNameOR;
2849           }
2850         }
2851         //        if (length == 12) {
2852         //          if ((data[++index] == 'l')
2853         //            && (data[++index] == 'd')
2854         //            && (data[++index] == '_')
2855         //            && (data[++index] == 'f')
2856         //            && (data[++index] == 'u')
2857         //            && (data[++index] == 'n')
2858         //            && (data[++index] == 'c')
2859         //            && (data[++index] == 't')
2860         //            && (data[++index] == 'i')
2861         //            && (data[++index] == 'o')
2862         //            && (data[++index] == 'n')) {
2863         //            return TokenNameold_function;
2864         //          }
2865         //        }
2866         return TokenNameIdentifier;
2867
2868       case 'p' : // print
2869         if (length == 5) {
2870           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2871             return TokenNameprint;
2872           }
2873         }
2874         return TokenNameIdentifier;
2875       case 'r' : //return require require_once
2876         if (length == 6) {
2877           if ((data[++index] == 'e')
2878             && (data[++index] == 't')
2879             && (data[++index] == 'u')
2880             && (data[++index] == 'r')
2881             && (data[++index] == 'n')) {
2882             return TokenNamereturn;
2883           }
2884         } else if (length == 7) {
2885           if ((data[++index] == 'e')
2886             && (data[++index] == 'q')
2887             && (data[++index] == 'u')
2888             && (data[++index] == 'i')
2889             && (data[++index] == 'r')
2890             && (data[++index] == 'e')) {
2891             return TokenNamerequire;
2892           }
2893         } else if (length == 12) {
2894           if ((data[++index] == 'e')
2895             && (data[++index] == 'q')
2896             && (data[++index] == 'u')
2897             && (data[++index] == 'i')
2898             && (data[++index] == 'r')
2899             && (data[++index] == 'e')
2900             && (data[++index] == '_')
2901             && (data[++index] == 'o')
2902             && (data[++index] == 'n')
2903             && (data[++index] == 'c')
2904             && (data[++index] == 'e')) {
2905             return TokenNamerequire_once;
2906           }
2907         } else
2908           return TokenNameIdentifier;
2909
2910       case 's' : //static switch
2911         switch (length) {
2912           case 6 :
2913             if (data[++index] == 't')
2914               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2915                 return TokenNamestatic;
2916               } else
2917                 return TokenNameIdentifier;
2918             else if (
2919               (data[index] == 'w')
2920                 && (data[++index] == 'i')
2921                 && (data[++index] == 't')
2922                 && (data[++index] == 'c')
2923                 && (data[++index] == 'h'))
2924               return TokenNameswitch;
2925             else
2926               return TokenNameIdentifier;
2927           default :
2928             return TokenNameIdentifier;
2929         }
2930
2931       case 't' : // true
2932         switch (length) {
2933
2934           case 4 :
2935             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2936               return TokenNametrue;
2937             else
2938               return TokenNameIdentifier;
2939             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2940             //              return TokenNamethis;
2941
2942           default :
2943             return TokenNameIdentifier;
2944         }
2945
2946       case 'v' : //var
2947         switch (length) {
2948           case 3 :
2949             if ((data[++index] == 'a') && (data[++index] == 'r'))
2950               return TokenNamevar;
2951             else
2952               return TokenNameIdentifier;
2953
2954           default :
2955             return TokenNameIdentifier;
2956         }
2957
2958       case 'w' : //while
2959         switch (length) {
2960           case 5 :
2961             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2962               return TokenNamewhile;
2963             else
2964               return TokenNameIdentifier;
2965             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2966             //return TokenNamewidefp ;
2967             //else
2968             //return TokenNameIdentifier;
2969           default :
2970             return TokenNameIdentifier;
2971         }
2972
2973       case 'x' : //xor
2974         switch (length) {
2975           case 3 :
2976             if ((data[++index] == 'o') && (data[++index] == 'r'))
2977               return TokenNameXOR;
2978             else
2979               return TokenNameIdentifier;
2980
2981           default :
2982             return TokenNameIdentifier;
2983         }
2984       default :
2985         return TokenNameIdentifier;
2986     }
2987   }
2988   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
2989
2990     //when entering this method the currentCharacter is the firt
2991     //digit of the number , i.e. it may be preceeded by a . when
2992     //dotPrefix is true
2993
2994     boolean floating = dotPrefix;
2995     if ((!dotPrefix) && (currentCharacter == '0')) {
2996       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
2997         //force the first char of the hexa number do exist...
2998         // consume next character
2999         unicodeAsBackSlash = false;
3000         currentCharacter = source[currentPosition++];
3001         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3002         //          && (source[currentPosition] == 'u')) {
3003         //          getNextUnicodeChar();
3004         //        } else {
3005         //          if (withoutUnicodePtr != 0) {
3006         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3007         //          }
3008         //        }
3009         if (Character.digit(currentCharacter, 16) == -1)
3010           throw new InvalidInputException(INVALID_HEXA);
3011         //---end forcing--
3012         while (getNextCharAsDigit(16)) {
3013         };
3014         //        if (getNextChar('l', 'L') >= 0)
3015         //          return TokenNameLongLiteral;
3016         //        else
3017         return TokenNameIntegerLiteral;
3018       }
3019
3020       //there is x or X in the number
3021       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3022       if (getNextCharAsDigit()) {
3023         //-------------potential octal-----------------
3024         while (getNextCharAsDigit()) {
3025         };
3026
3027         //        if (getNextChar('l', 'L') >= 0) {
3028         //          return TokenNameLongLiteral;
3029         //        }
3030         //
3031         //        if (getNextChar('f', 'F') >= 0) {
3032         //          return TokenNameFloatingPointLiteral;
3033         //        }
3034
3035         if (getNextChar('d', 'D') >= 0) {
3036           return TokenNameDoubleLiteral;
3037         } else { //make the distinction between octal and float ....
3038           if (getNextChar('.')) { //bingo ! ....
3039             while (getNextCharAsDigit()) {
3040             };
3041             if (getNextChar('e', 'E') >= 0) {
3042               // consume next character
3043               unicodeAsBackSlash = false;
3044               currentCharacter = source[currentPosition++];
3045               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3046               //                && (source[currentPosition] == 'u')) {
3047               //                getNextUnicodeChar();
3048               //              } else {
3049               //                if (withoutUnicodePtr != 0) {
3050               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3051               //                }
3052               //              }
3053
3054               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3055                 // consume next character
3056                 unicodeAsBackSlash = false;
3057                 currentCharacter = source[currentPosition++];
3058                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3059                 //                  && (source[currentPosition] == 'u')) {
3060                 //                  getNextUnicodeChar();
3061                 //                } else {
3062                 //                  if (withoutUnicodePtr != 0) {
3063                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3064                 //                      currentCharacter;
3065                 //                  }
3066                 //                }
3067               }
3068               if (!Character.isDigit(currentCharacter))
3069                 throw new InvalidInputException(INVALID_FLOAT);
3070               while (getNextCharAsDigit()) {
3071               };
3072             }
3073             //            if (getNextChar('f', 'F') >= 0)
3074             //              return TokenNameFloatingPointLiteral;
3075             getNextChar('d', 'D'); //jump over potential d or D
3076             return TokenNameDoubleLiteral;
3077           } else {
3078             return TokenNameIntegerLiteral;
3079           }
3080         }
3081       } else {
3082         /* carry on */
3083       }
3084     }
3085
3086     while (getNextCharAsDigit()) {
3087     };
3088
3089     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3090     //      return TokenNameLongLiteral;
3091
3092     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3093       while (getNextCharAsDigit()) {
3094       };
3095       floating = true;
3096     }
3097
3098     //if floating is true both exponant and suffix may be optional
3099
3100     if (getNextChar('e', 'E') >= 0) {
3101       floating = true;
3102       // consume next character
3103       unicodeAsBackSlash = false;
3104       currentCharacter = source[currentPosition++];
3105       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3106       //        && (source[currentPosition] == 'u')) {
3107       //        getNextUnicodeChar();
3108       //      } else {
3109       //        if (withoutUnicodePtr != 0) {
3110       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3111       //        }
3112       //      }
3113
3114       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3115         unicodeAsBackSlash = false;
3116         currentCharacter = source[currentPosition++];
3117         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3118         //          && (source[currentPosition] == 'u')) {
3119         //          getNextUnicodeChar();
3120         //        } else {
3121         //          if (withoutUnicodePtr != 0) {
3122         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3123         //          }
3124         //        }
3125       }
3126       if (!Character.isDigit(currentCharacter))
3127         throw new InvalidInputException(INVALID_FLOAT);
3128       while (getNextCharAsDigit()) {
3129       };
3130     }
3131
3132     if (getNextChar('d', 'D') >= 0)
3133       return TokenNameDoubleLiteral;
3134     //    if (getNextChar('f', 'F') >= 0)
3135     //      return TokenNameFloatingPointLiteral;
3136
3137     //the long flag has been tested before
3138
3139     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3140   }
3141   /**
3142    * Search the line number corresponding to a specific position
3143    *
3144    */
3145   public final int getLineNumber(int position) {
3146
3147     if (lineEnds == null)
3148       return 1;
3149     int length = linePtr + 1;
3150     if (length == 0)
3151       return 1;
3152     int g = 0, d = length - 1;
3153     int m = 0;
3154     while (g <= d) {
3155       m = (g + d) / 2;
3156       if (position < lineEnds[m]) {
3157         d = m - 1;
3158       } else if (position > lineEnds[m]) {
3159         g = m + 1;
3160       } else {
3161         return m + 1;
3162       }
3163     }
3164     if (position < lineEnds[m]) {
3165       return m + 1;
3166     }
3167     return m + 2;
3168   }
3169
3170   public void setPHPMode(boolean mode) {
3171     phpMode = mode;
3172   }
3173
3174   public final void setSource(char[] source) {
3175     //the source-buffer is set to sourceString
3176
3177     if (source == null) {
3178       this.source = new char[0];
3179     } else {
3180       this.source = source;
3181     }
3182     startPosition = -1;
3183     initialPosition = currentPosition = 0;
3184     containsAssertKeyword = false;
3185     withoutUnicodeBuffer = new char[this.source.length];
3186
3187   }
3188
3189   public String toString() {
3190     if (startPosition == source.length)
3191       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3192     if (currentPosition > source.length)
3193       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3194
3195     char front[] = new char[startPosition];
3196     System.arraycopy(source, 0, front, 0, startPosition);
3197
3198     int middleLength = (currentPosition - 1) - startPosition + 1;
3199     char middle[];
3200     if (middleLength > -1) {
3201       middle = new char[middleLength];
3202       System.arraycopy(source, startPosition, middle, 0, middleLength);
3203     } else {
3204       middle = new char[0];
3205     }
3206
3207     char end[] = new char[source.length - (currentPosition - 1)];
3208     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3209
3210     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3211     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3212     + new String(end);
3213   }
3214   public final String toStringAction(int act) {
3215
3216     switch (act) {
3217       case TokenNameERROR :
3218         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3219       case TokenNameStopPHP :
3220         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3221       case TokenNameIdentifier :
3222         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3223       case TokenNameVariable :
3224         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3225       case TokenNameas :
3226         return "as"; //$NON-NLS-1$
3227       case TokenNamebreak :
3228         return "break"; //$NON-NLS-1$
3229       case TokenNamecase :
3230         return "case"; //$NON-NLS-1$
3231       case TokenNameclass :
3232         return "class"; //$NON-NLS-1$
3233       case TokenNamecontinue :
3234         return "continue"; //$NON-NLS-1$
3235       case TokenNamedefault :
3236         return "default"; //$NON-NLS-1$
3237       case TokenNamedefine :
3238         return "define"; //$NON-NLS-1$
3239       case TokenNamedo :
3240         return "do"; //$NON-NLS-1$
3241       case TokenNameecho :
3242         return "echo"; //$NON-NLS-1$
3243       case TokenNameelse :
3244         return "else"; //$NON-NLS-1$
3245       case TokenNameelseif :
3246         return "elseif"; //$NON-NLS-1$
3247       case TokenNameendfor :
3248         return "endfor"; //$NON-NLS-1$
3249       case TokenNameendforeach :
3250         return "endforeach"; //$NON-NLS-1$
3251       case TokenNameendif :
3252         return "endif"; //$NON-NLS-1$
3253       case TokenNameendswitch :
3254         return "endswitch"; //$NON-NLS-1$
3255       case TokenNameendwhile :
3256         return "endwhile"; //$NON-NLS-1$
3257       case TokenNameextends :
3258         return "extends"; //$NON-NLS-1$
3259       case TokenNamefalse :
3260         return "false"; //$NON-NLS-1$
3261       case TokenNamefor :
3262         return "for"; //$NON-NLS-1$
3263       case TokenNameforeach :
3264         return "foreach"; //$NON-NLS-1$
3265       case TokenNamefunction :
3266         return "function"; //$NON-NLS-1$
3267       case TokenNameglobal :
3268         return "global"; //$NON-NLS-1$
3269       case TokenNameif :
3270         return "if"; //$NON-NLS-1$
3271       case TokenNameinclude :
3272         return "include"; //$NON-NLS-1$
3273       case TokenNameinclude_once :
3274         return "include_once"; //$NON-NLS-1$
3275       case TokenNamelist :
3276         return "list"; //$NON-NLS-1$
3277       case TokenNamenew :
3278         return "new"; //$NON-NLS-1$
3279       case TokenNamenull :
3280         return "null"; //$NON-NLS-1$
3281       case TokenNameprint :
3282         return "print"; //$NON-NLS-1$
3283       case TokenNamerequire :
3284         return "require"; //$NON-NLS-1$
3285       case TokenNamerequire_once :
3286         return "require_once"; //$NON-NLS-1$
3287       case TokenNamereturn :
3288         return "return"; //$NON-NLS-1$
3289       case TokenNamestatic :
3290         return "static"; //$NON-NLS-1$
3291       case TokenNameswitch :
3292         return "switch"; //$NON-NLS-1$
3293       case TokenNametrue :
3294         return "true"; //$NON-NLS-1$
3295       case TokenNamevar :
3296         return "var"; //$NON-NLS-1$
3297       case TokenNamewhile :
3298         return "while"; //$NON-NLS-1$
3299       case TokenNameIntegerLiteral :
3300         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3301       case TokenNameDoubleLiteral :
3302         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3303       case TokenNameStringLiteral :
3304         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3305       case TokenNameStringConstant :
3306         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3307       case TokenNameStringInterpolated :
3308         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3309       case TokenNameHEREDOC :
3310         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3311
3312       case TokenNamePLUS_PLUS :
3313         return "++"; //$NON-NLS-1$
3314       case TokenNameMINUS_MINUS :
3315         return "--"; //$NON-NLS-1$
3316       case TokenNameEQUAL_EQUAL :
3317         return "=="; //$NON-NLS-1$
3318       case TokenNameEQUAL_GREATER :
3319         return "=>"; //$NON-NLS-1$
3320       case TokenNameLESS_EQUAL :
3321         return "<="; //$NON-NLS-1$
3322       case TokenNameGREATER_EQUAL :
3323         return ">="; //$NON-NLS-1$
3324       case TokenNameNOT_EQUAL :
3325         return "!="; //$NON-NLS-1$
3326       case TokenNameLEFT_SHIFT :
3327         return "<<"; //$NON-NLS-1$
3328       case TokenNameRIGHT_SHIFT :
3329         return ">>"; //$NON-NLS-1$
3330       case TokenNamePLUS_EQUAL :
3331         return "+="; //$NON-NLS-1$
3332       case TokenNameMINUS_EQUAL :
3333         return "-="; //$NON-NLS-1$
3334       case TokenNameMULTIPLY_EQUAL :
3335         return "*="; //$NON-NLS-1$
3336       case TokenNameDIVIDE_EQUAL :
3337         return "/="; //$NON-NLS-1$
3338       case TokenNameAND_EQUAL :
3339         return "&="; //$NON-NLS-1$
3340       case TokenNameOR_EQUAL :
3341         return "|="; //$NON-NLS-1$
3342       case TokenNameXOR_EQUAL :
3343         return "^="; //$NON-NLS-1$
3344       case TokenNameREMAINDER_EQUAL :
3345         return "%="; //$NON-NLS-1$
3346       case TokenNameLEFT_SHIFT_EQUAL :
3347         return "<<="; //$NON-NLS-1$
3348       case TokenNameRIGHT_SHIFT_EQUAL :
3349         return ">>="; //$NON-NLS-1$
3350       case TokenNameOR_OR :
3351         return "||"; //$NON-NLS-1$
3352       case TokenNameAND_AND :
3353         return "&&"; //$NON-NLS-1$
3354       case TokenNamePLUS :
3355         return "+"; //$NON-NLS-1$
3356       case TokenNameMINUS :
3357         return "-"; //$NON-NLS-1$
3358       case TokenNameMINUS_GREATER :
3359         return "->";
3360       case TokenNameNOT :
3361         return "!"; //$NON-NLS-1$
3362       case TokenNameREMAINDER :
3363         return "%"; //$NON-NLS-1$
3364       case TokenNameXOR :
3365         return "^"; //$NON-NLS-1$
3366       case TokenNameAND :
3367         return "&"; //$NON-NLS-1$
3368       case TokenNameMULTIPLY :
3369         return "*"; //$NON-NLS-1$
3370       case TokenNameOR :
3371         return "|"; //$NON-NLS-1$
3372       case TokenNameTWIDDLE :
3373         return "~"; //$NON-NLS-1$
3374       case TokenNameTWIDDLE_EQUAL :
3375         return "~="; //$NON-NLS-1$
3376       case TokenNameDIVIDE :
3377         return "/"; //$NON-NLS-1$
3378       case TokenNameGREATER :
3379         return ">"; //$NON-NLS-1$
3380       case TokenNameLESS :
3381         return "<"; //$NON-NLS-1$
3382       case TokenNameLPAREN :
3383         return "("; //$NON-NLS-1$
3384       case TokenNameRPAREN :
3385         return ")"; //$NON-NLS-1$
3386       case TokenNameLBRACE :
3387         return "{"; //$NON-NLS-1$
3388       case TokenNameRBRACE :
3389         return "}"; //$NON-NLS-1$
3390       case TokenNameLBRACKET :
3391         return "["; //$NON-NLS-1$
3392       case TokenNameRBRACKET :
3393         return "]"; //$NON-NLS-1$
3394       case TokenNameSEMICOLON :
3395         return ";"; //$NON-NLS-1$
3396       case TokenNameQUESTION :
3397         return "?"; //$NON-NLS-1$
3398       case TokenNameCOLON :
3399         return ":"; //$NON-NLS-1$
3400       case TokenNameCOMMA :
3401         return ","; //$NON-NLS-1$
3402       case TokenNameDOT :
3403         return "."; //$NON-NLS-1$
3404       case TokenNameEQUAL :
3405         return "="; //$NON-NLS-1$
3406       case TokenNameAT :
3407         return "@";
3408       case TokenNameDOLLAR_LBRACE :
3409         return "${";
3410       case TokenNameEOF :
3411         return "EOF"; //$NON-NLS-1$
3412       case TokenNameWHITESPACE :
3413         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3414       case TokenNameCOMMENT_LINE :
3415         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3416       case TokenNameCOMMENT_BLOCK :
3417         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3418       case TokenNameCOMMENT_PHPDOC :
3419         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3420       case TokenNameHTML :
3421         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3422       default :
3423         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3424     }
3425   }
3426
3427   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3428     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3429   }
3430
3431   public Scanner(
3432     boolean tokenizeComments,
3433     boolean tokenizeWhiteSpace,
3434     boolean checkNonExternalizedStringLiterals,
3435     boolean assertMode) {
3436     this.eofPosition = Integer.MAX_VALUE;
3437     this.tokenizeComments = tokenizeComments;
3438     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3439     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3440     this.assertMode = assertMode;
3441   }
3442
3443   private void checkNonExternalizeString() throws InvalidInputException {
3444     if (currentLine == null)
3445       return;
3446     parseTags(currentLine);
3447   }
3448
3449   private void parseTags(NLSLine line) throws InvalidInputException {
3450     String s = new String(getCurrentTokenSource());
3451     int pos = s.indexOf(TAG_PREFIX);
3452     int lineLength = line.size();
3453     while (pos != -1) {
3454       int start = pos + TAG_PREFIX_LENGTH;
3455       int end = s.indexOf(TAG_POSTFIX, start);
3456       String index = s.substring(start, end);
3457       int i = 0;
3458       try {
3459         i = Integer.parseInt(index) - 1;
3460         // Tags are one based not zero based.
3461       } catch (NumberFormatException e) {
3462         i = -1; // we don't want to consider this as a valid NLS tag
3463       }
3464       if (line.exists(i)) {
3465         line.set(i, null);
3466       }
3467       pos = s.indexOf(TAG_PREFIX, start);
3468     }
3469
3470     this.nonNLSStrings = new StringLiteral[lineLength];
3471     int nonNLSCounter = 0;
3472     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3473       StringLiteral literal = (StringLiteral) iterator.next();
3474       if (literal != null) {
3475         this.nonNLSStrings[nonNLSCounter++] = literal;
3476       }
3477     }
3478     if (nonNLSCounter == 0) {
3479       this.nonNLSStrings = null;
3480       currentLine = null;
3481       return;
3482     }
3483     this.wasNonExternalizedStringLiteral = true;
3484     if (nonNLSCounter != lineLength) {
3485       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3486     }
3487     currentLine = null;
3488   }
3489 }