net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12 import java.util.ArrayList;
  13 import java.util.Iterator;
  14 import java.util.List;
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21   /*
  22    * APIs ares - getNextToken() which return the current type of the token
  23    * (this value is not memorized by the scanner) - getCurrentTokenSource()
  24    * which provides with the token "REAL" source (aka all unicode have been
  25    * transformed into a correct char) - sourceStart gives the position into the
  26    * stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30   public boolean useAssertAsAnIndentifier = false;
  31   //flag indicating if processed source contains occurrences of keyword assert
  32   public boolean containsAssertKeyword = false;
  33   public boolean recordLineSeparator;
  34   public boolean phpMode = false;
  35   public char currentCharacter;
  36   public int startPosition;
  37   public int currentPosition;
  38   public int initialPosition, eofPosition;
  39   // after this position eof are generated instead of real token from the
  40   // source
  41   public boolean tokenizeComments;
  42   public boolean tokenizeWhiteSpace;
  43   //source should be viewed as a window (aka a part)
  44   //of a entire very large stream
  45   public char source[];
  46   //unicode support
  47   public char[] withoutUnicodeBuffer;
  48   public int withoutUnicodePtr;
  49   //when == 0 ==> no unicode in the current token
  50   public boolean unicodeAsBackSlash = false;
  51   public boolean scanningFloatLiteral = false;
  52   //support for /** comments
  53   //public char[][] comments = new char[10][];
  54   public int[] commentStops = new int[10];
  55   public int[] commentStarts = new int[10];
  56   public int commentPtr = -1; // no comment test with commentPtr value -1
  57   //diet parsing support - jump over some method body when requested
  58   public boolean diet = false;
  59   //support for the poor-line-debuggers ....
  60   //remember the position of the cr/lf
  61   public int[] lineEnds = new int[250];
  62   public int linePtr = -1;
  63   public boolean wasAcr = false;
  64   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  65   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  66   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  67   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  68   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  69   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  70   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  71   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  72   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  73   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  74   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  75   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  76   //----------------optimized identifier managment------------------
  77   static final char[] charArray_a = new char[]{'a'},
  78       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
  79       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
  80       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
  81       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
  82       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
  83       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
  84       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
  85       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
  86       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
  87       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
  88       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
  89       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
  90       charArray_z = new char[]{'z'};
  91   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
  92       '\u0000', '\u0000', '\u0000'};
  93   static final int TableSize = 30, InternalTableSize = 6;
  94   //30*6 = 180 entries
  95   public static final int OptimizedLength = 6;
  96   public/* static */
  97   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
  98   // support for detecting non-externalized string literals
  99   int currentLineNr = -1;
 100   int previousLineNr = -1;
 101   NLSLine currentLine = null;
 102   List lines = new ArrayList();
 103   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 104   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 105   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 106   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 107   public StringLiteral[] nonNLSStrings = null;
 108   public boolean checkNonExternalizedStringLiterals = true;
 109   public boolean wasNonExternalizedStringLiteral = false;
 110   /* static */{
 111     for (int i = 0; i < 6; i++) {
 112       for (int j = 0; j < TableSize; j++) {
 113         for (int k = 0; k < InternalTableSize; k++) {
 114           charArray_length[i][j][k] = initCharArray;
 115         }
 116       }
 117     }
 118   }
 119   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
 120       newEntry6 = 0;
 121   public static final int RoundBracket = 0;
 122   public static final int SquareBracket = 1;
 123   public static final int CurlyBracket = 2;
 124   public static final int BracketKinds = 3;
 125   // task tag support
 126   public char[][] foundTaskTags = null;
 127   public char[][] foundTaskMessages;
 128   public char[][] foundTaskPriorities = null;
 129   public int[][] foundTaskPositions;
 130   public int foundTaskCount = 0;
 131   public char[][] taskTags = null;
 132   public char[][] taskPriorities = null;
 133   public static final boolean DEBUG = false;
 134   public static final boolean TRACE = false;
 135   public Scanner() {
 136     this(false, false);
 137   }
 138   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 139     this(tokenizeComments, tokenizeWhiteSpace, false);
 140   }
 141   /**
 142    * Determines if the specified character is permissible as the first
 143    * character in a PHP identifier
 144    */
 145   public static boolean isPHPIdentifierStart(char ch) {
 146     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 147   }
 148   /**
 149    * Determines if the specified character may be part of a PHP identifier as
 150    * other than the first character
 151    */
 152   public static boolean isPHPIdentifierPart(char ch) {
 153     return Character.isLetterOrDigit(ch) || (ch == '_')
 154         || (0x7F <= ch && ch <= 0xFF);
 155   }
 156   public final boolean atEnd() {
 157     // This code is not relevant if source is
 158     // Only a part of the real stream input
 159     return source.length == currentPosition;
 160   }
 161   public char[] getCurrentIdentifierSource() {
 162     //return the token REAL source (aka unicodes are precomputed)
 163     char[] result;
 164     //    if (withoutUnicodePtr != 0)
 165     //      //0 is used as a fast test flag so the real first char is in position 1
 166     //      System.arraycopy(
 167     //        withoutUnicodeBuffer,
 168     //        1,
 169     //        result = new char[withoutUnicodePtr],
 170     //        0,
 171     //        withoutUnicodePtr);
 172     //    else {
 173     int length = currentPosition - startPosition;
 174     switch (length) { // see OptimizedLength
 175       case 1 :
 176         return optimizedCurrentTokenSource1();
 177       case 2 :
 178         return optimizedCurrentTokenSource2();
 179       case 3 :
 180         return optimizedCurrentTokenSource3();
 181       case 4 :
 182         return optimizedCurrentTokenSource4();
 183       case 5 :
 184         return optimizedCurrentTokenSource5();
 185       case 6 :
 186         return optimizedCurrentTokenSource6();
 187     }
 188     //no optimization
 189     System.arraycopy(source, startPosition, result = new char[length], 0,
 190         length);
 191     //   }
 192     return result;
 193   }
 194   public int getCurrentTokenEndPosition() {
 195     return this.currentPosition - 1;
 196   }
 197   public final char[] getCurrentTokenSource() {
 198     // Return the token REAL source (aka unicodes are precomputed)
 199     char[] result;
 200     //    if (withoutUnicodePtr != 0)
 201     //      // 0 is used as a fast test flag so the real first char is in position 1
 202     //      System.arraycopy(
 203     //        withoutUnicodeBuffer,
 204     //        1,
 205     //        result = new char[withoutUnicodePtr],
 206     //        0,
 207     //        withoutUnicodePtr);
 208     //    else {
 209     int length;
 210     System.arraycopy(source, startPosition,
 211         result = new char[length = currentPosition - startPosition], 0, length);
 212     //    }
 213     return result;
 214   }
 215   public final char[] getCurrentTokenSource(int startPos) {
 216     // Return the token REAL source (aka unicodes are precomputed)
 217     char[] result;
 218     //    if (withoutUnicodePtr != 0)
 219     //      // 0 is used as a fast test flag so the real first char is in position 1
 220     //      System.arraycopy(
 221     //        withoutUnicodeBuffer,
 222     //        1,
 223     //        result = new char[withoutUnicodePtr],
 224     //        0,
 225     //        withoutUnicodePtr);
 226     //    else {
 227     int length;
 228     System.arraycopy(source, startPos,
 229         result = new char[length = currentPosition - startPos], 0, length);
 230     //  }
 231     return result;
 232   }
 233   public final char[] getCurrentTokenSourceString() {
 234     //return the token REAL source (aka unicodes are precomputed).
 235     //REMOVE the two " that are at the beginning and the end.
 236     char[] result;
 237     if (withoutUnicodePtr != 0)
 238       //0 is used as a fast test flag so the real first char is in position 1
 239       System.arraycopy(withoutUnicodeBuffer, 2,
 240       //2 is 1 (real start) + 1 (to jump over the ")
 241           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 242     else {
 243       int length;
 244       System.arraycopy(source, startPosition + 1,
 245           result = new char[length = currentPosition - startPosition - 2], 0,
 246           length);
 247     }
 248     return result;
 249   }
 250   public int getCurrentTokenStartPosition() {
 251     return this.startPosition;
 252   }
 253   public final char[] getCurrentStringLiteralSource() {
 254     // Return the token REAL source (aka unicodes are precomputed)
 255     char[] result;
 256     int length;
 257     System.arraycopy(source, startPosition + 1,
 258         result = new char[length = currentPosition - startPosition - 2], 0,
 259         length);
 260     //    }
 261     return result;
 262   }
 263   /*
 264    * Search the source position corresponding to the end of a given line number
 265    *
 266    * Line numbers are 1-based, and relative to the scanner initialPosition.
 267    * Character positions are 0-based.
 268    *
 269    * In case the given line number is inconsistent, answers -1.
 270    */
 271   public final int getLineEnd(int lineNumber) {
 272     if (lineEnds == null)
 273       return -1;
 274     if (lineNumber >= lineEnds.length)
 275       return -1;
 276     if (lineNumber <= 0)
 277       return -1;
 278     if (lineNumber == lineEnds.length - 1)
 279       return eofPosition;
 280     return lineEnds[lineNumber - 1];
 281     // next line start one character behind the lineEnd of the previous line
 282   }
 283   /**
 284    * Search the source position corresponding to the beginning of a given line
 285    * number
 286    *
 287    * Line numbers are 1-based, and relative to the scanner initialPosition.
 288    * Character positions are 0-based.
 289    *
 290    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 291    *
 292    * In case the given line number is inconsistent, answers -1.
 293    */
 294   public final int getLineStart(int lineNumber) {
 295     if (lineEnds == null)
 296       return -1;
 297     if (lineNumber >= lineEnds.length)
 298       return -1;
 299     if (lineNumber <= 0)
 300       return -1;
 301     if (lineNumber == 1)
 302       return initialPosition;
 303     return lineEnds[lineNumber - 2] + 1;
 304     // next line start one character behind the lineEnd of the previous line
 305   }
 306   public final boolean getNextChar(char testedChar) {
 307     //BOOLEAN
 308     //handle the case of unicode.
 309     //when a unicode appears then we must use a buffer that holds char
 310     // internal values
 311     //At the end of this method currentCharacter holds the new visited char
 312     //and currentPosition points right next after it
 313     //Both previous lines are true if the currentCharacter is == to the
 314     // testedChar
 315     //On false, no side effect has occured.
 316     //ALL getNextChar.... ARE OPTIMIZED COPIES
 317     int temp = currentPosition;
 318     try {
 319       currentCharacter = source[currentPosition++];
 320       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 321       //        && (source[currentPosition] == 'u')) {
 322       //        //-------------unicode traitement ------------
 323       //        int c1, c2, c3, c4;
 324       //        int unicodeSize = 6;
 325       //        currentPosition++;
 326       //        while (source[currentPosition] == 'u') {
 327       //          currentPosition++;
 328       //          unicodeSize++;
 329       //        }
 330       //
 331       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 332       //          || c1 < 0)
 333       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 334       //            || c2 < 0)
 335       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 336       //            || c3 < 0)
 337       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 338       //            || c4 < 0)) {
 339       //          currentPosition = temp;
 340       //          return false;
 341       //        }
 342       //
 343       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 344       //        if (currentCharacter != testedChar) {
 345       //          currentPosition = temp;
 346       //          return false;
 347       //        }
 348       //        unicodeAsBackSlash = currentCharacter == '\\';
 349       //
 350       //        //need the unicode buffer
 351       //        if (withoutUnicodePtr == 0) {
 352       //          //buffer all the entries that have been left aside....
 353       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 354       //          System.arraycopy(
 355       //            source,
 356       //            startPosition,
 357       //            withoutUnicodeBuffer,
 358       //            1,
 359       //            withoutUnicodePtr);
 360       //        }
 361       //        //fill the buffer with the char
 362       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 363       //        return true;
 364       //
 365       //      } //-------------end unicode traitement--------------
 366       //      else {
 367       if (currentCharacter != testedChar) {
 368         currentPosition = temp;
 369         return false;
 370       }
 371       unicodeAsBackSlash = false;
 372       //        if (withoutUnicodePtr != 0)
 373       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 374       return true;
 375       //      }
 376     } catch (IndexOutOfBoundsException e) {
 377       unicodeAsBackSlash = false;
 378       currentPosition = temp;
 379       return false;
 380     }
 381   }
 382   public final int getNextChar(char testedChar1, char testedChar2) {
 383     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 384     //test can be done with (x==0) for the first and (x>0) for the second
 385     //handle the case of unicode.
 386     //when a unicode appears then we must use a buffer that holds char
 387     // internal values
 388     //At the end of this method currentCharacter holds the new visited char
 389     //and currentPosition points right next after it
 390     //Both previous lines are true if the currentCharacter is == to the
 391     // testedChar1/2
 392     //On false, no side effect has occured.
 393     //ALL getNextChar.... ARE OPTIMIZED COPIES
 394     int temp = currentPosition;
 395     try {
 396       int result;
 397       currentCharacter = source[currentPosition++];
 398       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 399       //        && (source[currentPosition] == 'u')) {
 400       //        //-------------unicode traitement ------------
 401       //        int c1, c2, c3, c4;
 402       //        int unicodeSize = 6;
 403       //        currentPosition++;
 404       //        while (source[currentPosition] == 'u') {
 405       //          currentPosition++;
 406       //          unicodeSize++;
 407       //        }
 408       //
 409       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 410       //          || c1 < 0)
 411       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 412       //            || c2 < 0)
 413       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 414       //            || c3 < 0)
 415       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 416       //            || c4 < 0)) {
 417       //          currentPosition = temp;
 418       //          return 2;
 419       //        }
 420       //
 421       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 422       //        if (currentCharacter == testedChar1)
 423       //          result = 0;
 424       //        else if (currentCharacter == testedChar2)
 425       //          result = 1;
 426       //        else {
 427       //          currentPosition = temp;
 428       //          return -1;
 429       //        }
 430       //
 431       //        //need the unicode buffer
 432       //        if (withoutUnicodePtr == 0) {
 433       //          //buffer all the entries that have been left aside....
 434       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 435       //          System.arraycopy(
 436       //            source,
 437       //            startPosition,
 438       //            withoutUnicodeBuffer,
 439       //            1,
 440       //            withoutUnicodePtr);
 441       //        }
 442       //        //fill the buffer with the char
 443       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 444       //        return result;
 445       //      } //-------------end unicode traitement--------------
 446       //      else {
 447       if (currentCharacter == testedChar1)
 448         result = 0;
 449       else if (currentCharacter == testedChar2)
 450         result = 1;
 451       else {
 452         currentPosition = temp;
 453         return -1;
 454       }
 455       //        if (withoutUnicodePtr != 0)
 456       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 457       return result;
 458       //     }
 459     } catch (IndexOutOfBoundsException e) {
 460       currentPosition = temp;
 461       return -1;
 462     }
 463   }
 464   public final boolean getNextCharAsDigit() {
 465     //BOOLEAN
 466     //handle the case of unicode.
 467     //when a unicode appears then we must use a buffer that holds char
 468     // internal values
 469     //At the end of this method currentCharacter holds the new visited char
 470     //and currentPosition points right next after it
 471     //Both previous lines are true if the currentCharacter is a digit
 472     //On false, no side effect has occured.
 473     //ALL getNextChar.... ARE OPTIMIZED COPIES
 474     int temp = currentPosition;
 475     try {
 476       currentCharacter = source[currentPosition++];
 477       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 478       //        && (source[currentPosition] == 'u')) {
 479       //        //-------------unicode traitement ------------
 480       //        int c1, c2, c3, c4;
 481       //        int unicodeSize = 6;
 482       //        currentPosition++;
 483       //        while (source[currentPosition] == 'u') {
 484       //          currentPosition++;
 485       //          unicodeSize++;
 486       //        }
 487       //
 488       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 489       //          || c1 < 0)
 490       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 491       //            || c2 < 0)
 492       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 493       //            || c3 < 0)
 494       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 495       //            || c4 < 0)) {
 496       //          currentPosition = temp;
 497       //          return false;
 498       //        }
 499       //
 500       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 501       //        if (!Character.isDigit(currentCharacter)) {
 502       //          currentPosition = temp;
 503       //          return false;
 504       //        }
 505       //
 506       //        //need the unicode buffer
 507       //        if (withoutUnicodePtr == 0) {
 508       //          //buffer all the entries that have been left aside....
 509       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 510       //          System.arraycopy(
 511       //            source,
 512       //            startPosition,
 513       //            withoutUnicodeBuffer,
 514       //            1,
 515       //            withoutUnicodePtr);
 516       //        }
 517       //        //fill the buffer with the char
 518       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 519       //        return true;
 520       //      } //-------------end unicode traitement--------------
 521       //      else {
 522       if (!Character.isDigit(currentCharacter)) {
 523         currentPosition = temp;
 524         return false;
 525       }
 526       //        if (withoutUnicodePtr != 0)
 527       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 528       return true;
 529       //      }
 530     } catch (IndexOutOfBoundsException e) {
 531       currentPosition = temp;
 532       return false;
 533     }
 534   }
 535   public final boolean getNextCharAsDigit(int radix) {
 536     //BOOLEAN
 537     //handle the case of unicode.
 538     //when a unicode appears then we must use a buffer that holds char
 539     // internal values
 540     //At the end of this method currentCharacter holds the new visited char
 541     //and currentPosition points right next after it
 542     //Both previous lines are true if the currentCharacter is a digit base on
 543     // radix
 544     //On false, no side effect has occured.
 545     //ALL getNextChar.... ARE OPTIMIZED COPIES
 546     int temp = currentPosition;
 547     try {
 548       currentCharacter = source[currentPosition++];
 549       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 550       //        && (source[currentPosition] == 'u')) {
 551       //        //-------------unicode traitement ------------
 552       //        int c1, c2, c3, c4;
 553       //        int unicodeSize = 6;
 554       //        currentPosition++;
 555       //        while (source[currentPosition] == 'u') {
 556       //          currentPosition++;
 557       //          unicodeSize++;
 558       //        }
 559       //
 560       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 561       //          || c1 < 0)
 562       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 563       //            || c2 < 0)
 564       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 565       //            || c3 < 0)
 566       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 567       //            || c4 < 0)) {
 568       //          currentPosition = temp;
 569       //          return false;
 570       //        }
 571       //
 572       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 573       //        if (Character.digit(currentCharacter, radix) == -1) {
 574       //          currentPosition = temp;
 575       //          return false;
 576       //        }
 577       //
 578       //        //need the unicode buffer
 579       //        if (withoutUnicodePtr == 0) {
 580       //          //buffer all the entries that have been left aside....
 581       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 582       //          System.arraycopy(
 583       //            source,
 584       //            startPosition,
 585       //            withoutUnicodeBuffer,
 586       //            1,
 587       //            withoutUnicodePtr);
 588       //        }
 589       //        //fill the buffer with the char
 590       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 591       //        return true;
 592       //      } //-------------end unicode traitement--------------
 593       //      else {
 594       if (Character.digit(currentCharacter, radix) == -1) {
 595         currentPosition = temp;
 596         return false;
 597       }
 598       //        if (withoutUnicodePtr != 0)
 599       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 600       return true;
 601       //      }
 602     } catch (IndexOutOfBoundsException e) {
 603       currentPosition = temp;
 604       return false;
 605     }
 606   }
 607   public boolean getNextCharAsJavaIdentifierPart() {
 608     //BOOLEAN
 609     //handle the case of unicode.
 610     //when a unicode appears then we must use a buffer that holds char
 611     // internal values
 612     //At the end of this method currentCharacter holds the new visited char
 613     //and currentPosition points right next after it
 614     //Both previous lines are true if the currentCharacter is a
 615     // JavaIdentifierPart
 616     //On false, no side effect has occured.
 617     //ALL getNextChar.... ARE OPTIMIZED COPIES
 618     int temp = currentPosition;
 619     try {
 620       currentCharacter = source[currentPosition++];
 621       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 622       //        && (source[currentPosition] == 'u')) {
 623       //        //-------------unicode traitement ------------
 624       //        int c1, c2, c3, c4;
 625       //        int unicodeSize = 6;
 626       //        currentPosition++;
 627       //        while (source[currentPosition] == 'u') {
 628       //          currentPosition++;
 629       //          unicodeSize++;
 630       //        }
 631       //
 632       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 633       //          || c1 < 0)
 634       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 635       //            || c2 < 0)
 636       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 637       //            || c3 < 0)
 638       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 639       //            || c4 < 0)) {
 640       //          currentPosition = temp;
 641       //          return false;
 642       //        }
 643       //
 644       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 645       //        if (!isPHPIdentifierPart(currentCharacter)) {
 646       //          currentPosition = temp;
 647       //          return false;
 648       //        }
 649       //
 650       //        //need the unicode buffer
 651       //        if (withoutUnicodePtr == 0) {
 652       //          //buffer all the entries that have been left aside....
 653       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 654       //          System.arraycopy(
 655       //            source,
 656       //            startPosition,
 657       //            withoutUnicodeBuffer,
 658       //            1,
 659       //            withoutUnicodePtr);
 660       //        }
 661       //        //fill the buffer with the char
 662       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 663       //        return true;
 664       //      } //-------------end unicode traitement--------------
 665       //      else {
 666       if (!isPHPIdentifierPart(currentCharacter)) {
 667         currentPosition = temp;
 668         return false;
 669       }
 670       //        if (withoutUnicodePtr != 0)
 671       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 672       return true;
 673       //      }
 674     } catch (IndexOutOfBoundsException e) {
 675       currentPosition = temp;
 676       return false;
 677     }
 678   }
 679   public int getCastOrParen() {
 680     int tempPosition = currentPosition;
 681     char tempCharacter = currentCharacter;
 682     int tempToken = TokenNameLPAREN;
 683     boolean found = false;
 684     StringBuffer buf = new StringBuffer();
 685     try {
 686       do {
 687         currentCharacter = source[currentPosition++];
 688       } while (currentCharacter == ' ' || currentCharacter == '\t');
 689       while ((currentCharacter >= 'a' && currentCharacter <= 'z')
 690           || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 691         buf.append(currentCharacter);
 692         currentCharacter = source[currentPosition++];
 693       }
 694       if (buf.length() >= 3 && buf.length() <= 7) {
 695         char[] data = buf.toString().toCharArray();
 696         int index = 0;
 697         switch (data.length) {
 698           case 3 :
 699             // int
 700             if ((data[index] == 'i') && (data[++index] == 'n')
 701                 && (data[++index] == 't')) {
 702               found = true;
 703               tempToken = TokenNameintCAST;
 704             }
 705             break;
 706           case 4 :
 707             // bool real
 708             if ((data[index] == 'b') && (data[++index] == 'o')
 709                 && (data[++index] == 'o') && (data[++index] == 'l')) {
 710               found = true;
 711               tempToken = TokenNameboolCAST;
 712             } else {
 713               index = 0;
 714               if ((data[index] == 'r') && (data[++index] == 'e')
 715                   && (data[++index] == 'a') && (data[++index] == 'l')) {
 716                 found = true;
 717                 tempToken = TokenNamedoubleCAST;
 718               }
 719             }
 720             break;
 721           case 5 :
 722             // array unset float
 723             if ((data[index] == 'a') && (data[++index] == 'r')
 724                 && (data[++index] == 'r') && (data[++index] == 'a')
 725                 && (data[++index] == 'y')) {
 726               found = true;
 727               tempToken = TokenNamearrayCAST;
 728             } else {
 729               index = 0;
 730               if ((data[index] == 'u') && (data[++index] == 'n')
 731                   && (data[++index] == 's') && (data[++index] == 'e')
 732                   && (data[++index] == 't')) {
 733                 found = true;
 734                 tempToken = TokenNameunsetCAST;
 735               } else {
 736                 index = 0;
 737                 if ((data[index] == 'f') && (data[++index] == 'l')
 738                     && (data[++index] == 'o') && (data[++index] == 'a')
 739                     && (data[++index] == 't')) {
 740                   found = true;
 741                   tempToken = TokenNamedoubleCAST;
 742                 }
 743               }
 744             }
 745             break;
 746           case 6 :
 747             // object string double
 748             if ((data[index] == 'o') && (data[++index] == 'b')
 749                 && (data[++index] == 'j') && (data[++index] == 'e')
 750                 && (data[++index] == 'c') && (data[++index] == 't')) {
 751               found = true;
 752               tempToken = TokenNameobjectCAST;
 753             } else {
 754               index = 0;
 755               if ((data[index] == 's') && (data[++index] == 't')
 756                   && (data[++index] == 'r') && (data[++index] == 'i')
 757                   && (data[++index] == 'n') && (data[++index] == 'g')) {
 758                 found = true;
 759                 tempToken = TokenNamestringCAST;
 760               } else {
 761                 index = 0;
 762                 if ((data[index] == 'd') && (data[++index] == 'o')
 763                     && (data[++index] == 'u') && (data[++index] == 'b')
 764                     && (data[++index] == 'l') && (data[++index] == 'e')) {
 765                   found = true;
 766                   tempToken = TokenNamedoubleCAST;
 767                 }
 768               }
 769             }
 770             break;
 771           case 7 :
 772             // boolean integer
 773             if ((data[index] == 'b') && (data[++index] == 'o')
 774                 && (data[++index] == 'o') && (data[++index] == 'l')
 775                 && (data[++index] == 'e') && (data[++index] == 'a')
 776                 && (data[++index] == 'n')) {
 777               found = true;
 778               tempToken = TokenNameboolCAST;
 779             } else {
 780               index = 0;
 781               if ((data[index] == 'i') && (data[++index] == 'n')
 782                   && (data[++index] == 't') && (data[++index] == 'e')
 783                   && (data[++index] == 'g') && (data[++index] == 'e')
 784                   && (data[++index] == 'r')) {
 785                 found = true;
 786                 tempToken = TokenNameintCAST;
 787               }
 788             }
 789             break;
 790         }
 791         if (found) {
 792           while (currentCharacter == ' ' || currentCharacter == '\t') {
 793             currentCharacter = source[currentPosition++];
 794           }
 795           if (currentCharacter == ')') {
 796             return tempToken;
 797           }
 798         }
 799       }
 800     } catch (IndexOutOfBoundsException e) {
 801     }
 802     currentCharacter = tempCharacter;
 803     currentPosition = tempPosition;
 804     return TokenNameLPAREN;
 805   }
 806   public int getNextToken() throws InvalidInputException {
 807     int htmlPosition = currentPosition;
 808     try {
 809       while (!phpMode) {
 810         currentCharacter = source[currentPosition++];
 811         if (currentCharacter == '<') {
 812           if (getNextChar('?')) {
 813             currentCharacter = source[currentPosition++];
 814             if ((currentCharacter == ' ')
 815                 || Character.isWhitespace(currentCharacter)) {
 816               // <?
 817               startPosition = currentPosition;
 818               phpMode = true;
 819               if (tokenizeWhiteSpace) {
 820                 // && (whiteStart != currentPosition - 1)) {
 821                 // reposition scanner in case we are interested by spaces as
 822                 // tokens
 823                 startPosition = htmlPosition;
 824                 return TokenNameHTML;
 825               }
 826             } else {
 827               boolean phpStart = (currentCharacter == 'P')
 828                   || (currentCharacter == 'p');
 829               if (phpStart) {
 830                 int test = getNextChar('H', 'h');
 831                 if (test >= 0) {
 832                   test = getNextChar('P', 'p');
 833                   if (test >= 0) {
 834                     // <?PHP <?php
 835                     startPosition = currentPosition;
 836                     phpMode = true;
 837                     if (tokenizeWhiteSpace) {
 838                       // && (whiteStart != currentPosition - 1)) {
 839                       // reposition scanner in case we are interested by spaces
 840                       // as tokens
 841                       startPosition = htmlPosition;
 842                       return TokenNameHTML;
 843                     }
 844                   }
 845                 }
 846               }
 847             }
 848           }
 849         }
 850         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 851           if (recordLineSeparator) {
 852             pushLineSeparator();
 853           } else {
 854             currentLine = null;
 855           }
 856         }
 857       }
 858     } //-----------------end switch while try--------------------
 859     catch (IndexOutOfBoundsException e) {
 860       if (tokenizeWhiteSpace) {
 861         // && (whiteStart != currentPosition - 1)) {
 862         // reposition scanner in case we are interested by spaces as tokens
 863         startPosition = htmlPosition;
 864       }
 865       return TokenNameEOF;
 866     }
 867     if (phpMode) {
 868       this.wasAcr = false;
 869       if (diet) {
 870         jumpOverMethodBody();
 871         diet = false;
 872         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 873       }
 874       try {
 875         while (true) { //loop for jumping over comments
 876           withoutUnicodePtr = 0;
 877           //start with a new token (even comment written with unicode )
 878           // ---------Consume white space and handles startPosition---------
 879           int whiteStart = currentPosition;
 880           boolean isWhiteSpace;
 881           do {
 882             startPosition = currentPosition;
 883             currentCharacter = source[currentPosition++];
 884             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 885             //              && (source[currentPosition] == 'u')) {
 886             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 887             //            } else {
 888             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 889               checkNonExternalizeString();
 890               if (recordLineSeparator) {
 891                 pushLineSeparator();
 892               } else {
 893                 currentLine = null;
 894               }
 895             }
 896             isWhiteSpace = (currentCharacter == ' ')
 897                 || Character.isWhitespace(currentCharacter);
 898             //            }
 899           } while (isWhiteSpace);
 900           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 901             // reposition scanner in case we are interested by spaces as tokens
 902             currentPosition--;
 903             startPosition = whiteStart;
 904             return TokenNameWHITESPACE;
 905           }
 906           //little trick to get out in the middle of a source compuation
 907           if (currentPosition > eofPosition)
 908             return TokenNameEOF;
 909           // ---------Identify the next token-------------
 910           switch (currentCharacter) {
 911             case '(' :
 912               return getCastOrParen();
 913             case ')' :
 914               return TokenNameRPAREN;
 915             case '{' :
 916               return TokenNameLBRACE;
 917             case '}' :
 918               return TokenNameRBRACE;
 919             case '[' :
 920               return TokenNameLBRACKET;
 921             case ']' :
 922               return TokenNameRBRACKET;
 923             case ';' :
 924               return TokenNameSEMICOLON;
 925             case ',' :
 926               return TokenNameCOMMA;
 927             case '.' :
 928               if (getNextChar('='))
 929                 return TokenNameDOT_EQUAL;
 930               if (getNextCharAsDigit())
 931                 return scanNumber(true);
 932               return TokenNameDOT;
 933             case '+' :
 934               {
 935                 int test;
 936                 if ((test = getNextChar('+', '=')) == 0)
 937                   return TokenNamePLUS_PLUS;
 938                 if (test > 0)
 939                   return TokenNamePLUS_EQUAL;
 940                 return TokenNamePLUS;
 941               }
 942             case '-' :
 943               {
 944                 int test;
 945                 if ((test = getNextChar('-', '=')) == 0)
 946                   return TokenNameMINUS_MINUS;
 947                 if (test > 0)
 948                   return TokenNameMINUS_EQUAL;
 949                 if (getNextChar('>'))
 950                   return TokenNameMINUS_GREATER;
 951                 return TokenNameMINUS;
 952               }
 953             case '~' :
 954               if (getNextChar('='))
 955                 return TokenNameTWIDDLE_EQUAL;
 956               return TokenNameTWIDDLE;
 957             case '!' :
 958               if (getNextChar('=')) {
 959                 if (getNextChar('=')) {
 960                   return TokenNameNOT_EQUAL_EQUAL;
 961                 }
 962                 return TokenNameNOT_EQUAL;
 963               }
 964               return TokenNameNOT;
 965             case '*' :
 966               if (getNextChar('='))
 967                 return TokenNameMULTIPLY_EQUAL;
 968               return TokenNameMULTIPLY;
 969             case '%' :
 970               if (getNextChar('='))
 971                 return TokenNameREMAINDER_EQUAL;
 972               return TokenNameREMAINDER;
 973             case '<' :
 974               {
 975                 int oldPosition = currentPosition;
 976                 try {
 977                   currentCharacter = source[currentPosition++];
 978                 } catch (IndexOutOfBoundsException e) {
 979                   currentPosition = oldPosition;
 980                   return TokenNameLESS;
 981                 }
 982                 switch (currentCharacter) {
 983                   case '=' :
 984                     return TokenNameLESS_EQUAL;
 985                   case '>' :
 986                     return TokenNameNOT_EQUAL;
 987                   case '<' :
 988                     if (getNextChar('='))
 989                       return TokenNameLEFT_SHIFT_EQUAL;
 990                     if (getNextChar('<')) {
 991                       int heredocStart = currentPosition;
 992                       int heredocLength = 0;
 993                       currentCharacter = source[currentPosition++];
 994                       if (isPHPIdentifierStart(currentCharacter)) {
 995                         currentCharacter = source[currentPosition++];
 996                       } else {
 997                         return TokenNameERROR;
 998                       }
 999                       while (isPHPIdentifierPart(currentCharacter)) {
1000                         currentCharacter = source[currentPosition++];
1001                       }
1002                       heredocLength = currentPosition - heredocStart - 1;
1003                       // heredoc end-tag determination
1004                       boolean endTag = true;
1005                       char ch;
1006                       do {
1007                         ch = source[currentPosition++];
1008                         if (ch == '\r' || ch == '\n') {
1009                           if (recordLineSeparator) {
1010                             pushLineSeparator();
1011                           } else {
1012                             currentLine = null;
1013                           }
1014                           for (int i = 0; i < heredocLength; i++) {
1015                             if (source[currentPosition + i] != source[heredocStart
1016                                 + i]) {
1017                               endTag = false;
1018                               break;
1019                             }
1020                           }
1021                           if (endTag) {
1022                             currentPosition += heredocLength - 1;
1023                             currentCharacter = source[currentPosition++];
1024                             break; // do...while loop
1025                           } else {
1026                             endTag = true;
1027                           }
1028                         }
1029                       } while (true);
1030                       return TokenNameHEREDOC;
1031                     }
1032                     return TokenNameLEFT_SHIFT;
1033                 }
1034                 currentPosition = oldPosition;
1035                 return TokenNameLESS;
1036               }
1037             case '>' :
1038               {
1039                 int test;
1040                 if ((test = getNextChar('=', '>')) == 0)
1041                   return TokenNameGREATER_EQUAL;
1042                 if (test > 0) {
1043                   if ((test = getNextChar('=', '>')) == 0)
1044                     return TokenNameRIGHT_SHIFT_EQUAL;
1045                   return TokenNameRIGHT_SHIFT;
1046                 }
1047                 return TokenNameGREATER;
1048               }
1049             case '=' :
1050               if (getNextChar('=')) {
1051                 if (getNextChar('=')) {
1052                   return TokenNameEQUAL_EQUAL_EQUAL;
1053                 }
1054                 return TokenNameEQUAL_EQUAL;
1055               }
1056               if (getNextChar('>'))
1057                 return TokenNameEQUAL_GREATER;
1058               return TokenNameEQUAL;
1059             case '&' :
1060               {
1061                 int test;
1062                 if ((test = getNextChar('&', '=')) == 0)
1063                   return TokenNameAND_AND;
1064                 if (test > 0)
1065                   return TokenNameAND_EQUAL;
1066                 return TokenNameAND;
1067               }
1068             case '|' :
1069               {
1070                 int test;
1071                 if ((test = getNextChar('|', '=')) == 0)
1072                   return TokenNameOR_OR;
1073                 if (test > 0)
1074                   return TokenNameOR_EQUAL;
1075                 return TokenNameOR;
1076               }
1077             case '^' :
1078               if (getNextChar('='))
1079                 return TokenNameXOR_EQUAL;
1080               return TokenNameXOR;
1081             case '?' :
1082               if (getNextChar('>')) {
1083                 phpMode = false;
1084                 return TokenNameINLINE_HTML;
1085               }
1086               return TokenNameQUESTION;
1087             case ':' :
1088               if (getNextChar(':'))
1089                 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1090               return TokenNameCOLON;
1091             case '@' :
1092               return TokenNameAT;
1093             //                                  case '\'' :
1094             //                                          {
1095             //                                                  int test;
1096             //                                                  if ((test = getNextChar('\n', '\r')) == 0) {
1097             //                                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1098             //                                                  }
1099             //                                                  if (test > 0) {
1100             //                                                          // relocate if finding another quote fairly close: thus unicode
1101             // '/u000D' will be fully consumed
1102             //                                                          for (int lookAhead = 0;
1103             //                                                                  lookAhead < 3;
1104             //                                                                  lookAhead++) {
1105             //                                                                  if (currentPosition + lookAhead
1106             //                                                                          == source.length)
1107             //                                                                          break;
1108             //                                                                  if (source[currentPosition + lookAhead]
1109             //                                                                          == '\n')
1110             //                                                                          break;
1111             //                                                                  if (source[currentPosition + lookAhead]
1112             //                                                                          == '\'') {
1113             //                                                                          currentPosition += lookAhead + 1;
1114             //                                                                          break;
1115             //                                                                  }
1116             //                                                          }
1117             //                                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1118             //                                                  }
1119             //                                          }
1120             //                                          if (getNextChar('\'')) {
1121             //                                                  // relocate if finding another quote fairly close: thus unicode
1122             // '/u000D' will be fully consumed
1123             //                                                  for (int lookAhead = 0;
1124             //                                                          lookAhead < 3;
1125             //                                                          lookAhead++) {
1126             //                                                          if (currentPosition + lookAhead
1127             //                                                                  == source.length)
1128             //                                                                  break;
1129             //                                                          if (source[currentPosition + lookAhead]
1130             //                                                                  == '\n')
1131             //                                                                  break;
1132             //                                                          if (source[currentPosition + lookAhead]
1133             //                                                                  == '\'') {
1134             //                                                                  currentPosition += lookAhead + 1;
1135             //                                                                  break;
1136             //                                                          }
1137             //                                                  }
1138             //                                                  throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1139             //                                          }
1140             //                                          if (getNextChar('\\'))
1141             //                                                  scanEscapeCharacter();
1142             //                                          else { // consume next character
1143             //                                                  unicodeAsBackSlash = false;
1144             //                                                  if (((currentCharacter = source[currentPosition++])
1145             //                                                          == '\\')
1146             //                                                          && (source[currentPosition] == 'u')) {
1147             //                                                          getNextUnicodeChar();
1148             //                                                  } else {
1149             //                                                          if (withoutUnicodePtr != 0) {
1150             //                                                                  withoutUnicodeBuffer[++withoutUnicodePtr] =
1151             //                                                                          currentCharacter;
1152             //                                                          }
1153             //                                                  }
1154             //                                          }
1155             //                                          // if (getNextChar('\''))
1156             //                                          // return TokenNameCharacterLiteral;
1157             //                                          // relocate if finding another quote fairly close: thus unicode
1158             // '/u000D' will be fully consumed
1159             //                                          for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1160             //                                                  if (currentPosition + lookAhead == source.length)
1161             //                                                          break;
1162             //                                                  if (source[currentPosition + lookAhead] == '\n')
1163             //                                                          break;
1164             //                                                  if (source[currentPosition + lookAhead] == '\'') {
1165             //                                                          currentPosition += lookAhead + 1;
1166             //                                                          break;
1167             //                                                  }
1168             //                                          }
1169             //                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1170             case '\'' :
1171               try {
1172                 // consume next character
1173                 unicodeAsBackSlash = false;
1174                 currentCharacter = source[currentPosition++];
1175                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1176                 //                  && (source[currentPosition] == 'u')) {
1177                 //                  getNextUnicodeChar();
1178                 //                } else {
1179                 //                  if (withoutUnicodePtr != 0) {
1180                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1181                 //                      currentCharacter;
1182                 //                  }
1183                 //                }
1184                 while (currentCharacter != '\'') {
1185                   /** ** in PHP \r and \n are valid in string literals *** */
1186                   //                  if ((currentCharacter == '\n')
1187                   //                    || (currentCharacter == '\r')) {
1188                   //                    // relocate if finding another quote fairly close: thus
1189                   // unicode '/u000D' will be fully consumed
1190                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1191                   //                      if (currentPosition + lookAhead == source.length)
1192                   //                        break;
1193                   //                      if (source[currentPosition + lookAhead] == '\n')
1194                   //                        break;
1195                   //                      if (source[currentPosition + lookAhead] == '\"') {
1196                   //                        currentPosition += lookAhead + 1;
1197                   //                        break;
1198                   //                      }
1199                   //                    }
1200                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1201                   //                  }
1202                   if (currentCharacter == '\\') {
1203                     int escapeSize = currentPosition;
1204                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1205                     //scanEscapeCharacter make a side effect on this value and
1206                     // we need the previous value few lines down this one
1207                     scanSingleQuotedEscapeCharacter();
1208                     escapeSize = currentPosition - escapeSize;
1209                     if (withoutUnicodePtr == 0) {
1210                       //buffer all the entries that have been left aside....
1211                       withoutUnicodePtr = currentPosition - escapeSize - 1
1212                           - startPosition;
1213                       System.arraycopy(source, startPosition,
1214                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1215                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1216                     } else { //overwrite the / in the buffer
1217                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1218                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1219                         // the stream where
1220                         // only one is correct
1221                         withoutUnicodePtr--;
1222                       }
1223                     }
1224                   }
1225                   // consume next character
1226                   unicodeAsBackSlash = false;
1227                   currentCharacter = source[currentPosition++];
1228                   //                  if (((currentCharacter = source[currentPosition++]) ==
1229                   // '\\')
1230                   //                    && (source[currentPosition] == 'u')) {
1231                   //                    getNextUnicodeChar();
1232                   //                  } else {
1233                   if (withoutUnicodePtr != 0) {
1234                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1235                   }
1236                   //                  }
1237                 }
1238               } catch (IndexOutOfBoundsException e) {
1239                 throw new InvalidInputException(UNTERMINATED_STRING);
1240               } catch (InvalidInputException e) {
1241                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1242                   // relocate if finding another quote fairly close: thus
1243                   // unicode '/u000D' will be fully consumed
1244                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1245                     if (currentPosition + lookAhead == source.length)
1246                       break;
1247                     if (source[currentPosition + lookAhead] == '\n')
1248                       break;
1249                     if (source[currentPosition + lookAhead] == '\'') {
1250                       currentPosition += lookAhead + 1;
1251                       break;
1252                     }
1253                   }
1254                 }
1255                 throw e; // rethrow
1256               }
1257               if (checkNonExternalizedStringLiterals) { // check for presence
1258                 // of NLS tags
1259                 // //$NON-NLS-?$ where
1260                 // ? is an int.
1261                 if (currentLine == null) {
1262                   currentLine = new NLSLine();
1263                   lines.add(currentLine);
1264                 }
1265                 currentLine.add(new StringLiteral(
1266                     getCurrentTokenSourceString(), startPosition,
1267                     currentPosition - 1));
1268               }
1269               return TokenNameStringConstant;
1270             case '"' :
1271               try {
1272                 // consume next character
1273                 unicodeAsBackSlash = false;
1274                 currentCharacter = source[currentPosition++];
1275                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1276                 //                  && (source[currentPosition] == 'u')) {
1277                 //                  getNextUnicodeChar();
1278                 //                } else {
1279                 //                  if (withoutUnicodePtr != 0) {
1280                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1281                 //                      currentCharacter;
1282                 //                  }
1283                 //                }
1284                 while (currentCharacter != '"') {
1285                   /** ** in PHP \r and \n are valid in string literals *** */
1286                   //                  if ((currentCharacter == '\n')
1287                   //                    || (currentCharacter == '\r')) {
1288                   //                    // relocate if finding another quote fairly close: thus
1289                   // unicode '/u000D' will be fully consumed
1290                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1291                   //                      if (currentPosition + lookAhead == source.length)
1292                   //                        break;
1293                   //                      if (source[currentPosition + lookAhead] == '\n')
1294                   //                        break;
1295                   //                      if (source[currentPosition + lookAhead] == '\"') {
1296                   //                        currentPosition += lookAhead + 1;
1297                   //                        break;
1298                   //                      }
1299                   //                    }
1300                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1301                   //                  }
1302                   if (currentCharacter == '\\') {
1303                     int escapeSize = currentPosition;
1304                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1305                     //scanEscapeCharacter make a side effect on this value and
1306                     // we need the previous value few lines down this one
1307                     scanDoubleQuotedEscapeCharacter();
1308                     escapeSize = currentPosition - escapeSize;
1309                     if (withoutUnicodePtr == 0) {
1310                       //buffer all the entries that have been left aside....
1311                       withoutUnicodePtr = currentPosition - escapeSize - 1
1312                           - startPosition;
1313                       System.arraycopy(source, startPosition,
1314                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1315                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1316                     } else { //overwrite the / in the buffer
1317                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1318                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1319                         // the stream where
1320                         // only one is correct
1321                         withoutUnicodePtr--;
1322                       }
1323                     }
1324                   }
1325                   // consume next character
1326                   unicodeAsBackSlash = false;
1327                   currentCharacter = source[currentPosition++];
1328                   //                  if (((currentCharacter = source[currentPosition++]) ==
1329                   // '\\')
1330                   //                    && (source[currentPosition] == 'u')) {
1331                   //                    getNextUnicodeChar();
1332                   //                  } else {
1333                   if (withoutUnicodePtr != 0) {
1334                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1335                   }
1336                   //                  }
1337                 }
1338               } catch (IndexOutOfBoundsException e) {
1339                 throw new InvalidInputException(UNTERMINATED_STRING);
1340               } catch (InvalidInputException e) {
1341                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1342                   // relocate if finding another quote fairly close: thus
1343                   // unicode '/u000D' will be fully consumed
1344                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1345                     if (currentPosition + lookAhead == source.length)
1346                       break;
1347                     if (source[currentPosition + lookAhead] == '\n')
1348                       break;
1349                     if (source[currentPosition + lookAhead] == '\"') {
1350                       currentPosition += lookAhead + 1;
1351                       break;
1352                     }
1353                   }
1354                 }
1355                 throw e; // rethrow
1356               }
1357               if (checkNonExternalizedStringLiterals) { // check for presence
1358                 // of NLS tags
1359                 // //$NON-NLS-?$ where
1360                 // ? is an int.
1361                 if (currentLine == null) {
1362                   currentLine = new NLSLine();
1363                   lines.add(currentLine);
1364                 }
1365                 currentLine.add(new StringLiteral(
1366                     getCurrentTokenSourceString(), startPosition,
1367                     currentPosition - 1));
1368               }
1369               return TokenNameStringLiteral;
1370             case '`' :
1371               try {
1372                 // consume next character
1373                 unicodeAsBackSlash = false;
1374                 currentCharacter = source[currentPosition++];
1375                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1376                 //                  && (source[currentPosition] == 'u')) {
1377                 //                  getNextUnicodeChar();
1378                 //                } else {
1379                 //                  if (withoutUnicodePtr != 0) {
1380                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1381                 //                      currentCharacter;
1382                 //                  }
1383                 //                }
1384                 while (currentCharacter != '`') {
1385                   /** ** in PHP \r and \n are valid in string literals *** */
1386                   //                if ((currentCharacter == '\n')
1387                   //                  || (currentCharacter == '\r')) {
1388                   //                  // relocate if finding another quote fairly close: thus
1389                   // unicode '/u000D' will be fully consumed
1390                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1391                   //                    if (currentPosition + lookAhead == source.length)
1392                   //                      break;
1393                   //                    if (source[currentPosition + lookAhead] == '\n')
1394                   //                      break;
1395                   //                    if (source[currentPosition + lookAhead] == '\"') {
1396                   //                      currentPosition += lookAhead + 1;
1397                   //                      break;
1398                   //                    }
1399                   //                  }
1400                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1401                   //                }
1402                   if (currentCharacter == '\\') {
1403                     int escapeSize = currentPosition;
1404                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1405                     //scanEscapeCharacter make a side effect on this value and
1406                     // we need the previous value few lines down this one
1407                     scanDoubleQuotedEscapeCharacter();
1408                     escapeSize = currentPosition - escapeSize;
1409                     if (withoutUnicodePtr == 0) {
1410                       //buffer all the entries that have been left aside....
1411                       withoutUnicodePtr = currentPosition - escapeSize - 1
1412                           - startPosition;
1413                       System.arraycopy(source, startPosition,
1414                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1415                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1416                     } else { //overwrite the / in the buffer
1417                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1418                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1419                         // the stream where
1420                         // only one is correct
1421                         withoutUnicodePtr--;
1422                       }
1423                     }
1424                   }
1425                   // consume next character
1426                   unicodeAsBackSlash = false;
1427                   currentCharacter = source[currentPosition++];
1428                   //                  if (((currentCharacter = source[currentPosition++]) ==
1429                   // '\\')
1430                   //                    && (source[currentPosition] == 'u')) {
1431                   //                    getNextUnicodeChar();
1432                   //                  } else {
1433                   if (withoutUnicodePtr != 0) {
1434                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1435                   }
1436                   //                  }
1437                 }
1438               } catch (IndexOutOfBoundsException e) {
1439                 throw new InvalidInputException(UNTERMINATED_STRING);
1440               } catch (InvalidInputException e) {
1441                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1442                   // relocate if finding another quote fairly close: thus
1443                   // unicode '/u000D' will be fully consumed
1444                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1445                     if (currentPosition + lookAhead == source.length)
1446                       break;
1447                     if (source[currentPosition + lookAhead] == '\n')
1448                       break;
1449                     if (source[currentPosition + lookAhead] == '`') {
1450                       currentPosition += lookAhead + 1;
1451                       break;
1452                     }
1453                   }
1454                 }
1455                 throw e; // rethrow
1456               }
1457               if (checkNonExternalizedStringLiterals) { // check for presence
1458                 // of NLS tags
1459                 // //$NON-NLS-?$ where
1460                 // ? is an int.
1461                 if (currentLine == null) {
1462                   currentLine = new NLSLine();
1463                   lines.add(currentLine);
1464                 }
1465                 currentLine.add(new StringLiteral(
1466                     getCurrentTokenSourceString(), startPosition,
1467                     currentPosition - 1));
1468               }
1469               return TokenNameStringInterpolated;
1470             case '#' :
1471             case '/' :
1472               {
1473                 char startChar = currentCharacter;
1474                 if (getNextChar('=')) {
1475                   return TokenNameDIVIDE_EQUAL;
1476                 }
1477                 int test;
1478                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1479                   //line comment
1480                   int endPositionForLineComment = 0;
1481                   try { //get the next char
1482                     currentCharacter = source[currentPosition++];
1483                     //                    if (((currentCharacter = source[currentPosition++])
1484                     //                      == '\\')
1485                     //                      && (source[currentPosition] == 'u')) {
1486                     //                      //-------------unicode traitement ------------
1487                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1488                     //                      currentPosition++;
1489                     //                      while (source[currentPosition] == 'u') {
1490                     //                        currentPosition++;
1491                     //                      }
1492                     //                      if ((c1 =
1493                     //                        Character.getNumericValue(source[currentPosition++]))
1494                     //                        > 15
1495                     //                        || c1 < 0
1496                     //                        || (c2 =
1497                     //                          Character.getNumericValue(source[currentPosition++]))
1498                     //                          > 15
1499                     //                        || c2 < 0
1500                     //                        || (c3 =
1501                     //                          Character.getNumericValue(source[currentPosition++]))
1502                     //                          > 15
1503                     //                        || c3 < 0
1504                     //                        || (c4 =
1505                     //                          Character.getNumericValue(source[currentPosition++]))
1506                     //                          > 15
1507                     //                        || c4 < 0) {
1508                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1509                     //                      } else {
1510                     //                        currentCharacter =
1511                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1512                     //                      }
1513                     //                    }
1514                     //handle the \\u case manually into comment
1515                     //                    if (currentCharacter == '\\') {
1516                     //                      if (source[currentPosition] == '\\')
1517                     //                        currentPosition++;
1518                     //                    } //jump over the \\
1519                     boolean isUnicode = false;
1520                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1521                       if (currentCharacter == '?') {
1522                         if (getNextChar('>')) {
1523                           startPosition = currentPosition - 2;
1524                           phpMode = false;
1525                           return TokenNameINLINE_HTML;
1526                         }
1527                       }
1528                       //get the next char
1529                       isUnicode = false;
1530                       currentCharacter = source[currentPosition++];
1531                       //                      if (((currentCharacter = source[currentPosition++])
1532                       //                        == '\\')
1533                       //                        && (source[currentPosition] == 'u')) {
1534                       //                        isUnicode = true;
1535                       //                        //-------------unicode traitement ------------
1536                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1537                       //                        currentPosition++;
1538                       //                        while (source[currentPosition] == 'u') {
1539                       //                          currentPosition++;
1540                       //                        }
1541                       //                        if ((c1 =
1542                       //                          Character.getNumericValue(source[currentPosition++]))
1543                       //                          > 15
1544                       //                          || c1 < 0
1545                       //                          || (c2 =
1546                       //                            Character.getNumericValue(
1547                       //                              source[currentPosition++]))
1548                       //                            > 15
1549                       //                          || c2 < 0
1550                       //                          || (c3 =
1551                       //                            Character.getNumericValue(
1552                       //                              source[currentPosition++]))
1553                       //                            > 15
1554                       //                          || c3 < 0
1555                       //                          || (c4 =
1556                       //                            Character.getNumericValue(
1557                       //                              source[currentPosition++]))
1558                       //                            > 15
1559                       //                          || c4 < 0) {
1560                       //                          throw new
1561                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1562                       //                        } else {
1563                       //                          currentCharacter =
1564                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1565                       //                        }
1566                       //                      }
1567                       //handle the \\u case manually into comment
1568                       //                      if (currentCharacter == '\\') {
1569                       //                        if (source[currentPosition] == '\\')
1570                       //                          currentPosition++;
1571                       //                      } //jump over the \\
1572                     }
1573                     if (isUnicode) {
1574                       endPositionForLineComment = currentPosition - 6;
1575                     } else {
1576                       endPositionForLineComment = currentPosition - 1;
1577                     }
1578                     recordComment(false);
1579                     if ((currentCharacter == '\r')
1580                         || (currentCharacter == '\n')) {
1581                       checkNonExternalizeString();
1582                       if (recordLineSeparator) {
1583                         if (isUnicode) {
1584                           pushUnicodeLineSeparator();
1585                         } else {
1586                           pushLineSeparator();
1587                         }
1588                       } else {
1589                         currentLine = null;
1590                       }
1591                     }
1592                     if (tokenizeComments) {
1593                       if (!isUnicode) {
1594                         currentPosition = endPositionForLineComment;
1595                         // reset one character behind
1596                       }
1597                       return TokenNameCOMMENT_LINE;
1598                     }
1599                   } catch (IndexOutOfBoundsException e) { //an eof will them
1600                     // be generated
1601                     if (tokenizeComments) {
1602                       currentPosition--;
1603                       // reset one character behind
1604                       return TokenNameCOMMENT_LINE;
1605                     }
1606                   }
1607                   break;
1608                 }
1609                 if (test > 0) {
1610                   //traditional and annotation comment
1611                   boolean isJavadoc = false, star = false;
1612                   // consume next character
1613                   unicodeAsBackSlash = false;
1614                   currentCharacter = source[currentPosition++];
1615                   //                  if (((currentCharacter = source[currentPosition++]) ==
1616                   // '\\')
1617                   //                    && (source[currentPosition] == 'u')) {
1618                   //                    getNextUnicodeChar();
1619                   //                  } else {
1620                   //                    if (withoutUnicodePtr != 0) {
1621                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1622                   //                        currentCharacter;
1623                   //                    }
1624                   //                  }
1625                   if (currentCharacter == '*') {
1626                     isJavadoc = true;
1627                     star = true;
1628                   }
1629                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1630                     checkNonExternalizeString();
1631                     if (recordLineSeparator) {
1632                       pushLineSeparator();
1633                     } else {
1634                       currentLine = null;
1635                     }
1636                   }
1637                   try { //get the next char
1638                     currentCharacter = source[currentPosition++];
1639                     //                    if (((currentCharacter = source[currentPosition++])
1640                     //                      == '\\')
1641                     //                      && (source[currentPosition] == 'u')) {
1642                     //                      //-------------unicode traitement ------------
1643                     //                      getNextUnicodeChar();
1644                     //                    }
1645                     //handle the \\u case manually into comment
1646                     //                    if (currentCharacter == '\\') {
1647                     //                      if (source[currentPosition] == '\\')
1648                     //                        currentPosition++;
1649                     //                      //jump over the \\
1650                     //                    }
1651                     // empty comment is not a javadoc /**/
1652                     if (currentCharacter == '/') {
1653                       isJavadoc = false;
1654                     }
1655                     //loop until end of comment */
1656                     while ((currentCharacter != '/') || (!star)) {
1657                       if ((currentCharacter == '\r')
1658                           || (currentCharacter == '\n')) {
1659                         checkNonExternalizeString();
1660                         if (recordLineSeparator) {
1661                           pushLineSeparator();
1662                         } else {
1663                           currentLine = null;
1664                         }
1665                       }
1666                       star = currentCharacter == '*';
1667                       //get next char
1668                       currentCharacter = source[currentPosition++];
1669                       //                      if (((currentCharacter = source[currentPosition++])
1670                       //                        == '\\')
1671                       //                        && (source[currentPosition] == 'u')) {
1672                       //                        //-------------unicode traitement ------------
1673                       //                        getNextUnicodeChar();
1674                       //                      }
1675                       //handle the \\u case manually into comment
1676                       //                      if (currentCharacter == '\\') {
1677                       //                        if (source[currentPosition] == '\\')
1678                       //                          currentPosition++;
1679                       //                      } //jump over the \\
1680                     }
1681                     recordComment(isJavadoc);
1682                     if (tokenizeComments) {
1683                       if (isJavadoc)
1684                         return TokenNameCOMMENT_PHPDOC;
1685                       return TokenNameCOMMENT_BLOCK;
1686                     }
1687                   } catch (IndexOutOfBoundsException e) {
1688                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1689                   }
1690                   break;
1691                 }
1692                 return TokenNameDIVIDE;
1693               }
1694             case '\u001a' :
1695               if (atEnd())
1696                 return TokenNameEOF;
1697               //the atEnd may not be <currentPosition == source.length> if
1698               // source is only some part of a real (external) stream
1699               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1700             default :
1701               if (currentCharacter == '$') {
1702                 int oldPosition = currentPosition;
1703                 try {
1704                   currentCharacter = source[currentPosition++];
1705
1706                   if (isPHPIdentifierStart(currentCharacter)) {
1707                     return scanIdentifierOrKeyword(true);
1708                   } else {
1709                     currentPosition = oldPosition;
1710                     return TokenNameDOLLAR;
1711                   }
1712                 } catch (IndexOutOfBoundsException e) {
1713                   currentPosition = oldPosition;
1714                   return TokenNameDOLLAR;
1715                 }
1716               }
1717               if (isPHPIdentifierStart(currentCharacter))
1718                 return scanIdentifierOrKeyword(false);
1719               if (Character.isDigit(currentCharacter))
1720                 return scanNumber(false);
1721               return TokenNameERROR;
1722           }
1723         }
1724       } //-----------------end switch while try--------------------
1725       catch (IndexOutOfBoundsException e) {
1726       }
1727     }
1728     return TokenNameEOF;
1729   }
1730   //  public final void getNextUnicodeChar()
1731   //    throws IndexOutOfBoundsException, InvalidInputException {
1732   //    //VOID
1733   //    //handle the case of unicode.
1734   //    //when a unicode appears then we must use a buffer that holds char
1735   // internal values
1736   //    //At the end of this method currentCharacter holds the new visited char
1737   //    //and currentPosition points right next after it
1738   //
1739   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1740   //
1741   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1742   //    currentPosition++;
1743   //    while (source[currentPosition] == 'u') {
1744   //      currentPosition++;
1745   //      unicodeSize++;
1746   //    }
1747   //
1748   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1749   //      || c1 < 0
1750   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1751   //      || c2 < 0
1752   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1753   //      || c3 < 0
1754   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1755   //      || c4 < 0) {
1756   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1757   //    } else {
1758   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1759   //      //need the unicode buffer
1760   //      if (withoutUnicodePtr == 0) {
1761   //        //buffer all the entries that have been left aside....
1762   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1763   //        System.arraycopy(
1764   //          source,
1765   //          startPosition,
1766   //          withoutUnicodeBuffer,
1767   //          1,
1768   //          withoutUnicodePtr);
1769   //      }
1770   //      //fill the buffer with the char
1771   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1772   //    }
1773   //    unicodeAsBackSlash = currentCharacter == '\\';
1774   //  }
1775   /*
1776    * Tokenize a method body, assuming that curly brackets are properly
1777    * balanced.
1778    */
1779   public final void jumpOverMethodBody() {
1780     this.wasAcr = false;
1781     int found = 1;
1782     try {
1783       while (true) { //loop for jumping over comments
1784         // ---------Consume white space and handles startPosition---------
1785         boolean isWhiteSpace;
1786         do {
1787           startPosition = currentPosition;
1788           currentCharacter = source[currentPosition++];
1789           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1790           //            && (source[currentPosition] == 'u')) {
1791           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1792           //          } else {
1793           if (recordLineSeparator
1794               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1795             pushLineSeparator();
1796           isWhiteSpace = Character.isWhitespace(currentCharacter);
1797           //          }
1798         } while (isWhiteSpace);
1799         // -------consume token until } is found---------
1800         switch (currentCharacter) {
1801           case '{' :
1802             found++;
1803             break;
1804           case '}' :
1805             found--;
1806             if (found == 0)
1807               return;
1808             break;
1809           case '\'' :
1810             {
1811               boolean test;
1812               test = getNextChar('\\');
1813               if (test) {
1814                 try {
1815                   scanDoubleQuotedEscapeCharacter();
1816                 } catch (InvalidInputException ex) {
1817                 };
1818               } else {
1819                 //                try { // consume next character
1820                 unicodeAsBackSlash = false;
1821                 currentCharacter = source[currentPosition++];
1822                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1823                 //                    && (source[currentPosition] == 'u')) {
1824                 //                    getNextUnicodeChar();
1825                 //                  } else {
1826                 if (withoutUnicodePtr != 0) {
1827                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1828                 }
1829                 //                  }
1830                 //                } catch (InvalidInputException ex) {
1831                 //                };
1832               }
1833               getNextChar('\'');
1834               break;
1835             }
1836           case '"' :
1837             try {
1838               //              try { // consume next character
1839               unicodeAsBackSlash = false;
1840               currentCharacter = source[currentPosition++];
1841               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1842               //                  && (source[currentPosition] == 'u')) {
1843               //                  getNextUnicodeChar();
1844               //                } else {
1845               if (withoutUnicodePtr != 0) {
1846                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1847               }
1848               //                }
1849               //              } catch (InvalidInputException ex) {
1850               //              };
1851               while (currentCharacter != '"') {
1852                 if (currentCharacter == '\r') {
1853                   if (source[currentPosition] == '\n')
1854                     currentPosition++;
1855                   break;
1856                   // the string cannot go further that the line
1857                 }
1858                 if (currentCharacter == '\n') {
1859                   break;
1860                   // the string cannot go further that the line
1861                 }
1862                 if (currentCharacter == '\\') {
1863                   try {
1864                     scanDoubleQuotedEscapeCharacter();
1865                   } catch (InvalidInputException ex) {
1866                   };
1867                 }
1868                 //                try { // consume next character
1869                 unicodeAsBackSlash = false;
1870                 currentCharacter = source[currentPosition++];
1871                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1872                 //                    && (source[currentPosition] == 'u')) {
1873                 //                    getNextUnicodeChar();
1874                 //                  } else {
1875                 if (withoutUnicodePtr != 0) {
1876                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1877                 }
1878                 //                  }
1879                 //                } catch (InvalidInputException ex) {
1880                 //                };
1881               }
1882             } catch (IndexOutOfBoundsException e) {
1883               return;
1884             }
1885             break;
1886           case '/' :
1887             {
1888               int test;
1889               if ((test = getNextChar('/', '*')) == 0) {
1890                 //line comment
1891                 try {
1892                   //get the next char
1893                   currentCharacter = source[currentPosition++];
1894                   //                  if (((currentCharacter = source[currentPosition++]) ==
1895                   // '\\')
1896                   //                    && (source[currentPosition] == 'u')) {
1897                   //                    //-------------unicode traitement ------------
1898                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1899                   //                    currentPosition++;
1900                   //                    while (source[currentPosition] == 'u') {
1901                   //                      currentPosition++;
1902                   //                    }
1903                   //                    if ((c1 =
1904                   //                      Character.getNumericValue(source[currentPosition++]))
1905                   //                      > 15
1906                   //                      || c1 < 0
1907                   //                      || (c2 =
1908                   //                        Character.getNumericValue(source[currentPosition++]))
1909                   //                        > 15
1910                   //                      || c2 < 0
1911                   //                      || (c3 =
1912                   //                        Character.getNumericValue(source[currentPosition++]))
1913                   //                        > 15
1914                   //                      || c3 < 0
1915                   //                      || (c4 =
1916                   //                        Character.getNumericValue(source[currentPosition++]))
1917                   //                        > 15
1918                   //                      || c4 < 0) {
1919                   //                      //error don't care of the value
1920                   //                      currentCharacter = 'A';
1921                   //                    } //something different from \n and \r
1922                   //                    else {
1923                   //                      currentCharacter =
1924                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1925                   //                    }
1926                   //                  }
1927                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1928                     //get the next char
1929                     currentCharacter = source[currentPosition++];
1930                     //                    if (((currentCharacter = source[currentPosition++])
1931                     //                      == '\\')
1932                     //                      && (source[currentPosition] == 'u')) {
1933                     //                      //-------------unicode traitement ------------
1934                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1935                     //                      currentPosition++;
1936                     //                      while (source[currentPosition] == 'u') {
1937                     //                        currentPosition++;
1938                     //                      }
1939                     //                      if ((c1 =
1940                     //                        Character.getNumericValue(source[currentPosition++]))
1941                     //                        > 15
1942                     //                        || c1 < 0
1943                     //                        || (c2 =
1944                     //                          Character.getNumericValue(source[currentPosition++]))
1945                     //                          > 15
1946                     //                        || c2 < 0
1947                     //                        || (c3 =
1948                     //                          Character.getNumericValue(source[currentPosition++]))
1949                     //                          > 15
1950                     //                        || c3 < 0
1951                     //                        || (c4 =
1952                     //                          Character.getNumericValue(source[currentPosition++]))
1953                     //                          > 15
1954                     //                        || c4 < 0) {
1955                     //                        //error don't care of the value
1956                     //                        currentCharacter = 'A';
1957                     //                      } //something different from \n and \r
1958                     //                      else {
1959                     //                        currentCharacter =
1960                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1961                     //                      }
1962                     //                    }
1963                   }
1964                   if (recordLineSeparator
1965                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1966                     pushLineSeparator();
1967                 } catch (IndexOutOfBoundsException e) {
1968                 } //an eof will them be generated
1969                 break;
1970               }
1971               if (test > 0) {
1972                 //traditional and annotation comment
1973                 boolean star = false;
1974                 //                try { // consume next character
1975                 unicodeAsBackSlash = false;
1976                 currentCharacter = source[currentPosition++];
1977                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1978                 //                    && (source[currentPosition] == 'u')) {
1979                 //                    getNextUnicodeChar();
1980                 //                  } else {
1981                 if (withoutUnicodePtr != 0) {
1982                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1983                 }
1984                 //                  };
1985                 //                } catch (InvalidInputException ex) {
1986                 //                };
1987                 if (currentCharacter == '*') {
1988                   star = true;
1989                 }
1990                 if (recordLineSeparator
1991                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1992                   pushLineSeparator();
1993                 try { //get the next char
1994                   currentCharacter = source[currentPosition++];
1995                   //                  if (((currentCharacter = source[currentPosition++]) ==
1996                   // '\\')
1997                   //                    && (source[currentPosition] == 'u')) {
1998                   //                    //-------------unicode traitement ------------
1999                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2000                   //                    currentPosition++;
2001                   //                    while (source[currentPosition] == 'u') {
2002                   //                      currentPosition++;
2003                   //                    }
2004                   //                    if ((c1 =
2005                   //                      Character.getNumericValue(source[currentPosition++]))
2006                   //                      > 15
2007                   //                      || c1 < 0
2008                   //                      || (c2 =
2009                   //                        Character.getNumericValue(source[currentPosition++]))
2010                   //                        > 15
2011                   //                      || c2 < 0
2012                   //                      || (c3 =
2013                   //                        Character.getNumericValue(source[currentPosition++]))
2014                   //                        > 15
2015                   //                      || c3 < 0
2016                   //                      || (c4 =
2017                   //                        Character.getNumericValue(source[currentPosition++]))
2018                   //                        > 15
2019                   //                      || c4 < 0) {
2020                   //                      //error don't care of the value
2021                   //                      currentCharacter = 'A';
2022                   //                    } //something different from * and /
2023                   //                    else {
2024                   //                      currentCharacter =
2025                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2026                   //                    }
2027                   //                  }
2028                   //loop until end of comment */
2029                   while ((currentCharacter != '/') || (!star)) {
2030                     if (recordLineSeparator
2031                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2032                       pushLineSeparator();
2033                     star = currentCharacter == '*';
2034                     //get next char
2035                     currentCharacter = source[currentPosition++];
2036                     //                    if (((currentCharacter = source[currentPosition++])
2037                     //                      == '\\')
2038                     //                      && (source[currentPosition] == 'u')) {
2039                     //                      //-------------unicode traitement ------------
2040                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2041                     //                      currentPosition++;
2042                     //                      while (source[currentPosition] == 'u') {
2043                     //                        currentPosition++;
2044                     //                      }
2045                     //                      if ((c1 =
2046                     //                        Character.getNumericValue(source[currentPosition++]))
2047                     //                        > 15
2048                     //                        || c1 < 0
2049                     //                        || (c2 =
2050                     //                          Character.getNumericValue(source[currentPosition++]))
2051                     //                          > 15
2052                     //                        || c2 < 0
2053                     //                        || (c3 =
2054                     //                          Character.getNumericValue(source[currentPosition++]))
2055                     //                          > 15
2056                     //                        || c3 < 0
2057                     //                        || (c4 =
2058                     //                          Character.getNumericValue(source[currentPosition++]))
2059                     //                          > 15
2060                     //                        || c4 < 0) {
2061                     //                        //error don't care of the value
2062                     //                        currentCharacter = 'A';
2063                     //                      } //something different from * and /
2064                     //                      else {
2065                     //                        currentCharacter =
2066                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2067                     //                      }
2068                     //                    }
2069                   }
2070                 } catch (IndexOutOfBoundsException e) {
2071                   return;
2072                 }
2073                 break;
2074               }
2075               break;
2076             }
2077           default :
2078             if (isPHPIdentifierStart(currentCharacter)
2079                 || currentCharacter == '$') {
2080               try {
2081                 scanIdentifierOrKeyword((currentCharacter == '$'));
2082               } catch (InvalidInputException ex) {
2083               };
2084               break;
2085             }
2086             if (Character.isDigit(currentCharacter)) {
2087               try {
2088                 scanNumber(false);
2089               } catch (InvalidInputException ex) {
2090               };
2091               break;
2092             }
2093         }
2094       }
2095       //-----------------end switch while try--------------------
2096     } catch (IndexOutOfBoundsException e) {
2097     } catch (InvalidInputException e) {
2098     }
2099     return;
2100   }
2101   //  public final boolean jumpOverUnicodeWhiteSpace()
2102   //    throws InvalidInputException {
2103   //    //BOOLEAN
2104   //    //handle the case of unicode. Jump over the next whiteSpace
2105   //    //making startPosition pointing on the next available char
2106   //    //On false, the currentCharacter is filled up with a potential
2107   //    //correct char
2108   //
2109   //    try {
2110   //      this.wasAcr = false;
2111   //      int c1, c2, c3, c4;
2112   //      int unicodeSize = 6;
2113   //      currentPosition++;
2114   //      while (source[currentPosition] == 'u') {
2115   //        currentPosition++;
2116   //        unicodeSize++;
2117   //      }
2118   //
2119   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2120   //        || c1 < 0)
2121   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2122   //          || c2 < 0)
2123   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2124   //          || c3 < 0)
2125   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2126   //          || c4 < 0)) {
2127   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2128   //      }
2129   //
2130   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2131   //      if (recordLineSeparator
2132   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2133   //        pushLineSeparator();
2134   //      if (Character.isWhitespace(currentCharacter))
2135   //        return true;
2136   //
2137   //      //buffer the new char which is not a white space
2138   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2139   //      //withoutUnicodePtr == 1 is true here
2140   //      return false;
2141   //    } catch (IndexOutOfBoundsException e) {
2142   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2143   //    }
2144   //  }
2145   public final int[] getLineEnds() {
2146     //return a bounded copy of this.lineEnds
2147     int[] copy;
2148     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2149     return copy;
2150   }
2151   public char[] getSource() {
2152     return this.source;
2153   }
2154   final char[] optimizedCurrentTokenSource1() {
2155     //return always the same char[] build only once
2156     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2157     char charOne = source[startPosition];
2158     switch (charOne) {
2159       case 'a' :
2160         return charArray_a;
2161       case 'b' :
2162         return charArray_b;
2163       case 'c' :
2164         return charArray_c;
2165       case 'd' :
2166         return charArray_d;
2167       case 'e' :
2168         return charArray_e;
2169       case 'f' :
2170         return charArray_f;
2171       case 'g' :
2172         return charArray_g;
2173       case 'h' :
2174         return charArray_h;
2175       case 'i' :
2176         return charArray_i;
2177       case 'j' :
2178         return charArray_j;
2179       case 'k' :
2180         return charArray_k;
2181       case 'l' :
2182         return charArray_l;
2183       case 'm' :
2184         return charArray_m;
2185       case 'n' :
2186         return charArray_n;
2187       case 'o' :
2188         return charArray_o;
2189       case 'p' :
2190         return charArray_p;
2191       case 'q' :
2192         return charArray_q;
2193       case 'r' :
2194         return charArray_r;
2195       case 's' :
2196         return charArray_s;
2197       case 't' :
2198         return charArray_t;
2199       case 'u' :
2200         return charArray_u;
2201       case 'v' :
2202         return charArray_v;
2203       case 'w' :
2204         return charArray_w;
2205       case 'x' :
2206         return charArray_x;
2207       case 'y' :
2208         return charArray_y;
2209       case 'z' :
2210         return charArray_z;
2211       default :
2212         return new char[]{charOne};
2213     }
2214   }
2215   final char[] optimizedCurrentTokenSource2() {
2216     //try to return the same char[] build only once
2217     char c0, c1;
2218     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2219         % TableSize;
2220     char[][] table = charArray_length[0][hash];
2221     int i = newEntry2;
2222     while (++i < InternalTableSize) {
2223       char[] charArray = table[i];
2224       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2225         return charArray;
2226     }
2227     //---------other side---------
2228     i = -1;
2229     int max = newEntry2;
2230     while (++i <= max) {
2231       char[] charArray = table[i];
2232       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2233         return charArray;
2234     }
2235     //--------add the entry-------
2236     if (++max >= InternalTableSize)
2237       max = 0;
2238     char[] r;
2239     table[max] = (r = new char[]{c0, c1});
2240     newEntry2 = max;
2241     return r;
2242   }
2243   final char[] optimizedCurrentTokenSource3() {
2244     //try to return the same char[] build only once
2245     char c0, c1, c2;
2246     int hash = (((c0 = source[startPosition]) << 12)
2247         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2248         % TableSize;
2249     char[][] table = charArray_length[1][hash];
2250     int i = newEntry3;
2251     while (++i < InternalTableSize) {
2252       char[] charArray = table[i];
2253       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2254         return charArray;
2255     }
2256     //---------other side---------
2257     i = -1;
2258     int max = newEntry3;
2259     while (++i <= max) {
2260       char[] charArray = table[i];
2261       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2262         return charArray;
2263     }
2264     //--------add the entry-------
2265     if (++max >= InternalTableSize)
2266       max = 0;
2267     char[] r;
2268     table[max] = (r = new char[]{c0, c1, c2});
2269     newEntry3 = max;
2270     return r;
2271   }
2272   final char[] optimizedCurrentTokenSource4() {
2273     //try to return the same char[] build only once
2274     char c0, c1, c2, c3;
2275     long hash = ((((long) (c0 = source[startPosition])) << 18)
2276         + ((c1 = source[startPosition + 1]) << 12)
2277         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2278         % TableSize;
2279     char[][] table = charArray_length[2][(int) hash];
2280     int i = newEntry4;
2281     while (++i < InternalTableSize) {
2282       char[] charArray = table[i];
2283       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2284           && (c3 == charArray[3]))
2285         return charArray;
2286     }
2287     //---------other side---------
2288     i = -1;
2289     int max = newEntry4;
2290     while (++i <= max) {
2291       char[] charArray = table[i];
2292       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2293           && (c3 == charArray[3]))
2294         return charArray;
2295     }
2296     //--------add the entry-------
2297     if (++max >= InternalTableSize)
2298       max = 0;
2299     char[] r;
2300     table[max] = (r = new char[]{c0, c1, c2, c3});
2301     newEntry4 = max;
2302     return r;
2303   }
2304   final char[] optimizedCurrentTokenSource5() {
2305     //try to return the same char[] build only once
2306     char c0, c1, c2, c3, c4;
2307     long hash = ((((long) (c0 = source[startPosition])) << 24)
2308         + (((long) (c1 = source[startPosition + 1])) << 18)
2309         + ((c2 = source[startPosition + 2]) << 12)
2310         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2311         % TableSize;
2312     char[][] table = charArray_length[3][(int) hash];
2313     int i = newEntry5;
2314     while (++i < InternalTableSize) {
2315       char[] charArray = table[i];
2316       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2317           && (c3 == charArray[3]) && (c4 == charArray[4]))
2318         return charArray;
2319     }
2320     //---------other side---------
2321     i = -1;
2322     int max = newEntry5;
2323     while (++i <= max) {
2324       char[] charArray = table[i];
2325       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2326           && (c3 == charArray[3]) && (c4 == charArray[4]))
2327         return charArray;
2328     }
2329     //--------add the entry-------
2330     if (++max >= InternalTableSize)
2331       max = 0;
2332     char[] r;
2333     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2334     newEntry5 = max;
2335     return r;
2336   }
2337   final char[] optimizedCurrentTokenSource6() {
2338     //try to return the same char[] build only once
2339     char c0, c1, c2, c3, c4, c5;
2340     long hash = ((((long) (c0 = source[startPosition])) << 32)
2341         + (((long) (c1 = source[startPosition + 1])) << 24)
2342         + (((long) (c2 = source[startPosition + 2])) << 18)
2343         + ((c3 = source[startPosition + 3]) << 12)
2344         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2345         % TableSize;
2346     char[][] table = charArray_length[4][(int) hash];
2347     int i = newEntry6;
2348     while (++i < InternalTableSize) {
2349       char[] charArray = table[i];
2350       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2351           && (c3 == charArray[3]) && (c4 == charArray[4])
2352           && (c5 == charArray[5]))
2353         return charArray;
2354     }
2355     //---------other side---------
2356     i = -1;
2357     int max = newEntry6;
2358     while (++i <= max) {
2359       char[] charArray = table[i];
2360       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2361           && (c3 == charArray[3]) && (c4 == charArray[4])
2362           && (c5 == charArray[5]))
2363         return charArray;
2364     }
2365     //--------add the entry-------
2366     if (++max >= InternalTableSize)
2367       max = 0;
2368     char[] r;
2369     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2370     newEntry6 = max;
2371     return r;
2372   }
2373   public final void pushLineSeparator() throws InvalidInputException {
2374     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2375     final int INCREMENT = 250;
2376     if (this.checkNonExternalizedStringLiterals) {
2377       // reinitialize the current line for non externalize strings purpose
2378       currentLine = null;
2379     }
2380     //currentCharacter is at position currentPosition-1
2381     // cr 000D
2382     if (currentCharacter == '\r') {
2383       int separatorPos = currentPosition - 1;
2384       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2385         return;
2386       //System.out.println("CR-" + separatorPos);
2387       try {
2388         lineEnds[++linePtr] = separatorPos;
2389       } catch (IndexOutOfBoundsException e) {
2390         //linePtr value is correct
2391         int oldLength = lineEnds.length;
2392         int[] old = lineEnds;
2393         lineEnds = new int[oldLength + INCREMENT];
2394         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2395         lineEnds[linePtr] = separatorPos;
2396       }
2397       // look-ahead for merged cr+lf
2398       try {
2399         if (source[currentPosition] == '\n') {
2400           //System.out.println("look-ahead LF-" + currentPosition);
2401           lineEnds[linePtr] = currentPosition;
2402           currentPosition++;
2403           wasAcr = false;
2404         } else {
2405           wasAcr = true;
2406         }
2407       } catch (IndexOutOfBoundsException e) {
2408         wasAcr = true;
2409       }
2410     } else {
2411       // lf 000A
2412       if (currentCharacter == '\n') {
2413         //must merge eventual cr followed by lf
2414         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2415           //System.out.println("merge LF-" + (currentPosition - 1));
2416           lineEnds[linePtr] = currentPosition - 1;
2417         } else {
2418           int separatorPos = currentPosition - 1;
2419           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2420             return;
2421           // System.out.println("LF-" + separatorPos);
2422           try {
2423             lineEnds[++linePtr] = separatorPos;
2424           } catch (IndexOutOfBoundsException e) {
2425             //linePtr value is correct
2426             int oldLength = lineEnds.length;
2427             int[] old = lineEnds;
2428             lineEnds = new int[oldLength + INCREMENT];
2429             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2430             lineEnds[linePtr] = separatorPos;
2431           }
2432         }
2433         wasAcr = false;
2434       }
2435     }
2436   }
2437   public final void pushUnicodeLineSeparator() {
2438     // isUnicode means that the \r or \n has been read as a unicode character
2439     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2440     final int INCREMENT = 250;
2441     //currentCharacter is at position currentPosition-1
2442     if (this.checkNonExternalizedStringLiterals) {
2443       // reinitialize the current line for non externalize strings purpose
2444       currentLine = null;
2445     }
2446     // cr 000D
2447     if (currentCharacter == '\r') {
2448       int separatorPos = currentPosition - 6;
2449       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2450         return;
2451       //System.out.println("CR-" + separatorPos);
2452       try {
2453         lineEnds[++linePtr] = separatorPos;
2454       } catch (IndexOutOfBoundsException e) {
2455         //linePtr value is correct
2456         int oldLength = lineEnds.length;
2457         int[] old = lineEnds;
2458         lineEnds = new int[oldLength + INCREMENT];
2459         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2460         lineEnds[linePtr] = separatorPos;
2461       }
2462       // look-ahead for merged cr+lf
2463       if (source[currentPosition] == '\n') {
2464         //System.out.println("look-ahead LF-" + currentPosition);
2465         lineEnds[linePtr] = currentPosition;
2466         currentPosition++;
2467         wasAcr = false;
2468       } else {
2469         wasAcr = true;
2470       }
2471     } else {
2472       // lf 000A
2473       if (currentCharacter == '\n') {
2474         //must merge eventual cr followed by lf
2475         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2476           //System.out.println("merge LF-" + (currentPosition - 1));
2477           lineEnds[linePtr] = currentPosition - 6;
2478         } else {
2479           int separatorPos = currentPosition - 6;
2480           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2481             return;
2482           // System.out.println("LF-" + separatorPos);
2483           try {
2484             lineEnds[++linePtr] = separatorPos;
2485           } catch (IndexOutOfBoundsException e) {
2486             //linePtr value is correct
2487             int oldLength = lineEnds.length;
2488             int[] old = lineEnds;
2489             lineEnds = new int[oldLength + INCREMENT];
2490             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2491             lineEnds[linePtr] = separatorPos;
2492           }
2493         }
2494         wasAcr = false;
2495       }
2496     }
2497   }
2498   public final void recordComment(boolean isJavadoc) {
2499     // a new annotation comment is recorded
2500     try {
2501       commentStops[++commentPtr] = isJavadoc
2502           ? currentPosition
2503           : -currentPosition;
2504     } catch (IndexOutOfBoundsException e) {
2505       int oldStackLength = commentStops.length;
2506       int[] oldStack = commentStops;
2507       commentStops = new int[oldStackLength + 30];
2508       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2509       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2510       //grows the positions buffers too
2511       int[] old = commentStarts;
2512       commentStarts = new int[oldStackLength + 30];
2513       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2514     }
2515     //the buffer is of a correct size here
2516     commentStarts[commentPtr] = startPosition;
2517   }
2518   public void resetTo(int begin, int end) {
2519     //reset the scanner to a given position where it may rescan again
2520     diet = false;
2521     initialPosition = startPosition = currentPosition = begin;
2522     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2523     commentPtr = -1; // reset comment stack
2524   }
2525   public final void scanSingleQuotedEscapeCharacter()
2526       throws InvalidInputException {
2527     // the string with "\\u" is a legal string of two chars \ and u
2528     //thus we use a direct access to the source (for regular cases).
2529     //    if (unicodeAsBackSlash) {
2530     //      // consume next character
2531     //      unicodeAsBackSlash = false;
2532     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2533     //        && (source[currentPosition] == 'u')) {
2534     //        getNextUnicodeChar();
2535     //      } else {
2536     //        if (withoutUnicodePtr != 0) {
2537     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2538     //        }
2539     //      }
2540     //    } else
2541     currentCharacter = source[currentPosition++];
2542     switch (currentCharacter) {
2543       case '\'' :
2544         currentCharacter = '\'';
2545         break;
2546       case '\\' :
2547         currentCharacter = '\\';
2548         break;
2549       default :
2550         currentCharacter = '\\';
2551         currentPosition--;
2552     }
2553   }
2554   public final void scanDoubleQuotedEscapeCharacter()
2555       throws InvalidInputException {
2556     // the string with "\\u" is a legal string of two chars \ and u
2557     //thus we use a direct access to the source (for regular cases).
2558     //    if (unicodeAsBackSlash) {
2559     //      // consume next character
2560     //      unicodeAsBackSlash = false;
2561     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2562     //        && (source[currentPosition] == 'u')) {
2563     //        getNextUnicodeChar();
2564     //      } else {
2565     //        if (withoutUnicodePtr != 0) {
2566     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2567     //        }
2568     //      }
2569     //    } else
2570     currentCharacter = source[currentPosition++];
2571     switch (currentCharacter) {
2572       //      case 'b' :
2573       //        currentCharacter = '\b';
2574       //        break;
2575       case 't' :
2576         currentCharacter = '\t';
2577         break;
2578       case 'n' :
2579         currentCharacter = '\n';
2580         break;
2581       //      case 'f' :
2582       //        currentCharacter = '\f';
2583       //        break;
2584       case 'r' :
2585         currentCharacter = '\r';
2586         break;
2587       case '\"' :
2588         currentCharacter = '\"';
2589         break;
2590       case '\'' :
2591         currentCharacter = '\'';
2592         break;
2593       case '\\' :
2594         currentCharacter = '\\';
2595         break;
2596       case '$' :
2597         currentCharacter = '$';
2598         break;
2599       default :
2600         // -----------octal escape--------------
2601         // OctalDigit
2602         // OctalDigit OctalDigit
2603         // ZeroToThree OctalDigit OctalDigit
2604         int number = Character.getNumericValue(currentCharacter);
2605         if (number >= 0 && number <= 7) {
2606           boolean zeroToThreeNot = number > 3;
2607           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2608             int digit = Character.getNumericValue(currentCharacter);
2609             if (digit >= 0 && digit <= 7) {
2610               number = (number * 8) + digit;
2611               if (Character
2612                   .isDigit(currentCharacter = source[currentPosition++])) {
2613                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2614                   // Digit --> ignore last character
2615                   currentPosition--;
2616                 } else {
2617                   digit = Character.getNumericValue(currentCharacter);
2618                   if (digit >= 0 && digit <= 7) {
2619                     // has read \ZeroToThree OctalDigit OctalDigit
2620                     number = (number * 8) + digit;
2621                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2622                     // --> ignore last character
2623                     currentPosition--;
2624                   }
2625                 }
2626               } else { // has read \OctalDigit NonDigit--> ignore last
2627                 // character
2628                 currentPosition--;
2629               }
2630             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2631               // character
2632               currentPosition--;
2633             }
2634           } else { // has read \OctalDigit --> ignore last character
2635             currentPosition--;
2636           }
2637           if (number > 255)
2638             throw new InvalidInputException(INVALID_ESCAPE);
2639           currentCharacter = (char) number;
2640         }
2641     //else
2642     //     throw new InvalidInputException(INVALID_ESCAPE);
2643     }
2644   }
2645   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2646   //    return scanIdentifierOrKeyword( false );
2647   //  }
2648   public int scanIdentifierOrKeyword(boolean isVariable)
2649       throws InvalidInputException {
2650     //test keywords
2651     //first dispatch on the first char.
2652     //then the length. If there are several
2653     //keywors with the same length AND the same first char, then do another
2654     //disptach on the second char :-)...cool....but fast !
2655     useAssertAsAnIndentifier = false;
2656     while (getNextCharAsJavaIdentifierPart()) {
2657     };
2658     if (isVariable) {
2659       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2660       //        return TokenNamethis;
2661       //      }
2662       return TokenNameVariable;
2663     }
2664     int index, length;
2665     char[] data;
2666     char firstLetter;
2667     //    if (withoutUnicodePtr == 0)
2668     //quick test on length == 1 but not on length > 12 while most identifier
2669     //have a length which is <= 12...but there are lots of identifier with
2670     //only one char....
2671     //      {
2672     if ((length = currentPosition - startPosition) == 1)
2673       return TokenNameIdentifier;
2674     //  data = source;
2675     data = new char[length];
2676     index = startPosition;
2677     for (int i = 0; i < length; i++) {
2678       data[i] = Character.toLowerCase(source[index + i]);
2679     }
2680     index = 0;
2681     //    } else {
2682     //      if ((length = withoutUnicodePtr) == 1)
2683     //        return TokenNameIdentifier;
2684     //      // data = withoutUnicodeBuffer;
2685     //      data = new char[withoutUnicodeBuffer.length];
2686     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2687     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2688     //      }
2689     //      index = 1;
2690     //    }
2691     firstLetter = data[index];
2692     switch (firstLetter) {
2693       case '_' :
2694         switch (length) {
2695           case 8 :
2696             //__FILE__
2697             if ((data[++index] == '_') && (data[++index] == 'f')
2698                 && (data[++index] == 'i') && (data[++index] == 'l')
2699                 && (data[++index] == 'e') && (data[++index] == '_')
2700                 && (data[++index] == '_'))
2701               return TokenNameFILE;
2702             index = 0; //__LINE__
2703             if ((data[++index] == '_') && (data[++index] == 'l')
2704                 && (data[++index] == 'i') && (data[++index] == 'n')
2705                 && (data[++index] == 'e') && (data[++index] == '_')
2706                 && (data[++index] == '_'))
2707               return TokenNameLINE;
2708             break;
2709           case 9 :
2710             //__CLASS__
2711             if ((data[++index] == '_') && (data[++index] == 'c')
2712                 && (data[++index] == 'l') && (data[++index] == 'a')
2713                 && (data[++index] == 's') && (data[++index] == 's')
2714                 && (data[++index] == '_') && (data[++index] == '_'))
2715               return TokenNameCLASS_C;
2716             break;
2717           case 11 :
2718             //__METHOD__
2719             if ((data[++index] == '_') && (data[++index] == 'm')
2720                 && (data[++index] == 'e') && (data[++index] == 't')
2721                 && (data[++index] == 'h') && (data[++index] == 'o')
2722                 && (data[++index] == 'd') && (data[++index] == '_')
2723                 && (data[++index] == '_'))
2724               return TokenNameMETHOD_C;
2725             break;
2726           case 12 :
2727             //__FUNCTION__
2728             if ((data[++index] == '_') && (data[++index] == 'f')
2729                 && (data[++index] == 'u') && (data[++index] == 'n')
2730                 && (data[++index] == 'c') && (data[++index] == 't')
2731                 && (data[++index] == 'i') && (data[++index] == 'o')
2732                 && (data[++index] == 'n') && (data[++index] == '_')
2733                 && (data[++index] == '_'))
2734               return TokenNameFUNC_C;
2735             break;
2736         }
2737         return TokenNameIdentifier;
2738       case 'a' :
2739         // as and array abstract
2740         switch (length) {
2741           case 2 :
2742             //as
2743             if ((data[++index] == 's')) {
2744               return TokenNameas;
2745             } else {
2746               return TokenNameIdentifier;
2747             }
2748           case 3 :
2749             //and
2750             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2751               return TokenNameand;
2752             } else {
2753               return TokenNameIdentifier;
2754             }
2755           case 5 :
2756             // array
2757             if ((data[++index] == 'r') && (data[++index] == 'r')
2758                 && (data[++index] == 'a') && (data[++index] == 'y'))
2759               return TokenNamearray;
2760             else
2761               return TokenNameIdentifier;
2762           case 8 :
2763             if ((data[++index] == 'b') && (data[++index] == 's')
2764                 && (data[++index] == 't') && (data[++index] == 'r')
2765                 && (data[++index] == 'a') && (data[++index] == 'c')
2766                 && (data[++index] == 't'))
2767               return TokenNameabstract;
2768             else
2769               return TokenNameIdentifier;
2770           default :
2771             return TokenNameIdentifier;
2772         }
2773       case 'b' :
2774         //break
2775         switch (length) {
2776           case 5 :
2777             if ((data[++index] == 'r') && (data[++index] == 'e')
2778                 && (data[++index] == 'a') && (data[++index] == 'k'))
2779               return TokenNamebreak;
2780             else
2781               return TokenNameIdentifier;
2782           default :
2783             return TokenNameIdentifier;
2784         }
2785       case 'c' :
2786         //case catch class clone const continue
2787         switch (length) {
2788           case 4 :
2789             if ((data[++index] == 'a') && (data[++index] == 's')
2790                 && (data[++index] == 'e'))
2791               return TokenNamecase;
2792             else
2793               return TokenNameIdentifier;
2794           case 5 :
2795             if ((data[++index] == 'a') && (data[++index] == 't')
2796                 && (data[++index] == 'c') && (data[++index] == 'h'))
2797               return TokenNamecatch;
2798             index = 0;
2799             if ((data[++index] == 'l') && (data[++index] == 'a')
2800                 && (data[++index] == 's') && (data[++index] == 's'))
2801               return TokenNameclass;
2802             index = 0;
2803             if ((data[++index] == 'l') && (data[++index] == 'o')
2804                 && (data[++index] == 'n') && (data[++index] == 'e'))
2805               return TokenNameclone;
2806             index = 0;
2807             if ((data[++index] == 'o') && (data[++index] == 'n')
2808                 && (data[++index] == 's') && (data[++index] == 't'))
2809               return TokenNameconst;
2810             else
2811               return TokenNameIdentifier;
2812           case 8 :
2813             if ((data[++index] == 'o') && (data[++index] == 'n')
2814                 && (data[++index] == 't') && (data[++index] == 'i')
2815                 && (data[++index] == 'n') && (data[++index] == 'u')
2816                 && (data[++index] == 'e'))
2817               return TokenNamecontinue;
2818             else
2819               return TokenNameIdentifier;
2820           default :
2821             return TokenNameIdentifier;
2822         }
2823       case 'd' :
2824         // declare default do die
2825         // TODO delete define ==> no keyword !
2826         switch (length) {
2827           case 2 :
2828             if ((data[++index] == 'o'))
2829               return TokenNamedo;
2830             else
2831               return TokenNameIdentifier;
2832           //          case 6 :
2833           //            if ((data[++index] == 'e')
2834           //              && (data[++index] == 'f')
2835           //              && (data[++index] == 'i')
2836           //              && (data[++index] == 'n')
2837           //              && (data[++index] == 'e'))
2838           //              return TokenNamedefine;
2839           //            else
2840           //              return TokenNameIdentifier;
2841           case 7 :
2842             if ((data[++index] == 'e') && (data[++index] == 'c')
2843                 && (data[++index] == 'l') && (data[++index] == 'a')
2844                 && (data[++index] == 'r') && (data[++index] == 'e'))
2845               return TokenNamedeclare;
2846             index = 0;
2847             if ((data[++index] == 'e') && (data[++index] == 'f')
2848                 && (data[++index] == 'a') && (data[++index] == 'u')
2849                 && (data[++index] == 'l') && (data[++index] == 't'))
2850               return TokenNamedefault;
2851             else
2852               return TokenNameIdentifier;
2853           default :
2854             return TokenNameIdentifier;
2855         }
2856       case 'e' :
2857         //echo else exit elseif extends eval
2858         switch (length) {
2859           case 4 :
2860             if ((data[++index] == 'c') && (data[++index] == 'h')
2861                 && (data[++index] == 'o'))
2862               return TokenNameecho;
2863             else if ((data[index] == 'l') && (data[++index] == 's')
2864                 && (data[++index] == 'e'))
2865               return TokenNameelse;
2866             else if ((data[index] == 'x') && (data[++index] == 'i')
2867                 && (data[++index] == 't'))
2868               return TokenNameexit;
2869             else if ((data[index] == 'v') && (data[++index] == 'a')
2870                 && (data[++index] == 'l'))
2871               return TokenNameeval;
2872             else
2873               return TokenNameIdentifier;
2874           case 5 :
2875             // endif empty
2876             if ((data[++index] == 'n') && (data[++index] == 'd')
2877                 && (data[++index] == 'i') && (data[++index] == 'f'))
2878               return TokenNameendif;
2879             if ((data[index] == 'm') && (data[++index] == 'p')
2880                 && (data[++index] == 't') && (data[++index] == 'y'))
2881               return TokenNameempty;
2882             else
2883               return TokenNameIdentifier;
2884           case 6 :
2885             // endfor
2886             if ((data[++index] == 'n') && (data[++index] == 'd')
2887                 && (data[++index] == 'f') && (data[++index] == 'o')
2888                 && (data[++index] == 'r'))
2889               return TokenNameendfor;
2890             else if ((data[index] == 'l') && (data[++index] == 's')
2891                 && (data[++index] == 'e') && (data[++index] == 'i')
2892                 && (data[++index] == 'f'))
2893               return TokenNameelseif;
2894             else
2895               return TokenNameIdentifier;
2896           case 7 :
2897             if ((data[++index] == 'x') && (data[++index] == 't')
2898                 && (data[++index] == 'e') && (data[++index] == 'n')
2899                 && (data[++index] == 'd') && (data[++index] == 's'))
2900               return TokenNameextends;
2901             else
2902               return TokenNameIdentifier;
2903           case 8 :
2904             // endwhile
2905             if ((data[++index] == 'n') && (data[++index] == 'd')
2906                 && (data[++index] == 'w') && (data[++index] == 'h')
2907                 && (data[++index] == 'i') && (data[++index] == 'l')
2908                 && (data[++index] == 'e'))
2909               return TokenNameendwhile;
2910             else
2911               return TokenNameIdentifier;
2912           case 9 :
2913             // endswitch
2914             if ((data[++index] == 'n') && (data[++index] == 'd')
2915                 && (data[++index] == 's') && (data[++index] == 'w')
2916                 && (data[++index] == 'i') && (data[++index] == 't')
2917                 && (data[++index] == 'c') && (data[++index] == 'h'))
2918               return TokenNameendswitch;
2919             else
2920               return TokenNameIdentifier;
2921           case 10 :
2922             // enddeclare
2923             if ((data[++index] == 'n') && (data[++index] == 'd')
2924                 && (data[++index] == 'd') && (data[++index] == 'e')
2925                 && (data[++index] == 'c') && (data[++index] == 'l')
2926                 && (data[++index] == 'a') && (data[++index] == 'r')
2927                 && (data[++index] == 'e'))
2928               return TokenNameendforeach;
2929             index = 0;
2930             if ((data[++index] == 'n') // endforeach
2931                 && (data[++index] == 'd') && (data[++index] == 'f')
2932                 && (data[++index] == 'o') && (data[++index] == 'r')
2933                 && (data[++index] == 'e') && (data[++index] == 'a')
2934                 && (data[++index] == 'c') && (data[++index] == 'h'))
2935               return TokenNameendforeach;
2936             else
2937               return TokenNameIdentifier;
2938           default :
2939             return TokenNameIdentifier;
2940         }
2941       case 'f' :
2942         //for false final function
2943         switch (length) {
2944           case 3 :
2945             if ((data[++index] == 'o') && (data[++index] == 'r'))
2946               return TokenNamefor;
2947             else
2948               return TokenNameIdentifier;
2949           case 5 :
2950             //            if ((data[++index] == 'a') && (data[++index] == 'l')
2951             //                && (data[++index] == 's') && (data[++index] == 'e'))
2952             //              return TokenNamefalse;
2953             if ((data[++index] == 'i') && (data[++index] == 'n')
2954                 && (data[++index] == 'a') && (data[++index] == 'l'))
2955               return TokenNamefinal;
2956             else
2957               return TokenNameIdentifier;
2958           case 7 :
2959             // foreach
2960             if ((data[++index] == 'o') && (data[++index] == 'r')
2961                 && (data[++index] == 'e') && (data[++index] == 'a')
2962                 && (data[++index] == 'c') && (data[++index] == 'h'))
2963               return TokenNameforeach;
2964             else
2965               return TokenNameIdentifier;
2966           case 8 :
2967             // function
2968             if ((data[++index] == 'u') && (data[++index] == 'n')
2969                 && (data[++index] == 'c') && (data[++index] == 't')
2970                 && (data[++index] == 'i') && (data[++index] == 'o')
2971                 && (data[++index] == 'n'))
2972               return TokenNamefunction;
2973             else
2974               return TokenNameIdentifier;
2975           default :
2976             return TokenNameIdentifier;
2977         }
2978       case 'g' :
2979         //global
2980         if (length == 6) {
2981           if ((data[++index] == 'l') && (data[++index] == 'o')
2982               && (data[++index] == 'b') && (data[++index] == 'a')
2983               && (data[++index] == 'l')) {
2984             return TokenNameglobal;
2985           }
2986         }
2987         return TokenNameIdentifier;
2988       case 'i' :
2989         //if int isset include include_once instanceof interface implements
2990         switch (length) {
2991           case 2 :
2992             if (data[++index] == 'f')
2993               return TokenNameif;
2994             else
2995               return TokenNameIdentifier;
2996           //          case 3 :
2997           //            if ((data[++index] == 'n') && (data[++index] == 't'))
2998           //              return TokenNameint;
2999           //            else
3000           //              return TokenNameIdentifier;
3001           case 5 :
3002             if ((data[++index] == 's') && (data[++index] == 's')
3003                 && (data[++index] == 'e') && (data[++index] == 't'))
3004               return TokenNameisset;
3005             else
3006               return TokenNameIdentifier;
3007           case 7 :
3008             if ((data[++index] == 'n') && (data[++index] == 'c')
3009                 && (data[++index] == 'l') && (data[++index] == 'u')
3010                 && (data[++index] == 'd') && (data[++index] == 'e'))
3011               return TokenNameinclude;
3012             else
3013               return TokenNameIdentifier;
3014           case 9 :
3015             // interface
3016             if ((data[++index] == 'n') && (data[++index] == 't')
3017                 && (data[++index] == 'e') && (data[++index] == 'r')
3018                 && (data[++index] == 'f') && (data[++index] == 'a')
3019                 && (data[++index] == 'c') && (data[++index] == 'e'))
3020               return TokenNameinterface;
3021             else
3022               return TokenNameIdentifier;
3023           case 10 :
3024             // instanceof
3025             if ((data[++index] == 'n') && (data[++index] == 's')
3026                 && (data[++index] == 't') && (data[++index] == 'a')
3027                 && (data[++index] == 'n') && (data[++index] == 'c')
3028                 && (data[++index] == 'e') && (data[++index] == 'o')
3029                 && (data[++index] == 'f'))
3030               return TokenNameinstanceof;
3031             if ((data[index] == 'm') && (data[++index] == 'p')
3032                 && (data[++index] == 'l') && (data[++index] == 'e')
3033                 && (data[++index] == 'm') && (data[++index] == 'e')
3034                 && (data[++index] == 'n') && (data[++index] == 't')
3035                 && (data[++index] == 's'))
3036               return TokenNameimplements;
3037             else
3038               return TokenNameIdentifier;
3039           case 12 :
3040             if ((data[++index] == 'n') && (data[++index] == 'c')
3041                 && (data[++index] == 'l') && (data[++index] == 'u')
3042                 && (data[++index] == 'd') && (data[++index] == 'e')
3043                 && (data[++index] == '_') && (data[++index] == 'o')
3044                 && (data[++index] == 'n') && (data[++index] == 'c')
3045                 && (data[++index] == 'e'))
3046               return TokenNameinclude_once;
3047             else
3048               return TokenNameIdentifier;
3049           default :
3050             return TokenNameIdentifier;
3051         }
3052       case 'l' :
3053         //list
3054         if (length == 4) {
3055           if ((data[++index] == 'i') && (data[++index] == 's')
3056               && (data[++index] == 't')) {
3057             return TokenNamelist;
3058           }
3059         }
3060         return TokenNameIdentifier;
3061       case 'n' :
3062         // new null
3063         switch (length) {
3064           case 3 :
3065             if ((data[++index] == 'e') && (data[++index] == 'w'))
3066               return TokenNamenew;
3067             else
3068               return TokenNameIdentifier;
3069           //          case 4 :
3070           //            if ((data[++index] == 'u') && (data[++index] == 'l')
3071           //                && (data[++index] == 'l'))
3072           //              return TokenNamenull;
3073           //            else
3074           //              return TokenNameIdentifier;
3075           default :
3076             return TokenNameIdentifier;
3077         }
3078       case 'o' :
3079         // or old_function
3080         if (length == 2) {
3081           if (data[++index] == 'r') {
3082             return TokenNameor;
3083           }
3084         }
3085         //        if (length == 12) {
3086         //          if ((data[++index] == 'l')
3087         //            && (data[++index] == 'd')
3088         //            && (data[++index] == '_')
3089         //            && (data[++index] == 'f')
3090         //            && (data[++index] == 'u')
3091         //            && (data[++index] == 'n')
3092         //            && (data[++index] == 'c')
3093         //            && (data[++index] == 't')
3094         //            && (data[++index] == 'i')
3095         //            && (data[++index] == 'o')
3096         //            && (data[++index] == 'n')) {
3097         //            return TokenNameold_function;
3098         //          }
3099         //        }
3100         return TokenNameIdentifier;
3101       case 'p' :
3102         // print public private protected
3103         switch (length) {
3104           case 5 :
3105             if ((data[++index] == 'r') && (data[++index] == 'i')
3106                 && (data[++index] == 'n') && (data[++index] == 't')) {
3107               return TokenNameprint;
3108             } else
3109               return TokenNameIdentifier;
3110           case 6 :
3111             if ((data[++index] == 'u') && (data[++index] == 'b')
3112                 && (data[++index] == 'l') && (data[++index] == 'i')
3113                 && (data[++index] == 'c')) {
3114               return TokenNamepublic;
3115             } else
3116               return TokenNameIdentifier;
3117           case 7 :
3118             if ((data[++index] == 'r') && (data[++index] == 'i')
3119                 && (data[++index] == 'v') && (data[++index] == 'a')
3120                 && (data[++index] == 't') && (data[++index] == 'e')) {
3121               return TokenNameprivate;
3122             } else
3123               return TokenNameIdentifier;
3124           case 9 :
3125             if ((data[++index] == 'r') && (data[++index] == 'o')
3126                 && (data[++index] == 't') && (data[++index] == 'e')
3127                 && (data[++index] == 'c') && (data[++index] == 't')
3128                 && (data[++index] == 'e') && (data[++index] == 'd')) {
3129               return TokenNameprotected;
3130             } else
3131               return TokenNameIdentifier;
3132         }
3133         return TokenNameIdentifier;
3134       case 'r' :
3135         //return require require_once
3136         if (length == 6) {
3137           if ((data[++index] == 'e') && (data[++index] == 't')
3138               && (data[++index] == 'u') && (data[++index] == 'r')
3139               && (data[++index] == 'n')) {
3140             return TokenNamereturn;
3141           }
3142         } else if (length == 7) {
3143           if ((data[++index] == 'e') && (data[++index] == 'q')
3144               && (data[++index] == 'u') && (data[++index] == 'i')
3145               && (data[++index] == 'r') && (data[++index] == 'e')) {
3146             return TokenNamerequire;
3147           }
3148         } else if (length == 12) {
3149           if ((data[++index] == 'e') && (data[++index] == 'q')
3150               && (data[++index] == 'u') && (data[++index] == 'i')
3151               && (data[++index] == 'r') && (data[++index] == 'e')
3152               && (data[++index] == '_') && (data[++index] == 'o')
3153               && (data[++index] == 'n') && (data[++index] == 'c')
3154               && (data[++index] == 'e')) {
3155             return TokenNamerequire_once;
3156           }
3157         } else
3158           return TokenNameIdentifier;
3159       case 's' :
3160         //static switch
3161         switch (length) {
3162           case 6 :
3163             if (data[++index] == 't')
3164               if ((data[++index] == 'a') && (data[++index] == 't')
3165                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3166                 return TokenNamestatic;
3167               } else
3168                 return TokenNameIdentifier;
3169             else if ((data[index] == 'w') && (data[++index] == 'i')
3170                 && (data[++index] == 't') && (data[++index] == 'c')
3171                 && (data[++index] == 'h'))
3172               return TokenNameswitch;
3173             else
3174               return TokenNameIdentifier;
3175           default :
3176             return TokenNameIdentifier;
3177         }
3178       case 't' :
3179         // try true throw
3180         switch (length) {
3181           case 3 :
3182             if ((data[++index] == 'r') && (data[++index] == 'y'))
3183               return TokenNametry;
3184             else
3185               return TokenNameIdentifier;
3186           //          case 4 :
3187           //            if ((data[++index] == 'r') && (data[++index] == 'u')
3188           //                && (data[++index] == 'e'))
3189           //              return TokenNametrue;
3190           //            else
3191           //              return TokenNameIdentifier;
3192           case 5 :
3193             if ((data[++index] == 'h') && (data[++index] == 'r')
3194                 && (data[++index] == 'o') && (data[++index] == 'w'))
3195               return TokenNamethrow;
3196             else
3197               return TokenNameIdentifier;
3198           default :
3199             return TokenNameIdentifier;
3200         }
3201       case 'u' :
3202         //use unset
3203         switch (length) {
3204           case 3 :
3205             if ((data[++index] == 's') && (data[++index] == 'e'))
3206               return TokenNameuse;
3207             else
3208               return TokenNameIdentifier;
3209           case 5 :
3210             if ((data[++index] == 'n') && (data[++index] == 's')
3211                 && (data[++index] == 'e') && (data[++index] == 't'))
3212               return TokenNameunset;
3213             else
3214               return TokenNameIdentifier;
3215           default :
3216             return TokenNameIdentifier;
3217         }
3218       case 'v' :
3219         //var
3220         switch (length) {
3221           case 3 :
3222             if ((data[++index] == 'a') && (data[++index] == 'r'))
3223               return TokenNamevar;
3224             else
3225               return TokenNameIdentifier;
3226           default :
3227             return TokenNameIdentifier;
3228         }
3229       case 'w' :
3230         //while
3231         switch (length) {
3232           case 5 :
3233             if ((data[++index] == 'h') && (data[++index] == 'i')
3234                 && (data[++index] == 'l') && (data[++index] == 'e'))
3235               return TokenNamewhile;
3236             else
3237               return TokenNameIdentifier;
3238           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3239           // (data[++index]=='e') && (data[++index]=='f')&&
3240           // (data[++index]=='p'))
3241           //return TokenNamewidefp ;
3242           //else
3243           //return TokenNameIdentifier;
3244           default :
3245             return TokenNameIdentifier;
3246         }
3247       case 'x' :
3248         //xor
3249         switch (length) {
3250           case 3 :
3251             if ((data[++index] == 'o') && (data[++index] == 'r'))
3252               return TokenNamexor;
3253             else
3254               return TokenNameIdentifier;
3255           default :
3256             return TokenNameIdentifier;
3257         }
3258       default :
3259         return TokenNameIdentifier;
3260     }
3261   }
3262   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3263     //when entering this method the currentCharacter is the firt
3264     //digit of the number , i.e. it may be preceeded by a . when
3265     //dotPrefix is true
3266     boolean floating = dotPrefix;
3267     if ((!dotPrefix) && (currentCharacter == '0')) {
3268       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3269         //force the first char of the hexa number do exist...
3270         // consume next character
3271         unicodeAsBackSlash = false;
3272         currentCharacter = source[currentPosition++];
3273         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3274         //          && (source[currentPosition] == 'u')) {
3275         //          getNextUnicodeChar();
3276         //        } else {
3277         //          if (withoutUnicodePtr != 0) {
3278         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3279         //          }
3280         //        }
3281         if (Character.digit(currentCharacter, 16) == -1)
3282           throw new InvalidInputException(INVALID_HEXA);
3283         //---end forcing--
3284         while (getNextCharAsDigit(16)) {
3285         };
3286         //        if (getNextChar('l', 'L') >= 0)
3287         //          return TokenNameLongLiteral;
3288         //        else
3289         return TokenNameIntegerLiteral;
3290       }
3291       //there is x or X in the number
3292       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3293       // 00078.0 is true !!!!! crazy language
3294       if (getNextCharAsDigit()) {
3295         //-------------potential octal-----------------
3296         while (getNextCharAsDigit()) {
3297         };
3298         //        if (getNextChar('l', 'L') >= 0) {
3299         //          return TokenNameLongLiteral;
3300         //        }
3301         //
3302         //        if (getNextChar('f', 'F') >= 0) {
3303         //          return TokenNameFloatingPointLiteral;
3304         //        }
3305         if (getNextChar('d', 'D') >= 0) {
3306           return TokenNameDoubleLiteral;
3307         } else { //make the distinction between octal and float ....
3308           if (getNextChar('.')) { //bingo ! ....
3309             while (getNextCharAsDigit()) {
3310             };
3311             if (getNextChar('e', 'E') >= 0) {
3312               // consume next character
3313               unicodeAsBackSlash = false;
3314               currentCharacter = source[currentPosition++];
3315               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3316               //                && (source[currentPosition] == 'u')) {
3317               //                getNextUnicodeChar();
3318               //              } else {
3319               //                if (withoutUnicodePtr != 0) {
3320               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3321               //                }
3322               //              }
3323               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3324                 // consume next character
3325                 unicodeAsBackSlash = false;
3326                 currentCharacter = source[currentPosition++];
3327                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3328                 //                  && (source[currentPosition] == 'u')) {
3329                 //                  getNextUnicodeChar();
3330                 //                } else {
3331                 //                  if (withoutUnicodePtr != 0) {
3332                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3333                 //                      currentCharacter;
3334                 //                  }
3335                 //                }
3336               }
3337               if (!Character.isDigit(currentCharacter))
3338                 throw new InvalidInputException(INVALID_FLOAT);
3339               while (getNextCharAsDigit()) {
3340               };
3341             }
3342             //            if (getNextChar('f', 'F') >= 0)
3343             //              return TokenNameFloatingPointLiteral;
3344             getNextChar('d', 'D'); //jump over potential d or D
3345             return TokenNameDoubleLiteral;
3346           } else {
3347             return TokenNameIntegerLiteral;
3348           }
3349         }
3350       } else {
3351         /* carry on */
3352       }
3353     }
3354     while (getNextCharAsDigit()) {
3355     };
3356     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3357     //      return TokenNameLongLiteral;
3358     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3359       while (getNextCharAsDigit()) {
3360       };
3361       floating = true;
3362     }
3363     //if floating is true both exponant and suffix may be optional
3364     if (getNextChar('e', 'E') >= 0) {
3365       floating = true;
3366       // consume next character
3367       unicodeAsBackSlash = false;
3368       currentCharacter = source[currentPosition++];
3369       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3370       //        && (source[currentPosition] == 'u')) {
3371       //        getNextUnicodeChar();
3372       //      } else {
3373       //        if (withoutUnicodePtr != 0) {
3374       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3375       //        }
3376       //      }
3377       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3378         // next
3379         // character
3380         unicodeAsBackSlash = false;
3381         currentCharacter = source[currentPosition++];
3382         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3383         //          && (source[currentPosition] == 'u')) {
3384         //          getNextUnicodeChar();
3385         //        } else {
3386         //          if (withoutUnicodePtr != 0) {
3387         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3388         //          }
3389         //        }
3390       }
3391       if (!Character.isDigit(currentCharacter))
3392         throw new InvalidInputException(INVALID_FLOAT);
3393       while (getNextCharAsDigit()) {
3394       };
3395     }
3396     if (getNextChar('d', 'D') >= 0)
3397       return TokenNameDoubleLiteral;
3398     //    if (getNextChar('f', 'F') >= 0)
3399     //      return TokenNameFloatingPointLiteral;
3400     //the long flag has been tested before
3401     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3402   }
3403   /**
3404    * Search the line number corresponding to a specific position
3405    *
3406    */
3407   public final int getLineNumber(int position) {
3408     if (lineEnds == null)
3409       return 1;
3410     int length = linePtr + 1;
3411     if (length == 0)
3412       return 1;
3413     int g = 0, d = length - 1;
3414     int m = 0;
3415     while (g <= d) {
3416       m = (g + d) / 2;
3417       if (position < lineEnds[m]) {
3418         d = m - 1;
3419       } else if (position > lineEnds[m]) {
3420         g = m + 1;
3421       } else {
3422         return m + 1;
3423       }
3424     }
3425     if (position < lineEnds[m]) {
3426       return m + 1;
3427     }
3428     return m + 2;
3429   }
3430   public void setPHPMode(boolean mode) {
3431     phpMode = mode;
3432   }
3433   public final void setSource(char[] source) {
3434     //the source-buffer is set to sourceString
3435     if (source == null) {
3436       this.source = new char[0];
3437     } else {
3438       this.source = source;
3439     }
3440     startPosition = -1;
3441     initialPosition = currentPosition = 0;
3442     containsAssertKeyword = false;
3443     withoutUnicodeBuffer = new char[this.source.length];
3444   }
3445   public String toString() {
3446     if (startPosition == source.length)
3447       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3448     if (currentPosition > source.length)
3449       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3450     char front[] = new char[startPosition];
3451     System.arraycopy(source, 0, front, 0, startPosition);
3452     int middleLength = (currentPosition - 1) - startPosition + 1;
3453     char middle[];
3454     if (middleLength > -1) {
3455       middle = new char[middleLength];
3456       System.arraycopy(source, startPosition, middle, 0, middleLength);
3457     } else {
3458       middle = new char[0];
3459     }
3460     char end[] = new char[source.length - (currentPosition - 1)];
3461     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3462         - (currentPosition - 1) - 1);
3463     return new String(front)
3464         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3465         + new String(middle)
3466         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3467         + new String(end);
3468   }
3469   public final String toStringAction(int act) {
3470     switch (act) {
3471       case TokenNameERROR :
3472         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3473       // //$NON-NLS-1$
3474       case TokenNameINLINE_HTML :
3475         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3476       case TokenNameIdentifier :
3477         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3478       case TokenNameVariable :
3479         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3480       case TokenNameabstract :
3481         return "abstract"; //$NON-NLS-1$
3482       case TokenNameand :
3483         return "AND"; //$NON-NLS-1$
3484       case TokenNamearray :
3485         return "array"; //$NON-NLS-1$
3486       case TokenNameas :
3487         return "as"; //$NON-NLS-1$
3488       case TokenNamebreak :
3489         return "break"; //$NON-NLS-1$
3490       case TokenNamecase :
3491         return "case"; //$NON-NLS-1$
3492       case TokenNameclass :
3493         return "class"; //$NON-NLS-1$
3494       case TokenNameclone :
3495         //$NON-NLS-1$
3496         return "clone";
3497       case TokenNameconst :
3498         //$NON-NLS-1$
3499         return "const";
3500       case TokenNamecontinue :
3501         return "continue"; //$NON-NLS-1$
3502       case TokenNamedefault :
3503         return "default"; //$NON-NLS-1$
3504       //      case TokenNamedefine :
3505       //        return "define"; //$NON-NLS-1$
3506       case TokenNamedo :
3507         return "do"; //$NON-NLS-1$
3508       case TokenNameecho :
3509         return "echo"; //$NON-NLS-1$
3510       case TokenNameelse :
3511         return "else"; //$NON-NLS-1$
3512       case TokenNameelseif :
3513         return "elseif"; //$NON-NLS-1$
3514       case TokenNameendfor :
3515         return "endfor"; //$NON-NLS-1$
3516       case TokenNameendforeach :
3517         return "endforeach"; //$NON-NLS-1$
3518       case TokenNameendif :
3519         return "endif"; //$NON-NLS-1$
3520       case TokenNameendswitch :
3521         return "endswitch"; //$NON-NLS-1$
3522       case TokenNameendwhile :
3523         return "endwhile"; //$NON-NLS-1$
3524       case TokenNameextends :
3525         return "extends"; //$NON-NLS-1$
3526       //      case TokenNamefalse :
3527       //        return "false"; //$NON-NLS-1$
3528       case TokenNamefinal :
3529         return "final"; //$NON-NLS-1$
3530       case TokenNamefor :
3531         return "for"; //$NON-NLS-1$
3532       case TokenNameforeach :
3533         return "foreach"; //$NON-NLS-1$
3534       case TokenNamefunction :
3535         return "function"; //$NON-NLS-1$
3536       case TokenNameglobal :
3537         return "global"; //$NON-NLS-1$
3538       case TokenNameif :
3539         return "if"; //$NON-NLS-1$
3540       case TokenNameimplements :
3541         return "implements"; //$NON-NLS-1$
3542       case TokenNameinclude :
3543         return "include"; //$NON-NLS-1$
3544       case TokenNameinclude_once :
3545         return "include_once"; //$NON-NLS-1$
3546       case TokenNameinterface :
3547         return "interface"; //$NON-NLS-1$
3548       case TokenNameisset :
3549         return "isset"; //$NON-NLS-1$
3550       case TokenNamelist :
3551         return "list"; //$NON-NLS-1$
3552       case TokenNamenew :
3553         return "new"; //$NON-NLS-1$
3554       //      case TokenNamenull :
3555       //        return "null"; //$NON-NLS-1$
3556       case TokenNameor :
3557         return "OR"; //$NON-NLS-1$
3558       case TokenNameprint :
3559         return "print"; //$NON-NLS-1$
3560       case TokenNameprivate :
3561         return "private"; //$NON-NLS-1$
3562       case TokenNameprotected :
3563         return "protected"; //$NON-NLS-1$
3564       case TokenNamepublic :
3565         return "public"; //$NON-NLS-1$
3566       case TokenNamerequire :
3567         return "require"; //$NON-NLS-1$
3568       case TokenNamerequire_once :
3569         return "require_once"; //$NON-NLS-1$
3570       case TokenNamereturn :
3571         return "return"; //$NON-NLS-1$
3572       case TokenNamestatic :
3573         return "static"; //$NON-NLS-1$
3574       case TokenNameswitch :
3575         return "switch"; //$NON-NLS-1$
3576       //      case TokenNametrue :
3577       //        return "true"; //$NON-NLS-1$
3578       case TokenNameunset :
3579         return "unset"; //$NON-NLS-1$
3580       case TokenNamevar :
3581         return "var"; //$NON-NLS-1$
3582       case TokenNamewhile :
3583         return "while"; //$NON-NLS-1$
3584       case TokenNamexor :
3585         return "XOR"; //$NON-NLS-1$
3586       //      case TokenNamethis :
3587       //        return "$this"; //$NON-NLS-1$
3588       case TokenNameIntegerLiteral :
3589         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3590       case TokenNameDoubleLiteral :
3591         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3592       case TokenNameStringLiteral :
3593         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3594       case TokenNameStringConstant :
3595         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3596       case TokenNameStringInterpolated :
3597         return "StringInterpolated(" + new String(getCurrentTokenSource())
3598             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3599       case TokenNameHEREDOC :
3600         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3601       case TokenNamePLUS_PLUS :
3602         return "++"; //$NON-NLS-1$
3603       case TokenNameMINUS_MINUS :
3604         return "--"; //$NON-NLS-1$
3605       case TokenNameEQUAL_EQUAL :
3606         return "=="; //$NON-NLS-1$
3607       case TokenNameEQUAL_EQUAL_EQUAL :
3608         return "==="; //$NON-NLS-1$
3609       case TokenNameEQUAL_GREATER :
3610         return "=>"; //$NON-NLS-1$
3611       case TokenNameLESS_EQUAL :
3612         return "<="; //$NON-NLS-1$
3613       case TokenNameGREATER_EQUAL :
3614         return ">="; //$NON-NLS-1$
3615       case TokenNameNOT_EQUAL :
3616         return "!="; //$NON-NLS-1$
3617       case TokenNameNOT_EQUAL_EQUAL :
3618         return "!=="; //$NON-NLS-1$
3619       case TokenNameLEFT_SHIFT :
3620         return "<<"; //$NON-NLS-1$
3621       case TokenNameRIGHT_SHIFT :
3622         return ">>"; //$NON-NLS-1$
3623       case TokenNamePLUS_EQUAL :
3624         return "+="; //$NON-NLS-1$
3625       case TokenNameMINUS_EQUAL :
3626         return "-="; //$NON-NLS-1$
3627       case TokenNameMULTIPLY_EQUAL :
3628         return "*="; //$NON-NLS-1$
3629       case TokenNameDIVIDE_EQUAL :
3630         return "/="; //$NON-NLS-1$
3631       case TokenNameAND_EQUAL :
3632         return "&="; //$NON-NLS-1$
3633       case TokenNameOR_EQUAL :
3634         return "|="; //$NON-NLS-1$
3635       case TokenNameXOR_EQUAL :
3636         return "^="; //$NON-NLS-1$
3637       case TokenNameREMAINDER_EQUAL :
3638         return "%="; //$NON-NLS-1$
3639       case TokenNameDOT_EQUAL :
3640         return ".="; //$NON-NLS-1$
3641       case TokenNameLEFT_SHIFT_EQUAL :
3642         return "<<="; //$NON-NLS-1$
3643       case TokenNameRIGHT_SHIFT_EQUAL :
3644         return ">>="; //$NON-NLS-1$
3645       case TokenNameOR_OR :
3646         return "||"; //$NON-NLS-1$
3647       case TokenNameAND_AND :
3648         return "&&"; //$NON-NLS-1$
3649       case TokenNamePLUS :
3650         return "+"; //$NON-NLS-1$
3651       case TokenNameMINUS :
3652         return "-"; //$NON-NLS-1$
3653       case TokenNameMINUS_GREATER :
3654         return "->";
3655       case TokenNameNOT :
3656         return "!"; //$NON-NLS-1$
3657       case TokenNameREMAINDER :
3658         return "%"; //$NON-NLS-1$
3659       case TokenNameXOR :
3660         return "^"; //$NON-NLS-1$
3661       case TokenNameAND :
3662         return "&"; //$NON-NLS-1$
3663       case TokenNameMULTIPLY :
3664         return "*"; //$NON-NLS-1$
3665       case TokenNameOR :
3666         return "|"; //$NON-NLS-1$
3667       case TokenNameTWIDDLE :
3668         return "~"; //$NON-NLS-1$
3669       case TokenNameTWIDDLE_EQUAL :
3670         return "~="; //$NON-NLS-1$
3671       case TokenNameDIVIDE :
3672         return "/"; //$NON-NLS-1$
3673       case TokenNameGREATER :
3674         return ">"; //$NON-NLS-1$
3675       case TokenNameLESS :
3676         return "<"; //$NON-NLS-1$
3677       case TokenNameLPAREN :
3678         return "("; //$NON-NLS-1$
3679       case TokenNameRPAREN :
3680         return ")"; //$NON-NLS-1$
3681       case TokenNameLBRACE :
3682         return "{"; //$NON-NLS-1$
3683       case TokenNameRBRACE :
3684         return "}"; //$NON-NLS-1$
3685       case TokenNameLBRACKET :
3686         return "["; //$NON-NLS-1$
3687       case TokenNameRBRACKET :
3688         return "]"; //$NON-NLS-1$
3689       case TokenNameSEMICOLON :
3690         return ";"; //$NON-NLS-1$
3691       case TokenNameQUESTION :
3692         return "?"; //$NON-NLS-1$
3693       case TokenNameCOLON :
3694         return ":"; //$NON-NLS-1$
3695       case TokenNameCOMMA :
3696         return ","; //$NON-NLS-1$
3697       case TokenNameDOT :
3698         return "."; //$NON-NLS-1$
3699       case TokenNameEQUAL :
3700         return "="; //$NON-NLS-1$
3701       case TokenNameAT :
3702         return "@";
3703       case TokenNameDOLLAR :
3704         return "$";
3705       //      case TokenNameDOLLAR_LBRACE :
3706       //        return "${";
3707       case TokenNameEOF :
3708         return "EOF"; //$NON-NLS-1$
3709       case TokenNameWHITESPACE :
3710         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3711       case TokenNameCOMMENT_LINE :
3712         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3713       case TokenNameCOMMENT_BLOCK :
3714         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3715       case TokenNameCOMMENT_PHPDOC :
3716         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3717       case TokenNameHTML :
3718         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3719       case TokenNameFILE :
3720         return "__FILE__"; //$NON-NLS-1$
3721       case TokenNameLINE :
3722         return "__LINE__"; //$NON-NLS-1$
3723       case TokenNameCLASS_C :
3724         return "__CLASS__"; //$NON-NLS-1$
3725       case TokenNameMETHOD_C :
3726         return "__METHOD__"; //$NON-NLS-1$
3727       case TokenNameFUNC_C :
3728         return "__FUNCTION__"; //$NON-NLS-1
3729       case TokenNameboolCAST :
3730         return "( bool )"; //$NON-NLS-1$
3731       case TokenNameintCAST :
3732         return "( int )"; //$NON-NLS-1$
3733       case TokenNamedoubleCAST :
3734         return "( double )"; //$NON-NLS-1$
3735       case TokenNameobjectCAST :
3736         return "( object )"; //$NON-NLS-1$
3737       case TokenNamestringCAST :
3738         return "( string )"; //$NON-NLS-1$
3739       default :
3740         return "not-a-token(" + (new Integer(act)) + ") "
3741             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3742     }
3743   }
3744   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3745       boolean checkNonExternalizedStringLiterals) {
3746     this(tokenizeComments, tokenizeWhiteSpace,
3747         checkNonExternalizedStringLiterals, false);
3748   }
3749   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3750       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3751     this.eofPosition = Integer.MAX_VALUE;
3752     this.tokenizeComments = tokenizeComments;
3753     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3754     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3755     this.assertMode = assertMode;
3756   }
3757   private void checkNonExternalizeString() throws InvalidInputException {
3758     if (currentLine == null)
3759       return;
3760     parseTags(currentLine);
3761   }
3762   private void parseTags(NLSLine line) throws InvalidInputException {
3763     String s = new String(getCurrentTokenSource());
3764     int pos = s.indexOf(TAG_PREFIX);
3765     int lineLength = line.size();
3766     while (pos != -1) {
3767       int start = pos + TAG_PREFIX_LENGTH;
3768       int end = s.indexOf(TAG_POSTFIX, start);
3769       String index = s.substring(start, end);
3770       int i = 0;
3771       try {
3772         i = Integer.parseInt(index) - 1;
3773         // Tags are one based not zero based.
3774       } catch (NumberFormatException e) {
3775         i = -1; // we don't want to consider this as a valid NLS tag
3776       }
3777       if (line.exists(i)) {
3778         line.set(i, null);
3779       }
3780       pos = s.indexOf(TAG_PREFIX, start);
3781     }
3782     this.nonNLSStrings = new StringLiteral[lineLength];
3783     int nonNLSCounter = 0;
3784     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3785       StringLiteral literal = (StringLiteral) iterator.next();
3786       if (literal != null) {
3787         this.nonNLSStrings[nonNLSCounter++] = literal;
3788       }
3789     }
3790     if (nonNLSCounter == 0) {
3791       this.nonNLSStrings = null;
3792       currentLine = null;
3793       return;
3794     }
3795     this.wasNonExternalizedStringLiteral = true;
3796     if (nonNLSCounter != lineLength) {
3797       System.arraycopy(this.nonNLSStrings, 0,
3798           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3799           nonNLSCounter);
3800     }
3801     currentLine = null;
3802   }
3803   public final void scanEscapeCharacter() throws InvalidInputException {
3804     // the string with "\\u" is a legal string of two chars \ and u
3805     //thus we use a direct access to the source (for regular cases).
3806     if (unicodeAsBackSlash) {
3807       // consume next character
3808       unicodeAsBackSlash = false;
3809       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3810       // (source[currentPosition] == 'u')) {
3811       //                                getNextUnicodeChar();
3812       //                        } else {
3813       if (withoutUnicodePtr != 0) {
3814         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3815         //                              }
3816       }
3817     } else
3818       currentCharacter = source[currentPosition++];
3819     switch (currentCharacter) {
3820       case 'b' :
3821         currentCharacter = '\b';
3822         break;
3823       case 't' :
3824         currentCharacter = '\t';
3825         break;
3826       case 'n' :
3827         currentCharacter = '\n';
3828         break;
3829       case 'f' :
3830         currentCharacter = '\f';
3831         break;
3832       case 'r' :
3833         currentCharacter = '\r';
3834         break;
3835       case '\"' :
3836         currentCharacter = '\"';
3837         break;
3838       case '\'' :
3839         currentCharacter = '\'';
3840         break;
3841       case '\\' :
3842         currentCharacter = '\\';
3843         break;
3844       default :
3845         // -----------octal escape--------------
3846         // OctalDigit
3847         // OctalDigit OctalDigit
3848         // ZeroToThree OctalDigit OctalDigit
3849         int number = Character.getNumericValue(currentCharacter);
3850         if (number >= 0 && number <= 7) {
3851           boolean zeroToThreeNot = number > 3;
3852           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3853             int digit = Character.getNumericValue(currentCharacter);
3854             if (digit >= 0 && digit <= 7) {
3855               number = (number * 8) + digit;
3856               if (Character
3857                   .isDigit(currentCharacter = source[currentPosition++])) {
3858                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3859                   // Digit --> ignore last character
3860                   currentPosition--;
3861                 } else {
3862                   digit = Character.getNumericValue(currentCharacter);
3863                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3864                     // OctalDigit OctalDigit
3865                     number = (number * 8) + digit;
3866                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3867                     // --> ignore last character
3868                     currentPosition--;
3869                   }
3870                 }
3871               } else { // has read \OctalDigit NonDigit--> ignore last
3872                 // character
3873                 currentPosition--;
3874               }
3875             } else { // has read \OctalDigit NonOctalDigit--> ignore last
3876               // character
3877               currentPosition--;
3878             }
3879           } else { // has read \OctalDigit --> ignore last character
3880             currentPosition--;
3881           }
3882           if (number > 255)
3883             throw new InvalidInputException(INVALID_ESCAPE);
3884           currentCharacter = (char) number;
3885         } else
3886           throw new InvalidInputException(INVALID_ESCAPE);
3887     }
3888   }
3889   // chech presence of task: tags
3890   public void checkTaskTag(int commentStart, int commentEnd) {
3891     // only look for newer task: tags
3892     if (this.foundTaskCount > 0
3893         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3894       return;
3895     }
3896     int foundTaskIndex = this.foundTaskCount;
3897     nextChar : for (int i = commentStart; i < commentEnd
3898         && i < this.eofPosition; i++) {
3899       char[] tag = null;
3900       char[] priority = null;
3901       // check for tag occurrence
3902       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3903         tag = this.taskTags[itag];
3904         priority = this.taskPriorities != null
3905             && itag < this.taskPriorities.length
3906             ? this.taskPriorities[itag]
3907             : null;
3908         int tagLength = tag.length;
3909         for (int t = 0; t < tagLength; t++) {
3910           if (this.source[i + t] != tag[t])
3911             continue nextTag;
3912         }
3913         if (this.foundTaskTags == null) {
3914           this.foundTaskTags = new char[5][];
3915           this.foundTaskMessages = new char[5][];
3916           this.foundTaskPriorities = new char[5][];
3917           this.foundTaskPositions = new int[5][];
3918         } else if (this.foundTaskCount == this.foundTaskTags.length) {
3919           System.arraycopy(this.foundTaskTags, 0,
3920               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3921               this.foundTaskCount);
3922           System.arraycopy(this.foundTaskMessages, 0,
3923               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3924               this.foundTaskCount);
3925           System.arraycopy(this.foundTaskPriorities, 0,
3926               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3927               0, this.foundTaskCount);
3928           System.arraycopy(this.foundTaskPositions, 0,
3929               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3930               this.foundTaskCount);
3931         }
3932         this.foundTaskTags[this.foundTaskCount] = tag;
3933         this.foundTaskPriorities[this.foundTaskCount] = priority;
3934         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3935             i + tagLength - 1};
3936         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3937         this.foundTaskCount++;
3938         i += tagLength - 1; // will be incremented when looping
3939       }
3940     }
3941     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3942       // retrieve message start and end positions
3943       int msgStart = this.foundTaskPositions[i][0]
3944           + this.foundTaskTags[i].length;
3945       int max_value = i + 1 < this.foundTaskCount
3946           ? this.foundTaskPositions[i + 1][0] - 1
3947           : commentEnd - 1;
3948       // at most beginning of next task
3949       if (max_value < msgStart)
3950         max_value = msgStart; // would only occur if tag is before EOF.
3951       int end = -1;
3952       char c;
3953       for (int j = msgStart; j < max_value; j++) {
3954         if ((c = this.source[j]) == '\n' || c == '\r') {
3955           end = j - 1;
3956           break;
3957         }
3958       }
3959       if (end == -1) {
3960         for (int j = max_value; j > msgStart; j--) {
3961           if ((c = this.source[j]) == '*') {
3962             end = j - 1;
3963             break;
3964           }
3965         }
3966         if (end == -1)
3967           end = max_value;
3968       }
3969       if (msgStart == end)
3970         continue; // empty
3971       // trim the message
3972       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3973         end--;
3974       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3975         msgStart++;
3976       // update the end position of the task
3977       this.foundTaskPositions[i][1] = end;
3978       // get the message source
3979       final int messageLength = end - msgStart + 1;
3980       char[] message = new char[messageLength];
3981       System.arraycopy(source, msgStart, message, 0, messageLength);
3982       this.foundTaskMessages[i] = message;
3983     }
3984   }
3985 }