net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12 import java.util.ArrayList;
  13 import java.util.Iterator;
  14 import java.util.List;
  15 import java.util.Stack;
  16 import net.sourceforge.phpdt.core.compiler.CharOperation;
  17 import net.sourceforge.phpdt.core.compiler.IScanner;
  18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  21 public class Scanner implements IScanner, ITerminalSymbols {
  22   /*
  23    * APIs ares - getNextToken() which return the current type of the token
  24    * (this value is not memorized by the scanner) - getCurrentTokenSource()
  25    * which provides with the token "REAL" source (aka all unicode have been
  26    * transformed into a correct char) - sourceStart gives the position into the
  27    * stream - currentPosition-1 gives the sourceEnd position into the stream
  28    */
  29   // 1.4 feature
  30   private boolean assertMode;
  31   public boolean useAssertAsAnIndentifier = false;
  32   //flag indicating if processed source contains occurrences of keyword assert
  33   public boolean containsAssertKeyword = false;
  34   public boolean recordLineSeparator;
  35   public boolean phpMode = false;
  36   public Stack encapsedStringStack = null;
  37   public char currentCharacter;
  38   public int startPosition;
  39   public int currentPosition;
  40   public int initialPosition, eofPosition;
  41   // after this position eof are generated instead of real token from the
  42   // source
  43   public boolean tokenizeComments;
  44   public boolean tokenizeWhiteSpace;
  45   public boolean tokenizeStrings;
  46   //source should be viewed as a window (aka a part)
  47   //of a entire very large stream
  48   public char source[];
  49   //unicode support
  50   public char[] withoutUnicodeBuffer;
  51   public int withoutUnicodePtr;
  52   //when == 0 ==> no unicode in the current token
  53   public boolean unicodeAsBackSlash = false;
  54   public boolean scanningFloatLiteral = false;
  55   //support for /** comments
  56   //public char[][] comments = new char[10][];
  57   public int[] commentStops = new int[10];
  58   public int[] commentStarts = new int[10];
  59   public int commentPtr = -1; // no comment test with commentPtr value -1
  60   //diet parsing support - jump over some method body when requested
  61   public boolean diet = false;
  62   //support for the poor-line-debuggers ....
  63   //remember the position of the cr/lf
  64   public int[] lineEnds = new int[250];
  65   public int linePtr = -1;
  66   public boolean wasAcr = false;
  67   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  68   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  69   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  70   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  71   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  72   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  73   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  74   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  75   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  76   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  77   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  78   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  79   //----------------optimized identifier managment------------------
  80   static final char[] charArray_a = new char[]{'a'},
  81       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
  82       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
  83       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
  84       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
  85       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
  86       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
  87       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
  88       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
  89       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
  90       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
  91       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
  92       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
  93       charArray_z = new char[]{'z'};
  94   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
  95       '\u0000', '\u0000', '\u0000'};
  96   static final int TableSize = 30, InternalTableSize = 6;
  97   //30*6 = 180 entries
  98   public static final int OptimizedLength = 6;
  99   public/* static */
 100   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 101   // support for detecting non-externalized string literals
 102   int currentLineNr = -1;
 103   int previousLineNr = -1;
 104   NLSLine currentLine = null;
 105   List lines = new ArrayList();
 106   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 107   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 108   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 109   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 110   public StringLiteral[] nonNLSStrings = null;
 111   public boolean checkNonExternalizedStringLiterals = true;
 112   public boolean wasNonExternalizedStringLiteral = false;
 113   /* static */{
 114     for (int i = 0; i < 6; i++) {
 115       for (int j = 0; j < TableSize; j++) {
 116         for (int k = 0; k < InternalTableSize; k++) {
 117           charArray_length[i][j][k] = initCharArray;
 118         }
 119       }
 120     }
 121   }
 122   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
 123       newEntry6 = 0;
 124   public static final int RoundBracket = 0;
 125   public static final int SquareBracket = 1;
 126   public static final int CurlyBracket = 2;
 127   public static final int BracketKinds = 3;
 128   // task tag support
 129   public char[][] foundTaskTags = null;
 130   public char[][] foundTaskMessages;
 131   public char[][] foundTaskPriorities = null;
 132   public int[][] foundTaskPositions;
 133   public int foundTaskCount = 0;
 134   public char[][] taskTags = null;
 135   public char[][] taskPriorities = null;
 136   public static final boolean DEBUG = false;
 137   public static final boolean TRACE = false;
 138
 139   /**
 140    * Determines if the specified character is permissible as the first
 141    * character in a PHP identifier
 142    */
 143   public static boolean isPHPIdentifierStart(char ch) {
 144     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 145   }
 146   /**
 147    * Determines if the specified character may be part of a PHP identifier as
 148    * other than the first character
 149    */
 150   public static boolean isPHPIdentifierPart(char ch) {
 151     return Character.isLetterOrDigit(ch) || (ch == '_')
 152         || (0x7F <= ch && ch <= 0xFF);
 153   }
 154   public final boolean atEnd() {
 155     // This code is not relevant if source is
 156     // Only a part of the real stream input
 157     return source.length == currentPosition;
 158   }
 159   public char[] getCurrentIdentifierSource() {
 160     //return the token REAL source (aka unicodes are precomputed)
 161     char[] result;
 162     //    if (withoutUnicodePtr != 0)
 163     //      //0 is used as a fast test flag so the real first char is in position 1
 164     //      System.arraycopy(
 165     //        withoutUnicodeBuffer,
 166     //        1,
 167     //        result = new char[withoutUnicodePtr],
 168     //        0,
 169     //        withoutUnicodePtr);
 170     //    else {
 171     int length = currentPosition - startPosition;
 172     switch (length) { // see OptimizedLength
 173       case 1 :
 174         return optimizedCurrentTokenSource1();
 175       case 2 :
 176         return optimizedCurrentTokenSource2();
 177       case 3 :
 178         return optimizedCurrentTokenSource3();
 179       case 4 :
 180         return optimizedCurrentTokenSource4();
 181       case 5 :
 182         return optimizedCurrentTokenSource5();
 183       case 6 :
 184         return optimizedCurrentTokenSource6();
 185     }
 186     //no optimization
 187     System.arraycopy(source, startPosition, result = new char[length], 0,
 188         length);
 189     //   }
 190     return result;
 191   }
 192   public int getCurrentTokenEndPosition() {
 193     return this.currentPosition - 1;
 194   }
 195   public final char[] getCurrentTokenSource() {
 196     // Return the token REAL source (aka unicodes are precomputed)
 197     char[] result;
 198     //    if (withoutUnicodePtr != 0)
 199     //      // 0 is used as a fast test flag so the real first char is in position 1
 200     //      System.arraycopy(
 201     //        withoutUnicodeBuffer,
 202     //        1,
 203     //        result = new char[withoutUnicodePtr],
 204     //        0,
 205     //        withoutUnicodePtr);
 206     //    else {
 207     int length;
 208     System.arraycopy(source, startPosition,
 209         result = new char[length = currentPosition - startPosition], 0, length);
 210     //    }
 211     return result;
 212   }
 213   public final char[] getCurrentTokenSource(int startPos) {
 214     // Return the token REAL source (aka unicodes are precomputed)
 215     char[] result;
 216     //    if (withoutUnicodePtr != 0)
 217     //      // 0 is used as a fast test flag so the real first char is in position 1
 218     //      System.arraycopy(
 219     //        withoutUnicodeBuffer,
 220     //        1,
 221     //        result = new char[withoutUnicodePtr],
 222     //        0,
 223     //        withoutUnicodePtr);
 224     //    else {
 225     int length;
 226     System.arraycopy(source, startPos,
 227         result = new char[length = currentPosition - startPos], 0, length);
 228     //  }
 229     return result;
 230   }
 231   public final char[] getCurrentTokenSourceString() {
 232     //return the token REAL source (aka unicodes are precomputed).
 233     //REMOVE the two " that are at the beginning and the end.
 234     char[] result;
 235     if (withoutUnicodePtr != 0)
 236       //0 is used as a fast test flag so the real first char is in position 1
 237       System.arraycopy(withoutUnicodeBuffer, 2,
 238       //2 is 1 (real start) + 1 (to jump over the ")
 239           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 240     else {
 241       int length;
 242       System.arraycopy(source, startPosition + 1,
 243           result = new char[length = currentPosition - startPosition - 2], 0,
 244           length);
 245     }
 246     return result;
 247   }
 248   public int getCurrentTokenStartPosition() {
 249     return this.startPosition;
 250   }
 251   public final char[] getCurrentStringLiteralSource() {
 252     // Return the token REAL source (aka unicodes are precomputed)
 253     char[] result;
 254     int length;
 255     System.arraycopy(source, startPosition + 1,
 256         result = new char[length = currentPosition - startPosition - 2], 0,
 257         length);
 258     //    }
 259     return result;
 260   }
 261   /*
 262    * Search the source position corresponding to the end of a given line number
 263    *
 264    * Line numbers are 1-based, and relative to the scanner initialPosition.
 265    * Character positions are 0-based.
 266    *
 267    * In case the given line number is inconsistent, answers -1.
 268    */
 269   public final int getLineEnd(int lineNumber) {
 270     if (lineEnds == null)
 271       return -1;
 272     if (lineNumber >= lineEnds.length)
 273       return -1;
 274     if (lineNumber <= 0)
 275       return -1;
 276     if (lineNumber == lineEnds.length - 1)
 277       return eofPosition;
 278     return lineEnds[lineNumber - 1];
 279     // next line start one character behind the lineEnd of the previous line
 280   }
 281   /**
 282    * Search the source position corresponding to the beginning of a given line
 283    * number
 284    *
 285    * Line numbers are 1-based, and relative to the scanner initialPosition.
 286    * Character positions are 0-based.
 287    *
 288    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 289    *
 290    * In case the given line number is inconsistent, answers -1.
 291    */
 292   public final int getLineStart(int lineNumber) {
 293     if (lineEnds == null)
 294       return -1;
 295     if (lineNumber >= lineEnds.length)
 296       return -1;
 297     if (lineNumber <= 0)
 298       return -1;
 299     if (lineNumber == 1)
 300       return initialPosition;
 301     return lineEnds[lineNumber - 2] + 1;
 302     // next line start one character behind the lineEnd of the previous line
 303   }
 304   public final boolean getNextChar(char testedChar) {
 305     //BOOLEAN
 306     //handle the case of unicode.
 307     //when a unicode appears then we must use a buffer that holds char
 308     // internal values
 309     //At the end of this method currentCharacter holds the new visited char
 310     //and currentPosition points right next after it
 311     //Both previous lines are true if the currentCharacter is == to the
 312     // testedChar
 313     //On false, no side effect has occured.
 314     //ALL getNextChar.... ARE OPTIMIZED COPIES
 315     int temp = currentPosition;
 316     try {
 317       currentCharacter = source[currentPosition++];
 318       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 319       //        && (source[currentPosition] == 'u')) {
 320       //        //-------------unicode traitement ------------
 321       //        int c1, c2, c3, c4;
 322       //        int unicodeSize = 6;
 323       //        currentPosition++;
 324       //        while (source[currentPosition] == 'u') {
 325       //          currentPosition++;
 326       //          unicodeSize++;
 327       //        }
 328       //
 329       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 330       //          || c1 < 0)
 331       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 332       //            || c2 < 0)
 333       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 334       //            || c3 < 0)
 335       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 336       //            || c4 < 0)) {
 337       //          currentPosition = temp;
 338       //          return false;
 339       //        }
 340       //
 341       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 342       //        if (currentCharacter != testedChar) {
 343       //          currentPosition = temp;
 344       //          return false;
 345       //        }
 346       //        unicodeAsBackSlash = currentCharacter == '\\';
 347       //
 348       //        //need the unicode buffer
 349       //        if (withoutUnicodePtr == 0) {
 350       //          //buffer all the entries that have been left aside....
 351       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 352       //          System.arraycopy(
 353       //            source,
 354       //            startPosition,
 355       //            withoutUnicodeBuffer,
 356       //            1,
 357       //            withoutUnicodePtr);
 358       //        }
 359       //        //fill the buffer with the char
 360       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 361       //        return true;
 362       //
 363       //      } //-------------end unicode traitement--------------
 364       //      else {
 365       if (currentCharacter != testedChar) {
 366         currentPosition = temp;
 367         return false;
 368       }
 369       unicodeAsBackSlash = false;
 370       //        if (withoutUnicodePtr != 0)
 371       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 372       return true;
 373       //      }
 374     } catch (IndexOutOfBoundsException e) {
 375       unicodeAsBackSlash = false;
 376       currentPosition = temp;
 377       return false;
 378     }
 379   }
 380   public final int getNextChar(char testedChar1, char testedChar2) {
 381     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 382     //test can be done with (x==0) for the first and (x>0) for the second
 383     //handle the case of unicode.
 384     //when a unicode appears then we must use a buffer that holds char
 385     // internal values
 386     //At the end of this method currentCharacter holds the new visited char
 387     //and currentPosition points right next after it
 388     //Both previous lines are true if the currentCharacter is == to the
 389     // testedChar1/2
 390     //On false, no side effect has occured.
 391     //ALL getNextChar.... ARE OPTIMIZED COPIES
 392     int temp = currentPosition;
 393     try {
 394       int result;
 395       currentCharacter = source[currentPosition++];
 396       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 397       //        && (source[currentPosition] == 'u')) {
 398       //        //-------------unicode traitement ------------
 399       //        int c1, c2, c3, c4;
 400       //        int unicodeSize = 6;
 401       //        currentPosition++;
 402       //        while (source[currentPosition] == 'u') {
 403       //          currentPosition++;
 404       //          unicodeSize++;
 405       //        }
 406       //
 407       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 408       //          || c1 < 0)
 409       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 410       //            || c2 < 0)
 411       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 412       //            || c3 < 0)
 413       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 414       //            || c4 < 0)) {
 415       //          currentPosition = temp;
 416       //          return 2;
 417       //        }
 418       //
 419       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 420       //        if (currentCharacter == testedChar1)
 421       //          result = 0;
 422       //        else if (currentCharacter == testedChar2)
 423       //          result = 1;
 424       //        else {
 425       //          currentPosition = temp;
 426       //          return -1;
 427       //        }
 428       //
 429       //        //need the unicode buffer
 430       //        if (withoutUnicodePtr == 0) {
 431       //          //buffer all the entries that have been left aside....
 432       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 433       //          System.arraycopy(
 434       //            source,
 435       //            startPosition,
 436       //            withoutUnicodeBuffer,
 437       //            1,
 438       //            withoutUnicodePtr);
 439       //        }
 440       //        //fill the buffer with the char
 441       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 442       //        return result;
 443       //      } //-------------end unicode traitement--------------
 444       //      else {
 445       if (currentCharacter == testedChar1)
 446         result = 0;
 447       else if (currentCharacter == testedChar2)
 448         result = 1;
 449       else {
 450         currentPosition = temp;
 451         return -1;
 452       }
 453       //        if (withoutUnicodePtr != 0)
 454       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 455       return result;
 456       //     }
 457     } catch (IndexOutOfBoundsException e) {
 458       currentPosition = temp;
 459       return -1;
 460     }
 461   }
 462   public final boolean getNextCharAsDigit() {
 463     //BOOLEAN
 464     //handle the case of unicode.
 465     //when a unicode appears then we must use a buffer that holds char
 466     // internal values
 467     //At the end of this method currentCharacter holds the new visited char
 468     //and currentPosition points right next after it
 469     //Both previous lines are true if the currentCharacter is a digit
 470     //On false, no side effect has occured.
 471     //ALL getNextChar.... ARE OPTIMIZED COPIES
 472     int temp = currentPosition;
 473     try {
 474       currentCharacter = source[currentPosition++];
 475       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 476       //        && (source[currentPosition] == 'u')) {
 477       //        //-------------unicode traitement ------------
 478       //        int c1, c2, c3, c4;
 479       //        int unicodeSize = 6;
 480       //        currentPosition++;
 481       //        while (source[currentPosition] == 'u') {
 482       //          currentPosition++;
 483       //          unicodeSize++;
 484       //        }
 485       //
 486       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 487       //          || c1 < 0)
 488       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 489       //            || c2 < 0)
 490       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 491       //            || c3 < 0)
 492       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 493       //            || c4 < 0)) {
 494       //          currentPosition = temp;
 495       //          return false;
 496       //        }
 497       //
 498       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 499       //        if (!Character.isDigit(currentCharacter)) {
 500       //          currentPosition = temp;
 501       //          return false;
 502       //        }
 503       //
 504       //        //need the unicode buffer
 505       //        if (withoutUnicodePtr == 0) {
 506       //          //buffer all the entries that have been left aside....
 507       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 508       //          System.arraycopy(
 509       //            source,
 510       //            startPosition,
 511       //            withoutUnicodeBuffer,
 512       //            1,
 513       //            withoutUnicodePtr);
 514       //        }
 515       //        //fill the buffer with the char
 516       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 517       //        return true;
 518       //      } //-------------end unicode traitement--------------
 519       //      else {
 520       if (!Character.isDigit(currentCharacter)) {
 521         currentPosition = temp;
 522         return false;
 523       }
 524       //        if (withoutUnicodePtr != 0)
 525       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 526       return true;
 527       //      }
 528     } catch (IndexOutOfBoundsException e) {
 529       currentPosition = temp;
 530       return false;
 531     }
 532   }
 533   public final boolean getNextCharAsDigit(int radix) {
 534     //BOOLEAN
 535     //handle the case of unicode.
 536     //when a unicode appears then we must use a buffer that holds char
 537     // internal values
 538     //At the end of this method currentCharacter holds the new visited char
 539     //and currentPosition points right next after it
 540     //Both previous lines are true if the currentCharacter is a digit base on
 541     // radix
 542     //On false, no side effect has occured.
 543     //ALL getNextChar.... ARE OPTIMIZED COPIES
 544     int temp = currentPosition;
 545     try {
 546       currentCharacter = source[currentPosition++];
 547       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 548       //        && (source[currentPosition] == 'u')) {
 549       //        //-------------unicode traitement ------------
 550       //        int c1, c2, c3, c4;
 551       //        int unicodeSize = 6;
 552       //        currentPosition++;
 553       //        while (source[currentPosition] == 'u') {
 554       //          currentPosition++;
 555       //          unicodeSize++;
 556       //        }
 557       //
 558       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 559       //          || c1 < 0)
 560       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 561       //            || c2 < 0)
 562       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 563       //            || c3 < 0)
 564       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 565       //            || c4 < 0)) {
 566       //          currentPosition = temp;
 567       //          return false;
 568       //        }
 569       //
 570       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 571       //        if (Character.digit(currentCharacter, radix) == -1) {
 572       //          currentPosition = temp;
 573       //          return false;
 574       //        }
 575       //
 576       //        //need the unicode buffer
 577       //        if (withoutUnicodePtr == 0) {
 578       //          //buffer all the entries that have been left aside....
 579       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 580       //          System.arraycopy(
 581       //            source,
 582       //            startPosition,
 583       //            withoutUnicodeBuffer,
 584       //            1,
 585       //            withoutUnicodePtr);
 586       //        }
 587       //        //fill the buffer with the char
 588       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 589       //        return true;
 590       //      } //-------------end unicode traitement--------------
 591       //      else {
 592       if (Character.digit(currentCharacter, radix) == -1) {
 593         currentPosition = temp;
 594         return false;
 595       }
 596       //        if (withoutUnicodePtr != 0)
 597       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 598       return true;
 599       //      }
 600     } catch (IndexOutOfBoundsException e) {
 601       currentPosition = temp;
 602       return false;
 603     }
 604   }
 605   public boolean getNextCharAsJavaIdentifierPart() {
 606     //BOOLEAN
 607     //handle the case of unicode.
 608     //when a unicode appears then we must use a buffer that holds char
 609     // internal values
 610     //At the end of this method currentCharacter holds the new visited char
 611     //and currentPosition points right next after it
 612     //Both previous lines are true if the currentCharacter is a
 613     // JavaIdentifierPart
 614     //On false, no side effect has occured.
 615     //ALL getNextChar.... ARE OPTIMIZED COPIES
 616     int temp = currentPosition;
 617     try {
 618       currentCharacter = source[currentPosition++];
 619       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 620       //        && (source[currentPosition] == 'u')) {
 621       //        //-------------unicode traitement ------------
 622       //        int c1, c2, c3, c4;
 623       //        int unicodeSize = 6;
 624       //        currentPosition++;
 625       //        while (source[currentPosition] == 'u') {
 626       //          currentPosition++;
 627       //          unicodeSize++;
 628       //        }
 629       //
 630       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 631       //          || c1 < 0)
 632       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 633       //            || c2 < 0)
 634       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 635       //            || c3 < 0)
 636       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 637       //            || c4 < 0)) {
 638       //          currentPosition = temp;
 639       //          return false;
 640       //        }
 641       //
 642       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 643       //        if (!isPHPIdentifierPart(currentCharacter)) {
 644       //          currentPosition = temp;
 645       //          return false;
 646       //        }
 647       //
 648       //        //need the unicode buffer
 649       //        if (withoutUnicodePtr == 0) {
 650       //          //buffer all the entries that have been left aside....
 651       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 652       //          System.arraycopy(
 653       //            source,
 654       //            startPosition,
 655       //            withoutUnicodeBuffer,
 656       //            1,
 657       //            withoutUnicodePtr);
 658       //        }
 659       //        //fill the buffer with the char
 660       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 661       //        return true;
 662       //      } //-------------end unicode traitement--------------
 663       //      else {
 664       if (!isPHPIdentifierPart(currentCharacter)) {
 665         currentPosition = temp;
 666         return false;
 667       }
 668       //        if (withoutUnicodePtr != 0)
 669       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 670       return true;
 671       //      }
 672     } catch (IndexOutOfBoundsException e) {
 673       currentPosition = temp;
 674       return false;
 675     }
 676   }
 677   public int getCastOrParen() {
 678     int tempPosition = currentPosition;
 679     char tempCharacter = currentCharacter;
 680     int tempToken = TokenNameLPAREN;
 681     boolean found = false;
 682     StringBuffer buf = new StringBuffer();
 683     try {
 684       do {
 685         currentCharacter = source[currentPosition++];
 686       } while (currentCharacter == ' ' || currentCharacter == '\t');
 687       while ((currentCharacter >= 'a' && currentCharacter <= 'z')
 688           || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 689         buf.append(currentCharacter);
 690         currentCharacter = source[currentPosition++];
 691       }
 692       if (buf.length() >= 3 && buf.length() <= 7) {
 693         char[] data = buf.toString().toCharArray();
 694         int index = 0;
 695         switch (data.length) {
 696           case 3 :
 697             // int
 698             if ((data[index] == 'i') && (data[++index] == 'n')
 699                 && (data[++index] == 't')) {
 700               found = true;
 701               tempToken = TokenNameintCAST;
 702             }
 703             break;
 704           case 4 :
 705             // bool real
 706             if ((data[index] == 'b') && (data[++index] == 'o')
 707                 && (data[++index] == 'o') && (data[++index] == 'l')) {
 708               found = true;
 709               tempToken = TokenNameboolCAST;
 710             } else {
 711               index = 0;
 712               if ((data[index] == 'r') && (data[++index] == 'e')
 713                   && (data[++index] == 'a') && (data[++index] == 'l')) {
 714                 found = true;
 715                 tempToken = TokenNamedoubleCAST;
 716               }
 717             }
 718             break;
 719           case 5 :
 720             // array unset float
 721             if ((data[index] == 'a') && (data[++index] == 'r')
 722                 && (data[++index] == 'r') && (data[++index] == 'a')
 723                 && (data[++index] == 'y')) {
 724               found = true;
 725               tempToken = TokenNamearrayCAST;
 726             } else {
 727               index = 0;
 728               if ((data[index] == 'u') && (data[++index] == 'n')
 729                   && (data[++index] == 's') && (data[++index] == 'e')
 730                   && (data[++index] == 't')) {
 731                 found = true;
 732                 tempToken = TokenNameunsetCAST;
 733               } else {
 734                 index = 0;
 735                 if ((data[index] == 'f') && (data[++index] == 'l')
 736                     && (data[++index] == 'o') && (data[++index] == 'a')
 737                     && (data[++index] == 't')) {
 738                   found = true;
 739                   tempToken = TokenNamedoubleCAST;
 740                 }
 741               }
 742             }
 743             break;
 744           case 6 :
 745             // object string double
 746             if ((data[index] == 'o') && (data[++index] == 'b')
 747                 && (data[++index] == 'j') && (data[++index] == 'e')
 748                 && (data[++index] == 'c') && (data[++index] == 't')) {
 749               found = true;
 750               tempToken = TokenNameobjectCAST;
 751             } else {
 752               index = 0;
 753               if ((data[index] == 's') && (data[++index] == 't')
 754                   && (data[++index] == 'r') && (data[++index] == 'i')
 755                   && (data[++index] == 'n') && (data[++index] == 'g')) {
 756                 found = true;
 757                 tempToken = TokenNamestringCAST;
 758               } else {
 759                 index = 0;
 760                 if ((data[index] == 'd') && (data[++index] == 'o')
 761                     && (data[++index] == 'u') && (data[++index] == 'b')
 762                     && (data[++index] == 'l') && (data[++index] == 'e')) {
 763                   found = true;
 764                   tempToken = TokenNamedoubleCAST;
 765                 }
 766               }
 767             }
 768             break;
 769           case 7 :
 770             // boolean integer
 771             if ((data[index] == 'b') && (data[++index] == 'o')
 772                 && (data[++index] == 'o') && (data[++index] == 'l')
 773                 && (data[++index] == 'e') && (data[++index] == 'a')
 774                 && (data[++index] == 'n')) {
 775               found = true;
 776               tempToken = TokenNameboolCAST;
 777             } else {
 778               index = 0;
 779               if ((data[index] == 'i') && (data[++index] == 'n')
 780                   && (data[++index] == 't') && (data[++index] == 'e')
 781                   && (data[++index] == 'g') && (data[++index] == 'e')
 782                   && (data[++index] == 'r')) {
 783                 found = true;
 784                 tempToken = TokenNameintCAST;
 785               }
 786             }
 787             break;
 788         }
 789         if (found) {
 790           while (currentCharacter == ' ' || currentCharacter == '\t') {
 791             currentCharacter = source[currentPosition++];
 792           }
 793           if (currentCharacter == ')') {
 794             return tempToken;
 795           }
 796         }
 797       }
 798     } catch (IndexOutOfBoundsException e) {
 799     }
 800     currentCharacter = tempCharacter;
 801     currentPosition = tempPosition;
 802     return TokenNameLPAREN;
 803   }
 804   public void consumeStringInterpolated() throws InvalidInputException {
 805     try {
 806       // consume next character
 807       unicodeAsBackSlash = false;
 808       currentCharacter = source[currentPosition++];
 809       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 810       //                  && (source[currentPosition] == 'u')) {
 811       //                  getNextUnicodeChar();
 812       //                } else {
 813       //                  if (withoutUnicodePtr != 0) {
 814       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 815       //                      currentCharacter;
 816       //                  }
 817       //                }
 818       while (currentCharacter != '`') {
 819         /** ** in PHP \r and \n are valid in string literals *** */
 820         //                if ((currentCharacter == '\n')
 821         //                  || (currentCharacter == '\r')) {
 822         //                  // relocate if finding another quote fairly close: thus unicode
 823         // '/u000D' will be fully consumed
 824         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 825         //                    if (currentPosition + lookAhead == source.length)
 826         //                      break;
 827         //                    if (source[currentPosition + lookAhead] == '\n')
 828         //                      break;
 829         //                    if (source[currentPosition + lookAhead] == '\"') {
 830         //                      currentPosition += lookAhead + 1;
 831         //                      break;
 832         //                    }
 833         //                  }
 834         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 835         //                }
 836         if (currentCharacter == '\\') {
 837           int escapeSize = currentPosition;
 838           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 839           //scanEscapeCharacter make a side effect on this value and we need
 840           // the previous value few lines down this one
 841           scanDoubleQuotedEscapeCharacter();
 842           escapeSize = currentPosition - escapeSize;
 843           if (withoutUnicodePtr == 0) {
 844             //buffer all the entries that have been left aside....
 845             withoutUnicodePtr = currentPosition - escapeSize - 1
 846                 - startPosition;
 847             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
 848                 withoutUnicodePtr);
 849             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 850           } else { //overwrite the / in the buffer
 851             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 852             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 853                                               // where only one is correct
 854               withoutUnicodePtr--;
 855             }
 856           }
 857         }
 858         // consume next character
 859         unicodeAsBackSlash = false;
 860         currentCharacter = source[currentPosition++];
 861         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 862         //                    && (source[currentPosition] == 'u')) {
 863         //                    getNextUnicodeChar();
 864         //                  } else {
 865         if (withoutUnicodePtr != 0) {
 866           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 867         }
 868         //                  }
 869       }
 870     } catch (IndexOutOfBoundsException e) {
 871       throw new InvalidInputException(UNTERMINATED_STRING);
 872     } catch (InvalidInputException e) {
 873       if (e.getMessage().equals(INVALID_ESCAPE)) {
 874         // relocate if finding another quote fairly close: thus unicode
 875         // '/u000D' will be fully consumed
 876         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 877           if (currentPosition + lookAhead == source.length)
 878             break;
 879           if (source[currentPosition + lookAhead] == '\n')
 880             break;
 881           if (source[currentPosition + lookAhead] == '`') {
 882             currentPosition += lookAhead + 1;
 883             break;
 884           }
 885         }
 886       }
 887       throw e; // rethrow
 888     }
 889     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 890                                               // //$NON-NLS-?$ where ? is an
 891                                               // int.
 892       if (currentLine == null) {
 893         currentLine = new NLSLine();
 894         lines.add(currentLine);
 895       }
 896       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
 897           startPosition, currentPosition - 1));
 898     }
 899   }
 900   public void consumeStringConstant() throws InvalidInputException {
 901     try {
 902       // consume next character
 903       unicodeAsBackSlash = false;
 904       currentCharacter = source[currentPosition++];
 905       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 906       //                  && (source[currentPosition] == 'u')) {
 907       //                  getNextUnicodeChar();
 908       //                } else {
 909       //                  if (withoutUnicodePtr != 0) {
 910       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 911       //                      currentCharacter;
 912       //                  }
 913       //                }
 914       while (currentCharacter != '\'') {
 915         /** ** in PHP \r and \n are valid in string literals *** */
 916         //                  if ((currentCharacter == '\n')
 917         //                    || (currentCharacter == '\r')) {
 918         //                    // relocate if finding another quote fairly close: thus unicode
 919         // '/u000D' will be fully consumed
 920         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 921         //                      if (currentPosition + lookAhead == source.length)
 922         //                        break;
 923         //                      if (source[currentPosition + lookAhead] == '\n')
 924         //                        break;
 925         //                      if (source[currentPosition + lookAhead] == '\"') {
 926         //                        currentPosition += lookAhead + 1;
 927         //                        break;
 928         //                      }
 929         //                    }
 930         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 931         //                  }
 932         if (currentCharacter == '\\') {
 933           int escapeSize = currentPosition;
 934           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 935           //scanEscapeCharacter make a side effect on this value and we need
 936           // the previous value few lines down this one
 937           scanSingleQuotedEscapeCharacter();
 938           escapeSize = currentPosition - escapeSize;
 939           if (withoutUnicodePtr == 0) {
 940             //buffer all the entries that have been left aside....
 941             withoutUnicodePtr = currentPosition - escapeSize - 1
 942                 - startPosition;
 943             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
 944                 withoutUnicodePtr);
 945             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 946           } else { //overwrite the / in the buffer
 947             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 948             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 949                                               // where only one is correct
 950               withoutUnicodePtr--;
 951             }
 952           }
 953         }
 954         // consume next character
 955         unicodeAsBackSlash = false;
 956         currentCharacter = source[currentPosition++];
 957         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 958         //                    && (source[currentPosition] == 'u')) {
 959         //                    getNextUnicodeChar();
 960         //                  } else {
 961         if (withoutUnicodePtr != 0) {
 962           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 963         }
 964         //                  }
 965       }
 966     } catch (IndexOutOfBoundsException e) {
 967       throw new InvalidInputException(UNTERMINATED_STRING);
 968     } catch (InvalidInputException e) {
 969       if (e.getMessage().equals(INVALID_ESCAPE)) {
 970         // relocate if finding another quote fairly close: thus unicode
 971         // '/u000D' will be fully consumed
 972         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 973           if (currentPosition + lookAhead == source.length)
 974             break;
 975           if (source[currentPosition + lookAhead] == '\n')
 976             break;
 977           if (source[currentPosition + lookAhead] == '\'') {
 978             currentPosition += lookAhead + 1;
 979             break;
 980           }
 981         }
 982       }
 983       throw e; // rethrow
 984     }
 985     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 986                                               // //$NON-NLS-?$ where ? is an
 987                                               // int.
 988       if (currentLine == null) {
 989         currentLine = new NLSLine();
 990         lines.add(currentLine);
 991       }
 992       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
 993           startPosition, currentPosition - 1));
 994     }
 995   }
 996   public void consumeStringLiteral() throws InvalidInputException {
 997     try {
 998       // consume next character
 999       unicodeAsBackSlash = false;
1000       currentCharacter = source[currentPosition++];
1001       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1002       //                  && (source[currentPosition] == 'u')) {
1003       //                  getNextUnicodeChar();
1004       //                } else {
1005       //                  if (withoutUnicodePtr != 0) {
1006       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1007       //                      currentCharacter;
1008       //                  }
1009       //                }
1010       while (currentCharacter != '"') {
1011         /** ** in PHP \r and \n are valid in string literals *** */
1012         //                  if ((currentCharacter == '\n')
1013         //                    || (currentCharacter == '\r')) {
1014         //                    // relocate if finding another quote fairly close: thus unicode
1015         // '/u000D' will be fully consumed
1016         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1017         //                      if (currentPosition + lookAhead == source.length)
1018         //                        break;
1019         //                      if (source[currentPosition + lookAhead] == '\n')
1020         //                        break;
1021         //                      if (source[currentPosition + lookAhead] == '\"') {
1022         //                        currentPosition += lookAhead + 1;
1023         //                        break;
1024         //                      }
1025         //                    }
1026         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1027         //                  }
1028         if (currentCharacter == '\\') {
1029           int escapeSize = currentPosition;
1030           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1031           //scanEscapeCharacter make a side effect on this value and we need
1032           // the previous value few lines down this one
1033           scanDoubleQuotedEscapeCharacter();
1034           escapeSize = currentPosition - escapeSize;
1035           if (withoutUnicodePtr == 0) {
1036             //buffer all the entries that have been left aside....
1037             withoutUnicodePtr = currentPosition - escapeSize - 1
1038                 - startPosition;
1039             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1040                 withoutUnicodePtr);
1041             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1042           } else { //overwrite the / in the buffer
1043             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1044             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1045                                               // where only one is correct
1046               withoutUnicodePtr--;
1047             }
1048           }
1049         }
1050         // consume next character
1051         unicodeAsBackSlash = false;
1052         currentCharacter = source[currentPosition++];
1053         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1054         //                    && (source[currentPosition] == 'u')) {
1055         //                    getNextUnicodeChar();
1056         //                  } else {
1057         if (withoutUnicodePtr != 0) {
1058           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1059         }
1060         //                  }
1061       }
1062     } catch (IndexOutOfBoundsException e) {
1063       throw new InvalidInputException(UNTERMINATED_STRING);
1064     } catch (InvalidInputException e) {
1065       if (e.getMessage().equals(INVALID_ESCAPE)) {
1066         // relocate if finding another quote fairly close: thus unicode
1067         // '/u000D' will be fully consumed
1068         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1069           if (currentPosition + lookAhead == source.length)
1070             break;
1071           if (source[currentPosition + lookAhead] == '\n')
1072             break;
1073           if (source[currentPosition + lookAhead] == '\"') {
1074             currentPosition += lookAhead + 1;
1075             break;
1076           }
1077         }
1078       }
1079       throw e; // rethrow
1080     }
1081     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1082                                               // //$NON-NLS-?$ where ? is an
1083                                               // int.
1084       if (currentLine == null) {
1085         currentLine = new NLSLine();
1086         lines.add(currentLine);
1087       }
1088       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1089           startPosition, currentPosition - 1));
1090     }
1091   }
1092   public int getNextToken() throws InvalidInputException {
1093     if (!phpMode) {
1094       return getInlinedHTML(currentPosition);
1095     }
1096     if (phpMode) {
1097       this.wasAcr = false;
1098       if (diet) {
1099         jumpOverMethodBody();
1100         diet = false;
1101         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1102       }
1103       try {
1104         while (true) {
1105           withoutUnicodePtr = 0;
1106           //start with a new token
1107           char encapsedChar = ' ';
1108           if (!encapsedStringStack.isEmpty()) {
1109             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1110           }
1111           if (encapsedChar != '$' && encapsedChar != ' ') {
1112             currentCharacter = source[currentPosition++];
1113             if (currentCharacter == encapsedChar) {
1114               switch (currentCharacter) {
1115                 case '`' :
1116                   return TokenNameEncapsedString0;
1117                 case '\'' :
1118                   return TokenNameEncapsedString1;
1119                 case '"' :
1120                   return TokenNameEncapsedString2;
1121               }
1122             }
1123             while (currentCharacter != encapsedChar) {
1124               /** ** in PHP \r and \n are valid in string literals *** */
1125               switch (currentCharacter) {
1126                 case '\\' :
1127                   int escapeSize = currentPosition;
1128                   boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1129                   //scanEscapeCharacter make a side effect on this value and
1130                   // we need the previous value few lines down this one
1131                   scanDoubleQuotedEscapeCharacter();
1132                   escapeSize = currentPosition - escapeSize;
1133                   if (withoutUnicodePtr == 0) {
1134                     //buffer all the entries that have been left aside....
1135                     withoutUnicodePtr = currentPosition - escapeSize - 1
1136                         - startPosition;
1137                     System.arraycopy(source, startPosition,
1138                         withoutUnicodeBuffer, 1, withoutUnicodePtr);
1139                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1140                   } else { //overwrite the / in the buffer
1141                     withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1142                     if (backSlashAsUnicodeInString) { //there are TWO \ in
1143                       withoutUnicodePtr--;
1144                     }
1145                   }
1146                   break;
1147                 case '$' :
1148                   if (isPHPIdentifierStart(source[currentPosition])
1149                       || source[currentPosition] == '{') {
1150                     currentPosition--;
1151                     encapsedStringStack.push(new Character('$'));
1152                     return TokenNameSTRING;
1153                   }
1154                   break;
1155                 case '{' :
1156                   if (source[currentPosition] == '$') { // CURLY_OPEN
1157                     currentPosition--;
1158                     encapsedStringStack.push(new Character('$'));
1159                     return TokenNameSTRING;
1160                   }
1161               }
1162               // consume next character
1163               unicodeAsBackSlash = false;
1164               currentCharacter = source[currentPosition++];
1165               if (withoutUnicodePtr != 0) {
1166                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1167               }
1168               //                  }
1169             } // end while
1170             currentPosition--;
1171             return TokenNameSTRING;
1172           }
1173           // ---------Consume white space and handles startPosition---------
1174           int whiteStart = currentPosition;
1175           startPosition = currentPosition;
1176           currentCharacter = source[currentPosition++];
1177           if (encapsedChar == '$') {
1178             switch (currentCharacter) {
1179               case '\\' :
1180                 currentCharacter = source[currentPosition++];
1181                 return TokenNameSTRING;
1182               case '{' :
1183                 if (encapsedChar == '$') {
1184                   if (getNextChar('$'))
1185                     return TokenNameCURLY_OPEN;
1186                 }
1187                 return TokenNameLBRACE;
1188               case '}' :
1189                 return TokenNameRBRACE;
1190               case '[' :
1191                 return TokenNameLBRACKET;
1192               case ']' :
1193                 return TokenNameRBRACKET;
1194               case '\'' :
1195                 if (tokenizeStrings) {
1196                   consumeStringConstant();
1197                   return TokenNameStringConstant;
1198                 }
1199                 return TokenNameEncapsedString1;
1200               case '"' :
1201                 return TokenNameEncapsedString2;
1202               case '`' :
1203                 if (tokenizeStrings) {
1204                   consumeStringInterpolated();
1205                   return TokenNameStringInterpolated;
1206                 }
1207                 return TokenNameEncapsedString0;
1208               case '-' :
1209                 if (getNextChar('>'))
1210                   return TokenNameMINUS_GREATER;
1211                 return TokenNameSTRING;
1212               default :
1213                 if (currentCharacter == '$') {
1214                   int oldPosition = currentPosition;
1215                   try {
1216                     currentCharacter = source[currentPosition++];
1217                     if (currentCharacter == '{') {
1218                       return TokenNameDOLLAR_LBRACE;
1219                     }
1220                     if (isPHPIdentifierStart(currentCharacter)) {
1221                       return scanIdentifierOrKeyword(true);
1222                     } else {
1223                       currentPosition = oldPosition;
1224                       return TokenNameSTRING;
1225                     }
1226                   } catch (IndexOutOfBoundsException e) {
1227                     currentPosition = oldPosition;
1228                     return TokenNameSTRING;
1229                   }
1230                 }
1231                 if (isPHPIdentifierStart(currentCharacter))
1232                   return scanIdentifierOrKeyword(false);
1233                 if (Character.isDigit(currentCharacter))
1234                   return scanNumber(false);
1235                 return TokenNameERROR;
1236             }
1237           }
1238           //          boolean isWhiteSpace;
1239
1240           while ((currentCharacter == ' ')
1241               || Character.isWhitespace(currentCharacter)) {
1242             startPosition = currentPosition;
1243             currentCharacter = source[currentPosition++];
1244             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1245             //              && (source[currentPosition] == 'u')) {
1246             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1247             //            } else {
1248             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1249               checkNonExternalizeString();
1250               if (recordLineSeparator) {
1251                 pushLineSeparator();
1252               } else {
1253                 currentLine = null;
1254               }
1255             }
1256             //            isWhiteSpace = (currentCharacter == ' ')
1257             //                || Character.isWhitespace(currentCharacter);
1258             //            }
1259           }
1260           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1261             // reposition scanner in case we are interested by spaces as tokens
1262             currentPosition--;
1263             startPosition = whiteStart;
1264             return TokenNameWHITESPACE;
1265           }
1266           //little trick to get out in the middle of a source compuation
1267           if (currentPosition > eofPosition)
1268             return TokenNameEOF;
1269           // ---------Identify the next token-------------
1270           switch (currentCharacter) {
1271             case '(' :
1272               return getCastOrParen();
1273             case ')' :
1274               return TokenNameRPAREN;
1275             case '{' :
1276               return TokenNameLBRACE;
1277             case '}' :
1278               return TokenNameRBRACE;
1279             case '[' :
1280               return TokenNameLBRACKET;
1281             case ']' :
1282               return TokenNameRBRACKET;
1283             case ';' :
1284               return TokenNameSEMICOLON;
1285             case ',' :
1286               return TokenNameCOMMA;
1287             case '.' :
1288               if (getNextChar('='))
1289                 return TokenNameDOT_EQUAL;
1290               if (getNextCharAsDigit())
1291                 return scanNumber(true);
1292               return TokenNameDOT;
1293             case '+' :
1294               {
1295                 int test;
1296                 if ((test = getNextChar('+', '=')) == 0)
1297                   return TokenNamePLUS_PLUS;
1298                 if (test > 0)
1299                   return TokenNamePLUS_EQUAL;
1300                 return TokenNamePLUS;
1301               }
1302             case '-' :
1303               {
1304                 int test;
1305                 if ((test = getNextChar('-', '=')) == 0)
1306                   return TokenNameMINUS_MINUS;
1307                 if (test > 0)
1308                   return TokenNameMINUS_EQUAL;
1309                 if (getNextChar('>'))
1310                   return TokenNameMINUS_GREATER;
1311                 return TokenNameMINUS;
1312               }
1313             case '~' :
1314               if (getNextChar('='))
1315                 return TokenNameTWIDDLE_EQUAL;
1316               return TokenNameTWIDDLE;
1317             case '!' :
1318               if (getNextChar('=')) {
1319                 if (getNextChar('=')) {
1320                   return TokenNameNOT_EQUAL_EQUAL;
1321                 }
1322                 return TokenNameNOT_EQUAL;
1323               }
1324               return TokenNameNOT;
1325             case '*' :
1326               if (getNextChar('='))
1327                 return TokenNameMULTIPLY_EQUAL;
1328               return TokenNameMULTIPLY;
1329             case '%' :
1330               if (getNextChar('='))
1331                 return TokenNameREMAINDER_EQUAL;
1332               return TokenNameREMAINDER;
1333             case '<' :
1334               {
1335                 int oldPosition = currentPosition;
1336                 try {
1337                   currentCharacter = source[currentPosition++];
1338                 } catch (IndexOutOfBoundsException e) {
1339                   currentPosition = oldPosition;
1340                   return TokenNameLESS;
1341                 }
1342                 switch (currentCharacter) {
1343                   case '=' :
1344                     return TokenNameLESS_EQUAL;
1345                   case '>' :
1346                     return TokenNameNOT_EQUAL;
1347                   case '<' :
1348                     if (getNextChar('='))
1349                       return TokenNameLEFT_SHIFT_EQUAL;
1350                     if (getNextChar('<')) {
1351                       currentCharacter = source[currentPosition++];
1352                       while (Character.isWhitespace(currentCharacter)) {
1353                         currentCharacter = source[currentPosition++];
1354                       }
1355                       int heredocStart = currentPosition - 1;
1356                       int heredocLength = 0;
1357                       if (isPHPIdentifierStart(currentCharacter)) {
1358                         currentCharacter = source[currentPosition++];
1359                       } else {
1360                         return TokenNameERROR;
1361                       }
1362                       while (isPHPIdentifierPart(currentCharacter)) {
1363                         currentCharacter = source[currentPosition++];
1364                       }
1365                       heredocLength = currentPosition - heredocStart - 1;
1366                       // heredoc end-tag determination
1367                       boolean endTag = true;
1368                       char ch;
1369                       do {
1370                         ch = source[currentPosition++];
1371                         if (ch == '\r' || ch == '\n') {
1372                           if (recordLineSeparator) {
1373                             pushLineSeparator();
1374                           } else {
1375                             currentLine = null;
1376                           }
1377                           for (int i = 0; i < heredocLength; i++) {
1378                             if (source[currentPosition + i] != source[heredocStart
1379                                 + i]) {
1380                               endTag = false;
1381                               break;
1382                             }
1383                           }
1384                           if (endTag) {
1385                             currentPosition += heredocLength - 1;
1386                             currentCharacter = source[currentPosition++];
1387                             break; // do...while loop
1388                           } else {
1389                             endTag = true;
1390                           }
1391                         }
1392                       } while (true);
1393                       return TokenNameHEREDOC;
1394                     }
1395                     return TokenNameLEFT_SHIFT;
1396                 }
1397                 currentPosition = oldPosition;
1398                 return TokenNameLESS;
1399               }
1400             case '>' :
1401               {
1402                 int test;
1403                 if ((test = getNextChar('=', '>')) == 0)
1404                   return TokenNameGREATER_EQUAL;
1405                 if (test > 0) {
1406                   if ((test = getNextChar('=', '>')) == 0)
1407                     return TokenNameRIGHT_SHIFT_EQUAL;
1408                   return TokenNameRIGHT_SHIFT;
1409                 }
1410                 return TokenNameGREATER;
1411               }
1412             case '=' :
1413               if (getNextChar('=')) {
1414                 if (getNextChar('=')) {
1415                   return TokenNameEQUAL_EQUAL_EQUAL;
1416                 }
1417                 return TokenNameEQUAL_EQUAL;
1418               }
1419               if (getNextChar('>'))
1420                 return TokenNameEQUAL_GREATER;
1421               return TokenNameEQUAL;
1422             case '&' :
1423               {
1424                 int test;
1425                 if ((test = getNextChar('&', '=')) == 0)
1426                   return TokenNameAND_AND;
1427                 if (test > 0)
1428                   return TokenNameAND_EQUAL;
1429                 return TokenNameAND;
1430               }
1431             case '|' :
1432               {
1433                 int test;
1434                 if ((test = getNextChar('|', '=')) == 0)
1435                   return TokenNameOR_OR;
1436                 if (test > 0)
1437                   return TokenNameOR_EQUAL;
1438                 return TokenNameOR;
1439               }
1440             case '^' :
1441               if (getNextChar('='))
1442                 return TokenNameXOR_EQUAL;
1443               return TokenNameXOR;
1444             case '?' :
1445               if (getNextChar('>')) {
1446                 phpMode = false;
1447                 if (currentPosition == source.length) {
1448                   phpMode = true;
1449                   return TokenNameINLINE_HTML;
1450                 }
1451                 return getInlinedHTML(currentPosition - 2);
1452               }
1453               return TokenNameQUESTION;
1454             case ':' :
1455               if (getNextChar(':'))
1456                 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1457               return TokenNameCOLON;
1458             case '@' :
1459               return TokenNameAT;
1460             case '\'' :
1461               consumeStringConstant();
1462               return TokenNameStringConstant;
1463             case '"' :
1464               if (tokenizeStrings) {
1465                 consumeStringLiteral();
1466                 return TokenNameStringLiteral;
1467               }
1468               return TokenNameEncapsedString2;
1469             case '`' :
1470               if (tokenizeStrings) {
1471                 consumeStringInterpolated();
1472                 return TokenNameStringInterpolated;
1473               }
1474               return TokenNameEncapsedString0;
1475             case '#' :
1476             case '/' :
1477               {
1478                 char startChar = currentCharacter;
1479                 if (getNextChar('=')) {
1480                   return TokenNameDIVIDE_EQUAL;
1481                 }
1482                 int test;
1483                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1484                   //line comment
1485                   int endPositionForLineComment = 0;
1486                   try { //get the next char
1487                     currentCharacter = source[currentPosition++];
1488                     //                    if (((currentCharacter = source[currentPosition++])
1489                     //                      == '\\')
1490                     //                      && (source[currentPosition] == 'u')) {
1491                     //                      //-------------unicode traitement ------------
1492                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1493                     //                      currentPosition++;
1494                     //                      while (source[currentPosition] == 'u') {
1495                     //                        currentPosition++;
1496                     //                      }
1497                     //                      if ((c1 =
1498                     //                        Character.getNumericValue(source[currentPosition++]))
1499                     //                        > 15
1500                     //                        || c1 < 0
1501                     //                        || (c2 =
1502                     //                          Character.getNumericValue(source[currentPosition++]))
1503                     //                          > 15
1504                     //                        || c2 < 0
1505                     //                        || (c3 =
1506                     //                          Character.getNumericValue(source[currentPosition++]))
1507                     //                          > 15
1508                     //                        || c3 < 0
1509                     //                        || (c4 =
1510                     //                          Character.getNumericValue(source[currentPosition++]))
1511                     //                          > 15
1512                     //                        || c4 < 0) {
1513                     //                        throw new
1514                     // InvalidInputException(INVALID_UNICODE_ESCAPE);
1515                     //                      } else {
1516                     //                        currentCharacter =
1517                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1518                     //                      }
1519                     //                    }
1520                     //handle the \\u case manually into comment
1521                     //                    if (currentCharacter == '\\') {
1522                     //                      if (source[currentPosition] == '\\')
1523                     //                        currentPosition++;
1524                     //                    } //jump over the \\
1525                     boolean isUnicode = false;
1526                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1527                       if (currentCharacter == '?') {
1528                         if (getNextChar('>')) {
1529                           startPosition = currentPosition - 2;
1530                           phpMode = false;
1531                           return TokenNameINLINE_HTML;
1532                         }
1533                       }
1534                       //get the next char
1535                       isUnicode = false;
1536                       currentCharacter = source[currentPosition++];
1537                       //                      if (((currentCharacter = source[currentPosition++])
1538                       //                        == '\\')
1539                       //                        && (source[currentPosition] == 'u')) {
1540                       //                        isUnicode = true;
1541                       //                        //-------------unicode traitement ------------
1542                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1543                       //                        currentPosition++;
1544                       //                        while (source[currentPosition] == 'u') {
1545                       //                          currentPosition++;
1546                       //                        }
1547                       //                        if ((c1 =
1548                       //                          Character.getNumericValue(source[currentPosition++]))
1549                       //                          > 15
1550                       //                          || c1 < 0
1551                       //                          || (c2 =
1552                       //                            Character.getNumericValue(
1553                       //                              source[currentPosition++]))
1554                       //                            > 15
1555                       //                          || c2 < 0
1556                       //                          || (c3 =
1557                       //                            Character.getNumericValue(
1558                       //                              source[currentPosition++]))
1559                       //                            > 15
1560                       //                          || c3 < 0
1561                       //                          || (c4 =
1562                       //                            Character.getNumericValue(
1563                       //                              source[currentPosition++]))
1564                       //                            > 15
1565                       //                          || c4 < 0) {
1566                       //                          throw new
1567                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1568                       //                        } else {
1569                       //                          currentCharacter =
1570                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1571                       //                        }
1572                       //                      }
1573                       //handle the \\u case manually into comment
1574                       //                      if (currentCharacter == '\\') {
1575                       //                        if (source[currentPosition] == '\\')
1576                       //                          currentPosition++;
1577                       //                      } //jump over the \\
1578                     }
1579                     if (isUnicode) {
1580                       endPositionForLineComment = currentPosition - 6;
1581                     } else {
1582                       endPositionForLineComment = currentPosition - 1;
1583                     }
1584                     recordComment(false);
1585                     if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1586                                         if ((currentCharacter == '\r')
1587                         || (currentCharacter == '\n')) {
1588                       checkNonExternalizeString();
1589                       if (recordLineSeparator) {
1590                         if (isUnicode) {
1591                           pushUnicodeLineSeparator();
1592                         } else {
1593                           pushLineSeparator();
1594                         }
1595                       } else {
1596                         currentLine = null;
1597                       }
1598                     }
1599                     if (tokenizeComments) {
1600                       if (!isUnicode) {
1601                         currentPosition = endPositionForLineComment;
1602                         // reset one character behind
1603                       }
1604                       return TokenNameCOMMENT_LINE;
1605                     }
1606                   } catch (IndexOutOfBoundsException e) { //an eof will them
1607                     // be generated
1608                     if (tokenizeComments) {
1609                       currentPosition--;
1610                       // reset one character behind
1611                       return TokenNameCOMMENT_LINE;
1612                     }
1613                   }
1614                   break;
1615                 }
1616                 if (test > 0) {
1617                   //traditional and annotation comment
1618                   boolean isJavadoc = false, star = false;
1619                   // consume next character
1620                   unicodeAsBackSlash = false;
1621                   currentCharacter = source[currentPosition++];
1622                   //                  if (((currentCharacter = source[currentPosition++]) ==
1623                   // '\\')
1624                   //                    && (source[currentPosition] == 'u')) {
1625                   //                    getNextUnicodeChar();
1626                   //                  } else {
1627                   //                    if (withoutUnicodePtr != 0) {
1628                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1629                   //                        currentCharacter;
1630                   //                    }
1631                   //                  }
1632                   if (currentCharacter == '*') {
1633                     isJavadoc = true;
1634                     star = true;
1635                   }
1636                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1637                     checkNonExternalizeString();
1638                     if (recordLineSeparator) {
1639                       pushLineSeparator();
1640                     } else {
1641                       currentLine = null;
1642                     }
1643                   }
1644                   try { //get the next char
1645                     currentCharacter = source[currentPosition++];
1646                     //                    if (((currentCharacter = source[currentPosition++])
1647                     //                      == '\\')
1648                     //                      && (source[currentPosition] == 'u')) {
1649                     //                      //-------------unicode traitement ------------
1650                     //                      getNextUnicodeChar();
1651                     //                    }
1652                     //handle the \\u case manually into comment
1653                     //                    if (currentCharacter == '\\') {
1654                     //                      if (source[currentPosition] == '\\')
1655                     //                        currentPosition++;
1656                     //                      //jump over the \\
1657                     //                    }
1658                     // empty comment is not a javadoc /**/
1659                     if (currentCharacter == '/') {
1660                       isJavadoc = false;
1661                     }
1662                     //loop until end of comment */
1663                     while ((currentCharacter != '/') || (!star)) {
1664                       if ((currentCharacter == '\r')
1665                           || (currentCharacter == '\n')) {
1666                         checkNonExternalizeString();
1667                         if (recordLineSeparator) {
1668                           pushLineSeparator();
1669                         } else {
1670                           currentLine = null;
1671                         }
1672                       }
1673                       star = currentCharacter == '*';
1674                       //get next char
1675                       currentCharacter = source[currentPosition++];
1676                       //                      if (((currentCharacter = source[currentPosition++])
1677                       //                        == '\\')
1678                       //                        && (source[currentPosition] == 'u')) {
1679                       //                        //-------------unicode traitement ------------
1680                       //                        getNextUnicodeChar();
1681                       //                      }
1682                       //handle the \\u case manually into comment
1683                       //                      if (currentCharacter == '\\') {
1684                       //                        if (source[currentPosition] == '\\')
1685                       //                          currentPosition++;
1686                       //                      } //jump over the \\
1687                     }
1688                     recordComment(isJavadoc);
1689                     if (tokenizeComments) {
1690                       if (isJavadoc)
1691                         return TokenNameCOMMENT_PHPDOC;
1692                       return TokenNameCOMMENT_BLOCK;
1693                     }
1694                   } catch (IndexOutOfBoundsException e) {
1695                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1696                   }
1697                   break;
1698                 }
1699                 return TokenNameDIVIDE;
1700               }
1701             case '\u001a' :
1702               if (atEnd())
1703                 return TokenNameEOF;
1704               //the atEnd may not be <currentPosition == source.length> if
1705               // source is only some part of a real (external) stream
1706               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1707             default :
1708               if (currentCharacter == '$') {
1709                 int oldPosition = currentPosition;
1710                 try {
1711                   currentCharacter = source[currentPosition++];
1712                   if (isPHPIdentifierStart(currentCharacter)) {
1713                     return scanIdentifierOrKeyword(true);
1714                   } else {
1715                     currentPosition = oldPosition;
1716                     return TokenNameDOLLAR;
1717                   }
1718                 } catch (IndexOutOfBoundsException e) {
1719                   currentPosition = oldPosition;
1720                   return TokenNameDOLLAR;
1721                 }
1722               }
1723               if (isPHPIdentifierStart(currentCharacter))
1724                 return scanIdentifierOrKeyword(false);
1725               if (Character.isDigit(currentCharacter))
1726                 return scanNumber(false);
1727               return TokenNameERROR;
1728           }
1729         }
1730       } //-----------------end switch while try--------------------
1731       catch (IndexOutOfBoundsException e) {
1732       }
1733     }
1734     return TokenNameEOF;
1735   }
1736   /**
1737    * @return @throws
1738    *         InvalidInputException
1739    */
1740   private int getInlinedHTML(int start) throws InvalidInputException {
1741     //    int htmlPosition = start;
1742     if (currentPosition > source.length) {
1743       currentPosition = source.length;
1744       return TokenNameEOF;
1745     }
1746     startPosition = start;
1747     try {
1748       while (!phpMode) {
1749         currentCharacter = source[currentPosition++];
1750         if (currentCharacter == '<') {
1751           if (getNextChar('?')) {
1752             currentCharacter = source[currentPosition++];
1753             if ((currentCharacter == ' ')
1754                 || Character.isWhitespace(currentCharacter)) {
1755               // <?
1756               phpMode = true;
1757               return TokenNameINLINE_HTML;
1758             } else {
1759               boolean phpStart = (currentCharacter == 'P')
1760                   || (currentCharacter == 'p');
1761               if (phpStart) {
1762                 int test = getNextChar('H', 'h');
1763                 if (test >= 0) {
1764                   test = getNextChar('P', 'p');
1765                   if (test >= 0) {
1766                     // <?PHP <?php
1767                     phpMode = true;
1768                     return TokenNameINLINE_HTML;
1769                   }
1770                 }
1771               }
1772             }
1773           }
1774         }
1775         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1776           if (recordLineSeparator) {
1777             pushLineSeparator();
1778           } else {
1779             currentLine = null;
1780           }
1781         }
1782       } //-----------------while--------------------
1783       phpMode = true;
1784       return TokenNameINLINE_HTML;
1785     } //-----------------try--------------------
1786     catch (IndexOutOfBoundsException e) {
1787       startPosition = start;
1788       currentPosition--;
1789     }
1790     phpMode = true;
1791     return TokenNameINLINE_HTML;
1792   }
1793   //  public final void getNextUnicodeChar()
1794   //    throws IndexOutOfBoundsException, InvalidInputException {
1795   //    //VOID
1796   //    //handle the case of unicode.
1797   //    //when a unicode appears then we must use a buffer that holds char
1798   // internal values
1799   //    //At the end of this method currentCharacter holds the new visited char
1800   //    //and currentPosition points right next after it
1801   //
1802   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1803   //
1804   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1805   //    currentPosition++;
1806   //    while (source[currentPosition] == 'u') {
1807   //      currentPosition++;
1808   //      unicodeSize++;
1809   //    }
1810   //
1811   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1812   //      || c1 < 0
1813   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1814   //      || c2 < 0
1815   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1816   //      || c3 < 0
1817   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1818   //      || c4 < 0) {
1819   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1820   //    } else {
1821   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1822   //      //need the unicode buffer
1823   //      if (withoutUnicodePtr == 0) {
1824   //        //buffer all the entries that have been left aside....
1825   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1826   //        System.arraycopy(
1827   //          source,
1828   //          startPosition,
1829   //          withoutUnicodeBuffer,
1830   //          1,
1831   //          withoutUnicodePtr);
1832   //      }
1833   //      //fill the buffer with the char
1834   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1835   //    }
1836   //    unicodeAsBackSlash = currentCharacter == '\\';
1837   //  }
1838   /*
1839    * Tokenize a method body, assuming that curly brackets are properly
1840    * balanced.
1841    */
1842   public final void jumpOverMethodBody() {
1843     this.wasAcr = false;
1844     int found = 1;
1845     try {
1846       while (true) { //loop for jumping over comments
1847         // ---------Consume white space and handles startPosition---------
1848         boolean isWhiteSpace;
1849         do {
1850           startPosition = currentPosition;
1851           currentCharacter = source[currentPosition++];
1852           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1853           //            && (source[currentPosition] == 'u')) {
1854           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1855           //          } else {
1856           if (recordLineSeparator
1857               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1858             pushLineSeparator();
1859           isWhiteSpace = Character.isWhitespace(currentCharacter);
1860           //          }
1861         } while (isWhiteSpace);
1862         // -------consume token until } is found---------
1863         switch (currentCharacter) {
1864           case '{' :
1865             found++;
1866             break;
1867           case '}' :
1868             found--;
1869             if (found == 0)
1870               return;
1871             break;
1872           case '\'' :
1873             {
1874               boolean test;
1875               test = getNextChar('\\');
1876               if (test) {
1877                 try {
1878                   scanDoubleQuotedEscapeCharacter();
1879                 } catch (InvalidInputException ex) {
1880                 };
1881               } else {
1882                 //                try { // consume next character
1883                 unicodeAsBackSlash = false;
1884                 currentCharacter = source[currentPosition++];
1885                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1886                 //                    && (source[currentPosition] == 'u')) {
1887                 //                    getNextUnicodeChar();
1888                 //                  } else {
1889                 if (withoutUnicodePtr != 0) {
1890                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1891                 }
1892                 //                  }
1893                 //                } catch (InvalidInputException ex) {
1894                 //                };
1895               }
1896               getNextChar('\'');
1897               break;
1898             }
1899           case '"' :
1900             try {
1901               //              try { // consume next character
1902               unicodeAsBackSlash = false;
1903               currentCharacter = source[currentPosition++];
1904               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1905               //                  && (source[currentPosition] == 'u')) {
1906               //                  getNextUnicodeChar();
1907               //                } else {
1908               if (withoutUnicodePtr != 0) {
1909                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1910               }
1911               //                }
1912               //              } catch (InvalidInputException ex) {
1913               //              };
1914               while (currentCharacter != '"') {
1915                 if (currentCharacter == '\r') {
1916                   if (source[currentPosition] == '\n')
1917                     currentPosition++;
1918                   break;
1919                   // the string cannot go further that the line
1920                 }
1921                 if (currentCharacter == '\n') {
1922                   break;
1923                   // the string cannot go further that the line
1924                 }
1925                 if (currentCharacter == '\\') {
1926                   try {
1927                     scanDoubleQuotedEscapeCharacter();
1928                   } catch (InvalidInputException ex) {
1929                   };
1930                 }
1931                 //                try { // consume next character
1932                 unicodeAsBackSlash = false;
1933                 currentCharacter = source[currentPosition++];
1934                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1935                 //                    && (source[currentPosition] == 'u')) {
1936                 //                    getNextUnicodeChar();
1937                 //                  } else {
1938                 if (withoutUnicodePtr != 0) {
1939                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1940                 }
1941                 //                  }
1942                 //                } catch (InvalidInputException ex) {
1943                 //                };
1944               }
1945             } catch (IndexOutOfBoundsException e) {
1946               return;
1947             }
1948             break;
1949           case '/' :
1950             {
1951               int test;
1952               if ((test = getNextChar('/', '*')) == 0) {
1953                 //line comment
1954                 try {
1955                   //get the next char
1956                   currentCharacter = source[currentPosition++];
1957                   //                  if (((currentCharacter = source[currentPosition++]) ==
1958                   // '\\')
1959                   //                    && (source[currentPosition] == 'u')) {
1960                   //                    //-------------unicode traitement ------------
1961                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1962                   //                    currentPosition++;
1963                   //                    while (source[currentPosition] == 'u') {
1964                   //                      currentPosition++;
1965                   //                    }
1966                   //                    if ((c1 =
1967                   //                      Character.getNumericValue(source[currentPosition++]))
1968                   //                      > 15
1969                   //                      || c1 < 0
1970                   //                      || (c2 =
1971                   //                        Character.getNumericValue(source[currentPosition++]))
1972                   //                        > 15
1973                   //                      || c2 < 0
1974                   //                      || (c3 =
1975                   //                        Character.getNumericValue(source[currentPosition++]))
1976                   //                        > 15
1977                   //                      || c3 < 0
1978                   //                      || (c4 =
1979                   //                        Character.getNumericValue(source[currentPosition++]))
1980                   //                        > 15
1981                   //                      || c4 < 0) {
1982                   //                      //error don't care of the value
1983                   //                      currentCharacter = 'A';
1984                   //                    } //something different from \n and \r
1985                   //                    else {
1986                   //                      currentCharacter =
1987                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1988                   //                    }
1989                   //                  }
1990                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1991                     //get the next char
1992                     currentCharacter = source[currentPosition++];
1993                     //                    if (((currentCharacter = source[currentPosition++])
1994                     //                      == '\\')
1995                     //                      && (source[currentPosition] == 'u')) {
1996                     //                      //-------------unicode traitement ------------
1997                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1998                     //                      currentPosition++;
1999                     //                      while (source[currentPosition] == 'u') {
2000                     //                        currentPosition++;
2001                     //                      }
2002                     //                      if ((c1 =
2003                     //                        Character.getNumericValue(source[currentPosition++]))
2004                     //                        > 15
2005                     //                        || c1 < 0
2006                     //                        || (c2 =
2007                     //                          Character.getNumericValue(source[currentPosition++]))
2008                     //                          > 15
2009                     //                        || c2 < 0
2010                     //                        || (c3 =
2011                     //                          Character.getNumericValue(source[currentPosition++]))
2012                     //                          > 15
2013                     //                        || c3 < 0
2014                     //                        || (c4 =
2015                     //                          Character.getNumericValue(source[currentPosition++]))
2016                     //                          > 15
2017                     //                        || c4 < 0) {
2018                     //                        //error don't care of the value
2019                     //                        currentCharacter = 'A';
2020                     //                      } //something different from \n and \r
2021                     //                      else {
2022                     //                        currentCharacter =
2023                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2024                     //                      }
2025                     //                    }
2026                   }
2027                   if (recordLineSeparator
2028                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2029                     pushLineSeparator();
2030                 } catch (IndexOutOfBoundsException e) {
2031                 } //an eof will them be generated
2032                 break;
2033               }
2034               if (test > 0) {
2035                 //traditional and annotation comment
2036                 boolean star = false;
2037                 //                try { // consume next character
2038                 unicodeAsBackSlash = false;
2039                 currentCharacter = source[currentPosition++];
2040                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2041                 //                    && (source[currentPosition] == 'u')) {
2042                 //                    getNextUnicodeChar();
2043                 //                  } else {
2044                 if (withoutUnicodePtr != 0) {
2045                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2046                 }
2047                 //                  };
2048                 //                } catch (InvalidInputException ex) {
2049                 //                };
2050                 if (currentCharacter == '*') {
2051                   star = true;
2052                 }
2053                 if (recordLineSeparator
2054                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2055                   pushLineSeparator();
2056                 try { //get the next char
2057                   currentCharacter = source[currentPosition++];
2058                   //                  if (((currentCharacter = source[currentPosition++]) ==
2059                   // '\\')
2060                   //                    && (source[currentPosition] == 'u')) {
2061                   //                    //-------------unicode traitement ------------
2062                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2063                   //                    currentPosition++;
2064                   //                    while (source[currentPosition] == 'u') {
2065                   //                      currentPosition++;
2066                   //                    }
2067                   //                    if ((c1 =
2068                   //                      Character.getNumericValue(source[currentPosition++]))
2069                   //                      > 15
2070                   //                      || c1 < 0
2071                   //                      || (c2 =
2072                   //                        Character.getNumericValue(source[currentPosition++]))
2073                   //                        > 15
2074                   //                      || c2 < 0
2075                   //                      || (c3 =
2076                   //                        Character.getNumericValue(source[currentPosition++]))
2077                   //                        > 15
2078                   //                      || c3 < 0
2079                   //                      || (c4 =
2080                   //                        Character.getNumericValue(source[currentPosition++]))
2081                   //                        > 15
2082                   //                      || c4 < 0) {
2083                   //                      //error don't care of the value
2084                   //                      currentCharacter = 'A';
2085                   //                    } //something different from * and /
2086                   //                    else {
2087                   //                      currentCharacter =
2088                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2089                   //                    }
2090                   //                  }
2091                   //loop until end of comment */
2092                   while ((currentCharacter != '/') || (!star)) {
2093                     if (recordLineSeparator
2094                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2095                       pushLineSeparator();
2096                     star = currentCharacter == '*';
2097                     //get next char
2098                     currentCharacter = source[currentPosition++];
2099                     //                    if (((currentCharacter = source[currentPosition++])
2100                     //                      == '\\')
2101                     //                      && (source[currentPosition] == 'u')) {
2102                     //                      //-------------unicode traitement ------------
2103                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2104                     //                      currentPosition++;
2105                     //                      while (source[currentPosition] == 'u') {
2106                     //                        currentPosition++;
2107                     //                      }
2108                     //                      if ((c1 =
2109                     //                        Character.getNumericValue(source[currentPosition++]))
2110                     //                        > 15
2111                     //                        || c1 < 0
2112                     //                        || (c2 =
2113                     //                          Character.getNumericValue(source[currentPosition++]))
2114                     //                          > 15
2115                     //                        || c2 < 0
2116                     //                        || (c3 =
2117                     //                          Character.getNumericValue(source[currentPosition++]))
2118                     //                          > 15
2119                     //                        || c3 < 0
2120                     //                        || (c4 =
2121                     //                          Character.getNumericValue(source[currentPosition++]))
2122                     //                          > 15
2123                     //                        || c4 < 0) {
2124                     //                        //error don't care of the value
2125                     //                        currentCharacter = 'A';
2126                     //                      } //something different from * and /
2127                     //                      else {
2128                     //                        currentCharacter =
2129                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2130                     //                      }
2131                     //                    }
2132                   }
2133                 } catch (IndexOutOfBoundsException e) {
2134                   return;
2135                 }
2136                 break;
2137               }
2138               break;
2139             }
2140           default :
2141             if (isPHPIdentifierStart(currentCharacter)
2142                 || currentCharacter == '$') {
2143               try {
2144                 scanIdentifierOrKeyword((currentCharacter == '$'));
2145               } catch (InvalidInputException ex) {
2146               };
2147               break;
2148             }
2149             if (Character.isDigit(currentCharacter)) {
2150               try {
2151                 scanNumber(false);
2152               } catch (InvalidInputException ex) {
2153               };
2154               break;
2155             }
2156         }
2157       }
2158       //-----------------end switch while try--------------------
2159     } catch (IndexOutOfBoundsException e) {
2160     } catch (InvalidInputException e) {
2161     }
2162     return;
2163   }
2164   //  public final boolean jumpOverUnicodeWhiteSpace()
2165   //    throws InvalidInputException {
2166   //    //BOOLEAN
2167   //    //handle the case of unicode. Jump over the next whiteSpace
2168   //    //making startPosition pointing on the next available char
2169   //    //On false, the currentCharacter is filled up with a potential
2170   //    //correct char
2171   //
2172   //    try {
2173   //      this.wasAcr = false;
2174   //      int c1, c2, c3, c4;
2175   //      int unicodeSize = 6;
2176   //      currentPosition++;
2177   //      while (source[currentPosition] == 'u') {
2178   //        currentPosition++;
2179   //        unicodeSize++;
2180   //      }
2181   //
2182   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2183   //        || c1 < 0)
2184   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2185   //          || c2 < 0)
2186   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2187   //          || c3 < 0)
2188   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2189   //          || c4 < 0)) {
2190   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2191   //      }
2192   //
2193   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2194   //      if (recordLineSeparator
2195   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2196   //        pushLineSeparator();
2197   //      if (Character.isWhitespace(currentCharacter))
2198   //        return true;
2199   //
2200   //      //buffer the new char which is not a white space
2201   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2202   //      //withoutUnicodePtr == 1 is true here
2203   //      return false;
2204   //    } catch (IndexOutOfBoundsException e) {
2205   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2206   //    }
2207   //  }
2208   public final int[] getLineEnds() {
2209     //return a bounded copy of this.lineEnds
2210     int[] copy;
2211     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2212     return copy;
2213   }
2214   public char[] getSource() {
2215     return this.source;
2216   }
2217   final char[] optimizedCurrentTokenSource1() {
2218     //return always the same char[] build only once
2219     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2220     char charOne = source[startPosition];
2221     switch (charOne) {
2222       case 'a' :
2223         return charArray_a;
2224       case 'b' :
2225         return charArray_b;
2226       case 'c' :
2227         return charArray_c;
2228       case 'd' :
2229         return charArray_d;
2230       case 'e' :
2231         return charArray_e;
2232       case 'f' :
2233         return charArray_f;
2234       case 'g' :
2235         return charArray_g;
2236       case 'h' :
2237         return charArray_h;
2238       case 'i' :
2239         return charArray_i;
2240       case 'j' :
2241         return charArray_j;
2242       case 'k' :
2243         return charArray_k;
2244       case 'l' :
2245         return charArray_l;
2246       case 'm' :
2247         return charArray_m;
2248       case 'n' :
2249         return charArray_n;
2250       case 'o' :
2251         return charArray_o;
2252       case 'p' :
2253         return charArray_p;
2254       case 'q' :
2255         return charArray_q;
2256       case 'r' :
2257         return charArray_r;
2258       case 's' :
2259         return charArray_s;
2260       case 't' :
2261         return charArray_t;
2262       case 'u' :
2263         return charArray_u;
2264       case 'v' :
2265         return charArray_v;
2266       case 'w' :
2267         return charArray_w;
2268       case 'x' :
2269         return charArray_x;
2270       case 'y' :
2271         return charArray_y;
2272       case 'z' :
2273         return charArray_z;
2274       default :
2275         return new char[]{charOne};
2276     }
2277   }
2278   final char[] optimizedCurrentTokenSource2() {
2279     //try to return the same char[] build only once
2280     char c0, c1;
2281     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2282         % TableSize;
2283     char[][] table = charArray_length[0][hash];
2284     int i = newEntry2;
2285     while (++i < InternalTableSize) {
2286       char[] charArray = table[i];
2287       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2288         return charArray;
2289     }
2290     //---------other side---------
2291     i = -1;
2292     int max = newEntry2;
2293     while (++i <= max) {
2294       char[] charArray = table[i];
2295       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2296         return charArray;
2297     }
2298     //--------add the entry-------
2299     if (++max >= InternalTableSize)
2300       max = 0;
2301     char[] r;
2302     table[max] = (r = new char[]{c0, c1});
2303     newEntry2 = max;
2304     return r;
2305   }
2306   final char[] optimizedCurrentTokenSource3() {
2307     //try to return the same char[] build only once
2308     char c0, c1, c2;
2309     int hash = (((c0 = source[startPosition]) << 12)
2310         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2311         % TableSize;
2312     char[][] table = charArray_length[1][hash];
2313     int i = newEntry3;
2314     while (++i < InternalTableSize) {
2315       char[] charArray = table[i];
2316       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2317         return charArray;
2318     }
2319     //---------other side---------
2320     i = -1;
2321     int max = newEntry3;
2322     while (++i <= max) {
2323       char[] charArray = table[i];
2324       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2325         return charArray;
2326     }
2327     //--------add the entry-------
2328     if (++max >= InternalTableSize)
2329       max = 0;
2330     char[] r;
2331     table[max] = (r = new char[]{c0, c1, c2});
2332     newEntry3 = max;
2333     return r;
2334   }
2335   final char[] optimizedCurrentTokenSource4() {
2336     //try to return the same char[] build only once
2337     char c0, c1, c2, c3;
2338     long hash = ((((long) (c0 = source[startPosition])) << 18)
2339         + ((c1 = source[startPosition + 1]) << 12)
2340         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2341         % TableSize;
2342     char[][] table = charArray_length[2][(int) hash];
2343     int i = newEntry4;
2344     while (++i < InternalTableSize) {
2345       char[] charArray = table[i];
2346       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2347           && (c3 == charArray[3]))
2348         return charArray;
2349     }
2350     //---------other side---------
2351     i = -1;
2352     int max = newEntry4;
2353     while (++i <= max) {
2354       char[] charArray = table[i];
2355       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2356           && (c3 == charArray[3]))
2357         return charArray;
2358     }
2359     //--------add the entry-------
2360     if (++max >= InternalTableSize)
2361       max = 0;
2362     char[] r;
2363     table[max] = (r = new char[]{c0, c1, c2, c3});
2364     newEntry4 = max;
2365     return r;
2366   }
2367   final char[] optimizedCurrentTokenSource5() {
2368     //try to return the same char[] build only once
2369     char c0, c1, c2, c3, c4;
2370     long hash = ((((long) (c0 = source[startPosition])) << 24)
2371         + (((long) (c1 = source[startPosition + 1])) << 18)
2372         + ((c2 = source[startPosition + 2]) << 12)
2373         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2374         % TableSize;
2375     char[][] table = charArray_length[3][(int) hash];
2376     int i = newEntry5;
2377     while (++i < InternalTableSize) {
2378       char[] charArray = table[i];
2379       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2380           && (c3 == charArray[3]) && (c4 == charArray[4]))
2381         return charArray;
2382     }
2383     //---------other side---------
2384     i = -1;
2385     int max = newEntry5;
2386     while (++i <= max) {
2387       char[] charArray = table[i];
2388       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2389           && (c3 == charArray[3]) && (c4 == charArray[4]))
2390         return charArray;
2391     }
2392     //--------add the entry-------
2393     if (++max >= InternalTableSize)
2394       max = 0;
2395     char[] r;
2396     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2397     newEntry5 = max;
2398     return r;
2399   }
2400   final char[] optimizedCurrentTokenSource6() {
2401     //try to return the same char[] build only once
2402     char c0, c1, c2, c3, c4, c5;
2403     long hash = ((((long) (c0 = source[startPosition])) << 32)
2404         + (((long) (c1 = source[startPosition + 1])) << 24)
2405         + (((long) (c2 = source[startPosition + 2])) << 18)
2406         + ((c3 = source[startPosition + 3]) << 12)
2407         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2408         % TableSize;
2409     char[][] table = charArray_length[4][(int) hash];
2410     int i = newEntry6;
2411     while (++i < InternalTableSize) {
2412       char[] charArray = table[i];
2413       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2414           && (c3 == charArray[3]) && (c4 == charArray[4])
2415           && (c5 == charArray[5]))
2416         return charArray;
2417     }
2418     //---------other side---------
2419     i = -1;
2420     int max = newEntry6;
2421     while (++i <= max) {
2422       char[] charArray = table[i];
2423       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2424           && (c3 == charArray[3]) && (c4 == charArray[4])
2425           && (c5 == charArray[5]))
2426         return charArray;
2427     }
2428     //--------add the entry-------
2429     if (++max >= InternalTableSize)
2430       max = 0;
2431     char[] r;
2432     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2433     newEntry6 = max;
2434     return r;
2435   }
2436   public final void pushLineSeparator() throws InvalidInputException {
2437     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2438     final int INCREMENT = 250;
2439     if (this.checkNonExternalizedStringLiterals) {
2440       // reinitialize the current line for non externalize strings purpose
2441       currentLine = null;
2442     }
2443     //currentCharacter is at position currentPosition-1
2444     // cr 000D
2445     if (currentCharacter == '\r') {
2446       int separatorPos = currentPosition - 1;
2447       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2448         return;
2449       //System.out.println("CR-" + separatorPos);
2450       try {
2451         lineEnds[++linePtr] = separatorPos;
2452       } catch (IndexOutOfBoundsException e) {
2453         //linePtr value is correct
2454         int oldLength = lineEnds.length;
2455         int[] old = lineEnds;
2456         lineEnds = new int[oldLength + INCREMENT];
2457         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2458         lineEnds[linePtr] = separatorPos;
2459       }
2460       // look-ahead for merged cr+lf
2461       try {
2462         if (source[currentPosition] == '\n') {
2463           //System.out.println("look-ahead LF-" + currentPosition);
2464           lineEnds[linePtr] = currentPosition;
2465           currentPosition++;
2466           wasAcr = false;
2467         } else {
2468           wasAcr = true;
2469         }
2470       } catch (IndexOutOfBoundsException e) {
2471         wasAcr = true;
2472       }
2473     } else {
2474       // lf 000A
2475       if (currentCharacter == '\n') {
2476         //must merge eventual cr followed by lf
2477         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2478           //System.out.println("merge LF-" + (currentPosition - 1));
2479           lineEnds[linePtr] = currentPosition - 1;
2480         } else {
2481           int separatorPos = currentPosition - 1;
2482           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2483             return;
2484           // System.out.println("LF-" + separatorPos);
2485           try {
2486             lineEnds[++linePtr] = separatorPos;
2487           } catch (IndexOutOfBoundsException e) {
2488             //linePtr value is correct
2489             int oldLength = lineEnds.length;
2490             int[] old = lineEnds;
2491             lineEnds = new int[oldLength + INCREMENT];
2492             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2493             lineEnds[linePtr] = separatorPos;
2494           }
2495         }
2496         wasAcr = false;
2497       }
2498     }
2499   }
2500   public final void pushUnicodeLineSeparator() {
2501     // isUnicode means that the \r or \n has been read as a unicode character
2502     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2503     final int INCREMENT = 250;
2504     //currentCharacter is at position currentPosition-1
2505     if (this.checkNonExternalizedStringLiterals) {
2506       // reinitialize the current line for non externalize strings purpose
2507       currentLine = null;
2508     }
2509     // cr 000D
2510     if (currentCharacter == '\r') {
2511       int separatorPos = currentPosition - 6;
2512       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2513         return;
2514       //System.out.println("CR-" + separatorPos);
2515       try {
2516         lineEnds[++linePtr] = separatorPos;
2517       } catch (IndexOutOfBoundsException e) {
2518         //linePtr value is correct
2519         int oldLength = lineEnds.length;
2520         int[] old = lineEnds;
2521         lineEnds = new int[oldLength + INCREMENT];
2522         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2523         lineEnds[linePtr] = separatorPos;
2524       }
2525       // look-ahead for merged cr+lf
2526       if (source[currentPosition] == '\n') {
2527         //System.out.println("look-ahead LF-" + currentPosition);
2528         lineEnds[linePtr] = currentPosition;
2529         currentPosition++;
2530         wasAcr = false;
2531       } else {
2532         wasAcr = true;
2533       }
2534     } else {
2535       // lf 000A
2536       if (currentCharacter == '\n') {
2537         //must merge eventual cr followed by lf
2538         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2539           //System.out.println("merge LF-" + (currentPosition - 1));
2540           lineEnds[linePtr] = currentPosition - 6;
2541         } else {
2542           int separatorPos = currentPosition - 6;
2543           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2544             return;
2545           // System.out.println("LF-" + separatorPos);
2546           try {
2547             lineEnds[++linePtr] = separatorPos;
2548           } catch (IndexOutOfBoundsException e) {
2549             //linePtr value is correct
2550             int oldLength = lineEnds.length;
2551             int[] old = lineEnds;
2552             lineEnds = new int[oldLength + INCREMENT];
2553             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2554             lineEnds[linePtr] = separatorPos;
2555           }
2556         }
2557         wasAcr = false;
2558       }
2559     }
2560   }
2561   public final void recordComment(boolean isJavadoc) {
2562     // a new annotation comment is recorded
2563     try {
2564       commentStops[++commentPtr] = isJavadoc
2565           ? currentPosition
2566           : -currentPosition;
2567     } catch (IndexOutOfBoundsException e) {
2568       int oldStackLength = commentStops.length;
2569       int[] oldStack = commentStops;
2570       commentStops = new int[oldStackLength + 30];
2571       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2572       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2573       //grows the positions buffers too
2574       int[] old = commentStarts;
2575       commentStarts = new int[oldStackLength + 30];
2576       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2577     }
2578     //the buffer is of a correct size here
2579     commentStarts[commentPtr] = startPosition;
2580   }
2581   public void resetTo(int begin, int end) {
2582     //reset the scanner to a given position where it may rescan again
2583     diet = false;
2584     initialPosition = startPosition = currentPosition = begin;
2585     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2586     commentPtr = -1; // reset comment stack
2587   }
2588   public final void scanSingleQuotedEscapeCharacter()
2589       throws InvalidInputException {
2590     // the string with "\\u" is a legal string of two chars \ and u
2591     //thus we use a direct access to the source (for regular cases).
2592     //    if (unicodeAsBackSlash) {
2593     //      // consume next character
2594     //      unicodeAsBackSlash = false;
2595     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2596     //        && (source[currentPosition] == 'u')) {
2597     //        getNextUnicodeChar();
2598     //      } else {
2599     //        if (withoutUnicodePtr != 0) {
2600     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2601     //        }
2602     //      }
2603     //    } else
2604     currentCharacter = source[currentPosition++];
2605     switch (currentCharacter) {
2606       case '\'' :
2607         currentCharacter = '\'';
2608         break;
2609       case '\\' :
2610         currentCharacter = '\\';
2611         break;
2612       default :
2613         currentCharacter = '\\';
2614         currentPosition--;
2615     }
2616   }
2617   public final void scanDoubleQuotedEscapeCharacter()
2618       throws InvalidInputException {
2619     // the string with "\\u" is a legal string of two chars \ and u
2620     //thus we use a direct access to the source (for regular cases).
2621     //    if (unicodeAsBackSlash) {
2622     //      // consume next character
2623     //      unicodeAsBackSlash = false;
2624     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2625     //        && (source[currentPosition] == 'u')) {
2626     //        getNextUnicodeChar();
2627     //      } else {
2628     //        if (withoutUnicodePtr != 0) {
2629     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2630     //        }
2631     //      }
2632     //    } else
2633     currentCharacter = source[currentPosition++];
2634     switch (currentCharacter) {
2635       //      case 'b' :
2636       //        currentCharacter = '\b';
2637       //        break;
2638       case 't' :
2639         currentCharacter = '\t';
2640         break;
2641       case 'n' :
2642         currentCharacter = '\n';
2643         break;
2644       //      case 'f' :
2645       //        currentCharacter = '\f';
2646       //        break;
2647       case 'r' :
2648         currentCharacter = '\r';
2649         break;
2650       case '\"' :
2651         currentCharacter = '\"';
2652         break;
2653       case '\'' :
2654         currentCharacter = '\'';
2655         break;
2656       case '\\' :
2657         currentCharacter = '\\';
2658         break;
2659       case '$' :
2660         currentCharacter = '$';
2661         break;
2662       default :
2663         // -----------octal escape--------------
2664         // OctalDigit
2665         // OctalDigit OctalDigit
2666         // ZeroToThree OctalDigit OctalDigit
2667         int number = Character.getNumericValue(currentCharacter);
2668         if (number >= 0 && number <= 7) {
2669           boolean zeroToThreeNot = number > 3;
2670           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2671             int digit = Character.getNumericValue(currentCharacter);
2672             if (digit >= 0 && digit <= 7) {
2673               number = (number * 8) + digit;
2674               if (Character
2675                   .isDigit(currentCharacter = source[currentPosition++])) {
2676                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2677                   // Digit --> ignore last character
2678                   currentPosition--;
2679                 } else {
2680                   digit = Character.getNumericValue(currentCharacter);
2681                   if (digit >= 0 && digit <= 7) {
2682                     // has read \ZeroToThree OctalDigit OctalDigit
2683                     number = (number * 8) + digit;
2684                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2685                     // --> ignore last character
2686                     currentPosition--;
2687                   }
2688                 }
2689               } else { // has read \OctalDigit NonDigit--> ignore last
2690                 // character
2691                 currentPosition--;
2692               }
2693             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2694               // character
2695               currentPosition--;
2696             }
2697           } else { // has read \OctalDigit --> ignore last character
2698             currentPosition--;
2699           }
2700           if (number > 255)
2701             throw new InvalidInputException(INVALID_ESCAPE);
2702           currentCharacter = (char) number;
2703         }
2704     //else
2705     //     throw new InvalidInputException(INVALID_ESCAPE);
2706     }
2707   }
2708   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2709   //    return scanIdentifierOrKeyword( false );
2710   //  }
2711   public int scanIdentifierOrKeyword(boolean isVariable)
2712       throws InvalidInputException {
2713     //test keywords
2714     //first dispatch on the first char.
2715     //then the length. If there are several
2716     //keywors with the same length AND the same first char, then do another
2717     //disptach on the second char :-)...cool....but fast !
2718     useAssertAsAnIndentifier = false;
2719     while (getNextCharAsJavaIdentifierPart()) {
2720     };
2721     if (isVariable) {
2722       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2723       //        return TokenNamethis;
2724       //      }
2725       return TokenNameVariable;
2726     }
2727     int index, length;
2728     char[] data;
2729     char firstLetter;
2730     //    if (withoutUnicodePtr == 0)
2731     //quick test on length == 1 but not on length > 12 while most identifier
2732     //have a length which is <= 12...but there are lots of identifier with
2733     //only one char....
2734     //      {
2735     if ((length = currentPosition - startPosition) == 1)
2736       return TokenNameIdentifier;
2737     //  data = source;
2738     data = new char[length];
2739     index = startPosition;
2740     for (int i = 0; i < length; i++) {
2741       data[i] = Character.toLowerCase(source[index + i]);
2742     }
2743     index = 0;
2744     //    } else {
2745     //      if ((length = withoutUnicodePtr) == 1)
2746     //        return TokenNameIdentifier;
2747     //      // data = withoutUnicodeBuffer;
2748     //      data = new char[withoutUnicodeBuffer.length];
2749     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2750     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2751     //      }
2752     //      index = 1;
2753     //    }
2754     firstLetter = data[index];
2755     switch (firstLetter) {
2756       case '_' :
2757         switch (length) {
2758           case 8 :
2759             //__FILE__
2760             if ((data[++index] == '_') && (data[++index] == 'f')
2761                 && (data[++index] == 'i') && (data[++index] == 'l')
2762                 && (data[++index] == 'e') && (data[++index] == '_')
2763                 && (data[++index] == '_'))
2764               return TokenNameFILE;
2765             index = 0; //__LINE__
2766             if ((data[++index] == '_') && (data[++index] == 'l')
2767                 && (data[++index] == 'i') && (data[++index] == 'n')
2768                 && (data[++index] == 'e') && (data[++index] == '_')
2769                 && (data[++index] == '_'))
2770               return TokenNameLINE;
2771             break;
2772           case 9 :
2773             //__CLASS__
2774             if ((data[++index] == '_') && (data[++index] == 'c')
2775                 && (data[++index] == 'l') && (data[++index] == 'a')
2776                 && (data[++index] == 's') && (data[++index] == 's')
2777                 && (data[++index] == '_') && (data[++index] == '_'))
2778               return TokenNameCLASS_C;
2779             break;
2780           case 11 :
2781             //__METHOD__
2782             if ((data[++index] == '_') && (data[++index] == 'm')
2783                 && (data[++index] == 'e') && (data[++index] == 't')
2784                 && (data[++index] == 'h') && (data[++index] == 'o')
2785                 && (data[++index] == 'd') && (data[++index] == '_')
2786                 && (data[++index] == '_'))
2787               return TokenNameMETHOD_C;
2788             break;
2789           case 12 :
2790             //__FUNCTION__
2791             if ((data[++index] == '_') && (data[++index] == 'f')
2792                 && (data[++index] == 'u') && (data[++index] == 'n')
2793                 && (data[++index] == 'c') && (data[++index] == 't')
2794                 && (data[++index] == 'i') && (data[++index] == 'o')
2795                 && (data[++index] == 'n') && (data[++index] == '_')
2796                 && (data[++index] == '_'))
2797               return TokenNameFUNC_C;
2798             break;
2799         }
2800         return TokenNameIdentifier;
2801       case 'a' :
2802         // as and array abstract
2803         switch (length) {
2804           case 2 :
2805             //as
2806             if ((data[++index] == 's')) {
2807               return TokenNameas;
2808             } else {
2809               return TokenNameIdentifier;
2810             }
2811           case 3 :
2812             //and
2813             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2814               return TokenNameand;
2815             } else {
2816               return TokenNameIdentifier;
2817             }
2818           case 5 :
2819             // array
2820             if ((data[++index] == 'r') && (data[++index] == 'r')
2821                 && (data[++index] == 'a') && (data[++index] == 'y'))
2822               return TokenNamearray;
2823             else
2824               return TokenNameIdentifier;
2825           case 8 :
2826             if ((data[++index] == 'b') && (data[++index] == 's')
2827                 && (data[++index] == 't') && (data[++index] == 'r')
2828                 && (data[++index] == 'a') && (data[++index] == 'c')
2829                 && (data[++index] == 't'))
2830               return TokenNameabstract;
2831             else
2832               return TokenNameIdentifier;
2833           default :
2834             return TokenNameIdentifier;
2835         }
2836       case 'b' :
2837         //break
2838         switch (length) {
2839           case 5 :
2840             if ((data[++index] == 'r') && (data[++index] == 'e')
2841                 && (data[++index] == 'a') && (data[++index] == 'k'))
2842               return TokenNamebreak;
2843             else
2844               return TokenNameIdentifier;
2845           default :
2846             return TokenNameIdentifier;
2847         }
2848       case 'c' :
2849         //case catch class clone const continue
2850         switch (length) {
2851           case 4 :
2852             if ((data[++index] == 'a') && (data[++index] == 's')
2853                 && (data[++index] == 'e'))
2854               return TokenNamecase;
2855             else
2856               return TokenNameIdentifier;
2857           case 5 :
2858             if ((data[++index] == 'a') && (data[++index] == 't')
2859                 && (data[++index] == 'c') && (data[++index] == 'h'))
2860               return TokenNamecatch;
2861             index = 0;
2862             if ((data[++index] == 'l') && (data[++index] == 'a')
2863                 && (data[++index] == 's') && (data[++index] == 's'))
2864               return TokenNameclass;
2865             index = 0;
2866             if ((data[++index] == 'l') && (data[++index] == 'o')
2867                 && (data[++index] == 'n') && (data[++index] == 'e'))
2868               return TokenNameclone;
2869             index = 0;
2870             if ((data[++index] == 'o') && (data[++index] == 'n')
2871                 && (data[++index] == 's') && (data[++index] == 't'))
2872               return TokenNameconst;
2873             else
2874               return TokenNameIdentifier;
2875           case 8 :
2876             if ((data[++index] == 'o') && (data[++index] == 'n')
2877                 && (data[++index] == 't') && (data[++index] == 'i')
2878                 && (data[++index] == 'n') && (data[++index] == 'u')
2879                 && (data[++index] == 'e'))
2880               return TokenNamecontinue;
2881             else
2882               return TokenNameIdentifier;
2883           default :
2884             return TokenNameIdentifier;
2885         }
2886       case 'd' :
2887         // declare default do die
2888         // TODO delete define ==> no keyword !
2889         switch (length) {
2890           case 2 :
2891             if ((data[++index] == 'o'))
2892               return TokenNamedo;
2893             else
2894               return TokenNameIdentifier;
2895           //          case 6 :
2896           //            if ((data[++index] == 'e')
2897           //              && (data[++index] == 'f')
2898           //              && (data[++index] == 'i')
2899           //              && (data[++index] == 'n')
2900           //              && (data[++index] == 'e'))
2901           //              return TokenNamedefine;
2902           //            else
2903           //              return TokenNameIdentifier;
2904           case 7 :
2905             if ((data[++index] == 'e') && (data[++index] == 'c')
2906                 && (data[++index] == 'l') && (data[++index] == 'a')
2907                 && (data[++index] == 'r') && (data[++index] == 'e'))
2908               return TokenNamedeclare;
2909             index = 0;
2910             if ((data[++index] == 'e') && (data[++index] == 'f')
2911                 && (data[++index] == 'a') && (data[++index] == 'u')
2912                 && (data[++index] == 'l') && (data[++index] == 't'))
2913               return TokenNamedefault;
2914             else
2915               return TokenNameIdentifier;
2916           default :
2917             return TokenNameIdentifier;
2918         }
2919       case 'e' :
2920         //echo else exit elseif extends eval
2921         switch (length) {
2922           case 4 :
2923             if ((data[++index] == 'c') && (data[++index] == 'h')
2924                 && (data[++index] == 'o'))
2925               return TokenNameecho;
2926             else if ((data[index] == 'l') && (data[++index] == 's')
2927                 && (data[++index] == 'e'))
2928               return TokenNameelse;
2929             else if ((data[index] == 'x') && (data[++index] == 'i')
2930                 && (data[++index] == 't'))
2931               return TokenNameexit;
2932             else if ((data[index] == 'v') && (data[++index] == 'a')
2933                 && (data[++index] == 'l'))
2934               return TokenNameeval;
2935             else
2936               return TokenNameIdentifier;
2937           case 5 :
2938             // endif empty
2939             if ((data[++index] == 'n') && (data[++index] == 'd')
2940                 && (data[++index] == 'i') && (data[++index] == 'f'))
2941               return TokenNameendif;
2942             if ((data[index] == 'm') && (data[++index] == 'p')
2943                 && (data[++index] == 't') && (data[++index] == 'y'))
2944               return TokenNameempty;
2945             else
2946               return TokenNameIdentifier;
2947           case 6 :
2948             // endfor
2949             if ((data[++index] == 'n') && (data[++index] == 'd')
2950                 && (data[++index] == 'f') && (data[++index] == 'o')
2951                 && (data[++index] == 'r'))
2952               return TokenNameendfor;
2953             else if ((data[index] == 'l') && (data[++index] == 's')
2954                 && (data[++index] == 'e') && (data[++index] == 'i')
2955                 && (data[++index] == 'f'))
2956               return TokenNameelseif;
2957             else
2958               return TokenNameIdentifier;
2959           case 7 :
2960             if ((data[++index] == 'x') && (data[++index] == 't')
2961                 && (data[++index] == 'e') && (data[++index] == 'n')
2962                 && (data[++index] == 'd') && (data[++index] == 's'))
2963               return TokenNameextends;
2964             else
2965               return TokenNameIdentifier;
2966           case 8 :
2967             // endwhile
2968             if ((data[++index] == 'n') && (data[++index] == 'd')
2969                 && (data[++index] == 'w') && (data[++index] == 'h')
2970                 && (data[++index] == 'i') && (data[++index] == 'l')
2971                 && (data[++index] == 'e'))
2972               return TokenNameendwhile;
2973             else
2974               return TokenNameIdentifier;
2975           case 9 :
2976             // endswitch
2977             if ((data[++index] == 'n') && (data[++index] == 'd')
2978                 && (data[++index] == 's') && (data[++index] == 'w')
2979                 && (data[++index] == 'i') && (data[++index] == 't')
2980                 && (data[++index] == 'c') && (data[++index] == 'h'))
2981               return TokenNameendswitch;
2982             else
2983               return TokenNameIdentifier;
2984           case 10 :
2985             // enddeclare
2986             if ((data[++index] == 'n') && (data[++index] == 'd')
2987                 && (data[++index] == 'd') && (data[++index] == 'e')
2988                 && (data[++index] == 'c') && (data[++index] == 'l')
2989                 && (data[++index] == 'a') && (data[++index] == 'r')
2990                 && (data[++index] == 'e'))
2991               return TokenNameendforeach;
2992             index = 0;
2993             if ((data[++index] == 'n') // endforeach
2994                 && (data[++index] == 'd') && (data[++index] == 'f')
2995                 && (data[++index] == 'o') && (data[++index] == 'r')
2996                 && (data[++index] == 'e') && (data[++index] == 'a')
2997                 && (data[++index] == 'c') && (data[++index] == 'h'))
2998               return TokenNameendforeach;
2999             else
3000               return TokenNameIdentifier;
3001           default :
3002             return TokenNameIdentifier;
3003         }
3004       case 'f' :
3005         //for false final function
3006         switch (length) {
3007           case 3 :
3008             if ((data[++index] == 'o') && (data[++index] == 'r'))
3009               return TokenNamefor;
3010             else
3011               return TokenNameIdentifier;
3012           case 5 :
3013             //            if ((data[++index] == 'a') && (data[++index] == 'l')
3014             //                && (data[++index] == 's') && (data[++index] == 'e'))
3015             //              return TokenNamefalse;
3016             if ((data[++index] == 'i') && (data[++index] == 'n')
3017                 && (data[++index] == 'a') && (data[++index] == 'l'))
3018               return TokenNamefinal;
3019             else
3020               return TokenNameIdentifier;
3021           case 7 :
3022             // foreach
3023             if ((data[++index] == 'o') && (data[++index] == 'r')
3024                 && (data[++index] == 'e') && (data[++index] == 'a')
3025                 && (data[++index] == 'c') && (data[++index] == 'h'))
3026               return TokenNameforeach;
3027             else
3028               return TokenNameIdentifier;
3029           case 8 :
3030             // function
3031             if ((data[++index] == 'u') && (data[++index] == 'n')
3032                 && (data[++index] == 'c') && (data[++index] == 't')
3033                 && (data[++index] == 'i') && (data[++index] == 'o')
3034                 && (data[++index] == 'n'))
3035               return TokenNamefunction;
3036             else
3037               return TokenNameIdentifier;
3038           default :
3039             return TokenNameIdentifier;
3040         }
3041       case 'g' :
3042         //global
3043         if (length == 6) {
3044           if ((data[++index] == 'l') && (data[++index] == 'o')
3045               && (data[++index] == 'b') && (data[++index] == 'a')
3046               && (data[++index] == 'l')) {
3047             return TokenNameglobal;
3048           }
3049         }
3050         return TokenNameIdentifier;
3051       case 'i' :
3052         //if int isset include include_once instanceof interface implements
3053         switch (length) {
3054           case 2 :
3055             if (data[++index] == 'f')
3056               return TokenNameif;
3057             else
3058               return TokenNameIdentifier;
3059           //          case 3 :
3060           //            if ((data[++index] == 'n') && (data[++index] == 't'))
3061           //              return TokenNameint;
3062           //            else
3063           //              return TokenNameIdentifier;
3064           case 5 :
3065             if ((data[++index] == 's') && (data[++index] == 's')
3066                 && (data[++index] == 'e') && (data[++index] == 't'))
3067               return TokenNameisset;
3068             else
3069               return TokenNameIdentifier;
3070           case 7 :
3071             if ((data[++index] == 'n') && (data[++index] == 'c')
3072                 && (data[++index] == 'l') && (data[++index] == 'u')
3073                 && (data[++index] == 'd') && (data[++index] == 'e'))
3074               return TokenNameinclude;
3075             else
3076               return TokenNameIdentifier;
3077           case 9 :
3078             // interface
3079             if ((data[++index] == 'n') && (data[++index] == 't')
3080                 && (data[++index] == 'e') && (data[++index] == 'r')
3081                 && (data[++index] == 'f') && (data[++index] == 'a')
3082                 && (data[++index] == 'c') && (data[++index] == 'e'))
3083               return TokenNameinterface;
3084             else
3085               return TokenNameIdentifier;
3086           case 10 :
3087             // instanceof
3088             if ((data[++index] == 'n') && (data[++index] == 's')
3089                 && (data[++index] == 't') && (data[++index] == 'a')
3090                 && (data[++index] == 'n') && (data[++index] == 'c')
3091                 && (data[++index] == 'e') && (data[++index] == 'o')
3092                 && (data[++index] == 'f'))
3093               return TokenNameinstanceof;
3094             if ((data[index] == 'm') && (data[++index] == 'p')
3095                 && (data[++index] == 'l') && (data[++index] == 'e')
3096                 && (data[++index] == 'm') && (data[++index] == 'e')
3097                 && (data[++index] == 'n') && (data[++index] == 't')
3098                 && (data[++index] == 's'))
3099               return TokenNameimplements;
3100             else
3101               return TokenNameIdentifier;
3102           case 12 :
3103             if ((data[++index] == 'n') && (data[++index] == 'c')
3104                 && (data[++index] == 'l') && (data[++index] == 'u')
3105                 && (data[++index] == 'd') && (data[++index] == 'e')
3106                 && (data[++index] == '_') && (data[++index] == 'o')
3107                 && (data[++index] == 'n') && (data[++index] == 'c')
3108                 && (data[++index] == 'e'))
3109               return TokenNameinclude_once;
3110             else
3111               return TokenNameIdentifier;
3112           default :
3113             return TokenNameIdentifier;
3114         }
3115       case 'l' :
3116         //list
3117         if (length == 4) {
3118           if ((data[++index] == 'i') && (data[++index] == 's')
3119               && (data[++index] == 't')) {
3120             return TokenNamelist;
3121           }
3122         }
3123         return TokenNameIdentifier;
3124       case 'n' :
3125         // new null
3126         switch (length) {
3127           case 3 :
3128             if ((data[++index] == 'e') && (data[++index] == 'w'))
3129               return TokenNamenew;
3130             else
3131               return TokenNameIdentifier;
3132           //          case 4 :
3133           //            if ((data[++index] == 'u') && (data[++index] == 'l')
3134           //                && (data[++index] == 'l'))
3135           //              return TokenNamenull;
3136           //            else
3137           //              return TokenNameIdentifier;
3138           default :
3139             return TokenNameIdentifier;
3140         }
3141       case 'o' :
3142         // or old_function
3143         if (length == 2) {
3144           if (data[++index] == 'r') {
3145             return TokenNameor;
3146           }
3147         }
3148         //        if (length == 12) {
3149         //          if ((data[++index] == 'l')
3150         //            && (data[++index] == 'd')
3151         //            && (data[++index] == '_')
3152         //            && (data[++index] == 'f')
3153         //            && (data[++index] == 'u')
3154         //            && (data[++index] == 'n')
3155         //            && (data[++index] == 'c')
3156         //            && (data[++index] == 't')
3157         //            && (data[++index] == 'i')
3158         //            && (data[++index] == 'o')
3159         //            && (data[++index] == 'n')) {
3160         //            return TokenNameold_function;
3161         //          }
3162         //        }
3163         return TokenNameIdentifier;
3164       case 'p' :
3165         // print public private protected
3166         switch (length) {
3167           case 5 :
3168             if ((data[++index] == 'r') && (data[++index] == 'i')
3169                 && (data[++index] == 'n') && (data[++index] == 't')) {
3170               return TokenNameprint;
3171             } else
3172               return TokenNameIdentifier;
3173           case 6 :
3174             if ((data[++index] == 'u') && (data[++index] == 'b')
3175                 && (data[++index] == 'l') && (data[++index] == 'i')
3176                 && (data[++index] == 'c')) {
3177               return TokenNamepublic;
3178             } else
3179               return TokenNameIdentifier;
3180           case 7 :
3181             if ((data[++index] == 'r') && (data[++index] == 'i')
3182                 && (data[++index] == 'v') && (data[++index] == 'a')
3183                 && (data[++index] == 't') && (data[++index] == 'e')) {
3184               return TokenNameprivate;
3185             } else
3186               return TokenNameIdentifier;
3187           case 9 :
3188             if ((data[++index] == 'r') && (data[++index] == 'o')
3189                 && (data[++index] == 't') && (data[++index] == 'e')
3190                 && (data[++index] == 'c') && (data[++index] == 't')
3191                 && (data[++index] == 'e') && (data[++index] == 'd')) {
3192               return TokenNameprotected;
3193             } else
3194               return TokenNameIdentifier;
3195         }
3196         return TokenNameIdentifier;
3197       case 'r' :
3198         //return require require_once
3199         if (length == 6) {
3200           if ((data[++index] == 'e') && (data[++index] == 't')
3201               && (data[++index] == 'u') && (data[++index] == 'r')
3202               && (data[++index] == 'n')) {
3203             return TokenNamereturn;
3204           }
3205         } else if (length == 7) {
3206           if ((data[++index] == 'e') && (data[++index] == 'q')
3207               && (data[++index] == 'u') && (data[++index] == 'i')
3208               && (data[++index] == 'r') && (data[++index] == 'e')) {
3209             return TokenNamerequire;
3210           }
3211         } else if (length == 12) {
3212           if ((data[++index] == 'e') && (data[++index] == 'q')
3213               && (data[++index] == 'u') && (data[++index] == 'i')
3214               && (data[++index] == 'r') && (data[++index] == 'e')
3215               && (data[++index] == '_') && (data[++index] == 'o')
3216               && (data[++index] == 'n') && (data[++index] == 'c')
3217               && (data[++index] == 'e')) {
3218             return TokenNamerequire_once;
3219           }
3220         } else
3221           return TokenNameIdentifier;
3222       case 's' :
3223         //static switch
3224         switch (length) {
3225           case 6 :
3226             if (data[++index] == 't')
3227               if ((data[++index] == 'a') && (data[++index] == 't')
3228                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3229                 return TokenNamestatic;
3230               } else
3231                 return TokenNameIdentifier;
3232             else if ((data[index] == 'w') && (data[++index] == 'i')
3233                 && (data[++index] == 't') && (data[++index] == 'c')
3234                 && (data[++index] == 'h'))
3235               return TokenNameswitch;
3236             else
3237               return TokenNameIdentifier;
3238           default :
3239             return TokenNameIdentifier;
3240         }
3241       case 't' :
3242         // try true throw
3243         switch (length) {
3244           case 3 :
3245             if ((data[++index] == 'r') && (data[++index] == 'y'))
3246               return TokenNametry;
3247             else
3248               return TokenNameIdentifier;
3249           //          case 4 :
3250           //            if ((data[++index] == 'r') && (data[++index] == 'u')
3251           //                && (data[++index] == 'e'))
3252           //              return TokenNametrue;
3253           //            else
3254           //              return TokenNameIdentifier;
3255           case 5 :
3256             if ((data[++index] == 'h') && (data[++index] == 'r')
3257                 && (data[++index] == 'o') && (data[++index] == 'w'))
3258               return TokenNamethrow;
3259             else
3260               return TokenNameIdentifier;
3261           default :
3262             return TokenNameIdentifier;
3263         }
3264       case 'u' :
3265         //use unset
3266         switch (length) {
3267           case 3 :
3268             if ((data[++index] == 's') && (data[++index] == 'e'))
3269               return TokenNameuse;
3270             else
3271               return TokenNameIdentifier;
3272           case 5 :
3273             if ((data[++index] == 'n') && (data[++index] == 's')
3274                 && (data[++index] == 'e') && (data[++index] == 't'))
3275               return TokenNameunset;
3276             else
3277               return TokenNameIdentifier;
3278           default :
3279             return TokenNameIdentifier;
3280         }
3281       case 'v' :
3282         //var
3283         switch (length) {
3284           case 3 :
3285             if ((data[++index] == 'a') && (data[++index] == 'r'))
3286               return TokenNamevar;
3287             else
3288               return TokenNameIdentifier;
3289           default :
3290             return TokenNameIdentifier;
3291         }
3292       case 'w' :
3293         //while
3294         switch (length) {
3295           case 5 :
3296             if ((data[++index] == 'h') && (data[++index] == 'i')
3297                 && (data[++index] == 'l') && (data[++index] == 'e'))
3298               return TokenNamewhile;
3299             else
3300               return TokenNameIdentifier;
3301           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3302           // (data[++index]=='e') && (data[++index]=='f')&&
3303           // (data[++index]=='p'))
3304           //return TokenNamewidefp ;
3305           //else
3306           //return TokenNameIdentifier;
3307           default :
3308             return TokenNameIdentifier;
3309         }
3310       case 'x' :
3311         //xor
3312         switch (length) {
3313           case 3 :
3314             if ((data[++index] == 'o') && (data[++index] == 'r'))
3315               return TokenNamexor;
3316             else
3317               return TokenNameIdentifier;
3318           default :
3319             return TokenNameIdentifier;
3320         }
3321       default :
3322         return TokenNameIdentifier;
3323     }
3324   }
3325   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3326     //when entering this method the currentCharacter is the firt
3327     //digit of the number , i.e. it may be preceeded by a . when
3328     //dotPrefix is true
3329     boolean floating = dotPrefix;
3330     if ((!dotPrefix) && (currentCharacter == '0')) {
3331       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3332         //force the first char of the hexa number do exist...
3333         // consume next character
3334         unicodeAsBackSlash = false;
3335         currentCharacter = source[currentPosition++];
3336         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3337         //          && (source[currentPosition] == 'u')) {
3338         //          getNextUnicodeChar();
3339         //        } else {
3340         //          if (withoutUnicodePtr != 0) {
3341         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3342         //          }
3343         //        }
3344         if (Character.digit(currentCharacter, 16) == -1)
3345           throw new InvalidInputException(INVALID_HEXA);
3346         //---end forcing--
3347         while (getNextCharAsDigit(16)) {
3348         };
3349         //        if (getNextChar('l', 'L') >= 0)
3350         //          return TokenNameLongLiteral;
3351         //        else
3352         return TokenNameIntegerLiteral;
3353       }
3354       //there is x or X in the number
3355       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3356       // 00078.0 is true !!!!! crazy language
3357       if (getNextCharAsDigit()) {
3358         //-------------potential octal-----------------
3359         while (getNextCharAsDigit()) {
3360         };
3361         //        if (getNextChar('l', 'L') >= 0) {
3362         //          return TokenNameLongLiteral;
3363         //        }
3364         //
3365         //        if (getNextChar('f', 'F') >= 0) {
3366         //          return TokenNameFloatingPointLiteral;
3367         //        }
3368         if (getNextChar('d', 'D') >= 0) {
3369           return TokenNameDoubleLiteral;
3370         } else { //make the distinction between octal and float ....
3371           if (getNextChar('.')) { //bingo ! ....
3372             while (getNextCharAsDigit()) {
3373             };
3374             if (getNextChar('e', 'E') >= 0) {
3375               // consume next character
3376               unicodeAsBackSlash = false;
3377               currentCharacter = source[currentPosition++];
3378               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3379               //                && (source[currentPosition] == 'u')) {
3380               //                getNextUnicodeChar();
3381               //              } else {
3382               //                if (withoutUnicodePtr != 0) {
3383               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3384               //                }
3385               //              }
3386               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3387                 // consume next character
3388                 unicodeAsBackSlash = false;
3389                 currentCharacter = source[currentPosition++];
3390                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3391                 //                  && (source[currentPosition] == 'u')) {
3392                 //                  getNextUnicodeChar();
3393                 //                } else {
3394                 //                  if (withoutUnicodePtr != 0) {
3395                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3396                 //                      currentCharacter;
3397                 //                  }
3398                 //                }
3399               }
3400               if (!Character.isDigit(currentCharacter))
3401                 throw new InvalidInputException(INVALID_FLOAT);
3402               while (getNextCharAsDigit()) {
3403               };
3404             }
3405             //            if (getNextChar('f', 'F') >= 0)
3406             //              return TokenNameFloatingPointLiteral;
3407             getNextChar('d', 'D'); //jump over potential d or D
3408             return TokenNameDoubleLiteral;
3409           } else {
3410             return TokenNameIntegerLiteral;
3411           }
3412         }
3413       } else {
3414         /* carry on */
3415       }
3416     }
3417     while (getNextCharAsDigit()) {
3418     };
3419     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3420     //      return TokenNameLongLiteral;
3421     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3422       while (getNextCharAsDigit()) {
3423       };
3424       floating = true;
3425     }
3426     //if floating is true both exponant and suffix may be optional
3427     if (getNextChar('e', 'E') >= 0) {
3428       floating = true;
3429       // consume next character
3430       unicodeAsBackSlash = false;
3431       currentCharacter = source[currentPosition++];
3432       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3433       //        && (source[currentPosition] == 'u')) {
3434       //        getNextUnicodeChar();
3435       //      } else {
3436       //        if (withoutUnicodePtr != 0) {
3437       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3438       //        }
3439       //      }
3440       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3441         // next
3442         // character
3443         unicodeAsBackSlash = false;
3444         currentCharacter = source[currentPosition++];
3445         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3446         //          && (source[currentPosition] == 'u')) {
3447         //          getNextUnicodeChar();
3448         //        } else {
3449         //          if (withoutUnicodePtr != 0) {
3450         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3451         //          }
3452         //        }
3453       }
3454       if (!Character.isDigit(currentCharacter))
3455         throw new InvalidInputException(INVALID_FLOAT);
3456       while (getNextCharAsDigit()) {
3457       };
3458     }
3459     if (getNextChar('d', 'D') >= 0)
3460       return TokenNameDoubleLiteral;
3461     //    if (getNextChar('f', 'F') >= 0)
3462     //      return TokenNameFloatingPointLiteral;
3463     //the long flag has been tested before
3464     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3465   }
3466   /**
3467    * Search the line number corresponding to a specific position
3468    *
3469    */
3470   public final int getLineNumber(int position) {
3471     if (lineEnds == null)
3472       return 1;
3473     int length = linePtr + 1;
3474     if (length == 0)
3475       return 1;
3476     int g = 0, d = length - 1;
3477     int m = 0;
3478     while (g <= d) {
3479       m = (g + d) / 2;
3480       if (position < lineEnds[m]) {
3481         d = m - 1;
3482       } else if (position > lineEnds[m]) {
3483         g = m + 1;
3484       } else {
3485         return m + 1;
3486       }
3487     }
3488     if (position < lineEnds[m]) {
3489       return m + 1;
3490     }
3491     return m + 2;
3492   }
3493   public void setPHPMode(boolean mode) {
3494     phpMode = mode;
3495   }
3496   public final void setSource(char[] source) {
3497     //the source-buffer is set to sourceString
3498     if (source == null) {
3499       this.source = new char[0];
3500     } else {
3501       this.source = source;
3502     }
3503     startPosition = -1;
3504     initialPosition = currentPosition = 0;
3505     containsAssertKeyword = false;
3506     withoutUnicodeBuffer = new char[this.source.length];
3507     encapsedStringStack = new Stack();
3508   }
3509   public String toString() {
3510     if (startPosition == source.length)
3511       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3512     if (currentPosition > source.length)
3513       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3514     char front[] = new char[startPosition];
3515     System.arraycopy(source, 0, front, 0, startPosition);
3516     int middleLength = (currentPosition - 1) - startPosition + 1;
3517     char middle[];
3518     if (middleLength > -1) {
3519       middle = new char[middleLength];
3520       System.arraycopy(source, startPosition, middle, 0, middleLength);
3521     } else {
3522       middle = new char[0];
3523     }
3524     char end[] = new char[source.length - (currentPosition - 1)];
3525     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3526         - (currentPosition - 1) - 1);
3527     return new String(front)
3528         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3529         + new String(middle)
3530         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3531         + new String(end);
3532   }
3533   public final String toStringAction(int act) {
3534     switch (act) {
3535       case TokenNameERROR :
3536         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3537       // //$NON-NLS-1$
3538       case TokenNameINLINE_HTML :
3539         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3540       case TokenNameIdentifier :
3541         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3542       case TokenNameVariable :
3543         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3544       case TokenNameabstract :
3545         return "abstract"; //$NON-NLS-1$
3546       case TokenNameand :
3547         return "AND"; //$NON-NLS-1$
3548       case TokenNamearray :
3549         return "array"; //$NON-NLS-1$
3550       case TokenNameas :
3551         return "as"; //$NON-NLS-1$
3552       case TokenNamebreak :
3553         return "break"; //$NON-NLS-1$
3554       case TokenNamecase :
3555         return "case"; //$NON-NLS-1$
3556       case TokenNameclass :
3557         return "class"; //$NON-NLS-1$
3558       case TokenNamecatch :
3559         return "catch"; //$NON-NLS-1$
3560       case TokenNameclone :
3561         //$NON-NLS-1$
3562         return "clone";
3563       case TokenNameconst :
3564         //$NON-NLS-1$
3565         return "const";
3566       case TokenNamecontinue :
3567         return "continue"; //$NON-NLS-1$
3568       case TokenNamedefault :
3569         return "default"; //$NON-NLS-1$
3570       //      case TokenNamedefine :
3571       //        return "define"; //$NON-NLS-1$
3572       case TokenNamedo :
3573         return "do"; //$NON-NLS-1$
3574       case TokenNameecho :
3575         return "echo"; //$NON-NLS-1$
3576       case TokenNameelse :
3577         return "else"; //$NON-NLS-1$
3578       case TokenNameelseif :
3579         return "elseif"; //$NON-NLS-1$
3580       case TokenNameendfor :
3581         return "endfor"; //$NON-NLS-1$
3582       case TokenNameendforeach :
3583         return "endforeach"; //$NON-NLS-1$
3584       case TokenNameendif :
3585         return "endif"; //$NON-NLS-1$
3586       case TokenNameendswitch :
3587         return "endswitch"; //$NON-NLS-1$
3588       case TokenNameendwhile :
3589         return "endwhile"; //$NON-NLS-1$
3590       case TokenNameexit:
3591         return "exit";
3592       case TokenNameextends :
3593         return "extends"; //$NON-NLS-1$
3594       //      case TokenNamefalse :
3595       //        return "false"; //$NON-NLS-1$
3596       case TokenNamefinal :
3597         return "final"; //$NON-NLS-1$
3598       case TokenNamefor :
3599         return "for"; //$NON-NLS-1$
3600       case TokenNameforeach :
3601         return "foreach"; //$NON-NLS-1$
3602       case TokenNamefunction :
3603         return "function"; //$NON-NLS-1$
3604       case TokenNameglobal :
3605         return "global"; //$NON-NLS-1$
3606       case TokenNameif :
3607         return "if"; //$NON-NLS-1$
3608       case TokenNameimplements :
3609         return "implements"; //$NON-NLS-1$
3610       case TokenNameinclude :
3611         return "include"; //$NON-NLS-1$
3612       case TokenNameinclude_once :
3613         return "include_once"; //$NON-NLS-1$
3614       case TokenNameinstanceof :
3615         return "instanceof"; //$NON-NLS-1$
3616       case TokenNameinterface :
3617         return "interface"; //$NON-NLS-1$
3618       case TokenNameisset :
3619         return "isset"; //$NON-NLS-1$
3620       case TokenNamelist :
3621         return "list"; //$NON-NLS-1$
3622       case TokenNamenew :
3623         return "new"; //$NON-NLS-1$
3624       //      case TokenNamenull :
3625       //        return "null"; //$NON-NLS-1$
3626       case TokenNameor :
3627         return "OR"; //$NON-NLS-1$
3628       case TokenNameprint :
3629         return "print"; //$NON-NLS-1$
3630       case TokenNameprivate :
3631         return "private"; //$NON-NLS-1$
3632       case TokenNameprotected :
3633         return "protected"; //$NON-NLS-1$
3634       case TokenNamepublic :
3635         return "public"; //$NON-NLS-1$
3636       case TokenNamerequire :
3637         return "require"; //$NON-NLS-1$
3638       case TokenNamerequire_once :
3639         return "require_once"; //$NON-NLS-1$
3640       case TokenNamereturn :
3641         return "return"; //$NON-NLS-1$
3642       case TokenNamestatic :
3643         return "static"; //$NON-NLS-1$
3644       case TokenNameswitch :
3645         return "switch"; //$NON-NLS-1$
3646       //      case TokenNametrue :
3647       //        return "true"; //$NON-NLS-1$
3648       case TokenNameunset :
3649         return "unset"; //$NON-NLS-1$
3650       case TokenNamevar :
3651         return "var"; //$NON-NLS-1$
3652       case TokenNamewhile :
3653         return "while"; //$NON-NLS-1$
3654       case TokenNamexor :
3655         return "XOR"; //$NON-NLS-1$
3656       //      case TokenNamethis :
3657       //        return "$this"; //$NON-NLS-1$
3658       case TokenNameIntegerLiteral :
3659         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3660       case TokenNameDoubleLiteral :
3661         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3662       case TokenNameStringLiteral :
3663         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3664       case TokenNameStringConstant :
3665         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3666       case TokenNameStringInterpolated :
3667         return "StringInterpolated(" + new String(getCurrentTokenSource())
3668             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3669       case TokenNameEncapsedString0 :
3670         return "`"; //$NON-NLS-1$
3671       case TokenNameEncapsedString1 :
3672         return "\'"; //$NON-NLS-1$
3673       case TokenNameEncapsedString2 :
3674         return "\""; //$NON-NLS-1$
3675       case TokenNameSTRING :
3676         return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3677       case TokenNameHEREDOC :
3678         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3679       case TokenNamePLUS_PLUS :
3680         return "++"; //$NON-NLS-1$
3681       case TokenNameMINUS_MINUS :
3682         return "--"; //$NON-NLS-1$
3683       case TokenNameEQUAL_EQUAL :
3684         return "=="; //$NON-NLS-1$
3685       case TokenNameEQUAL_EQUAL_EQUAL :
3686         return "==="; //$NON-NLS-1$
3687       case TokenNameEQUAL_GREATER :
3688         return "=>"; //$NON-NLS-1$
3689       case TokenNameLESS_EQUAL :
3690         return "<="; //$NON-NLS-1$
3691       case TokenNameGREATER_EQUAL :
3692         return ">="; //$NON-NLS-1$
3693       case TokenNameNOT_EQUAL :
3694         return "!="; //$NON-NLS-1$
3695       case TokenNameNOT_EQUAL_EQUAL :
3696         return "!=="; //$NON-NLS-1$
3697       case TokenNameLEFT_SHIFT :
3698         return "<<"; //$NON-NLS-1$
3699       case TokenNameRIGHT_SHIFT :
3700         return ">>"; //$NON-NLS-1$
3701       case TokenNamePLUS_EQUAL :
3702         return "+="; //$NON-NLS-1$
3703       case TokenNameMINUS_EQUAL :
3704         return "-="; //$NON-NLS-1$
3705       case TokenNameMULTIPLY_EQUAL :
3706         return "*="; //$NON-NLS-1$
3707       case TokenNameDIVIDE_EQUAL :
3708         return "/="; //$NON-NLS-1$
3709       case TokenNameAND_EQUAL :
3710         return "&="; //$NON-NLS-1$
3711       case TokenNameOR_EQUAL :
3712         return "|="; //$NON-NLS-1$
3713       case TokenNameXOR_EQUAL :
3714         return "^="; //$NON-NLS-1$
3715       case TokenNameREMAINDER_EQUAL :
3716         return "%="; //$NON-NLS-1$
3717       case TokenNameDOT_EQUAL :
3718         return ".="; //$NON-NLS-1$
3719       case TokenNameLEFT_SHIFT_EQUAL :
3720         return "<<="; //$NON-NLS-1$
3721       case TokenNameRIGHT_SHIFT_EQUAL :
3722         return ">>="; //$NON-NLS-1$
3723       case TokenNameOR_OR :
3724         return "||"; //$NON-NLS-1$
3725       case TokenNameAND_AND :
3726         return "&&"; //$NON-NLS-1$
3727       case TokenNamePLUS :
3728         return "+"; //$NON-NLS-1$
3729       case TokenNameMINUS :
3730         return "-"; //$NON-NLS-1$
3731       case TokenNameMINUS_GREATER :
3732         return "->";
3733       case TokenNameNOT :
3734         return "!"; //$NON-NLS-1$
3735       case TokenNameREMAINDER :
3736         return "%"; //$NON-NLS-1$
3737       case TokenNameXOR :
3738         return "^"; //$NON-NLS-1$
3739       case TokenNameAND :
3740         return "&"; //$NON-NLS-1$
3741       case TokenNameMULTIPLY :
3742         return "*"; //$NON-NLS-1$
3743       case TokenNameOR :
3744         return "|"; //$NON-NLS-1$
3745       case TokenNameTWIDDLE :
3746         return "~"; //$NON-NLS-1$
3747       case TokenNameTWIDDLE_EQUAL :
3748         return "~="; //$NON-NLS-1$
3749       case TokenNameDIVIDE :
3750         return "/"; //$NON-NLS-1$
3751       case TokenNameGREATER :
3752         return ">"; //$NON-NLS-1$
3753       case TokenNameLESS :
3754         return "<"; //$NON-NLS-1$
3755       case TokenNameLPAREN :
3756         return "("; //$NON-NLS-1$
3757       case TokenNameRPAREN :
3758         return ")"; //$NON-NLS-1$
3759       case TokenNameLBRACE :
3760         return "{"; //$NON-NLS-1$
3761       case TokenNameRBRACE :
3762         return "}"; //$NON-NLS-1$
3763       case TokenNameLBRACKET :
3764         return "["; //$NON-NLS-1$
3765       case TokenNameRBRACKET :
3766         return "]"; //$NON-NLS-1$
3767       case TokenNameSEMICOLON :
3768         return ";"; //$NON-NLS-1$
3769       case TokenNameQUESTION :
3770         return "?"; //$NON-NLS-1$
3771       case TokenNameCOLON :
3772         return ":"; //$NON-NLS-1$
3773       case TokenNameCOMMA :
3774         return ","; //$NON-NLS-1$
3775       case TokenNameDOT :
3776         return "."; //$NON-NLS-1$
3777       case TokenNameEQUAL :
3778         return "="; //$NON-NLS-1$
3779       case TokenNameAT :
3780         return "@";
3781       case TokenNameDOLLAR :
3782         return "$";
3783       case TokenNameDOLLAR_LBRACE :
3784         return "${";
3785       case TokenNameEOF :
3786         return "EOF"; //$NON-NLS-1$
3787       case TokenNameWHITESPACE :
3788         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3789       case TokenNameCOMMENT_LINE :
3790         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3791       case TokenNameCOMMENT_BLOCK :
3792         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3793       case TokenNameCOMMENT_PHPDOC :
3794         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3795       //      case TokenNameHTML :
3796       //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3797       // //$NON-NLS-1$
3798       case TokenNameFILE :
3799         return "__FILE__"; //$NON-NLS-1$
3800       case TokenNameLINE :
3801         return "__LINE__"; //$NON-NLS-1$
3802       case TokenNameCLASS_C :
3803         return "__CLASS__"; //$NON-NLS-1$
3804       case TokenNameMETHOD_C :
3805         return "__METHOD__"; //$NON-NLS-1$
3806       case TokenNameFUNC_C :
3807         return "__FUNCTION__"; //$NON-NLS-1
3808       case TokenNameboolCAST :
3809         return "( bool )"; //$NON-NLS-1$
3810       case TokenNameintCAST :
3811         return "( int )"; //$NON-NLS-1$
3812       case TokenNamedoubleCAST :
3813         return "( double )"; //$NON-NLS-1$
3814       case TokenNameobjectCAST :
3815         return "( object )"; //$NON-NLS-1$
3816       case TokenNamestringCAST :
3817         return "( string )"; //$NON-NLS-1$
3818       default :
3819         return "not-a-token(" + (new Integer(act)) + ") "
3820             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3821     }
3822   }
3823
3824   public Scanner() {
3825     this(false, false);
3826   }
3827   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3828     this(tokenizeComments, tokenizeWhiteSpace, false);
3829   }
3830   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3831       boolean checkNonExternalizedStringLiterals) {
3832     this(tokenizeComments, tokenizeWhiteSpace,
3833         checkNonExternalizedStringLiterals, false);
3834   }
3835   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3836       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3837     this(tokenizeComments, tokenizeWhiteSpace,
3838         checkNonExternalizedStringLiterals, assertMode, false, null, null);
3839   }
3840   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3841       boolean checkNonExternalizedStringLiterals, boolean assertMode,
3842       boolean tokenizeStrings,
3843       char[][] taskTags,
3844           char[][] taskPriorities) {
3845     this.eofPosition = Integer.MAX_VALUE;
3846     this.tokenizeComments = tokenizeComments;
3847     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3848     this.tokenizeStrings = tokenizeStrings;
3849     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3850     this.assertMode = assertMode;
3851     this.encapsedStringStack = null;
3852     this.taskTags = taskTags;
3853         this.taskPriorities = taskPriorities;
3854   }
3855   private void checkNonExternalizeString() throws InvalidInputException {
3856     if (currentLine == null)
3857       return;
3858     parseTags(currentLine);
3859   }
3860   private void parseTags(NLSLine line) throws InvalidInputException {
3861     String s = new String(getCurrentTokenSource());
3862     int pos = s.indexOf(TAG_PREFIX);
3863     int lineLength = line.size();
3864     while (pos != -1) {
3865       int start = pos + TAG_PREFIX_LENGTH;
3866       int end = s.indexOf(TAG_POSTFIX, start);
3867       String index = s.substring(start, end);
3868       int i = 0;
3869       try {
3870         i = Integer.parseInt(index) - 1;
3871         // Tags are one based not zero based.
3872       } catch (NumberFormatException e) {
3873         i = -1; // we don't want to consider this as a valid NLS tag
3874       }
3875       if (line.exists(i)) {
3876         line.set(i, null);
3877       }
3878       pos = s.indexOf(TAG_PREFIX, start);
3879     }
3880     this.nonNLSStrings = new StringLiteral[lineLength];
3881     int nonNLSCounter = 0;
3882     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3883       StringLiteral literal = (StringLiteral) iterator.next();
3884       if (literal != null) {
3885         this.nonNLSStrings[nonNLSCounter++] = literal;
3886       }
3887     }
3888     if (nonNLSCounter == 0) {
3889       this.nonNLSStrings = null;
3890       currentLine = null;
3891       return;
3892     }
3893     this.wasNonExternalizedStringLiteral = true;
3894     if (nonNLSCounter != lineLength) {
3895       System.arraycopy(this.nonNLSStrings, 0,
3896           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3897           nonNLSCounter);
3898     }
3899     currentLine = null;
3900   }
3901   public final void scanEscapeCharacter() throws InvalidInputException {
3902     // the string with "\\u" is a legal string of two chars \ and u
3903     //thus we use a direct access to the source (for regular cases).
3904     if (unicodeAsBackSlash) {
3905       // consume next character
3906       unicodeAsBackSlash = false;
3907       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3908       // (source[currentPosition] == 'u')) {
3909       //                                getNextUnicodeChar();
3910       //                        } else {
3911       if (withoutUnicodePtr != 0) {
3912         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3913         //                              }
3914       }
3915     } else
3916       currentCharacter = source[currentPosition++];
3917     switch (currentCharacter) {
3918       case 'b' :
3919         currentCharacter = '\b';
3920         break;
3921       case 't' :
3922         currentCharacter = '\t';
3923         break;
3924       case 'n' :
3925         currentCharacter = '\n';
3926         break;
3927       case 'f' :
3928         currentCharacter = '\f';
3929         break;
3930       case 'r' :
3931         currentCharacter = '\r';
3932         break;
3933       case '\"' :
3934         currentCharacter = '\"';
3935         break;
3936       case '\'' :
3937         currentCharacter = '\'';
3938         break;
3939       case '\\' :
3940         currentCharacter = '\\';
3941         break;
3942       default :
3943         // -----------octal escape--------------
3944         // OctalDigit
3945         // OctalDigit OctalDigit
3946         // ZeroToThree OctalDigit OctalDigit
3947         int number = Character.getNumericValue(currentCharacter);
3948         if (number >= 0 && number <= 7) {
3949           boolean zeroToThreeNot = number > 3;
3950           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3951             int digit = Character.getNumericValue(currentCharacter);
3952             if (digit >= 0 && digit <= 7) {
3953               number = (number * 8) + digit;
3954               if (Character
3955                   .isDigit(currentCharacter = source[currentPosition++])) {
3956                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3957                   // Digit --> ignore last character
3958                   currentPosition--;
3959                 } else {
3960                   digit = Character.getNumericValue(currentCharacter);
3961                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3962                     // OctalDigit OctalDigit
3963                     number = (number * 8) + digit;
3964                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3965                     // --> ignore last character
3966                     currentPosition--;
3967                   }
3968                 }
3969               } else { // has read \OctalDigit NonDigit--> ignore last
3970                 // character
3971                 currentPosition--;
3972               }
3973             } else { // has read \OctalDigit NonOctalDigit--> ignore last
3974               // character
3975               currentPosition--;
3976             }
3977           } else { // has read \OctalDigit --> ignore last character
3978             currentPosition--;
3979           }
3980           if (number > 255)
3981             throw new InvalidInputException(INVALID_ESCAPE);
3982           currentCharacter = (char) number;
3983         } else
3984           throw new InvalidInputException(INVALID_ESCAPE);
3985     }
3986   }
3987   // chech presence of task: tags
3988   public void checkTaskTag(int commentStart, int commentEnd) {
3989     // only look for newer task: tags
3990     if (this.foundTaskCount > 0
3991         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3992       return;
3993     }
3994     int foundTaskIndex = this.foundTaskCount;
3995     nextChar : for (int i = commentStart; i < commentEnd
3996         && i < this.eofPosition; i++) {
3997       char[] tag = null;
3998       char[] priority = null;
3999       // check for tag occurrence
4000       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4001         tag = this.taskTags[itag];
4002         priority = this.taskPriorities != null
4003             && itag < this.taskPriorities.length
4004             ? this.taskPriorities[itag]
4005             : null;
4006         int tagLength = tag.length;
4007         for (int t = 0; t < tagLength; t++) {
4008           if (this.source[i + t] != tag[t])
4009             continue nextTag;
4010         }
4011         if (this.foundTaskTags == null) {
4012           this.foundTaskTags = new char[5][];
4013           this.foundTaskMessages = new char[5][];
4014           this.foundTaskPriorities = new char[5][];
4015           this.foundTaskPositions = new int[5][];
4016         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4017           System.arraycopy(this.foundTaskTags, 0,
4018               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4019               this.foundTaskCount);
4020           System.arraycopy(this.foundTaskMessages, 0,
4021               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4022               this.foundTaskCount);
4023           System.arraycopy(this.foundTaskPriorities, 0,
4024               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4025               0, this.foundTaskCount);
4026           System.arraycopy(this.foundTaskPositions, 0,
4027               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4028               this.foundTaskCount);
4029         }
4030         this.foundTaskTags[this.foundTaskCount] = tag;
4031         this.foundTaskPriorities[this.foundTaskCount] = priority;
4032         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4033             i + tagLength - 1};
4034         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4035         this.foundTaskCount++;
4036         i += tagLength - 1; // will be incremented when looping
4037       }
4038     }
4039     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4040       // retrieve message start and end positions
4041       int msgStart = this.foundTaskPositions[i][0]
4042           + this.foundTaskTags[i].length;
4043       int max_value = i + 1 < this.foundTaskCount
4044           ? this.foundTaskPositions[i + 1][0] - 1
4045           : commentEnd - 1;
4046       // at most beginning of next task
4047       if (max_value < msgStart)
4048         max_value = msgStart; // would only occur if tag is before EOF.
4049       int end = -1;
4050       char c;
4051       for (int j = msgStart; j < max_value; j++) {
4052         if ((c = this.source[j]) == '\n' || c == '\r') {
4053           end = j - 1;
4054           break;
4055         }
4056       }
4057       if (end == -1) {
4058         for (int j = max_value; j > msgStart; j--) {
4059           if ((c = this.source[j]) == '*') {
4060             end = j - 1;
4061             break;
4062           }
4063         }
4064         if (end == -1)
4065           end = max_value;
4066       }
4067       if (msgStart == end)
4068         continue; // empty
4069       // trim the message
4070       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4071         end--;
4072       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4073         msgStart++;
4074       // update the end position of the task
4075       this.foundTaskPositions[i][1] = end;
4076       // get the message source
4077       final int messageLength = end - msgStart + 1;
4078       char[] message = new char[messageLength];
4079       System.arraycopy(source, msgStart, message, 0, messageLength);
4080       this.foundTaskMessages[i] = message;
4081     }
4082   }
4083 }