net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13 import java.util.Stack;
  14
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
  20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23   /*
  24    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  25    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
  26    * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30
  31   public boolean useAssertAsAnIndentifier = false;
  32
  33   //flag indicating if processed source contains occurrences of keyword assert
  34   public boolean containsAssertKeyword = false;
  35
  36   public boolean recordLineSeparator;
  37
  38   public boolean ignorePHPOneLiner = false;
  39
  40   public boolean phpMode = false;
  41
  42   public Stack encapsedStringStack = null;
  43
  44   public char currentCharacter;
  45
  46   public int startPosition;
  47
  48   public int currentPosition;
  49
  50   public int initialPosition, eofPosition;
  51
  52   // after this position eof are generated instead of real token from the
  53   // source
  54   public boolean tokenizeComments;
  55
  56   public boolean tokenizeWhiteSpace;
  57
  58   public boolean tokenizeStrings;
  59
  60   //source should be viewed as a window (aka a part)
  61   //of a entire very large stream
  62   public char source[];
  63
  64   //unicode support
  65   public char[] withoutUnicodeBuffer;
  66
  67   public int withoutUnicodePtr;
  68
  69   //when == 0 ==> no unicode in the current token
  70   public boolean unicodeAsBackSlash = false;
  71
  72   public boolean scanningFloatLiteral = false;
  73
  74   //support for /** comments
  75   public int[] commentStops = new int[10];
  76
  77   public int[] commentStarts = new int[10];
  78
  79   public int commentPtr = -1; // no comment test with commentPtr value -1
  80
  81   protected int lastCommentLinePosition = -1;
  82
  83   //diet parsing support - jump over some method body when requested
  84   public boolean diet = false;
  85
  86   //support for the poor-line-debuggers ....
  87   //remember the position of the cr/lf
  88   public int[] lineEnds = new int[250];
  89
  90   public int linePtr = -1;
  91
  92   public boolean wasAcr = false;
  93
  94   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  95
  96   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  97
  98   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  99
 100   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 101
 102   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 103
 104   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 105
 106   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 107
 108   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 109
 110   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 111
 112   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 113
 114   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 115
 116   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 117
 118   //----------------optimized identifier managment------------------
 119   static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 120       charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 121       charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 122       charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 123       charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 124       charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 125       charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 126       charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 127       charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 128
 129   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 130
 131   static final int TableSize = 30, InternalTableSize = 6;
 132
 133   //30*6 = 180 entries
 134   public static final int OptimizedLength = 6;
 135
 136   public/* static */
 137   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 138
 139   // support for detecting non-externalized string literals
 140   int currentLineNr = -1;
 141
 142   int previousLineNr = -1;
 143
 144   NLSLine currentLine = null;
 145
 146   List lines = new ArrayList();
 147
 148   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 149
 150   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 151
 152   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 153
 154   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 155
 156   public StringLiteral[] nonNLSStrings = null;
 157
 158   public boolean checkNonExternalizedStringLiterals = true;
 159
 160   public boolean wasNonExternalizedStringLiteral = false;
 161   /* static */{
 162     for (int i = 0; i < 6; i++) {
 163       for (int j = 0; j < TableSize; j++) {
 164         for (int k = 0; k < InternalTableSize; k++) {
 165           charArray_length[i][j][k] = initCharArray;
 166         }
 167       }
 168     }
 169   }
 170
 171   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 172
 173   public static final int RoundBracket = 0;
 174
 175   public static final int SquareBracket = 1;
 176
 177   public static final int CurlyBracket = 2;
 178
 179   public static final int BracketKinds = 3;
 180
 181   // task tag support
 182   public char[][] foundTaskTags = null;
 183
 184   public char[][] foundTaskMessages;
 185
 186   public char[][] foundTaskPriorities = null;
 187
 188   public int[][] foundTaskPositions;
 189
 190   public int foundTaskCount = 0;
 191
 192   public char[][] taskTags = null;
 193
 194   public char[][] taskPriorities = null;
 195
 196   public static final boolean DEBUG = false;
 197
 198   public static final boolean TRACE = false;
 199
 200   public ICompilationUnit compilationUnit = null;
 201   /**
 202    * Determines if the specified character is permissible as the first character in a PHP identifier
 203    */
 204   public static boolean isPHPIdentifierStart(char ch) {
 205     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 206   }
 207
 208   /**
 209    * Determines if the specified character may be part of a PHP identifier as other than the first character
 210    */
 211   public static boolean isPHPIdentifierPart(char ch) {
 212     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 213   }
 214
 215   public final boolean atEnd() {
 216     // This code is not relevant if source is
 217     // Only a part of the real stream input
 218     return source.length == currentPosition;
 219   }
 220
 221   public char[] getCurrentIdentifierSource() {
 222     //return the token REAL source (aka unicodes are precomputed)
 223     char[] result;
 224     //    if (withoutUnicodePtr != 0)
 225     //      //0 is used as a fast test flag so the real first char is in position 1
 226     //      System.arraycopy(
 227     //        withoutUnicodeBuffer,
 228     //        1,
 229     //        result = new char[withoutUnicodePtr],
 230     //        0,
 231     //        withoutUnicodePtr);
 232     //    else {
 233     int length = currentPosition - startPosition;
 234     switch (length) { // see OptimizedLength
 235     case 1:
 236       return optimizedCurrentTokenSource1();
 237     case 2:
 238       return optimizedCurrentTokenSource2();
 239     case 3:
 240       return optimizedCurrentTokenSource3();
 241     case 4:
 242       return optimizedCurrentTokenSource4();
 243     case 5:
 244       return optimizedCurrentTokenSource5();
 245     case 6:
 246       return optimizedCurrentTokenSource6();
 247     }
 248     //no optimization
 249     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 250     //   }
 251     return result;
 252   }
 253
 254   public int getCurrentTokenEndPosition() {
 255     return this.currentPosition - 1;
 256   }
 257
 258   public final char[] getCurrentTokenSource() {
 259     // Return the token REAL source (aka unicodes are precomputed)
 260     char[] result;
 261     //    if (withoutUnicodePtr != 0)
 262     //      // 0 is used as a fast test flag so the real first char is in position 1
 263     //      System.arraycopy(
 264     //        withoutUnicodeBuffer,
 265     //        1,
 266     //        result = new char[withoutUnicodePtr],
 267     //        0,
 268     //        withoutUnicodePtr);
 269     //    else {
 270     int length;
 271     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 272     //    }
 273     return result;
 274   }
 275
 276   public final char[] getCurrentTokenSource(int startPos) {
 277     // Return the token REAL source (aka unicodes are precomputed)
 278     char[] result;
 279     //    if (withoutUnicodePtr != 0)
 280     //      // 0 is used as a fast test flag so the real first char is in position 1
 281     //      System.arraycopy(
 282     //        withoutUnicodeBuffer,
 283     //        1,
 284     //        result = new char[withoutUnicodePtr],
 285     //        0,
 286     //        withoutUnicodePtr);
 287     //    else {
 288     int length;
 289     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 290     //  }
 291     return result;
 292   }
 293
 294   public final char[] getCurrentTokenSourceString() {
 295     //return the token REAL source (aka unicodes are precomputed).
 296     //REMOVE the two " that are at the beginning and the end.
 297     char[] result;
 298     if (withoutUnicodePtr != 0)
 299       //0 is used as a fast test flag so the real first char is in position 1
 300       System.arraycopy(withoutUnicodeBuffer, 2,
 301       //2 is 1 (real start) + 1 (to jump over the ")
 302           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 303     else {
 304       int length;
 305       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 306     }
 307     return result;
 308   }
 309
 310   public int getCurrentTokenStartPosition() {
 311     return this.startPosition;
 312   }
 313
 314   public final char[] getCurrentStringLiteralSource() {
 315     // Return the token REAL source (aka unicodes are precomputed)
 316     if (startPosition + 1 >= currentPosition) {
 317       return new char[0];
 318     }
 319     char[] result;
 320     int length;
 321     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 322     //    }
 323     return result;
 324   }
 325
 326   public final char[] getCurrentStringLiteralSource(int startPos) {
 327     // Return the token REAL source (aka unicodes are precomputed)
 328     char[] result;
 329     int length;
 330     System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
 331     //    }
 332     return result;
 333   }
 334   /*
 335    * Search the source position corresponding to the end of a given line number
 336    *
 337    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 338    *
 339    * In case the given line number is inconsistent, answers -1.
 340    */
 341   public final int getLineEnd(int lineNumber) {
 342     if (lineEnds == null)
 343       return -1;
 344     if (lineNumber >= lineEnds.length)
 345       return -1;
 346     if (lineNumber <= 0)
 347       return -1;
 348     if (lineNumber == lineEnds.length - 1)
 349       return eofPosition;
 350     return lineEnds[lineNumber - 1];
 351     // next line start one character behind the lineEnd of the previous line
 352   }
 353
 354   /**
 355    * Search the source position corresponding to the beginning of a given line number
 356    *
 357    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 358    *
 359    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 360    *
 361    * In case the given line number is inconsistent, answers -1.
 362    */
 363   public final int getLineStart(int lineNumber) {
 364     if (lineEnds == null)
 365       return -1;
 366     if (lineNumber >= lineEnds.length)
 367       return -1;
 368     if (lineNumber <= 0)
 369       return -1;
 370     if (lineNumber == 1)
 371       return initialPosition;
 372     return lineEnds[lineNumber - 2] + 1;
 373     // next line start one character behind the lineEnd of the previous line
 374   }
 375
 376   public final boolean getNextChar(char testedChar) {
 377     //BOOLEAN
 378     //handle the case of unicode.
 379     //when a unicode appears then we must use a buffer that holds char
 380     // internal values
 381     //At the end of this method currentCharacter holds the new visited char
 382     //and currentPosition points right next after it
 383     //Both previous lines are true if the currentCharacter is == to the
 384     // testedChar
 385     //On false, no side effect has occured.
 386     //ALL getNextChar.... ARE OPTIMIZED COPIES
 387     int temp = currentPosition;
 388     try {
 389       currentCharacter = source[currentPosition++];
 390       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 391       //        && (source[currentPosition] == 'u')) {
 392       //        //-------------unicode traitement ------------
 393       //        int c1, c2, c3, c4;
 394       //        int unicodeSize = 6;
 395       //        currentPosition++;
 396       //        while (source[currentPosition] == 'u') {
 397       //          currentPosition++;
 398       //          unicodeSize++;
 399       //        }
 400       //
 401       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 402       //          || c1 < 0)
 403       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 404       //            || c2 < 0)
 405       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 406       //            || c3 < 0)
 407       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 408       //            || c4 < 0)) {
 409       //          currentPosition = temp;
 410       //          return false;
 411       //        }
 412       //
 413       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 414       //        if (currentCharacter != testedChar) {
 415       //          currentPosition = temp;
 416       //          return false;
 417       //        }
 418       //        unicodeAsBackSlash = currentCharacter == '\\';
 419       //
 420       //        //need the unicode buffer
 421       //        if (withoutUnicodePtr == 0) {
 422       //          //buffer all the entries that have been left aside....
 423       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 424       //          System.arraycopy(
 425       //            source,
 426       //            startPosition,
 427       //            withoutUnicodeBuffer,
 428       //            1,
 429       //            withoutUnicodePtr);
 430       //        }
 431       //        //fill the buffer with the char
 432       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 433       //        return true;
 434       //
 435       //      } //-------------end unicode traitement--------------
 436       //      else {
 437       if (currentCharacter != testedChar) {
 438         currentPosition = temp;
 439         return false;
 440       }
 441       unicodeAsBackSlash = false;
 442       //        if (withoutUnicodePtr != 0)
 443       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 444       return true;
 445       //      }
 446     } catch (IndexOutOfBoundsException e) {
 447       unicodeAsBackSlash = false;
 448       currentPosition = temp;
 449       return false;
 450     }
 451   }
 452
 453   public final int getNextChar(char testedChar1, char testedChar2) {
 454     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 455     //test can be done with (x==0) for the first and (x>0) for the second
 456     //handle the case of unicode.
 457     //when a unicode appears then we must use a buffer that holds char
 458     // internal values
 459     //At the end of this method currentCharacter holds the new visited char
 460     //and currentPosition points right next after it
 461     //Both previous lines are true if the currentCharacter is == to the
 462     // testedChar1/2
 463     //On false, no side effect has occured.
 464     //ALL getNextChar.... ARE OPTIMIZED COPIES
 465     int temp = currentPosition;
 466     try {
 467       int result;
 468       currentCharacter = source[currentPosition++];
 469       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 470       //        && (source[currentPosition] == 'u')) {
 471       //        //-------------unicode traitement ------------
 472       //        int c1, c2, c3, c4;
 473       //        int unicodeSize = 6;
 474       //        currentPosition++;
 475       //        while (source[currentPosition] == 'u') {
 476       //          currentPosition++;
 477       //          unicodeSize++;
 478       //        }
 479       //
 480       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 481       //          || c1 < 0)
 482       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 483       //            || c2 < 0)
 484       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 485       //            || c3 < 0)
 486       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 487       //            || c4 < 0)) {
 488       //          currentPosition = temp;
 489       //          return 2;
 490       //        }
 491       //
 492       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 493       //        if (currentCharacter == testedChar1)
 494       //          result = 0;
 495       //        else if (currentCharacter == testedChar2)
 496       //          result = 1;
 497       //        else {
 498       //          currentPosition = temp;
 499       //          return -1;
 500       //        }
 501       //
 502       //        //need the unicode buffer
 503       //        if (withoutUnicodePtr == 0) {
 504       //          //buffer all the entries that have been left aside....
 505       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 506       //          System.arraycopy(
 507       //            source,
 508       //            startPosition,
 509       //            withoutUnicodeBuffer,
 510       //            1,
 511       //            withoutUnicodePtr);
 512       //        }
 513       //        //fill the buffer with the char
 514       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 515       //        return result;
 516       //      } //-------------end unicode traitement--------------
 517       //      else {
 518       if (currentCharacter == testedChar1)
 519         result = 0;
 520       else if (currentCharacter == testedChar2)
 521         result = 1;
 522       else {
 523         currentPosition = temp;
 524         return -1;
 525       }
 526       //        if (withoutUnicodePtr != 0)
 527       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 528       return result;
 529       //     }
 530     } catch (IndexOutOfBoundsException e) {
 531       currentPosition = temp;
 532       return -1;
 533     }
 534   }
 535
 536   public final boolean getNextCharAsDigit() {
 537     //BOOLEAN
 538     //handle the case of unicode.
 539     //when a unicode appears then we must use a buffer that holds char
 540     // internal values
 541     //At the end of this method currentCharacter holds the new visited char
 542     //and currentPosition points right next after it
 543     //Both previous lines are true if the currentCharacter is a digit
 544     //On false, no side effect has occured.
 545     //ALL getNextChar.... ARE OPTIMIZED COPIES
 546     int temp = currentPosition;
 547     try {
 548       currentCharacter = source[currentPosition++];
 549       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 550       //        && (source[currentPosition] == 'u')) {
 551       //        //-------------unicode traitement ------------
 552       //        int c1, c2, c3, c4;
 553       //        int unicodeSize = 6;
 554       //        currentPosition++;
 555       //        while (source[currentPosition] == 'u') {
 556       //          currentPosition++;
 557       //          unicodeSize++;
 558       //        }
 559       //
 560       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 561       //          || c1 < 0)
 562       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 563       //            || c2 < 0)
 564       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 565       //            || c3 < 0)
 566       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 567       //            || c4 < 0)) {
 568       //          currentPosition = temp;
 569       //          return false;
 570       //        }
 571       //
 572       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 573       //        if (!Character.isDigit(currentCharacter)) {
 574       //          currentPosition = temp;
 575       //          return false;
 576       //        }
 577       //
 578       //        //need the unicode buffer
 579       //        if (withoutUnicodePtr == 0) {
 580       //          //buffer all the entries that have been left aside....
 581       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 582       //          System.arraycopy(
 583       //            source,
 584       //            startPosition,
 585       //            withoutUnicodeBuffer,
 586       //            1,
 587       //            withoutUnicodePtr);
 588       //        }
 589       //        //fill the buffer with the char
 590       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 591       //        return true;
 592       //      } //-------------end unicode traitement--------------
 593       //      else {
 594       if (!Character.isDigit(currentCharacter)) {
 595         currentPosition = temp;
 596         return false;
 597       }
 598       //        if (withoutUnicodePtr != 0)
 599       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 600       return true;
 601       //      }
 602     } catch (IndexOutOfBoundsException e) {
 603       currentPosition = temp;
 604       return false;
 605     }
 606   }
 607
 608   public final boolean getNextCharAsDigit(int radix) {
 609     //BOOLEAN
 610     //handle the case of unicode.
 611     //when a unicode appears then we must use a buffer that holds char
 612     // internal values
 613     //At the end of this method currentCharacter holds the new visited char
 614     //and currentPosition points right next after it
 615     //Both previous lines are true if the currentCharacter is a digit base on
 616     // radix
 617     //On false, no side effect has occured.
 618     //ALL getNextChar.... ARE OPTIMIZED COPIES
 619     int temp = currentPosition;
 620     try {
 621       currentCharacter = source[currentPosition++];
 622       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 623       //        && (source[currentPosition] == 'u')) {
 624       //        //-------------unicode traitement ------------
 625       //        int c1, c2, c3, c4;
 626       //        int unicodeSize = 6;
 627       //        currentPosition++;
 628       //        while (source[currentPosition] == 'u') {
 629       //          currentPosition++;
 630       //          unicodeSize++;
 631       //        }
 632       //
 633       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 634       //          || c1 < 0)
 635       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 636       //            || c2 < 0)
 637       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 638       //            || c3 < 0)
 639       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 640       //            || c4 < 0)) {
 641       //          currentPosition = temp;
 642       //          return false;
 643       //        }
 644       //
 645       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 646       //        if (Character.digit(currentCharacter, radix) == -1) {
 647       //          currentPosition = temp;
 648       //          return false;
 649       //        }
 650       //
 651       //        //need the unicode buffer
 652       //        if (withoutUnicodePtr == 0) {
 653       //          //buffer all the entries that have been left aside....
 654       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 655       //          System.arraycopy(
 656       //            source,
 657       //            startPosition,
 658       //            withoutUnicodeBuffer,
 659       //            1,
 660       //            withoutUnicodePtr);
 661       //        }
 662       //        //fill the buffer with the char
 663       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 664       //        return true;
 665       //      } //-------------end unicode traitement--------------
 666       //      else {
 667       if (Character.digit(currentCharacter, radix) == -1) {
 668         currentPosition = temp;
 669         return false;
 670       }
 671       //        if (withoutUnicodePtr != 0)
 672       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 673       return true;
 674       //      }
 675     } catch (IndexOutOfBoundsException e) {
 676       currentPosition = temp;
 677       return false;
 678     }
 679   }
 680
 681   public boolean getNextCharAsJavaIdentifierPart() {
 682     //BOOLEAN
 683     //handle the case of unicode.
 684     //when a unicode appears then we must use a buffer that holds char
 685     // internal values
 686     //At the end of this method currentCharacter holds the new visited char
 687     //and currentPosition points right next after it
 688     //Both previous lines are true if the currentCharacter is a
 689     // JavaIdentifierPart
 690     //On false, no side effect has occured.
 691     //ALL getNextChar.... ARE OPTIMIZED COPIES
 692     int temp = currentPosition;
 693     try {
 694       currentCharacter = source[currentPosition++];
 695       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 696       //        && (source[currentPosition] == 'u')) {
 697       //        //-------------unicode traitement ------------
 698       //        int c1, c2, c3, c4;
 699       //        int unicodeSize = 6;
 700       //        currentPosition++;
 701       //        while (source[currentPosition] == 'u') {
 702       //          currentPosition++;
 703       //          unicodeSize++;
 704       //        }
 705       //
 706       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 707       //          || c1 < 0)
 708       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 709       //            || c2 < 0)
 710       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 711       //            || c3 < 0)
 712       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 713       //            || c4 < 0)) {
 714       //          currentPosition = temp;
 715       //          return false;
 716       //        }
 717       //
 718       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 719       //        if (!isPHPIdentifierPart(currentCharacter)) {
 720       //          currentPosition = temp;
 721       //          return false;
 722       //        }
 723       //
 724       //        //need the unicode buffer
 725       //        if (withoutUnicodePtr == 0) {
 726       //          //buffer all the entries that have been left aside....
 727       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 728       //          System.arraycopy(
 729       //            source,
 730       //            startPosition,
 731       //            withoutUnicodeBuffer,
 732       //            1,
 733       //            withoutUnicodePtr);
 734       //        }
 735       //        //fill the buffer with the char
 736       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 737       //        return true;
 738       //      } //-------------end unicode traitement--------------
 739       //      else {
 740       if (!isPHPIdentifierPart(currentCharacter)) {
 741         currentPosition = temp;
 742         return false;
 743       }
 744       //        if (withoutUnicodePtr != 0)
 745       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 746       return true;
 747       //      }
 748     } catch (IndexOutOfBoundsException e) {
 749       currentPosition = temp;
 750       return false;
 751     }
 752   }
 753
 754   public int getCastOrParen() {
 755     int tempPosition = currentPosition;
 756     char tempCharacter = currentCharacter;
 757     int tempToken = TokenNameLPAREN;
 758     boolean found = false;
 759     StringBuffer buf = new StringBuffer();
 760     try {
 761       do {
 762         currentCharacter = source[currentPosition++];
 763       } while (currentCharacter == ' ' || currentCharacter == '\t');
 764       while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 765         buf.append(currentCharacter);
 766         currentCharacter = source[currentPosition++];
 767       }
 768       if (buf.length() >= 3 && buf.length() <= 7) {
 769         char[] data = buf.toString().toCharArray();
 770         int index = 0;
 771         switch (data.length) {
 772         case 3:
 773           // int
 774           if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 775             found = true;
 776             tempToken = TokenNameintCAST;
 777           }
 778           break;
 779         case 4:
 780           // bool real
 781           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 782             found = true;
 783             tempToken = TokenNameboolCAST;
 784           } else {
 785             index = 0;
 786             if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 787               found = true;
 788               tempToken = TokenNamedoubleCAST;
 789             }
 790           }
 791           break;
 792         case 5:
 793           // array unset float
 794           if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 795               && (data[++index] == 'y')) {
 796             found = true;
 797             tempToken = TokenNamearrayCAST;
 798           } else {
 799             index = 0;
 800             if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 801                 && (data[++index] == 't')) {
 802               found = true;
 803               tempToken = TokenNameunsetCAST;
 804             } else {
 805               index = 0;
 806               if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 807                   && (data[++index] == 't')) {
 808                 found = true;
 809                 tempToken = TokenNamedoubleCAST;
 810               }
 811             }
 812           }
 813           break;
 814         case 6:
 815           // object string double
 816           if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 817               && (data[++index] == 'c') && (data[++index] == 't')) {
 818             found = true;
 819             tempToken = TokenNameobjectCAST;
 820           } else {
 821             index = 0;
 822             if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 823                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 824               found = true;
 825               tempToken = TokenNamestringCAST;
 826             } else {
 827               index = 0;
 828               if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 829                   && (data[++index] == 'l') && (data[++index] == 'e')) {
 830                 found = true;
 831                 tempToken = TokenNamedoubleCAST;
 832               }
 833             }
 834           }
 835           break;
 836         case 7:
 837           // boolean integer
 838           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 839               && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 840             found = true;
 841             tempToken = TokenNameboolCAST;
 842           } else {
 843             index = 0;
 844             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 845                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 846               found = true;
 847               tempToken = TokenNameintCAST;
 848             }
 849           }
 850           break;
 851         }
 852         if (found) {
 853           while (currentCharacter == ' ' || currentCharacter == '\t') {
 854             currentCharacter = source[currentPosition++];
 855           }
 856           if (currentCharacter == ')') {
 857             return tempToken;
 858           }
 859         }
 860       }
 861     } catch (IndexOutOfBoundsException e) {
 862     }
 863     currentCharacter = tempCharacter;
 864     currentPosition = tempPosition;
 865     return TokenNameLPAREN;
 866   }
 867
 868   public void consumeStringInterpolated() throws InvalidInputException {
 869     try {
 870       // consume next character
 871       unicodeAsBackSlash = false;
 872       currentCharacter = source[currentPosition++];
 873       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 874       //                  && (source[currentPosition] == 'u')) {
 875       //                  getNextUnicodeChar();
 876       //                } else {
 877       //                  if (withoutUnicodePtr != 0) {
 878       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 879       //                      currentCharacter;
 880       //                  }
 881       //                }
 882       while (currentCharacter != '`') {
 883         /** ** in PHP \r and \n are valid in string literals *** */
 884         //                if ((currentCharacter == '\n')
 885         //                  || (currentCharacter == '\r')) {
 886         //                  // relocate if finding another quote fairly close: thus unicode
 887         // '/u000D' will be fully consumed
 888         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 889         //                    if (currentPosition + lookAhead == source.length)
 890         //                      break;
 891         //                    if (source[currentPosition + lookAhead] == '\n')
 892         //                      break;
 893         //                    if (source[currentPosition + lookAhead] == '\"') {
 894         //                      currentPosition += lookAhead + 1;
 895         //                      break;
 896         //                    }
 897         //                  }
 898         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 899         //                }
 900         if (currentCharacter == '\\') {
 901           int escapeSize = currentPosition;
 902           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 903           //scanEscapeCharacter make a side effect on this value and we need
 904           // the previous value few lines down this one
 905           scanDoubleQuotedEscapeCharacter();
 906           escapeSize = currentPosition - escapeSize;
 907           if (withoutUnicodePtr == 0) {
 908             //buffer all the entries that have been left aside....
 909             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 910             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 911             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 912           } else { //overwrite the / in the buffer
 913             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 914             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 915               // where only one is correct
 916               withoutUnicodePtr--;
 917             }
 918           }
 919         }
 920         // consume next character
 921         unicodeAsBackSlash = false;
 922         currentCharacter = source[currentPosition++];
 923         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 924         //                    && (source[currentPosition] == 'u')) {
 925         //                    getNextUnicodeChar();
 926         //                  } else {
 927         if (withoutUnicodePtr != 0) {
 928           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 929         }
 930         //                  }
 931       }
 932     } catch (IndexOutOfBoundsException e) {
 933       //    reset end position for error reporting
 934       currentPosition -= 2;
 935       throw new InvalidInputException(UNTERMINATED_STRING);
 936     } catch (InvalidInputException e) {
 937       if (e.getMessage().equals(INVALID_ESCAPE)) {
 938         // relocate if finding another quote fairly close: thus unicode
 939         // '/u000D' will be fully consumed
 940         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 941           if (currentPosition + lookAhead == source.length)
 942             break;
 943           if (source[currentPosition + lookAhead] == '\n')
 944             break;
 945           if (source[currentPosition + lookAhead] == '`') {
 946             currentPosition += lookAhead + 1;
 947             break;
 948           }
 949         }
 950       }
 951       throw e; // rethrow
 952     }
 953     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 954       // //$NON-NLS-?$ where ? is an
 955       // int.
 956       if (currentLine == null) {
 957         currentLine = new NLSLine();
 958         lines.add(currentLine);
 959       }
 960       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 961     }
 962   }
 963
 964   public void consumeStringConstant() throws InvalidInputException {
 965     try {
 966       // consume next character
 967       unicodeAsBackSlash = false;
 968       currentCharacter = source[currentPosition++];
 969       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 970       //                  && (source[currentPosition] == 'u')) {
 971       //                  getNextUnicodeChar();
 972       //                } else {
 973       //                  if (withoutUnicodePtr != 0) {
 974       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 975       //                      currentCharacter;
 976       //                  }
 977       //                }
 978       while (currentCharacter != '\'') {
 979         /** ** in PHP \r and \n are valid in string literals *** */
 980         //                  if ((currentCharacter == '\n')
 981         //                    || (currentCharacter == '\r')) {
 982         //                    // relocate if finding another quote fairly close: thus unicode
 983         // '/u000D' will be fully consumed
 984         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 985         //                      if (currentPosition + lookAhead == source.length)
 986         //                        break;
 987         //                      if (source[currentPosition + lookAhead] == '\n')
 988         //                        break;
 989         //                      if (source[currentPosition + lookAhead] == '\"') {
 990         //                        currentPosition += lookAhead + 1;
 991         //                        break;
 992         //                      }
 993         //                    }
 994         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 995         //                  }
 996         if (currentCharacter == '\\') {
 997           int escapeSize = currentPosition;
 998           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 999           //scanEscapeCharacter make a side effect on this value and we need
1000           // the previous value few lines down this one
1001           scanSingleQuotedEscapeCharacter();
1002           escapeSize = currentPosition - escapeSize;
1003           if (withoutUnicodePtr == 0) {
1004             //buffer all the entries that have been left aside....
1005             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1006             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1007             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1008           } else { //overwrite the / in the buffer
1009             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1010             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1011               // where only one is correct
1012               withoutUnicodePtr--;
1013             }
1014           }
1015         }
1016         // consume next character
1017         unicodeAsBackSlash = false;
1018         currentCharacter = source[currentPosition++];
1019         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1020         //                    && (source[currentPosition] == 'u')) {
1021         //                    getNextUnicodeChar();
1022         //                  } else {
1023         if (withoutUnicodePtr != 0) {
1024           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1025         }
1026         //                  }
1027       }
1028     } catch (IndexOutOfBoundsException e) {
1029       // reset end position for error reporting
1030       currentPosition -= 2;
1031       throw new InvalidInputException(UNTERMINATED_STRING);
1032     } catch (InvalidInputException e) {
1033       if (e.getMessage().equals(INVALID_ESCAPE)) {
1034         // relocate if finding another quote fairly close: thus unicode
1035         // '/u000D' will be fully consumed
1036         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1037           if (currentPosition + lookAhead == source.length)
1038             break;
1039           if (source[currentPosition + lookAhead] == '\n')
1040             break;
1041           if (source[currentPosition + lookAhead] == '\'') {
1042             currentPosition += lookAhead + 1;
1043             break;
1044           }
1045         }
1046       }
1047       throw e; // rethrow
1048     }
1049     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1050       // //$NON-NLS-?$ where ? is an
1051       // int.
1052       if (currentLine == null) {
1053         currentLine = new NLSLine();
1054         lines.add(currentLine);
1055       }
1056       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1057     }
1058   }
1059
1060   public void consumeStringLiteral() throws InvalidInputException {
1061     try {
1062       // consume next character
1063       unicodeAsBackSlash = false;
1064       currentCharacter = source[currentPosition++];
1065       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1066       //                  && (source[currentPosition] == 'u')) {
1067       //                  getNextUnicodeChar();
1068       //                } else {
1069       //                  if (withoutUnicodePtr != 0) {
1070       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1071       //                      currentCharacter;
1072       //                  }
1073       //                }
1074       while (currentCharacter != '"') {
1075         /** ** in PHP \r and \n are valid in string literals *** */
1076         //                  if ((currentCharacter == '\n')
1077         //                    || (currentCharacter == '\r')) {
1078         //                    // relocate if finding another quote fairly close: thus unicode
1079         // '/u000D' will be fully consumed
1080         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1081         //                      if (currentPosition + lookAhead == source.length)
1082         //                        break;
1083         //                      if (source[currentPosition + lookAhead] == '\n')
1084         //                        break;
1085         //                      if (source[currentPosition + lookAhead] == '\"') {
1086         //                        currentPosition += lookAhead + 1;
1087         //                        break;
1088         //                      }
1089         //                    }
1090         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1091         //                  }
1092         if (currentCharacter == '\\') {
1093           int escapeSize = currentPosition;
1094           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1095           //scanEscapeCharacter make a side effect on this value and we need
1096           // the previous value few lines down this one
1097           scanDoubleQuotedEscapeCharacter();
1098           escapeSize = currentPosition - escapeSize;
1099           if (withoutUnicodePtr == 0) {
1100             //buffer all the entries that have been left aside....
1101             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1102             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1103             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1104           } else { //overwrite the / in the buffer
1105             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1106             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1107               // where only one is correct
1108               withoutUnicodePtr--;
1109             }
1110           }
1111         }
1112         // consume next character
1113         unicodeAsBackSlash = false;
1114         currentCharacter = source[currentPosition++];
1115         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1116         //                    && (source[currentPosition] == 'u')) {
1117         //                    getNextUnicodeChar();
1118         //                  } else {
1119         if (withoutUnicodePtr != 0) {
1120           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1121         }
1122         //                  }
1123       }
1124     } catch (IndexOutOfBoundsException e) {
1125       //    reset end position for error reporting
1126       currentPosition -= 2;
1127       throw new InvalidInputException(UNTERMINATED_STRING);
1128     } catch (InvalidInputException e) {
1129       if (e.getMessage().equals(INVALID_ESCAPE)) {
1130         // relocate if finding another quote fairly close: thus unicode
1131         // '/u000D' will be fully consumed
1132         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1133           if (currentPosition + lookAhead == source.length)
1134             break;
1135           if (source[currentPosition + lookAhead] == '\n')
1136             break;
1137           if (source[currentPosition + lookAhead] == '\"') {
1138             currentPosition += lookAhead + 1;
1139             break;
1140           }
1141         }
1142       }
1143       throw e; // rethrow
1144     }
1145     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1146       // //$NON-NLS-?$ where ? is an
1147       // int.
1148       if (currentLine == null) {
1149         currentLine = new NLSLine();
1150         lines.add(currentLine);
1151       }
1152       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1153     }
1154   }
1155
1156   public int getNextToken() throws InvalidInputException {
1157     if (!phpMode) {
1158       return getInlinedHTML(currentPosition);
1159     }
1160     if (phpMode) {
1161       this.wasAcr = false;
1162       if (diet) {
1163         jumpOverMethodBody();
1164         diet = false;
1165         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1166       }
1167       try {
1168         while (true) {
1169           withoutUnicodePtr = 0;
1170           //start with a new token
1171           char encapsedChar = ' ';
1172           if (!encapsedStringStack.isEmpty()) {
1173             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1174           }
1175           if (encapsedChar != '$' && encapsedChar != ' ') {
1176             currentCharacter = source[currentPosition++];
1177             if (currentCharacter == encapsedChar) {
1178               switch (currentCharacter) {
1179               case '`':
1180                 return TokenNameEncapsedString0;
1181               case '\'':
1182                 return TokenNameEncapsedString1;
1183               case '"':
1184                 return TokenNameEncapsedString2;
1185               }
1186             }
1187             while (currentCharacter != encapsedChar) {
1188               /** ** in PHP \r and \n are valid in string literals *** */
1189               switch (currentCharacter) {
1190               case '\\':
1191                 int escapeSize = currentPosition;
1192                 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1193                 //scanEscapeCharacter make a side effect on this value and
1194                 // we need the previous value few lines down this one
1195                 scanDoubleQuotedEscapeCharacter();
1196                 escapeSize = currentPosition - escapeSize;
1197                 if (withoutUnicodePtr == 0) {
1198                   //buffer all the entries that have been left aside....
1199                   withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1200                   System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1201                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1202                 } else { //overwrite the / in the buffer
1203                   withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1204                   if (backSlashAsUnicodeInString) { //there are TWO \ in
1205                     withoutUnicodePtr--;
1206                   }
1207                 }
1208                 break;
1209               case '$':
1210                 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1211                   currentPosition--;
1212                   encapsedStringStack.push(new Character('$'));
1213                   return TokenNameSTRING;
1214                 }
1215                 break;
1216               case '{':
1217                 if (source[currentPosition] == '$') { // CURLY_OPEN
1218                   currentPosition--;
1219                   encapsedStringStack.push(new Character('$'));
1220                   return TokenNameSTRING;
1221                 }
1222               }
1223               // consume next character
1224               unicodeAsBackSlash = false;
1225               currentCharacter = source[currentPosition++];
1226               if (withoutUnicodePtr != 0) {
1227                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1228               }
1229               //                  }
1230             } // end while
1231             currentPosition--;
1232             return TokenNameSTRING;
1233           }
1234           // ---------Consume white space and handles startPosition---------
1235           int whiteStart = currentPosition;
1236           startPosition = currentPosition;
1237           currentCharacter = source[currentPosition++];
1238           if (encapsedChar == '$') {
1239             switch (currentCharacter) {
1240             case '\\':
1241               currentCharacter = source[currentPosition++];
1242               return TokenNameSTRING;
1243             case '{':
1244               if (encapsedChar == '$') {
1245                 if (getNextChar('$'))
1246                   return TokenNameLBRACE_DOLLAR;
1247               }
1248               return TokenNameLBRACE;
1249             case '}':
1250               return TokenNameRBRACE;
1251             case '[':
1252               return TokenNameLBRACKET;
1253             case ']':
1254               return TokenNameRBRACKET;
1255             case '\'':
1256               if (tokenizeStrings) {
1257                 consumeStringConstant();
1258                 return TokenNameStringSingleQuote;
1259               }
1260               return TokenNameEncapsedString1;
1261             case '"':
1262               return TokenNameEncapsedString2;
1263             case '`':
1264               if (tokenizeStrings) {
1265                 consumeStringInterpolated();
1266                 return TokenNameStringInterpolated;
1267               }
1268               return TokenNameEncapsedString0;
1269             case '-':
1270               if (getNextChar('>'))
1271                 return TokenNameMINUS_GREATER;
1272               return TokenNameSTRING;
1273             default:
1274               if (currentCharacter == '$') {
1275                 int oldPosition = currentPosition;
1276                 try {
1277                   currentCharacter = source[currentPosition++];
1278                   if (currentCharacter == '{') {
1279                     return TokenNameDOLLAR_LBRACE;
1280                   }
1281                   if (isPHPIdentifierStart(currentCharacter)) {
1282                     return scanIdentifierOrKeyword(true);
1283                   } else {
1284                     currentPosition = oldPosition;
1285                     return TokenNameSTRING;
1286                   }
1287                 } catch (IndexOutOfBoundsException e) {
1288                   currentPosition = oldPosition;
1289                   return TokenNameSTRING;
1290                 }
1291               }
1292               if (isPHPIdentifierStart(currentCharacter))
1293                 return scanIdentifierOrKeyword(false);
1294               if (Character.isDigit(currentCharacter))
1295                 return scanNumber(false);
1296               return TokenNameERROR;
1297             }
1298           }
1299           //          boolean isWhiteSpace;
1300
1301           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1302             startPosition = currentPosition;
1303             currentCharacter = source[currentPosition++];
1304             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1305             //              && (source[currentPosition] == 'u')) {
1306             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1307             //            } else {
1308             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1309               checkNonExternalizeString();
1310               if (recordLineSeparator) {
1311                 pushLineSeparator();
1312               } else {
1313                 currentLine = null;
1314               }
1315             }
1316             //            isWhiteSpace = (currentCharacter == ' ')
1317             //                || Character.isWhitespace(currentCharacter);
1318             //            }
1319           }
1320           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1321             // reposition scanner in case we are interested by spaces as tokens
1322             currentPosition--;
1323             startPosition = whiteStart;
1324             return TokenNameWHITESPACE;
1325           }
1326           //little trick to get out in the middle of a source compuation
1327           if (currentPosition > eofPosition)
1328             return TokenNameEOF;
1329           // ---------Identify the next token-------------
1330           switch (currentCharacter) {
1331           case '(':
1332             return getCastOrParen();
1333           case ')':
1334             return TokenNameRPAREN;
1335           case '{':
1336             return TokenNameLBRACE;
1337           case '}':
1338             return TokenNameRBRACE;
1339           case '[':
1340             return TokenNameLBRACKET;
1341           case ']':
1342             return TokenNameRBRACKET;
1343           case ';':
1344             return TokenNameSEMICOLON;
1345           case ',':
1346             return TokenNameCOMMA;
1347           case '.':
1348             if (getNextChar('='))
1349               return TokenNameDOT_EQUAL;
1350             if (getNextCharAsDigit())
1351               return scanNumber(true);
1352             return TokenNameDOT;
1353           case '+': {
1354             int test;
1355             if ((test = getNextChar('+', '=')) == 0)
1356               return TokenNamePLUS_PLUS;
1357             if (test > 0)
1358               return TokenNamePLUS_EQUAL;
1359             return TokenNamePLUS;
1360           }
1361           case '-': {
1362             int test;
1363             if ((test = getNextChar('-', '=')) == 0)
1364               return TokenNameMINUS_MINUS;
1365             if (test > 0)
1366               return TokenNameMINUS_EQUAL;
1367             if (getNextChar('>'))
1368               return TokenNameMINUS_GREATER;
1369             return TokenNameMINUS;
1370           }
1371           case '~':
1372             if (getNextChar('='))
1373               return TokenNameTWIDDLE_EQUAL;
1374             return TokenNameTWIDDLE;
1375           case '!':
1376             if (getNextChar('=')) {
1377               if (getNextChar('=')) {
1378                 return TokenNameNOT_EQUAL_EQUAL;
1379               }
1380               return TokenNameNOT_EQUAL;
1381             }
1382             return TokenNameNOT;
1383           case '*':
1384             if (getNextChar('='))
1385               return TokenNameMULTIPLY_EQUAL;
1386             return TokenNameMULTIPLY;
1387           case '%':
1388             if (getNextChar('='))
1389               return TokenNameREMAINDER_EQUAL;
1390             return TokenNameREMAINDER;
1391           case '<': {
1392             int oldPosition = currentPosition;
1393             try {
1394               currentCharacter = source[currentPosition++];
1395             } catch (IndexOutOfBoundsException e) {
1396               currentPosition = oldPosition;
1397               return TokenNameLESS;
1398             }
1399             switch (currentCharacter) {
1400             case '=':
1401               return TokenNameLESS_EQUAL;
1402             case '>':
1403               return TokenNameNOT_EQUAL;
1404             case '<':
1405               if (getNextChar('='))
1406                 return TokenNameLEFT_SHIFT_EQUAL;
1407               if (getNextChar('<')) {
1408                 currentCharacter = source[currentPosition++];
1409                 while (Character.isWhitespace(currentCharacter)) {
1410                   currentCharacter = source[currentPosition++];
1411                 }
1412                 int heredocStart = currentPosition - 1;
1413                 int heredocLength = 0;
1414                 if (isPHPIdentifierStart(currentCharacter)) {
1415                   currentCharacter = source[currentPosition++];
1416                 } else {
1417                   return TokenNameERROR;
1418                 }
1419                 while (isPHPIdentifierPart(currentCharacter)) {
1420                   currentCharacter = source[currentPosition++];
1421                 }
1422                 heredocLength = currentPosition - heredocStart - 1;
1423                 // heredoc end-tag determination
1424                 boolean endTag = true;
1425                 char ch;
1426                 do {
1427                   ch = source[currentPosition++];
1428                   if (ch == '\r' || ch == '\n') {
1429                     if (recordLineSeparator) {
1430                       pushLineSeparator();
1431                     } else {
1432                       currentLine = null;
1433                     }
1434                     for (int i = 0; i < heredocLength; i++) {
1435                       if (source[currentPosition + i] != source[heredocStart + i]) {
1436                         endTag = false;
1437                         break;
1438                       }
1439                     }
1440                     if (endTag) {
1441                       currentPosition += heredocLength - 1;
1442                       currentCharacter = source[currentPosition++];
1443                       break; // do...while loop
1444                     } else {
1445                       endTag = true;
1446                     }
1447                   }
1448                 } while (true);
1449                 return TokenNameHEREDOC;
1450               }
1451               return TokenNameLEFT_SHIFT;
1452             }
1453             currentPosition = oldPosition;
1454             return TokenNameLESS;
1455           }
1456           case '>': {
1457             int test;
1458             if ((test = getNextChar('=', '>')) == 0)
1459               return TokenNameGREATER_EQUAL;
1460             if (test > 0) {
1461               if ((test = getNextChar('=', '>')) == 0)
1462                 return TokenNameRIGHT_SHIFT_EQUAL;
1463               return TokenNameRIGHT_SHIFT;
1464             }
1465             return TokenNameGREATER;
1466           }
1467           case '=':
1468             if (getNextChar('=')) {
1469               if (getNextChar('=')) {
1470                 return TokenNameEQUAL_EQUAL_EQUAL;
1471               }
1472               return TokenNameEQUAL_EQUAL;
1473             }
1474             if (getNextChar('>'))
1475               return TokenNameEQUAL_GREATER;
1476             return TokenNameEQUAL;
1477           case '&': {
1478             int test;
1479             if ((test = getNextChar('&', '=')) == 0)
1480               return TokenNameAND_AND;
1481             if (test > 0)
1482               return TokenNameAND_EQUAL;
1483             return TokenNameAND;
1484           }
1485           case '|': {
1486             int test;
1487             if ((test = getNextChar('|', '=')) == 0)
1488               return TokenNameOR_OR;
1489             if (test > 0)
1490               return TokenNameOR_EQUAL;
1491             return TokenNameOR;
1492           }
1493           case '^':
1494             if (getNextChar('='))
1495               return TokenNameXOR_EQUAL;
1496             return TokenNameXOR;
1497           case '?':
1498             if (getNextChar('>')) {
1499               phpMode = false;
1500               if (currentPosition == source.length) {
1501                 phpMode = true;
1502                 return TokenNameINLINE_HTML;
1503               }
1504               return getInlinedHTML(currentPosition - 2);
1505             }
1506             return TokenNameQUESTION;
1507           case ':':
1508             if (getNextChar(':'))
1509               return TokenNamePAAMAYIM_NEKUDOTAYIM;
1510             return TokenNameCOLON;
1511           case '@':
1512             return TokenNameAT;
1513           case '\'':
1514             consumeStringConstant();
1515             return TokenNameStringSingleQuote;
1516           case '"':
1517             if (tokenizeStrings) {
1518               consumeStringLiteral();
1519               return TokenNameStringDoubleQuote;
1520             }
1521             return TokenNameEncapsedString2;
1522           case '`':
1523             if (tokenizeStrings) {
1524               consumeStringInterpolated();
1525               return TokenNameStringInterpolated;
1526             }
1527             return TokenNameEncapsedString0;
1528           case '#':
1529           case '/': {
1530             char startChar = currentCharacter;
1531             if (getNextChar('=') && startChar=='/') {
1532               return TokenNameDIVIDE_EQUAL;
1533             }
1534             int test;
1535             if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1536               //line comment
1537               this.lastCommentLinePosition = this.currentPosition;
1538               int endPositionForLineComment = 0;
1539               try { //get the next char
1540                 currentCharacter = source[currentPosition++];
1541                 //                    if (((currentCharacter = source[currentPosition++])
1542                 //                      == '\\')
1543                 //                      && (source[currentPosition] == 'u')) {
1544                 //                      //-------------unicode traitement ------------
1545                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1546                 //                      currentPosition++;
1547                 //                      while (source[currentPosition] == 'u') {
1548                 //                        currentPosition++;
1549                 //                      }
1550                 //                      if ((c1 =
1551                 //                        Character.getNumericValue(source[currentPosition++]))
1552                 //                        > 15
1553                 //                        || c1 < 0
1554                 //                        || (c2 =
1555                 //                          Character.getNumericValue(source[currentPosition++]))
1556                 //                          > 15
1557                 //                        || c2 < 0
1558                 //                        || (c3 =
1559                 //                          Character.getNumericValue(source[currentPosition++]))
1560                 //                          > 15
1561                 //                        || c3 < 0
1562                 //                        || (c4 =
1563                 //                          Character.getNumericValue(source[currentPosition++]))
1564                 //                          > 15
1565                 //                        || c4 < 0) {
1566                 //                        throw new
1567                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1568                 //                      } else {
1569                 //                        currentCharacter =
1570                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1571                 //                      }
1572                 //                    }
1573                 //handle the \\u case manually into comment
1574                 //                    if (currentCharacter == '\\') {
1575                 //                      if (source[currentPosition] == '\\')
1576                 //                        currentPosition++;
1577                 //                    } //jump over the \\
1578                 boolean isUnicode = false;
1579                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1580                   this.lastCommentLinePosition = this.currentPosition;
1581                   if (currentCharacter == '?') {
1582                     if (getNextChar('>')) {
1583                       startPosition = currentPosition - 2;
1584                       phpMode = false;
1585                       return TokenNameINLINE_HTML;
1586                     }
1587                   }
1588                   //get the next char
1589                   isUnicode = false;
1590                   currentCharacter = source[currentPosition++];
1591                   //                      if (((currentCharacter = source[currentPosition++])
1592                   //                        == '\\')
1593                   //                        && (source[currentPosition] == 'u')) {
1594                   //                        isUnicode = true;
1595                   //                        //-------------unicode traitement ------------
1596                   //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1597                   //                        currentPosition++;
1598                   //                        while (source[currentPosition] == 'u') {
1599                   //                          currentPosition++;
1600                   //                        }
1601                   //                        if ((c1 =
1602                   //                          Character.getNumericValue(source[currentPosition++]))
1603                   //                          > 15
1604                   //                          || c1 < 0
1605                   //                          || (c2 =
1606                   //                            Character.getNumericValue(
1607                   //                              source[currentPosition++]))
1608                   //                            > 15
1609                   //                          || c2 < 0
1610                   //                          || (c3 =
1611                   //                            Character.getNumericValue(
1612                   //                              source[currentPosition++]))
1613                   //                            > 15
1614                   //                          || c3 < 0
1615                   //                          || (c4 =
1616                   //                            Character.getNumericValue(
1617                   //                              source[currentPosition++]))
1618                   //                            > 15
1619                   //                          || c4 < 0) {
1620                   //                          throw new
1621                   // InvalidInputException(INVALID_UNICODE_ESCAPE);
1622                   //                        } else {
1623                   //                          currentCharacter =
1624                   //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1625                   //                        }
1626                   //                      }
1627                   //handle the \\u case manually into comment
1628                   //                      if (currentCharacter == '\\') {
1629                   //                        if (source[currentPosition] == '\\')
1630                   //                          currentPosition++;
1631                   //                      } //jump over the \\
1632                 }
1633                 if (isUnicode) {
1634                   endPositionForLineComment = currentPosition - 6;
1635                 } else {
1636                   endPositionForLineComment = currentPosition - 1;
1637                 }
1638                 //                    recordComment(false);
1639                 recordComment(TokenNameCOMMENT_LINE);
1640                 if (this.taskTags != null)
1641                   checkTaskTag(this.startPosition, this.currentPosition);
1642                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1643                   checkNonExternalizeString();
1644                   if (recordLineSeparator) {
1645                     if (isUnicode) {
1646                       pushUnicodeLineSeparator();
1647                     } else {
1648                       pushLineSeparator();
1649                     }
1650                   } else {
1651                     currentLine = null;
1652                   }
1653                 }
1654                 if (tokenizeComments) {
1655                   if (!isUnicode) {
1656                     currentPosition = endPositionForLineComment;
1657                     // reset one character behind
1658                   }
1659                   return TokenNameCOMMENT_LINE;
1660                 }
1661               } catch (IndexOutOfBoundsException e) { //an eof will them
1662                 // be generated
1663                 if (tokenizeComments) {
1664                   currentPosition--;
1665                   // reset one character behind
1666                   return TokenNameCOMMENT_LINE;
1667                 }
1668               }
1669               break;
1670             }
1671             if (test > 0) {
1672               //traditional and annotation comment
1673               boolean isJavadoc = false, star = false;
1674               // consume next character
1675               unicodeAsBackSlash = false;
1676               currentCharacter = source[currentPosition++];
1677               //                  if (((currentCharacter = source[currentPosition++]) ==
1678               // '\\')
1679               //                    && (source[currentPosition] == 'u')) {
1680               //                    getNextUnicodeChar();
1681               //                  } else {
1682               //                    if (withoutUnicodePtr != 0) {
1683               //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1684               //                        currentCharacter;
1685               //                    }
1686               //                  }
1687               if (currentCharacter == '*') {
1688                 isJavadoc = true;
1689                 star = true;
1690               }
1691               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1692                 checkNonExternalizeString();
1693                 if (recordLineSeparator) {
1694                   pushLineSeparator();
1695                 } else {
1696                   currentLine = null;
1697                 }
1698               }
1699               try { //get the next char
1700                 currentCharacter = source[currentPosition++];
1701                 //                    if (((currentCharacter = source[currentPosition++])
1702                 //                      == '\\')
1703                 //                      && (source[currentPosition] == 'u')) {
1704                 //                      //-------------unicode traitement ------------
1705                 //                      getNextUnicodeChar();
1706                 //                    }
1707                 //handle the \\u case manually into comment
1708                 //                    if (currentCharacter == '\\') {
1709                 //                      if (source[currentPosition] == '\\')
1710                 //                        currentPosition++;
1711                 //                      //jump over the \\
1712                 //                    }
1713                 // empty comment is not a javadoc /**/
1714                 if (currentCharacter == '/') {
1715                   isJavadoc = false;
1716                 }
1717                 //loop until end of comment */
1718                 while ((currentCharacter != '/') || (!star)) {
1719                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1720                     checkNonExternalizeString();
1721                     if (recordLineSeparator) {
1722                       pushLineSeparator();
1723                     } else {
1724                       currentLine = null;
1725                     }
1726                   }
1727                   star = currentCharacter == '*';
1728                   //get next char
1729                   currentCharacter = source[currentPosition++];
1730                   //                      if (((currentCharacter = source[currentPosition++])
1731                   //                        == '\\')
1732                   //                        && (source[currentPosition] == 'u')) {
1733                   //                        //-------------unicode traitement ------------
1734                   //                        getNextUnicodeChar();
1735                   //                      }
1736                   //handle the \\u case manually into comment
1737                   //                      if (currentCharacter == '\\') {
1738                   //                        if (source[currentPosition] == '\\')
1739                   //                          currentPosition++;
1740                   //                      } //jump over the \\
1741                 }
1742                 //recordComment(isJavadoc);
1743                 if (isJavadoc) {
1744                   recordComment(TokenNameCOMMENT_PHPDOC);
1745                 } else {
1746                   recordComment(TokenNameCOMMENT_BLOCK);
1747                 }
1748
1749                 if (tokenizeComments) {
1750                   if (isJavadoc)
1751                     return TokenNameCOMMENT_PHPDOC;
1752                   return TokenNameCOMMENT_BLOCK;
1753                 }
1754               } catch (IndexOutOfBoundsException e) {
1755                 //                  reset end position for error reporting
1756                 currentPosition -= 2;
1757                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1758               }
1759               break;
1760             }
1761             return TokenNameDIVIDE;
1762           }
1763           case '\u001a':
1764             if (atEnd())
1765               return TokenNameEOF;
1766             //the atEnd may not be <currentPosition == source.length> if
1767             // source is only some part of a real (external) stream
1768             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1769           default:
1770             if (currentCharacter == '$') {
1771               int oldPosition = currentPosition;
1772               try {
1773                 currentCharacter = source[currentPosition++];
1774                 if (isPHPIdentifierStart(currentCharacter)) {
1775                   return scanIdentifierOrKeyword(true);
1776                 } else {
1777                   currentPosition = oldPosition;
1778                   return TokenNameDOLLAR;
1779                 }
1780               } catch (IndexOutOfBoundsException e) {
1781                 currentPosition = oldPosition;
1782                 return TokenNameDOLLAR;
1783               }
1784             }
1785             if (isPHPIdentifierStart(currentCharacter))
1786               return scanIdentifierOrKeyword(false);
1787             if (Character.isDigit(currentCharacter))
1788               return scanNumber(false);
1789             return TokenNameERROR;
1790           }
1791         }
1792       } //-----------------end switch while try--------------------
1793       catch (IndexOutOfBoundsException e) {
1794       }
1795     }
1796     return TokenNameEOF;
1797   }
1798
1799   private int getInlinedHTML(int start) throws InvalidInputException {
1800     int token = getInlinedHTMLToken(start);
1801     if (token == TokenNameINLINE_HTML) {
1802       //                Stack stack = new Stack();
1803       //                // scan html for errors
1804       //                Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1805       //                int lastPHPEndPos=0;
1806       //                for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1807       //                    Tag tag=(Tag)i.next();
1808       //
1809       //                    if (tag instanceof StartTag) {
1810       //                        StartTag startTag=(StartTag)tag;
1811       //                      // System.out.println("startTag: "+tag);
1812       //                        if (startTag.isServerTag()) {
1813       //                          // TODO : what to do with a server tag ?
1814       //                        } else {
1815       //                            // do whatever with HTML start tag
1816       //                            // use startTag.getElement() to find the element corresponding
1817       //                            // to this start tag which may be useful if you implement code
1818       //                            // folding etc
1819       //                                stack.push(startTag);
1820       //                        }
1821       //                    } else {
1822       //                        EndTag endTag=(EndTag)tag;
1823       //                        StartTag stag = (StartTag) stack.peek();
1824       //// System.out.println("endTag: "+tag);
1825       //                        // do whatever with HTML end tag.
1826       //                    }
1827       //                }
1828     }
1829     return token;
1830   }
1831
1832   /**
1833    * @return
1834    * @throws InvalidInputException
1835    */
1836   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1837     //    int htmlPosition = start;
1838     if (currentPosition > source.length) {
1839       currentPosition = source.length;
1840       return TokenNameEOF;
1841     }
1842     startPosition = start;
1843     try {
1844       while (!phpMode) {
1845         currentCharacter = source[currentPosition++];
1846         if (currentCharacter == '<') {
1847           if (getNextChar('?')) {
1848             currentCharacter = source[currentPosition++];
1849             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1850               // <?
1851               if (ignorePHPOneLiner) {
1852                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1853                   phpMode = true;
1854                   return TokenNameINLINE_HTML;
1855                 }
1856               } else {
1857                 phpMode = true;
1858                 return TokenNameINLINE_HTML;
1859               }
1860             } else {
1861               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1862               if (phpStart) {
1863                 int test = getNextChar('H', 'h');
1864                 if (test >= 0) {
1865                   test = getNextChar('P', 'p');
1866                   if (test >= 0) {
1867                     // <?PHP <?php
1868                     if (ignorePHPOneLiner) {
1869                       if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1870                         phpMode = true;
1871                         return TokenNameINLINE_HTML;
1872                       }
1873                     } else {
1874                       phpMode = true;
1875                       return TokenNameINLINE_HTML;
1876                     }
1877                   }
1878                 }
1879               }
1880             }
1881           }
1882         }
1883         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1884           if (recordLineSeparator) {
1885             pushLineSeparator();
1886           } else {
1887             currentLine = null;
1888           }
1889         }
1890       } //-----------------while--------------------
1891       phpMode = true;
1892       return TokenNameINLINE_HTML;
1893     } //-----------------try--------------------
1894     catch (IndexOutOfBoundsException e) {
1895       startPosition = start;
1896       currentPosition--;
1897     }
1898     phpMode = true;
1899     return TokenNameINLINE_HTML;
1900   }
1901
1902   /**
1903    * @return
1904    */
1905   private int lookAheadLinePHPTag() {
1906     // check if the PHP is only in this line (for CodeFormatter)
1907     int currentPositionInLine = currentPosition;
1908     char previousCharInLine = ' ';
1909     char currentCharInLine = ' ';
1910     boolean singleQuotedStringActive = false;
1911     boolean doubleQuotedStringActive = false;
1912
1913     try {
1914       // look ahead in this line
1915       while (true) {
1916         previousCharInLine = currentCharInLine;
1917         currentCharInLine = source[currentPositionInLine++];
1918         switch (currentCharInLine) {
1919         case '>':
1920           if (previousCharInLine == '?') {
1921             // update the scanner's current Position in the source
1922             currentPosition = currentPositionInLine;
1923             // use as "dummy" token
1924             return TokenNameEOF;
1925           }
1926           break;
1927         case '\"':
1928           if (doubleQuotedStringActive) {
1929             if (previousCharInLine != '\\') {
1930               doubleQuotedStringActive = false;
1931             }
1932           } else {
1933             if (!singleQuotedStringActive) {
1934               doubleQuotedStringActive = true;
1935             }
1936           }
1937           break;
1938         case '\'':
1939           if (singleQuotedStringActive) {
1940             if (previousCharInLine != '\\') {
1941               singleQuotedStringActive = false;
1942             }
1943           } else {
1944             if (!doubleQuotedStringActive) {
1945               singleQuotedStringActive = true;
1946             }
1947           }
1948           break;
1949         case '\n':
1950           phpMode = true;
1951           return TokenNameINLINE_HTML;
1952         case '#':
1953           if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1954             phpMode = true;
1955             return TokenNameINLINE_HTML;
1956           }
1957           break;
1958         case '/':
1959           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1960             phpMode = true;
1961             return TokenNameINLINE_HTML;
1962           }
1963           break;
1964         case '*':
1965           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1966             phpMode = true;
1967             return TokenNameINLINE_HTML;
1968           }
1969           break;
1970         }
1971       }
1972     } catch (IndexOutOfBoundsException e) {
1973       phpMode = true;
1974       currentPosition = currentPositionInLine;
1975       return TokenNameINLINE_HTML;
1976     }
1977   }
1978
1979   //  public final void getNextUnicodeChar()
1980   //    throws IndexOutOfBoundsException, InvalidInputException {
1981   //    //VOID
1982   //    //handle the case of unicode.
1983   //    //when a unicode appears then we must use a buffer that holds char
1984   // internal values
1985   //    //At the end of this method currentCharacter holds the new visited char
1986   //    //and currentPosition points right next after it
1987   //
1988   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1989   //
1990   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1991   //    currentPosition++;
1992   //    while (source[currentPosition] == 'u') {
1993   //      currentPosition++;
1994   //      unicodeSize++;
1995   //    }
1996   //
1997   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1998   //      || c1 < 0
1999   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2000   //      || c2 < 0
2001   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2002   //      || c3 < 0
2003   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2004   //      || c4 < 0) {
2005   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2006   //    } else {
2007   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2008   //      //need the unicode buffer
2009   //      if (withoutUnicodePtr == 0) {
2010   //        //buffer all the entries that have been left aside....
2011   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2012   //        System.arraycopy(
2013   //          source,
2014   //          startPosition,
2015   //          withoutUnicodeBuffer,
2016   //          1,
2017   //          withoutUnicodePtr);
2018   //      }
2019   //      //fill the buffer with the char
2020   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2021   //    }
2022   //    unicodeAsBackSlash = currentCharacter == '\\';
2023   //  }
2024   /*
2025    * Tokenize a method body, assuming that curly brackets are properly balanced.
2026    */
2027   public final void jumpOverMethodBody() {
2028     this.wasAcr = false;
2029     int found = 1;
2030     try {
2031       while (true) { //loop for jumping over comments
2032         // ---------Consume white space and handles startPosition---------
2033         boolean isWhiteSpace;
2034         do {
2035           startPosition = currentPosition;
2036           currentCharacter = source[currentPosition++];
2037           //          if (((currentCharacter = source[currentPosition++]) == '\\')
2038           //            && (source[currentPosition] == 'u')) {
2039           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
2040           //          } else {
2041           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2042             pushLineSeparator();
2043           isWhiteSpace = Character.isWhitespace(currentCharacter);
2044           //          }
2045         } while (isWhiteSpace);
2046         // -------consume token until } is found---------
2047         switch (currentCharacter) {
2048         case '{':
2049           found++;
2050           break;
2051         case '}':
2052           found--;
2053           if (found == 0)
2054             return;
2055           break;
2056         case '\'': {
2057           boolean test;
2058           test = getNextChar('\\');
2059           if (test) {
2060             try {
2061               scanDoubleQuotedEscapeCharacter();
2062             } catch (InvalidInputException ex) {
2063             }
2064             ;
2065           } else {
2066             //                try { // consume next character
2067             unicodeAsBackSlash = false;
2068             currentCharacter = source[currentPosition++];
2069             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2070             //                    && (source[currentPosition] == 'u')) {
2071             //                    getNextUnicodeChar();
2072             //                  } else {
2073             if (withoutUnicodePtr != 0) {
2074               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2075             }
2076             //                  }
2077             //                } catch (InvalidInputException ex) {
2078             //                };
2079           }
2080           getNextChar('\'');
2081           break;
2082         }
2083         case '"':
2084           try {
2085             //              try { // consume next character
2086             unicodeAsBackSlash = false;
2087             currentCharacter = source[currentPosition++];
2088             //                if (((currentCharacter = source[currentPosition++]) == '\\')
2089             //                  && (source[currentPosition] == 'u')) {
2090             //                  getNextUnicodeChar();
2091             //                } else {
2092             if (withoutUnicodePtr != 0) {
2093               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2094             }
2095             //                }
2096             //              } catch (InvalidInputException ex) {
2097             //              };
2098             while (currentCharacter != '"') {
2099               if (currentCharacter == '\r') {
2100                 if (source[currentPosition] == '\n')
2101                   currentPosition++;
2102                 break;
2103                 // the string cannot go further that the line
2104               }
2105               if (currentCharacter == '\n') {
2106                 break;
2107                 // the string cannot go further that the line
2108               }
2109               if (currentCharacter == '\\') {
2110                 try {
2111                   scanDoubleQuotedEscapeCharacter();
2112                 } catch (InvalidInputException ex) {
2113                 }
2114                 ;
2115               }
2116               //                try { // consume next character
2117               unicodeAsBackSlash = false;
2118               currentCharacter = source[currentPosition++];
2119               //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2120               //                    && (source[currentPosition] == 'u')) {
2121               //                    getNextUnicodeChar();
2122               //                  } else {
2123               if (withoutUnicodePtr != 0) {
2124                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2125               }
2126               //                  }
2127               //                } catch (InvalidInputException ex) {
2128               //                };
2129             }
2130           } catch (IndexOutOfBoundsException e) {
2131             return;
2132           }
2133           break;
2134         case '/': {
2135           int test;
2136           if ((test = getNextChar('/', '*')) == 0) {
2137             //line comment
2138             try {
2139               //get the next char
2140               currentCharacter = source[currentPosition++];
2141               //                  if (((currentCharacter = source[currentPosition++]) ==
2142               // '\\')
2143               //                    && (source[currentPosition] == 'u')) {
2144               //                    //-------------unicode traitement ------------
2145               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2146               //                    currentPosition++;
2147               //                    while (source[currentPosition] == 'u') {
2148               //                      currentPosition++;
2149               //                    }
2150               //                    if ((c1 =
2151               //                      Character.getNumericValue(source[currentPosition++]))
2152               //                      > 15
2153               //                      || c1 < 0
2154               //                      || (c2 =
2155               //                        Character.getNumericValue(source[currentPosition++]))
2156               //                        > 15
2157               //                      || c2 < 0
2158               //                      || (c3 =
2159               //                        Character.getNumericValue(source[currentPosition++]))
2160               //                        > 15
2161               //                      || c3 < 0
2162               //                      || (c4 =
2163               //                        Character.getNumericValue(source[currentPosition++]))
2164               //                        > 15
2165               //                      || c4 < 0) {
2166               //                      //error don't care of the value
2167               //                      currentCharacter = 'A';
2168               //                    } //something different from \n and \r
2169               //                    else {
2170               //                      currentCharacter =
2171               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2172               //                    }
2173               //                  }
2174               while (currentCharacter != '\r' && currentCharacter != '\n') {
2175                 //get the next char
2176                 currentCharacter = source[currentPosition++];
2177                 //                    if (((currentCharacter = source[currentPosition++])
2178                 //                      == '\\')
2179                 //                      && (source[currentPosition] == 'u')) {
2180                 //                      //-------------unicode traitement ------------
2181                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2182                 //                      currentPosition++;
2183                 //                      while (source[currentPosition] == 'u') {
2184                 //                        currentPosition++;
2185                 //                      }
2186                 //                      if ((c1 =
2187                 //                        Character.getNumericValue(source[currentPosition++]))
2188                 //                        > 15
2189                 //                        || c1 < 0
2190                 //                        || (c2 =
2191                 //                          Character.getNumericValue(source[currentPosition++]))
2192                 //                          > 15
2193                 //                        || c2 < 0
2194                 //                        || (c3 =
2195                 //                          Character.getNumericValue(source[currentPosition++]))
2196                 //                          > 15
2197                 //                        || c3 < 0
2198                 //                        || (c4 =
2199                 //                          Character.getNumericValue(source[currentPosition++]))
2200                 //                          > 15
2201                 //                        || c4 < 0) {
2202                 //                        //error don't care of the value
2203                 //                        currentCharacter = 'A';
2204                 //                      } //something different from \n and \r
2205                 //                      else {
2206                 //                        currentCharacter =
2207                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2208                 //                      }
2209                 //                    }
2210               }
2211               if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2212                 pushLineSeparator();
2213             } catch (IndexOutOfBoundsException e) {
2214             } //an eof will them be generated
2215             break;
2216           }
2217           if (test > 0) {
2218             //traditional and annotation comment
2219             boolean star = false;
2220             //                try { // consume next character
2221             unicodeAsBackSlash = false;
2222             currentCharacter = source[currentPosition++];
2223             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2224             //                    && (source[currentPosition] == 'u')) {
2225             //                    getNextUnicodeChar();
2226             //                  } else {
2227             if (withoutUnicodePtr != 0) {
2228               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2229             }
2230             //                  };
2231             //                } catch (InvalidInputException ex) {
2232             //                };
2233             if (currentCharacter == '*') {
2234               star = true;
2235             }
2236             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2237               pushLineSeparator();
2238             try { //get the next char
2239               currentCharacter = source[currentPosition++];
2240               //                  if (((currentCharacter = source[currentPosition++]) ==
2241               // '\\')
2242               //                    && (source[currentPosition] == 'u')) {
2243               //                    //-------------unicode traitement ------------
2244               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2245               //                    currentPosition++;
2246               //                    while (source[currentPosition] == 'u') {
2247               //                      currentPosition++;
2248               //                    }
2249               //                    if ((c1 =
2250               //                      Character.getNumericValue(source[currentPosition++]))
2251               //                      > 15
2252               //                      || c1 < 0
2253               //                      || (c2 =
2254               //                        Character.getNumericValue(source[currentPosition++]))
2255               //                        > 15
2256               //                      || c2 < 0
2257               //                      || (c3 =
2258               //                        Character.getNumericValue(source[currentPosition++]))
2259               //                        > 15
2260               //                      || c3 < 0
2261               //                      || (c4 =
2262               //                        Character.getNumericValue(source[currentPosition++]))
2263               //                        > 15
2264               //                      || c4 < 0) {
2265               //                      //error don't care of the value
2266               //                      currentCharacter = 'A';
2267               //                    } //something different from * and /
2268               //                    else {
2269               //                      currentCharacter =
2270               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2271               //                    }
2272               //                  }
2273               //loop until end of comment */
2274               while ((currentCharacter != '/') || (!star)) {
2275                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2276                   pushLineSeparator();
2277                 star = currentCharacter == '*';
2278                 //get next char
2279                 currentCharacter = source[currentPosition++];
2280                 //                    if (((currentCharacter = source[currentPosition++])
2281                 //                      == '\\')
2282                 //                      && (source[currentPosition] == 'u')) {
2283                 //                      //-------------unicode traitement ------------
2284                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2285                 //                      currentPosition++;
2286                 //                      while (source[currentPosition] == 'u') {
2287                 //                        currentPosition++;
2288                 //                      }
2289                 //                      if ((c1 =
2290                 //                        Character.getNumericValue(source[currentPosition++]))
2291                 //                        > 15
2292                 //                        || c1 < 0
2293                 //                        || (c2 =
2294                 //                          Character.getNumericValue(source[currentPosition++]))
2295                 //                          > 15
2296                 //                        || c2 < 0
2297                 //                        || (c3 =
2298                 //                          Character.getNumericValue(source[currentPosition++]))
2299                 //                          > 15
2300                 //                        || c3 < 0
2301                 //                        || (c4 =
2302                 //                          Character.getNumericValue(source[currentPosition++]))
2303                 //                          > 15
2304                 //                        || c4 < 0) {
2305                 //                        //error don't care of the value
2306                 //                        currentCharacter = 'A';
2307                 //                      } //something different from * and /
2308                 //                      else {
2309                 //                        currentCharacter =
2310                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2311                 //                      }
2312                 //                    }
2313               }
2314             } catch (IndexOutOfBoundsException e) {
2315               return;
2316             }
2317             break;
2318           }
2319           break;
2320         }
2321         default:
2322           if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2323             try {
2324               scanIdentifierOrKeyword((currentCharacter == '$'));
2325             } catch (InvalidInputException ex) {
2326             }
2327             ;
2328             break;
2329           }
2330           if (Character.isDigit(currentCharacter)) {
2331             try {
2332               scanNumber(false);
2333             } catch (InvalidInputException ex) {
2334             }
2335             ;
2336             break;
2337           }
2338         }
2339       }
2340       //-----------------end switch while try--------------------
2341     } catch (IndexOutOfBoundsException e) {
2342     } catch (InvalidInputException e) {
2343     }
2344     return;
2345   }
2346
2347   //  public final boolean jumpOverUnicodeWhiteSpace()
2348   //    throws InvalidInputException {
2349   //    //BOOLEAN
2350   //    //handle the case of unicode. Jump over the next whiteSpace
2351   //    //making startPosition pointing on the next available char
2352   //    //On false, the currentCharacter is filled up with a potential
2353   //    //correct char
2354   //
2355   //    try {
2356   //      this.wasAcr = false;
2357   //      int c1, c2, c3, c4;
2358   //      int unicodeSize = 6;
2359   //      currentPosition++;
2360   //      while (source[currentPosition] == 'u') {
2361   //        currentPosition++;
2362   //        unicodeSize++;
2363   //      }
2364   //
2365   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2366   //        || c1 < 0)
2367   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2368   //          || c2 < 0)
2369   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2370   //          || c3 < 0)
2371   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2372   //          || c4 < 0)) {
2373   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2374   //      }
2375   //
2376   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2377   //      if (recordLineSeparator
2378   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2379   //        pushLineSeparator();
2380   //      if (Character.isWhitespace(currentCharacter))
2381   //        return true;
2382   //
2383   //      //buffer the new char which is not a white space
2384   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2385   //      //withoutUnicodePtr == 1 is true here
2386   //      return false;
2387   //    } catch (IndexOutOfBoundsException e) {
2388   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2389   //    }
2390   //  }
2391   public final int[] getLineEnds() {
2392     //return a bounded copy of this.lineEnds
2393     int[] copy;
2394     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2395     return copy;
2396   }
2397
2398   public char[] getSource() {
2399     return this.source;
2400   }
2401
2402   public static boolean isIdentifierOrKeyword(int token) {
2403     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2404   }
2405
2406   final char[] optimizedCurrentTokenSource1() {
2407     //return always the same char[] build only once
2408     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2409     char charOne = source[startPosition];
2410     switch (charOne) {
2411     case 'a':
2412       return charArray_a;
2413     case 'b':
2414       return charArray_b;
2415     case 'c':
2416       return charArray_c;
2417     case 'd':
2418       return charArray_d;
2419     case 'e':
2420       return charArray_e;
2421     case 'f':
2422       return charArray_f;
2423     case 'g':
2424       return charArray_g;
2425     case 'h':
2426       return charArray_h;
2427     case 'i':
2428       return charArray_i;
2429     case 'j':
2430       return charArray_j;
2431     case 'k':
2432       return charArray_k;
2433     case 'l':
2434       return charArray_l;
2435     case 'm':
2436       return charArray_m;
2437     case 'n':
2438       return charArray_n;
2439     case 'o':
2440       return charArray_o;
2441     case 'p':
2442       return charArray_p;
2443     case 'q':
2444       return charArray_q;
2445     case 'r':
2446       return charArray_r;
2447     case 's':
2448       return charArray_s;
2449     case 't':
2450       return charArray_t;
2451     case 'u':
2452       return charArray_u;
2453     case 'v':
2454       return charArray_v;
2455     case 'w':
2456       return charArray_w;
2457     case 'x':
2458       return charArray_x;
2459     case 'y':
2460       return charArray_y;
2461     case 'z':
2462       return charArray_z;
2463     default:
2464       return new char[] { charOne };
2465     }
2466   }
2467
2468   final char[] optimizedCurrentTokenSource2() {
2469     //try to return the same char[] build only once
2470     char c0, c1;
2471     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2472     char[][] table = charArray_length[0][hash];
2473     int i = newEntry2;
2474     while (++i < InternalTableSize) {
2475       char[] charArray = table[i];
2476       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2477         return charArray;
2478     }
2479     //---------other side---------
2480     i = -1;
2481     int max = newEntry2;
2482     while (++i <= max) {
2483       char[] charArray = table[i];
2484       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2485         return charArray;
2486     }
2487     //--------add the entry-------
2488     if (++max >= InternalTableSize)
2489       max = 0;
2490     char[] r;
2491     table[max] = (r = new char[] { c0, c1 });
2492     newEntry2 = max;
2493     return r;
2494   }
2495
2496   final char[] optimizedCurrentTokenSource3() {
2497     //try to return the same char[] build only once
2498     char c0, c1, c2;
2499     int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2500         % TableSize;
2501     char[][] table = charArray_length[1][hash];
2502     int i = newEntry3;
2503     while (++i < InternalTableSize) {
2504       char[] charArray = table[i];
2505       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2506         return charArray;
2507     }
2508     //---------other side---------
2509     i = -1;
2510     int max = newEntry3;
2511     while (++i <= max) {
2512       char[] charArray = table[i];
2513       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2514         return charArray;
2515     }
2516     //--------add the entry-------
2517     if (++max >= InternalTableSize)
2518       max = 0;
2519     char[] r;
2520     table[max] = (r = new char[] { c0, c1, c2 });
2521     newEntry3 = max;
2522     return r;
2523   }
2524
2525   final char[] optimizedCurrentTokenSource4() {
2526     //try to return the same char[] build only once
2527     char c0, c1, c2, c3;
2528     long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2529         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2530         % TableSize;
2531     char[][] table = charArray_length[2][(int) hash];
2532     int i = newEntry4;
2533     while (++i < InternalTableSize) {
2534       char[] charArray = table[i];
2535       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2536         return charArray;
2537     }
2538     //---------other side---------
2539     i = -1;
2540     int max = newEntry4;
2541     while (++i <= max) {
2542       char[] charArray = table[i];
2543       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2544         return charArray;
2545     }
2546     //--------add the entry-------
2547     if (++max >= InternalTableSize)
2548       max = 0;
2549     char[] r;
2550     table[max] = (r = new char[] { c0, c1, c2, c3 });
2551     newEntry4 = max;
2552     return r;
2553   }
2554
2555   final char[] optimizedCurrentTokenSource5() {
2556     //try to return the same char[] build only once
2557     char c0, c1, c2, c3, c4;
2558     long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2559         + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2560         % TableSize;
2561     char[][] table = charArray_length[3][(int) hash];
2562     int i = newEntry5;
2563     while (++i < InternalTableSize) {
2564       char[] charArray = table[i];
2565       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2566         return charArray;
2567     }
2568     //---------other side---------
2569     i = -1;
2570     int max = newEntry5;
2571     while (++i <= max) {
2572       char[] charArray = table[i];
2573       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2574         return charArray;
2575     }
2576     //--------add the entry-------
2577     if (++max >= InternalTableSize)
2578       max = 0;
2579     char[] r;
2580     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2581     newEntry5 = max;
2582     return r;
2583   }
2584
2585   final char[] optimizedCurrentTokenSource6() {
2586     //try to return the same char[] build only once
2587     char c0, c1, c2, c3, c4, c5;
2588     long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2589         + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2590         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2591         % TableSize;
2592     char[][] table = charArray_length[4][(int) hash];
2593     int i = newEntry6;
2594     while (++i < InternalTableSize) {
2595       char[] charArray = table[i];
2596       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2597           && (c5 == charArray[5]))
2598         return charArray;
2599     }
2600     //---------other side---------
2601     i = -1;
2602     int max = newEntry6;
2603     while (++i <= max) {
2604       char[] charArray = table[i];
2605       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2606           && (c5 == charArray[5]))
2607         return charArray;
2608     }
2609     //--------add the entry-------
2610     if (++max >= InternalTableSize)
2611       max = 0;
2612     char[] r;
2613     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2614     newEntry6 = max;
2615     return r;
2616   }
2617
2618   public final void pushLineSeparator() throws InvalidInputException {
2619     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2620     final int INCREMENT = 250;
2621     if (this.checkNonExternalizedStringLiterals) {
2622       // reinitialize the current line for non externalize strings purpose
2623       currentLine = null;
2624     }
2625     //currentCharacter is at position currentPosition-1
2626     // cr 000D
2627     if (currentCharacter == '\r') {
2628       int separatorPos = currentPosition - 1;
2629       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2630         return;
2631       //System.out.println("CR-" + separatorPos);
2632       try {
2633         lineEnds[++linePtr] = separatorPos;
2634       } catch (IndexOutOfBoundsException e) {
2635         //linePtr value is correct
2636         int oldLength = lineEnds.length;
2637         int[] old = lineEnds;
2638         lineEnds = new int[oldLength + INCREMENT];
2639         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2640         lineEnds[linePtr] = separatorPos;
2641       }
2642       // look-ahead for merged cr+lf
2643       try {
2644         if (source[currentPosition] == '\n') {
2645           //System.out.println("look-ahead LF-" + currentPosition);
2646           lineEnds[linePtr] = currentPosition;
2647           currentPosition++;
2648           wasAcr = false;
2649         } else {
2650           wasAcr = true;
2651         }
2652       } catch (IndexOutOfBoundsException e) {
2653         wasAcr = true;
2654       }
2655     } else {
2656       // lf 000A
2657       if (currentCharacter == '\n') {
2658         //must merge eventual cr followed by lf
2659         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2660           //System.out.println("merge LF-" + (currentPosition - 1));
2661           lineEnds[linePtr] = currentPosition - 1;
2662         } else {
2663           int separatorPos = currentPosition - 1;
2664           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2665             return;
2666           // System.out.println("LF-" + separatorPos);
2667           try {
2668             lineEnds[++linePtr] = separatorPos;
2669           } catch (IndexOutOfBoundsException e) {
2670             //linePtr value is correct
2671             int oldLength = lineEnds.length;
2672             int[] old = lineEnds;
2673             lineEnds = new int[oldLength + INCREMENT];
2674             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2675             lineEnds[linePtr] = separatorPos;
2676           }
2677         }
2678         wasAcr = false;
2679       }
2680     }
2681   }
2682
2683   public final void pushUnicodeLineSeparator() {
2684     // isUnicode means that the \r or \n has been read as a unicode character
2685     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2686     final int INCREMENT = 250;
2687     //currentCharacter is at position currentPosition-1
2688     if (this.checkNonExternalizedStringLiterals) {
2689       // reinitialize the current line for non externalize strings purpose
2690       currentLine = null;
2691     }
2692     // cr 000D
2693     if (currentCharacter == '\r') {
2694       int separatorPos = currentPosition - 6;
2695       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2696         return;
2697       //System.out.println("CR-" + separatorPos);
2698       try {
2699         lineEnds[++linePtr] = separatorPos;
2700       } catch (IndexOutOfBoundsException e) {
2701         //linePtr value is correct
2702         int oldLength = lineEnds.length;
2703         int[] old = lineEnds;
2704         lineEnds = new int[oldLength + INCREMENT];
2705         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2706         lineEnds[linePtr] = separatorPos;
2707       }
2708       // look-ahead for merged cr+lf
2709       if (source[currentPosition] == '\n') {
2710         //System.out.println("look-ahead LF-" + currentPosition);
2711         lineEnds[linePtr] = currentPosition;
2712         currentPosition++;
2713         wasAcr = false;
2714       } else {
2715         wasAcr = true;
2716       }
2717     } else {
2718       // lf 000A
2719       if (currentCharacter == '\n') {
2720         //must merge eventual cr followed by lf
2721         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2722           //System.out.println("merge LF-" + (currentPosition - 1));
2723           lineEnds[linePtr] = currentPosition - 6;
2724         } else {
2725           int separatorPos = currentPosition - 6;
2726           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2727             return;
2728           // System.out.println("LF-" + separatorPos);
2729           try {
2730             lineEnds[++linePtr] = separatorPos;
2731           } catch (IndexOutOfBoundsException e) {
2732             //linePtr value is correct
2733             int oldLength = lineEnds.length;
2734             int[] old = lineEnds;
2735             lineEnds = new int[oldLength + INCREMENT];
2736             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2737             lineEnds[linePtr] = separatorPos;
2738           }
2739         }
2740         wasAcr = false;
2741       }
2742     }
2743   }
2744
2745   public void recordComment(int token) {
2746     // compute position
2747     int stopPosition = this.currentPosition;
2748     switch (token) {
2749     case TokenNameCOMMENT_LINE:
2750       stopPosition = -this.lastCommentLinePosition;
2751       break;
2752     case TokenNameCOMMENT_BLOCK:
2753       stopPosition = -this.currentPosition;
2754       break;
2755     }
2756
2757     // a new comment is recorded
2758     int length = this.commentStops.length;
2759     if (++this.commentPtr >= length) {
2760       System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2761       //grows the positions buffers too
2762       System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2763     }
2764     this.commentStops[this.commentPtr] = stopPosition;
2765     this.commentStarts[this.commentPtr] = this.startPosition;
2766   }
2767
2768   //  public final void recordComment(boolean isJavadoc) {
2769   //    // a new annotation comment is recorded
2770   //    try {
2771   //      commentStops[++commentPtr] = isJavadoc
2772   //          ? currentPosition
2773   //          : -currentPosition;
2774   //    } catch (IndexOutOfBoundsException e) {
2775   //      int oldStackLength = commentStops.length;
2776   //      int[] oldStack = commentStops;
2777   //      commentStops = new int[oldStackLength + 30];
2778   //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2779   //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2780   //      //grows the positions buffers too
2781   //      int[] old = commentStarts;
2782   //      commentStarts = new int[oldStackLength + 30];
2783   //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2784   //    }
2785   //    //the buffer is of a correct size here
2786   //    commentStarts[commentPtr] = startPosition;
2787   //  }
2788   public void resetTo(int begin, int end) {
2789     //reset the scanner to a given position where it may rescan again
2790     diet = false;
2791     initialPosition = startPosition = currentPosition = begin;
2792     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2793     commentPtr = -1; // reset comment stack
2794   }
2795
2796   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2797     // the string with "\\u" is a legal string of two chars \ and u
2798     //thus we use a direct access to the source (for regular cases).
2799     //    if (unicodeAsBackSlash) {
2800     //      // consume next character
2801     //      unicodeAsBackSlash = false;
2802     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2803     //        && (source[currentPosition] == 'u')) {
2804     //        getNextUnicodeChar();
2805     //      } else {
2806     //        if (withoutUnicodePtr != 0) {
2807     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2808     //        }
2809     //      }
2810     //    } else
2811     currentCharacter = source[currentPosition++];
2812     switch (currentCharacter) {
2813     case '\'':
2814       currentCharacter = '\'';
2815       break;
2816     case '\\':
2817       currentCharacter = '\\';
2818       break;
2819     default:
2820       currentCharacter = '\\';
2821       currentPosition--;
2822     }
2823   }
2824
2825   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2826     // the string with "\\u" is a legal string of two chars \ and u
2827     //thus we use a direct access to the source (for regular cases).
2828     //    if (unicodeAsBackSlash) {
2829     //      // consume next character
2830     //      unicodeAsBackSlash = false;
2831     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2832     //        && (source[currentPosition] == 'u')) {
2833     //        getNextUnicodeChar();
2834     //      } else {
2835     //        if (withoutUnicodePtr != 0) {
2836     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2837     //        }
2838     //      }
2839     //    } else
2840     currentCharacter = source[currentPosition++];
2841     switch (currentCharacter) {
2842     //      case 'b' :
2843     //        currentCharacter = '\b';
2844     //        break;
2845     case 't':
2846       currentCharacter = '\t';
2847       break;
2848     case 'n':
2849       currentCharacter = '\n';
2850       break;
2851     //      case 'f' :
2852     //        currentCharacter = '\f';
2853     //        break;
2854     case 'r':
2855       currentCharacter = '\r';
2856       break;
2857     case '\"':
2858       currentCharacter = '\"';
2859       break;
2860     case '\'':
2861       currentCharacter = '\'';
2862       break;
2863     case '\\':
2864       currentCharacter = '\\';
2865       break;
2866     case '$':
2867       currentCharacter = '$';
2868       break;
2869     default:
2870       // -----------octal escape--------------
2871       // OctalDigit
2872       // OctalDigit OctalDigit
2873       // ZeroToThree OctalDigit OctalDigit
2874       int number = Character.getNumericValue(currentCharacter);
2875       if (number >= 0 && number <= 7) {
2876         boolean zeroToThreeNot = number > 3;
2877         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2878           int digit = Character.getNumericValue(currentCharacter);
2879           if (digit >= 0 && digit <= 7) {
2880             number = (number * 8) + digit;
2881             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2882               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2883                 // Digit --> ignore last character
2884                 currentPosition--;
2885               } else {
2886                 digit = Character.getNumericValue(currentCharacter);
2887                 if (digit >= 0 && digit <= 7) {
2888                   // has read \ZeroToThree OctalDigit OctalDigit
2889                   number = (number * 8) + digit;
2890                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2891                   // --> ignore last character
2892                   currentPosition--;
2893                 }
2894               }
2895             } else { // has read \OctalDigit NonDigit--> ignore last
2896               // character
2897               currentPosition--;
2898             }
2899           } else { // has read \OctalDigit NonOctalDigit--> ignore last
2900             // character
2901             currentPosition--;
2902           }
2903         } else { // has read \OctalDigit --> ignore last character
2904           currentPosition--;
2905         }
2906         if (number > 255)
2907           throw new InvalidInputException(INVALID_ESCAPE);
2908         currentCharacter = (char) number;
2909       }
2910     //else
2911     //     throw new InvalidInputException(INVALID_ESCAPE);
2912     }
2913   }
2914
2915   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2916   //    return scanIdentifierOrKeyword( false );
2917   //  }
2918   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2919     //test keywords
2920     //first dispatch on the first char.
2921     //then the length. If there are several
2922     //keywors with the same length AND the same first char, then do another
2923     //disptach on the second char :-)...cool....but fast !
2924     useAssertAsAnIndentifier = false;
2925     while (getNextCharAsJavaIdentifierPart()) {
2926     }
2927     ;
2928     if (isVariable) {
2929       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2930       //        return TokenNamethis;
2931       //      }
2932       return TokenNameVariable;
2933     }
2934     int index, length;
2935     char[] data;
2936     char firstLetter;
2937     //    if (withoutUnicodePtr == 0)
2938     //quick test on length == 1 but not on length > 12 while most identifier
2939     //have a length which is <= 12...but there are lots of identifier with
2940     //only one char....
2941     //      {
2942     if ((length = currentPosition - startPosition) == 1)
2943       return TokenNameIdentifier;
2944     //  data = source;
2945     data = new char[length];
2946     index = startPosition;
2947     for (int i = 0; i < length; i++) {
2948       data[i] = Character.toLowerCase(source[index + i]);
2949     }
2950     index = 0;
2951     //    } else {
2952     //      if ((length = withoutUnicodePtr) == 1)
2953     //        return TokenNameIdentifier;
2954     //      // data = withoutUnicodeBuffer;
2955     //      data = new char[withoutUnicodeBuffer.length];
2956     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2957     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2958     //      }
2959     //      index = 1;
2960     //    }
2961     firstLetter = data[index];
2962     switch (firstLetter) {
2963     case '_':
2964       switch (length) {
2965       case 8:
2966         //__FILE__
2967         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
2968             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2969           return TokenNameFILE;
2970         index = 0; //__LINE__
2971         if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
2972             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2973           return TokenNameLINE;
2974         break;
2975       case 9:
2976         //__CLASS__
2977         if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
2978             && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
2979           return TokenNameCLASS_C;
2980         break;
2981       case 11:
2982         //__METHOD__
2983         if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
2984             && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
2985             && (data[++index] == '_'))
2986           return TokenNameMETHOD_C;
2987         break;
2988       case 12:
2989         //__FUNCTION__
2990         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
2991             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
2992             && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
2993           return TokenNameFUNC_C;
2994         break;
2995       }
2996       return TokenNameIdentifier;
2997     case 'a':
2998       // as and array abstract
2999       switch (length) {
3000       case 2:
3001         //as
3002         if ((data[++index] == 's')) {
3003           return TokenNameas;
3004         } else {
3005           return TokenNameIdentifier;
3006         }
3007       case 3:
3008         //and
3009         if ((data[++index] == 'n') && (data[++index] == 'd')) {
3010           return TokenNameand;
3011         } else {
3012           return TokenNameIdentifier;
3013         }
3014       case 5:
3015         // array
3016         if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3017           return TokenNamearray;
3018         else
3019           return TokenNameIdentifier;
3020       case 8:
3021         if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3022             && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3023           return TokenNameabstract;
3024         else
3025           return TokenNameIdentifier;
3026       default:
3027         return TokenNameIdentifier;
3028       }
3029     case 'b':
3030       //break
3031       switch (length) {
3032       case 5:
3033         if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3034           return TokenNamebreak;
3035         else
3036           return TokenNameIdentifier;
3037       default:
3038         return TokenNameIdentifier;
3039       }
3040     case 'c':
3041       //case catch class clone const continue
3042       switch (length) {
3043       case 4:
3044         if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3045           return TokenNamecase;
3046         else
3047           return TokenNameIdentifier;
3048       case 5:
3049         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3050           return TokenNamecatch;
3051         index = 0;
3052         if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3053           return TokenNameclass;
3054         index = 0;
3055         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3056           return TokenNameclone;
3057         index = 0;
3058         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3059           return TokenNameconst;
3060         else
3061           return TokenNameIdentifier;
3062       case 8:
3063         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3064             && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3065           return TokenNamecontinue;
3066         else
3067           return TokenNameIdentifier;
3068       default:
3069         return TokenNameIdentifier;
3070       }
3071     case 'd':
3072       // declare default do die
3073       // TODO delete define ==> no keyword !
3074       switch (length) {
3075       case 2:
3076         if ((data[++index] == 'o'))
3077           return TokenNamedo;
3078         else
3079           return TokenNameIdentifier;
3080       //          case 6 :
3081       //            if ((data[++index] == 'e')
3082       //              && (data[++index] == 'f')
3083       //              && (data[++index] == 'i')
3084       //              && (data[++index] == 'n')
3085       //              && (data[++index] == 'e'))
3086       //              return TokenNamedefine;
3087       //            else
3088       //              return TokenNameIdentifier;
3089       case 7:
3090         if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3091             && (data[++index] == 'r') && (data[++index] == 'e'))
3092           return TokenNamedeclare;
3093         index = 0;
3094         if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3095             && (data[++index] == 'l') && (data[++index] == 't'))
3096           return TokenNamedefault;
3097         else
3098           return TokenNameIdentifier;
3099       default:
3100         return TokenNameIdentifier;
3101       }
3102     case 'e':
3103       //echo else exit elseif extends eval
3104       switch (length) {
3105       case 4:
3106         if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3107           return TokenNameecho;
3108         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3109           return TokenNameelse;
3110         else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3111           return TokenNameexit;
3112         else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3113           return TokenNameeval;
3114         else
3115           return TokenNameIdentifier;
3116       case 5:
3117         // endif empty
3118         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3119           return TokenNameendif;
3120         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3121           return TokenNameempty;
3122         else
3123           return TokenNameIdentifier;
3124       case 6:
3125         // endfor
3126         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3127             && (data[++index] == 'r'))
3128           return TokenNameendfor;
3129         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3130             && (data[++index] == 'f'))
3131           return TokenNameelseif;
3132         else
3133           return TokenNameIdentifier;
3134       case 7:
3135         if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3136             && (data[++index] == 'd') && (data[++index] == 's'))
3137           return TokenNameextends;
3138         else
3139           return TokenNameIdentifier;
3140       case 8:
3141         // endwhile
3142         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3143             && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3144           return TokenNameendwhile;
3145         else
3146           return TokenNameIdentifier;
3147       case 9:
3148         // endswitch
3149         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3150             && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3151           return TokenNameendswitch;
3152         else
3153           return TokenNameIdentifier;
3154       case 10:
3155         // enddeclare
3156         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3157             && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3158             && (data[++index] == 'e'))
3159           return TokenNameenddeclare;
3160         index = 0;
3161         if ((data[++index] == 'n') // endforeach
3162             && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3163             && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3164           return TokenNameendforeach;
3165         else
3166           return TokenNameIdentifier;
3167       default:
3168         return TokenNameIdentifier;
3169       }
3170     case 'f':
3171       //for false final function
3172       switch (length) {
3173       case 3:
3174         if ((data[++index] == 'o') && (data[++index] == 'r'))
3175           return TokenNamefor;
3176         else
3177           return TokenNameIdentifier;
3178       case 5:
3179         //            if ((data[++index] == 'a') && (data[++index] == 'l')
3180         //                && (data[++index] == 's') && (data[++index] == 'e'))
3181         //              return TokenNamefalse;
3182         if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3183           return TokenNamefinal;
3184         else
3185           return TokenNameIdentifier;
3186       case 7:
3187         // foreach
3188         if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3189             && (data[++index] == 'c') && (data[++index] == 'h'))
3190           return TokenNameforeach;
3191         else
3192           return TokenNameIdentifier;
3193       case 8:
3194         // function
3195         if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3196             && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3197           return TokenNamefunction;
3198         else
3199           return TokenNameIdentifier;
3200       default:
3201         return TokenNameIdentifier;
3202       }
3203     case 'g':
3204       //global
3205       if (length == 6) {
3206         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3207             && (data[++index] == 'l')) {
3208           return TokenNameglobal;
3209         }
3210       }
3211       return TokenNameIdentifier;
3212     case 'i':
3213       //if int isset include include_once instanceof interface implements
3214       switch (length) {
3215       case 2:
3216         if (data[++index] == 'f')
3217           return TokenNameif;
3218         else
3219           return TokenNameIdentifier;
3220       //          case 3 :
3221       //            if ((data[++index] == 'n') && (data[++index] == 't'))
3222       //              return TokenNameint;
3223       //            else
3224       //              return TokenNameIdentifier;
3225       case 5:
3226         if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3227           return TokenNameisset;
3228         else
3229           return TokenNameIdentifier;
3230       case 7:
3231         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3232             && (data[++index] == 'd') && (data[++index] == 'e'))
3233           return TokenNameinclude;
3234         else
3235           return TokenNameIdentifier;
3236       case 9:
3237         // interface
3238         if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3239             && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3240           return TokenNameinterface;
3241         else
3242           return TokenNameIdentifier;
3243       case 10:
3244         // instanceof
3245         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3246             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3247             && (data[++index] == 'f'))
3248           return TokenNameinstanceof;
3249         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3250             && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3251             && (data[++index] == 's'))
3252           return TokenNameimplements;
3253         else
3254           return TokenNameIdentifier;
3255       case 12:
3256         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3257             && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3258             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3259           return TokenNameinclude_once;
3260         else
3261           return TokenNameIdentifier;
3262       default:
3263         return TokenNameIdentifier;
3264       }
3265     case 'l':
3266       //list
3267       if (length == 4) {
3268         if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3269           return TokenNamelist;
3270         }
3271       }
3272       return TokenNameIdentifier;
3273     case 'n':
3274       // new null
3275       switch (length) {
3276       case 3:
3277         if ((data[++index] == 'e') && (data[++index] == 'w'))
3278           return TokenNamenew;
3279         else
3280           return TokenNameIdentifier;
3281       //          case 4 :
3282       //            if ((data[++index] == 'u') && (data[++index] == 'l')
3283       //                && (data[++index] == 'l'))
3284       //              return TokenNamenull;
3285       //            else
3286       //              return TokenNameIdentifier;
3287       default:
3288         return TokenNameIdentifier;
3289       }
3290     case 'o':
3291       // or old_function
3292       if (length == 2) {
3293         if (data[++index] == 'r') {
3294           return TokenNameor;
3295         }
3296       }
3297       //        if (length == 12) {
3298       //          if ((data[++index] == 'l')
3299       //            && (data[++index] == 'd')
3300       //            && (data[++index] == '_')
3301       //            && (data[++index] == 'f')
3302       //            && (data[++index] == 'u')
3303       //            && (data[++index] == 'n')
3304       //            && (data[++index] == 'c')
3305       //            && (data[++index] == 't')
3306       //            && (data[++index] == 'i')
3307       //            && (data[++index] == 'o')
3308       //            && (data[++index] == 'n')) {
3309       //            return TokenNameold_function;
3310       //          }
3311       //        }
3312       return TokenNameIdentifier;
3313     case 'p':
3314       // print public private protected
3315       switch (length) {
3316       case 5:
3317         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3318           return TokenNameprint;
3319         } else
3320           return TokenNameIdentifier;
3321       case 6:
3322         if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3323             && (data[++index] == 'c')) {
3324           return TokenNamepublic;
3325         } else
3326           return TokenNameIdentifier;
3327       case 7:
3328         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3329             && (data[++index] == 't') && (data[++index] == 'e')) {
3330           return TokenNameprivate;
3331         } else
3332           return TokenNameIdentifier;
3333       case 9:
3334         if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3335             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3336           return TokenNameprotected;
3337         } else
3338           return TokenNameIdentifier;
3339       }
3340       return TokenNameIdentifier;
3341     case 'r':
3342       //return require require_once
3343       if (length == 6) {
3344         if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3345             && (data[++index] == 'n')) {
3346           return TokenNamereturn;
3347         }
3348       } else if (length == 7) {
3349         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3350             && (data[++index] == 'r') && (data[++index] == 'e')) {
3351           return TokenNamerequire;
3352         }
3353       } else if (length == 12) {
3354         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3355             && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3356             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3357           return TokenNamerequire_once;
3358         }
3359       } else
3360         return TokenNameIdentifier;
3361     case 's':
3362       //static switch
3363       switch (length) {
3364       case 6:
3365         if (data[++index] == 't')
3366           if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3367             return TokenNamestatic;
3368           } else
3369             return TokenNameIdentifier;
3370         else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3371             && (data[++index] == 'h'))
3372           return TokenNameswitch;
3373         else
3374           return TokenNameIdentifier;
3375       default:
3376         return TokenNameIdentifier;
3377       }
3378     case 't':
3379       // try true throw
3380       switch (length) {
3381       case 3:
3382         if ((data[++index] == 'r') && (data[++index] == 'y'))
3383           return TokenNametry;
3384         else
3385           return TokenNameIdentifier;
3386       //          case 4 :
3387       //            if ((data[++index] == 'r') && (data[++index] == 'u')
3388       //                && (data[++index] == 'e'))
3389       //              return TokenNametrue;
3390       //            else
3391       //              return TokenNameIdentifier;
3392       case 5:
3393         if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3394           return TokenNamethrow;
3395         else
3396           return TokenNameIdentifier;
3397       default:
3398         return TokenNameIdentifier;
3399       }
3400     case 'u':
3401       //use unset
3402       switch (length) {
3403       case 3:
3404         if ((data[++index] == 's') && (data[++index] == 'e'))
3405           return TokenNameuse;
3406         else
3407           return TokenNameIdentifier;
3408       case 5:
3409         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3410           return TokenNameunset;
3411         else
3412           return TokenNameIdentifier;
3413       default:
3414         return TokenNameIdentifier;
3415       }
3416     case 'v':
3417       //var
3418       switch (length) {
3419       case 3:
3420         if ((data[++index] == 'a') && (data[++index] == 'r'))
3421           return TokenNamevar;
3422         else
3423           return TokenNameIdentifier;
3424       default:
3425         return TokenNameIdentifier;
3426       }
3427     case 'w':
3428       //while
3429       switch (length) {
3430       case 5:
3431         if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3432           return TokenNamewhile;
3433         else
3434           return TokenNameIdentifier;
3435       //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3436       // (data[++index]=='e') && (data[++index]=='f')&&
3437       // (data[++index]=='p'))
3438       //return TokenNamewidefp ;
3439       //else
3440       //return TokenNameIdentifier;
3441       default:
3442         return TokenNameIdentifier;
3443       }
3444     case 'x':
3445       //xor
3446       switch (length) {
3447       case 3:
3448         if ((data[++index] == 'o') && (data[++index] == 'r'))
3449           return TokenNamexor;
3450         else
3451           return TokenNameIdentifier;
3452       default:
3453         return TokenNameIdentifier;
3454       }
3455     default:
3456       return TokenNameIdentifier;
3457     }
3458   }
3459
3460   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3461     //when entering this method the currentCharacter is the firt
3462     //digit of the number , i.e. it may be preceeded by a . when
3463     //dotPrefix is true
3464     boolean floating = dotPrefix;
3465     if ((!dotPrefix) && (currentCharacter == '0')) {
3466       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3467         //force the first char of the hexa number do exist...
3468         // consume next character
3469         unicodeAsBackSlash = false;
3470         currentCharacter = source[currentPosition++];
3471         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3472         //          && (source[currentPosition] == 'u')) {
3473         //          getNextUnicodeChar();
3474         //        } else {
3475         //          if (withoutUnicodePtr != 0) {
3476         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3477         //          }
3478         //        }
3479         if (Character.digit(currentCharacter, 16) == -1)
3480           throw new InvalidInputException(INVALID_HEXA);
3481         //---end forcing--
3482         while (getNextCharAsDigit(16)) {
3483         }
3484         ;
3485         //        if (getNextChar('l', 'L') >= 0)
3486         //          return TokenNameLongLiteral;
3487         //        else
3488         return TokenNameIntegerLiteral;
3489       }
3490       //there is x or X in the number
3491       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3492       // 00078.0 is true !!!!! crazy language
3493       if (getNextCharAsDigit()) {
3494         //-------------potential octal-----------------
3495         while (getNextCharAsDigit()) {
3496         }
3497         ;
3498         //        if (getNextChar('l', 'L') >= 0) {
3499         //          return TokenNameLongLiteral;
3500         //        }
3501         //
3502         //        if (getNextChar('f', 'F') >= 0) {
3503         //          return TokenNameFloatingPointLiteral;
3504         //        }
3505         if (getNextChar('d', 'D') >= 0) {
3506           return TokenNameDoubleLiteral;
3507         } else { //make the distinction between octal and float ....
3508           if (getNextChar('.')) { //bingo ! ....
3509             while (getNextCharAsDigit()) {
3510             }
3511             ;
3512             if (getNextChar('e', 'E') >= 0) {
3513               // consume next character
3514               unicodeAsBackSlash = false;
3515               currentCharacter = source[currentPosition++];
3516               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3517               //                && (source[currentPosition] == 'u')) {
3518               //                getNextUnicodeChar();
3519               //              } else {
3520               //                if (withoutUnicodePtr != 0) {
3521               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3522               //                }
3523               //              }
3524               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3525                 // consume next character
3526                 unicodeAsBackSlash = false;
3527                 currentCharacter = source[currentPosition++];
3528                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3529                 //                  && (source[currentPosition] == 'u')) {
3530                 //                  getNextUnicodeChar();
3531                 //                } else {
3532                 //                  if (withoutUnicodePtr != 0) {
3533                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3534                 //                      currentCharacter;
3535                 //                  }
3536                 //                }
3537               }
3538               if (!Character.isDigit(currentCharacter))
3539                 throw new InvalidInputException(INVALID_FLOAT);
3540               while (getNextCharAsDigit()) {
3541               }
3542               ;
3543             }
3544             //            if (getNextChar('f', 'F') >= 0)
3545             //              return TokenNameFloatingPointLiteral;
3546             getNextChar('d', 'D'); //jump over potential d or D
3547             return TokenNameDoubleLiteral;
3548           } else {
3549             return TokenNameIntegerLiteral;
3550           }
3551         }
3552       } else {
3553         /* carry on */
3554       }
3555     }
3556     while (getNextCharAsDigit()) {
3557     }
3558     ;
3559     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3560     //      return TokenNameLongLiteral;
3561     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3562       while (getNextCharAsDigit()) {
3563       }
3564       ;
3565       floating = true;
3566     }
3567     //if floating is true both exponant and suffix may be optional
3568     if (getNextChar('e', 'E') >= 0) {
3569       floating = true;
3570       // consume next character
3571       unicodeAsBackSlash = false;
3572       currentCharacter = source[currentPosition++];
3573       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3574       //        && (source[currentPosition] == 'u')) {
3575       //        getNextUnicodeChar();
3576       //      } else {
3577       //        if (withoutUnicodePtr != 0) {
3578       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3579       //        }
3580       //      }
3581       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3582         // next
3583         // character
3584         unicodeAsBackSlash = false;
3585         currentCharacter = source[currentPosition++];
3586         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3587         //          && (source[currentPosition] == 'u')) {
3588         //          getNextUnicodeChar();
3589         //        } else {
3590         //          if (withoutUnicodePtr != 0) {
3591         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3592         //          }
3593         //        }
3594       }
3595       if (!Character.isDigit(currentCharacter))
3596         throw new InvalidInputException(INVALID_FLOAT);
3597       while (getNextCharAsDigit()) {
3598       }
3599       ;
3600     }
3601     if (getNextChar('d', 'D') >= 0)
3602       return TokenNameDoubleLiteral;
3603     //    if (getNextChar('f', 'F') >= 0)
3604     //      return TokenNameFloatingPointLiteral;
3605     //the long flag has been tested before
3606     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3607   }
3608
3609   /**
3610    * Search the line number corresponding to a specific position
3611    *
3612    */
3613   public final int getLineNumber(int position) {
3614     if (lineEnds == null)
3615       return 1;
3616     int length = linePtr + 1;
3617     if (length == 0)
3618       return 1;
3619     int g = 0, d = length - 1;
3620     int m = 0;
3621     while (g <= d) {
3622       m = (g + d) / 2;
3623       if (position < lineEnds[m]) {
3624         d = m - 1;
3625       } else if (position > lineEnds[m]) {
3626         g = m + 1;
3627       } else {
3628         return m + 1;
3629       }
3630     }
3631     if (position < lineEnds[m]) {
3632       return m + 1;
3633     }
3634     return m + 2;
3635   }
3636
3637   public void setPHPMode(boolean mode) {
3638     phpMode = mode;
3639   }
3640
3641   public final void setSource(char[] source) {
3642     setSource(null, source);
3643   }
3644
3645   public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3646     //the source-buffer is set to sourceString
3647     this.compilationUnit = compilationUnit;
3648     if (source == null) {
3649       this.source = new char[0];
3650     } else {
3651       this.source = source;
3652     }
3653     startPosition = -1;
3654     initialPosition = currentPosition = 0;
3655     containsAssertKeyword = false;
3656     withoutUnicodeBuffer = new char[this.source.length];
3657     encapsedStringStack = new Stack();
3658   }
3659
3660   public String toString() {
3661     if (startPosition == source.length)
3662       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3663     if (currentPosition > source.length)
3664       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3665     char front[] = new char[startPosition];
3666     System.arraycopy(source, 0, front, 0, startPosition);
3667     int middleLength = (currentPosition - 1) - startPosition + 1;
3668     char middle[];
3669     if (middleLength > -1) {
3670       middle = new char[middleLength];
3671       System.arraycopy(source, startPosition, middle, 0, middleLength);
3672     } else {
3673       middle = new char[0];
3674     }
3675     char end[] = new char[source.length - (currentPosition - 1)];
3676     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3677     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3678         + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3679         + new String(end);
3680   }
3681
3682   public final String toStringAction(int act) {
3683     switch (act) {
3684     case TokenNameERROR:
3685       return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3686     // //$NON-NLS-1$
3687     case TokenNameINLINE_HTML:
3688       return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3689     case TokenNameIdentifier:
3690       return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3691     case TokenNameVariable:
3692       return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3693     case TokenNameabstract:
3694       return "abstract"; //$NON-NLS-1$
3695     case TokenNameand:
3696       return "AND"; //$NON-NLS-1$
3697     case TokenNamearray:
3698       return "array"; //$NON-NLS-1$
3699     case TokenNameas:
3700       return "as"; //$NON-NLS-1$
3701     case TokenNamebreak:
3702       return "break"; //$NON-NLS-1$
3703     case TokenNamecase:
3704       return "case"; //$NON-NLS-1$
3705     case TokenNameclass:
3706       return "class"; //$NON-NLS-1$
3707     case TokenNamecatch:
3708       return "catch"; //$NON-NLS-1$
3709     case TokenNameclone:
3710       //$NON-NLS-1$
3711       return "clone";
3712     case TokenNameconst:
3713       //$NON-NLS-1$
3714       return "const";
3715     case TokenNamecontinue:
3716       return "continue"; //$NON-NLS-1$
3717     case TokenNamedefault:
3718       return "default"; //$NON-NLS-1$
3719     //      case TokenNamedefine :
3720     //        return "define"; //$NON-NLS-1$
3721     case TokenNamedo:
3722       return "do"; //$NON-NLS-1$
3723     case TokenNameecho:
3724       return "echo"; //$NON-NLS-1$
3725     case TokenNameelse:
3726       return "else"; //$NON-NLS-1$
3727     case TokenNameelseif:
3728       return "elseif"; //$NON-NLS-1$
3729     case TokenNameendfor:
3730       return "endfor"; //$NON-NLS-1$
3731     case TokenNameendforeach:
3732       return "endforeach"; //$NON-NLS-1$
3733     case TokenNameendif:
3734       return "endif"; //$NON-NLS-1$
3735     case TokenNameendswitch:
3736       return "endswitch"; //$NON-NLS-1$
3737     case TokenNameendwhile:
3738       return "endwhile"; //$NON-NLS-1$
3739     case TokenNameexit:
3740       return "exit";
3741     case TokenNameextends:
3742       return "extends"; //$NON-NLS-1$
3743     //      case TokenNamefalse :
3744     //        return "false"; //$NON-NLS-1$
3745     case TokenNamefinal:
3746       return "final"; //$NON-NLS-1$
3747     case TokenNamefor:
3748       return "for"; //$NON-NLS-1$
3749     case TokenNameforeach:
3750       return "foreach"; //$NON-NLS-1$
3751     case TokenNamefunction:
3752       return "function"; //$NON-NLS-1$
3753     case TokenNameglobal:
3754       return "global"; //$NON-NLS-1$
3755     case TokenNameif:
3756       return "if"; //$NON-NLS-1$
3757     case TokenNameimplements:
3758       return "implements"; //$NON-NLS-1$
3759     case TokenNameinclude:
3760       return "include"; //$NON-NLS-1$
3761     case TokenNameinclude_once:
3762       return "include_once"; //$NON-NLS-1$
3763     case TokenNameinstanceof:
3764       return "instanceof"; //$NON-NLS-1$
3765     case TokenNameinterface:
3766       return "interface"; //$NON-NLS-1$
3767     case TokenNameisset:
3768       return "isset"; //$NON-NLS-1$
3769     case TokenNamelist:
3770       return "list"; //$NON-NLS-1$
3771     case TokenNamenew:
3772       return "new"; //$NON-NLS-1$
3773     //      case TokenNamenull :
3774     //        return "null"; //$NON-NLS-1$
3775     case TokenNameor:
3776       return "OR"; //$NON-NLS-1$
3777     case TokenNameprint:
3778       return "print"; //$NON-NLS-1$
3779     case TokenNameprivate:
3780       return "private"; //$NON-NLS-1$
3781     case TokenNameprotected:
3782       return "protected"; //$NON-NLS-1$
3783     case TokenNamepublic:
3784       return "public"; //$NON-NLS-1$
3785     case TokenNamerequire:
3786       return "require"; //$NON-NLS-1$
3787     case TokenNamerequire_once:
3788       return "require_once"; //$NON-NLS-1$
3789     case TokenNamereturn:
3790       return "return"; //$NON-NLS-1$
3791     case TokenNamestatic:
3792       return "static"; //$NON-NLS-1$
3793     case TokenNameswitch:
3794       return "switch"; //$NON-NLS-1$
3795     //      case TokenNametrue :
3796     //        return "true"; //$NON-NLS-1$
3797     case TokenNameunset:
3798       return "unset"; //$NON-NLS-1$
3799     case TokenNamevar:
3800       return "var"; //$NON-NLS-1$
3801     case TokenNamewhile:
3802       return "while"; //$NON-NLS-1$
3803     case TokenNamexor:
3804       return "XOR"; //$NON-NLS-1$
3805     //      case TokenNamethis :
3806     //        return "$this"; //$NON-NLS-1$
3807     case TokenNameIntegerLiteral:
3808       return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3809     case TokenNameDoubleLiteral:
3810       return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3811     case TokenNameStringDoubleQuote:
3812       return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3813     case TokenNameStringSingleQuote:
3814       return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3815     case TokenNameStringInterpolated:
3816       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3817     case TokenNameEncapsedString0:
3818       return "`"; //$NON-NLS-1$
3819     case TokenNameEncapsedString1:
3820       return "\'"; //$NON-NLS-1$
3821     case TokenNameEncapsedString2:
3822       return "\""; //$NON-NLS-1$
3823     case TokenNameSTRING:
3824       return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3825     case TokenNameHEREDOC:
3826       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3827     case TokenNamePLUS_PLUS:
3828       return "++"; //$NON-NLS-1$
3829     case TokenNameMINUS_MINUS:
3830       return "--"; //$NON-NLS-1$
3831     case TokenNameEQUAL_EQUAL:
3832       return "=="; //$NON-NLS-1$
3833     case TokenNameEQUAL_EQUAL_EQUAL:
3834       return "==="; //$NON-NLS-1$
3835     case TokenNameEQUAL_GREATER:
3836       return "=>"; //$NON-NLS-1$
3837     case TokenNameLESS_EQUAL:
3838       return "<="; //$NON-NLS-1$
3839     case TokenNameGREATER_EQUAL:
3840       return ">="; //$NON-NLS-1$
3841     case TokenNameNOT_EQUAL:
3842       return "!="; //$NON-NLS-1$
3843     case TokenNameNOT_EQUAL_EQUAL:
3844       return "!=="; //$NON-NLS-1$
3845     case TokenNameLEFT_SHIFT:
3846       return "<<"; //$NON-NLS-1$
3847     case TokenNameRIGHT_SHIFT:
3848       return ">>"; //$NON-NLS-1$
3849     case TokenNamePLUS_EQUAL:
3850       return "+="; //$NON-NLS-1$
3851     case TokenNameMINUS_EQUAL:
3852       return "-="; //$NON-NLS-1$
3853     case TokenNameMULTIPLY_EQUAL:
3854       return "*="; //$NON-NLS-1$
3855     case TokenNameDIVIDE_EQUAL:
3856       return "/="; //$NON-NLS-1$
3857     case TokenNameAND_EQUAL:
3858       return "&="; //$NON-NLS-1$
3859     case TokenNameOR_EQUAL:
3860       return "|="; //$NON-NLS-1$
3861     case TokenNameXOR_EQUAL:
3862       return "^="; //$NON-NLS-1$
3863     case TokenNameREMAINDER_EQUAL:
3864       return "%="; //$NON-NLS-1$
3865     case TokenNameDOT_EQUAL:
3866       return ".="; //$NON-NLS-1$
3867     case TokenNameLEFT_SHIFT_EQUAL:
3868       return "<<="; //$NON-NLS-1$
3869     case TokenNameRIGHT_SHIFT_EQUAL:
3870       return ">>="; //$NON-NLS-1$
3871     case TokenNameOR_OR:
3872       return "||"; //$NON-NLS-1$
3873     case TokenNameAND_AND:
3874       return "&&"; //$NON-NLS-1$
3875     case TokenNamePLUS:
3876       return "+"; //$NON-NLS-1$
3877     case TokenNameMINUS:
3878       return "-"; //$NON-NLS-1$
3879     case TokenNameMINUS_GREATER:
3880       return "->";
3881     case TokenNameNOT:
3882       return "!"; //$NON-NLS-1$
3883     case TokenNameREMAINDER:
3884       return "%"; //$NON-NLS-1$
3885     case TokenNameXOR:
3886       return "^"; //$NON-NLS-1$
3887     case TokenNameAND:
3888       return "&"; //$NON-NLS-1$
3889     case TokenNameMULTIPLY:
3890       return "*"; //$NON-NLS-1$
3891     case TokenNameOR:
3892       return "|"; //$NON-NLS-1$
3893     case TokenNameTWIDDLE:
3894       return "~"; //$NON-NLS-1$
3895     case TokenNameTWIDDLE_EQUAL:
3896       return "~="; //$NON-NLS-1$
3897     case TokenNameDIVIDE:
3898       return "/"; //$NON-NLS-1$
3899     case TokenNameGREATER:
3900       return ">"; //$NON-NLS-1$
3901     case TokenNameLESS:
3902       return "<"; //$NON-NLS-1$
3903     case TokenNameLPAREN:
3904       return "("; //$NON-NLS-1$
3905     case TokenNameRPAREN:
3906       return ")"; //$NON-NLS-1$
3907     case TokenNameLBRACE:
3908       return "{"; //$NON-NLS-1$
3909     case TokenNameRBRACE:
3910       return "}"; //$NON-NLS-1$
3911     case TokenNameLBRACKET:
3912       return "["; //$NON-NLS-1$
3913     case TokenNameRBRACKET:
3914       return "]"; //$NON-NLS-1$
3915     case TokenNameSEMICOLON:
3916       return ";"; //$NON-NLS-1$
3917     case TokenNameQUESTION:
3918       return "?"; //$NON-NLS-1$
3919     case TokenNameCOLON:
3920       return ":"; //$NON-NLS-1$
3921     case TokenNameCOMMA:
3922       return ","; //$NON-NLS-1$
3923     case TokenNameDOT:
3924       return "."; //$NON-NLS-1$
3925     case TokenNameEQUAL:
3926       return "="; //$NON-NLS-1$
3927     case TokenNameAT:
3928       return "@";
3929     case TokenNameDOLLAR:
3930       return "$";
3931     case TokenNameDOLLAR_LBRACE:
3932       return "${";
3933     case TokenNameLBRACE_DOLLAR:
3934       return "{$";
3935     case TokenNameEOF:
3936       return "EOF"; //$NON-NLS-1$
3937     case TokenNameWHITESPACE:
3938       return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3939     case TokenNameCOMMENT_LINE:
3940       return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3941     case TokenNameCOMMENT_BLOCK:
3942       return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3943     case TokenNameCOMMENT_PHPDOC:
3944       return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3945     //      case TokenNameHTML :
3946     //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3947     // //$NON-NLS-1$
3948     case TokenNameFILE:
3949       return "__FILE__"; //$NON-NLS-1$
3950     case TokenNameLINE:
3951       return "__LINE__"; //$NON-NLS-1$
3952     case TokenNameCLASS_C:
3953       return "__CLASS__"; //$NON-NLS-1$
3954     case TokenNameMETHOD_C:
3955       return "__METHOD__"; //$NON-NLS-1$
3956     case TokenNameFUNC_C:
3957       return "__FUNCTION__"; //$NON-NLS-1
3958     case TokenNameboolCAST:
3959       return "( bool )"; //$NON-NLS-1$
3960     case TokenNameintCAST:
3961       return "( int )"; //$NON-NLS-1$
3962     case TokenNamedoubleCAST:
3963       return "( double )"; //$NON-NLS-1$
3964     case TokenNameobjectCAST:
3965       return "( object )"; //$NON-NLS-1$
3966     case TokenNamestringCAST:
3967       return "( string )"; //$NON-NLS-1$
3968     default:
3969       return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3970     }
3971   }
3972
3973   public Scanner() {
3974     this(false, false);
3975   }
3976
3977   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3978     this(tokenizeComments, tokenizeWhiteSpace, false);
3979   }
3980
3981   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3982     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3983   }
3984
3985   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3986       boolean assertMode) {
3987     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null);
3988   }
3989
3990   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3991       boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities) {
3992     this.eofPosition = Integer.MAX_VALUE;
3993     this.tokenizeComments = tokenizeComments;
3994     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3995     this.tokenizeStrings = tokenizeStrings;
3996     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3997     this.assertMode = assertMode;
3998     this.encapsedStringStack = null;
3999     this.taskTags = taskTags;
4000     this.taskPriorities = taskPriorities;
4001   }
4002
4003   private void checkNonExternalizeString() throws InvalidInputException {
4004     if (currentLine == null)
4005       return;
4006     parseTags(currentLine);
4007   }
4008
4009   private void parseTags(NLSLine line) throws InvalidInputException {
4010     String s = new String(getCurrentTokenSource());
4011     int pos = s.indexOf(TAG_PREFIX);
4012     int lineLength = line.size();
4013     while (pos != -1) {
4014       int start = pos + TAG_PREFIX_LENGTH;
4015       int end = s.indexOf(TAG_POSTFIX, start);
4016       String index = s.substring(start, end);
4017       int i = 0;
4018       try {
4019         i = Integer.parseInt(index) - 1;
4020         // Tags are one based not zero based.
4021       } catch (NumberFormatException e) {
4022         i = -1; // we don't want to consider this as a valid NLS tag
4023       }
4024       if (line.exists(i)) {
4025         line.set(i, null);
4026       }
4027       pos = s.indexOf(TAG_PREFIX, start);
4028     }
4029     this.nonNLSStrings = new StringLiteral[lineLength];
4030     int nonNLSCounter = 0;
4031     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4032       StringLiteral literal = (StringLiteral) iterator.next();
4033       if (literal != null) {
4034         this.nonNLSStrings[nonNLSCounter++] = literal;
4035       }
4036     }
4037     if (nonNLSCounter == 0) {
4038       this.nonNLSStrings = null;
4039       currentLine = null;
4040       return;
4041     }
4042     this.wasNonExternalizedStringLiteral = true;
4043     if (nonNLSCounter != lineLength) {
4044       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4045     }
4046     currentLine = null;
4047   }
4048
4049   public final void scanEscapeCharacter() throws InvalidInputException {
4050     // the string with "\\u" is a legal string of two chars \ and u
4051     //thus we use a direct access to the source (for regular cases).
4052     if (unicodeAsBackSlash) {
4053       // consume next character
4054       unicodeAsBackSlash = false;
4055       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
4056       // (source[currentPosition] == 'u')) {
4057       //                                getNextUnicodeChar();
4058       //                        } else {
4059       if (withoutUnicodePtr != 0) {
4060         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4061         //                              }
4062       }
4063     } else
4064       currentCharacter = source[currentPosition++];
4065     switch (currentCharacter) {
4066     case 'b':
4067       currentCharacter = '\b';
4068       break;
4069     case 't':
4070       currentCharacter = '\t';
4071       break;
4072     case 'n':
4073       currentCharacter = '\n';
4074       break;
4075     case 'f':
4076       currentCharacter = '\f';
4077       break;
4078     case 'r':
4079       currentCharacter = '\r';
4080       break;
4081     case '\"':
4082       currentCharacter = '\"';
4083       break;
4084     case '\'':
4085       currentCharacter = '\'';
4086       break;
4087     case '\\':
4088       currentCharacter = '\\';
4089       break;
4090     default:
4091       // -----------octal escape--------------
4092       // OctalDigit
4093       // OctalDigit OctalDigit
4094       // ZeroToThree OctalDigit OctalDigit
4095       int number = Character.getNumericValue(currentCharacter);
4096       if (number >= 0 && number <= 7) {
4097         boolean zeroToThreeNot = number > 3;
4098         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4099           int digit = Character.getNumericValue(currentCharacter);
4100           if (digit >= 0 && digit <= 7) {
4101             number = (number * 8) + digit;
4102             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4103               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4104                 // Digit --> ignore last character
4105                 currentPosition--;
4106               } else {
4107                 digit = Character.getNumericValue(currentCharacter);
4108                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4109                   // OctalDigit OctalDigit
4110                   number = (number * 8) + digit;
4111                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4112                   // --> ignore last character
4113                   currentPosition--;
4114                 }
4115               }
4116             } else { // has read \OctalDigit NonDigit--> ignore last
4117               // character
4118               currentPosition--;
4119             }
4120           } else { // has read \OctalDigit NonOctalDigit--> ignore last
4121             // character
4122             currentPosition--;
4123           }
4124         } else { // has read \OctalDigit --> ignore last character
4125           currentPosition--;
4126         }
4127         if (number > 255)
4128           throw new InvalidInputException(INVALID_ESCAPE);
4129         currentCharacter = (char) number;
4130       } else
4131         throw new InvalidInputException(INVALID_ESCAPE);
4132     }
4133   }
4134
4135   // chech presence of task: tags
4136   public void checkTaskTag(int commentStart, int commentEnd) {
4137     // only look for newer task: tags
4138     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4139       return;
4140     }
4141     int foundTaskIndex = this.foundTaskCount;
4142     nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4143       char[] tag = null;
4144       char[] priority = null;
4145       // check for tag occurrence
4146       nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4147         tag = this.taskTags[itag];
4148         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4149         int tagLength = tag.length;
4150         for (int t = 0; t < tagLength; t++) {
4151           if (this.source[i + t] != tag[t])
4152             continue nextTag;
4153         }
4154         if (this.foundTaskTags == null) {
4155           this.foundTaskTags = new char[5][];
4156           this.foundTaskMessages = new char[5][];
4157           this.foundTaskPriorities = new char[5][];
4158           this.foundTaskPositions = new int[5][];
4159         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4160           System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4161           System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4162               this.foundTaskCount);
4163           System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4164               this.foundTaskCount);
4165           System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4166               this.foundTaskCount);
4167         }
4168         this.foundTaskTags[this.foundTaskCount] = tag;
4169         this.foundTaskPriorities[this.foundTaskCount] = priority;
4170         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4171         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4172         this.foundTaskCount++;
4173         i += tagLength - 1; // will be incremented when looping
4174       }
4175     }
4176     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4177       // retrieve message start and end positions
4178       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4179       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4180       // at most beginning of next task
4181       if (max_value < msgStart)
4182         max_value = msgStart; // would only occur if tag is before EOF.
4183       int end = -1;
4184       char c;
4185       for (int j = msgStart; j < max_value; j++) {
4186         if ((c = this.source[j]) == '\n' || c == '\r') {
4187           end = j - 1;
4188           break;
4189         }
4190       }
4191       if (end == -1) {
4192         for (int j = max_value; j > msgStart; j--) {
4193           if ((c = this.source[j]) == '*') {
4194             end = j - 1;
4195             break;
4196           }
4197         }
4198         if (end == -1)
4199           end = max_value;
4200       }
4201       if (msgStart == end)
4202         continue; // empty
4203       // trim the message
4204       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4205         end--;
4206       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4207         msgStart++;
4208       // update the end position of the task
4209       this.foundTaskPositions[i][1] = end;
4210       // get the message source
4211       final int messageLength = end - msgStart + 1;
4212       char[] message = new char[messageLength];
4213       System.arraycopy(source, msgStart, message, 0, messageLength);
4214       this.foundTaskMessages[i] = message;
4215     }
4216   }
4217 }