net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.*;
  18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  19
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21
  22   /* APIs ares
  23    - getNextToken() which return the current type of the token
  24      (this value is not memorized by the scanner)
  25    - getCurrentTokenSource() which provides with the token "REAL" source
  26      (aka all unicode have been transformed into a correct char)
  27    - sourceStart gives the position into the stream
  28    - currentPosition-1 gives the sourceEnd position into the stream
  29   */
  30
  31   // 1.4 feature
  32   private boolean assertMode;
  33   public boolean useAssertAsAnIndentifier = false;
  34   //flag indicating if processed source contains occurrences of keyword assert
  35   public boolean containsAssertKeyword = false;
  36
  37   public boolean recordLineSeparator;
  38   public boolean phpMode = false;
  39
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr;
  56   //when == 0 ==> no unicode in the current token
  57   public boolean unicodeAsBackSlash = false;
  58
  59   public boolean scanningFloatLiteral = false;
  60
  61   //support for /** comments
  62   //public char[][] comments = new char[10][];
  63   public int[] commentStops = new int[10];
  64   public int[] commentStarts = new int[10];
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
  66
  67   //diet parsing support - jump over some method body when requested
  68   public boolean diet = false;
  69
  70   //support for the  poor-line-debuggers ....
  71   //remember the position of the cr/lf
  72   public int[] lineEnds = new int[250];
  73   public int linePtr = -1;
  74   public boolean wasAcr = false;
  75
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  77
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  85
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  90
  91   //----------------optimized identifier managment------------------
  92   static final char[] charArray_a = new char[] { 'a' },
  93     charArray_b = new char[] { 'b' },
  94     charArray_c = new char[] { 'c' },
  95     charArray_d = new char[] { 'd' },
  96     charArray_e = new char[] { 'e' },
  97     charArray_f = new char[] { 'f' },
  98     charArray_g = new char[] { 'g' },
  99     charArray_h = new char[] { 'h' },
 100     charArray_i = new char[] { 'i' },
 101     charArray_j = new char[] { 'j' },
 102     charArray_k = new char[] { 'k' },
 103     charArray_l = new char[] { 'l' },
 104     charArray_m = new char[] { 'm' },
 105     charArray_n = new char[] { 'n' },
 106     charArray_o = new char[] { 'o' },
 107     charArray_p = new char[] { 'p' },
 108     charArray_q = new char[] { 'q' },
 109     charArray_r = new char[] { 'r' },
 110     charArray_s = new char[] { 's' },
 111     charArray_t = new char[] { 't' },
 112     charArray_u = new char[] { 'u' },
 113     charArray_v = new char[] { 'v' },
 114     charArray_w = new char[] { 'w' },
 115     charArray_x = new char[] { 'x' },
 116     charArray_y = new char[] { 'y' },
 117     charArray_z = new char[] { 'z' };
 118
 119   static final char[] initCharArray =
 120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 121   static final int TableSize = 30, InternalTableSize = 6;
 122   //30*6 = 180 entries
 123   public static final int OptimizedLength = 6;
 124   public /*static*/
 125   final char[][][][] charArray_length =
 126     new char[OptimizedLength][TableSize][InternalTableSize][];
 127   // support for detecting non-externalized string literals
 128   int currentLineNr = -1;
 129   int previousLineNr = -1;
 130   NLSLine currentLine = null;
 131   List lines = new ArrayList();
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 136   public StringLiteral[] nonNLSStrings = null;
 137   public boolean checkNonExternalizedStringLiterals = true;
 138   public boolean wasNonExternalizedStringLiteral = false;
 139
 140   /*static*/ {
 141     for (int i = 0; i < 6; i++) {
 142       for (int j = 0; j < TableSize; j++) {
 143         for (int k = 0; k < InternalTableSize; k++) {
 144           charArray_length[i][j][k] = initCharArray;
 145         }
 146       }
 147     }
 148   }
 149   static int newEntry2 = 0,
 150     newEntry3 = 0,
 151     newEntry4 = 0,
 152     newEntry5 = 0,
 153     newEntry6 = 0;
 154
 155   public static final int RoundBracket = 0;
 156   public static final int SquareBracket = 1;
 157   public static final int CurlyBracket = 2;
 158   public static final int BracketKinds = 3;
 159
 160   public static final boolean DEBUG = false;
 161   public Scanner() {
 162     this(false, false);
 163   }
 164   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 165     this(tokenizeComments, tokenizeWhiteSpace, false);
 166   }
 167
 168   /**
 169    * Determines if the specified character is
 170    * permissible as the first character in a PHP identifier
 171    */
 172   public static boolean isPHPIdentifierStart(char ch) {
 173     return Character.isLetter(ch) || (ch == '_');
 174   }
 175
 176   /**
 177    * Determines if the specified character may be part of a PHP identifier as
 178    * other than the first character
 179    */
 180   public static boolean isPHPIdentifierPart(char ch) {
 181     return Character.isLetterOrDigit(ch) || (ch == '_');
 182   }
 183
 184   public final boolean atEnd() {
 185     // This code is not relevant if source is
 186     // Only a part of the real stream input
 187
 188     return source.length == currentPosition;
 189   }
 190   public char[] getCurrentIdentifierSource() {
 191     //return the token REAL source (aka unicodes are precomputed)
 192
 193     char[] result;
 194     if (withoutUnicodePtr != 0)
 195       //0 is used as a fast test flag so the real first char is in position 1
 196       System.arraycopy(
 197         withoutUnicodeBuffer,
 198         1,
 199         result = new char[withoutUnicodePtr],
 200         0,
 201         withoutUnicodePtr);
 202     else {
 203       int length = currentPosition - startPosition;
 204       switch (length) { // see OptimizedLength
 205         case 1 :
 206           return optimizedCurrentTokenSource1();
 207         case 2 :
 208           return optimizedCurrentTokenSource2();
 209         case 3 :
 210           return optimizedCurrentTokenSource3();
 211         case 4 :
 212           return optimizedCurrentTokenSource4();
 213         case 5 :
 214           return optimizedCurrentTokenSource5();
 215         case 6 :
 216           return optimizedCurrentTokenSource6();
 217       }
 218       //no optimization
 219       System.arraycopy(
 220         source,
 221         startPosition,
 222         result = new char[length],
 223         0,
 224         length);
 225     }
 226     return result;
 227   }
 228   public int getCurrentTokenEndPosition() {
 229     return this.currentPosition - 1;
 230   }
 231   public final char[] getCurrentTokenSource() {
 232     // Return the token REAL source (aka unicodes are precomputed)
 233
 234     char[] result;
 235     if (withoutUnicodePtr != 0)
 236       // 0 is used as a fast test flag so the real first char is in position 1
 237       System.arraycopy(
 238         withoutUnicodeBuffer,
 239         1,
 240         result = new char[withoutUnicodePtr],
 241         0,
 242         withoutUnicodePtr);
 243     else {
 244       int length;
 245       System.arraycopy(
 246         source,
 247         startPosition,
 248         result = new char[length = currentPosition - startPosition],
 249         0,
 250         length);
 251     }
 252     return result;
 253   }
 254
 255   public final char[] getCurrentTokenSource(int startPos) {
 256     // Return the token REAL source (aka unicodes are precomputed)
 257
 258     char[] result;
 259     if (withoutUnicodePtr != 0)
 260       // 0 is used as a fast test flag so the real first char is in position 1
 261       System.arraycopy(
 262         withoutUnicodeBuffer,
 263         1,
 264         result = new char[withoutUnicodePtr],
 265         0,
 266         withoutUnicodePtr);
 267     else {
 268       int length;
 269       System.arraycopy(
 270         source,
 271         startPos,
 272         result = new char[length = currentPosition - startPos],
 273         0,
 274         length);
 275     }
 276     return result;
 277   }
 278
 279   public final char[] getCurrentTokenSourceString() {
 280     //return the token REAL source (aka unicodes are precomputed).
 281     //REMOVE the two " that are at the beginning and the end.
 282
 283     char[] result;
 284     if (withoutUnicodePtr != 0)
 285       //0 is used as a fast test flag so the real first char is in position 1
 286       System.arraycopy(withoutUnicodeBuffer, 2,
 287       //2 is 1 (real start) + 1 (to jump over the ")
 288       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 289     else {
 290       int length;
 291       System.arraycopy(
 292         source,
 293         startPosition + 1,
 294         result = new char[length = currentPosition - startPosition - 2],
 295         0,
 296         length);
 297     }
 298     return result;
 299   }
 300   public int getCurrentTokenStartPosition() {
 301     return this.startPosition;
 302   }
 303   /*
 304    * Search the source position corresponding to the end of a given line number
 305    *
 306    * Line numbers are 1-based, and relative to the scanner initialPosition.
 307    * Character positions are 0-based.
 308    *
 309    * In case the given line number is inconsistent, answers -1.
 310    */
 311   public final int getLineEnd(int lineNumber) {
 312
 313     if (lineEnds == null)
 314       return -1;
 315     if (lineNumber >= lineEnds.length)
 316       return -1;
 317     if (lineNumber <= 0)
 318       return -1;
 319
 320     if (lineNumber == lineEnds.length - 1)
 321       return eofPosition;
 322     return lineEnds[lineNumber - 1];
 323     // next line start one character behind the lineEnd of the previous line
 324   }
 325   /**
 326    * Search the source position corresponding to the beginning of a given line number
 327    *
 328    * Line numbers are 1-based, and relative to the scanner initialPosition.
 329    * Character positions are 0-based.
 330    *
 331    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 332    *
 333    * In case the given line number is inconsistent, answers -1.
 334    */
 335   public final int getLineStart(int lineNumber) {
 336
 337     if (lineEnds == null)
 338       return -1;
 339     if (lineNumber >= lineEnds.length)
 340       return -1;
 341     if (lineNumber <= 0)
 342       return -1;
 343
 344     if (lineNumber == 1)
 345       return initialPosition;
 346     return lineEnds[lineNumber - 2] + 1;
 347     // next line start one character behind the lineEnd of the previous line
 348   }
 349   public final boolean getNextChar(char testedChar) {
 350     //BOOLEAN
 351     //handle the case of unicode.
 352     //when a unicode appears then we must use a buffer that holds char internal values
 353     //At the end of this method currentCharacter holds the new visited char
 354     //and currentPosition points right next after it
 355     //Both previous lines are true if the currentCharacter is == to the testedChar
 356     //On false, no side effect has occured.
 357
 358     //ALL getNextChar.... ARE OPTIMIZED COPIES
 359
 360     int temp = currentPosition;
 361     try {
 362       if (((currentCharacter = source[currentPosition++]) == '\\')
 363         && (source[currentPosition] == 'u')) {
 364         //-------------unicode traitement ------------
 365         int c1, c2, c3, c4;
 366         int unicodeSize = 6;
 367         currentPosition++;
 368         while (source[currentPosition] == 'u') {
 369           currentPosition++;
 370           unicodeSize++;
 371         }
 372
 373         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 374           || c1 < 0)
 375           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 376             || c2 < 0)
 377           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 378             || c3 < 0)
 379           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 380             || c4 < 0)) {
 381           currentPosition = temp;
 382           return false;
 383         }
 384
 385         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 386         if (currentCharacter != testedChar) {
 387           currentPosition = temp;
 388           return false;
 389         }
 390         unicodeAsBackSlash = currentCharacter == '\\';
 391
 392         //need the unicode buffer
 393         if (withoutUnicodePtr == 0) {
 394           //buffer all the entries that have been left aside....
 395           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 396           System.arraycopy(
 397             source,
 398             startPosition,
 399             withoutUnicodeBuffer,
 400             1,
 401             withoutUnicodePtr);
 402         }
 403         //fill the buffer with the char
 404         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 405         return true;
 406
 407       } //-------------end unicode traitement--------------
 408       else {
 409         if (currentCharacter != testedChar) {
 410           currentPosition = temp;
 411           return false;
 412         }
 413         unicodeAsBackSlash = false;
 414         if (withoutUnicodePtr != 0)
 415           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 416         return true;
 417       }
 418     } catch (IndexOutOfBoundsException e) {
 419       unicodeAsBackSlash = false;
 420       currentPosition = temp;
 421       return false;
 422     }
 423   }
 424   public final int getNextChar(char testedChar1, char testedChar2) {
 425     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 426     //test can be done with (x==0) for the first and (x>0) for the second
 427     //handle the case of unicode.
 428     //when a unicode appears then we must use a buffer that holds char internal values
 429     //At the end of this method currentCharacter holds the new visited char
 430     //and currentPosition points right next after it
 431     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 432     //On false, no side effect has occured.
 433
 434     //ALL getNextChar.... ARE OPTIMIZED COPIES
 435
 436     int temp = currentPosition;
 437     try {
 438       int result;
 439       if (((currentCharacter = source[currentPosition++]) == '\\')
 440         && (source[currentPosition] == 'u')) {
 441         //-------------unicode traitement ------------
 442         int c1, c2, c3, c4;
 443         int unicodeSize = 6;
 444         currentPosition++;
 445         while (source[currentPosition] == 'u') {
 446           currentPosition++;
 447           unicodeSize++;
 448         }
 449
 450         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 451           || c1 < 0)
 452           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 453             || c2 < 0)
 454           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 455             || c3 < 0)
 456           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 457             || c4 < 0)) {
 458           currentPosition = temp;
 459           return 2;
 460         }
 461
 462         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 463         if (currentCharacter == testedChar1)
 464           result = 0;
 465         else if (currentCharacter == testedChar2)
 466           result = 1;
 467         else {
 468           currentPosition = temp;
 469           return -1;
 470         }
 471
 472         //need the unicode buffer
 473         if (withoutUnicodePtr == 0) {
 474           //buffer all the entries that have been left aside....
 475           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 476           System.arraycopy(
 477             source,
 478             startPosition,
 479             withoutUnicodeBuffer,
 480             1,
 481             withoutUnicodePtr);
 482         }
 483         //fill the buffer with the char
 484         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 485         return result;
 486       } //-------------end unicode traitement--------------
 487       else {
 488         if (currentCharacter == testedChar1)
 489           result = 0;
 490         else if (currentCharacter == testedChar2)
 491           result = 1;
 492         else {
 493           currentPosition = temp;
 494           return -1;
 495         }
 496
 497         if (withoutUnicodePtr != 0)
 498           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 499         return result;
 500       }
 501     } catch (IndexOutOfBoundsException e) {
 502       currentPosition = temp;
 503       return -1;
 504     }
 505   }
 506   public final boolean getNextCharAsDigit() {
 507     //BOOLEAN
 508     //handle the case of unicode.
 509     //when a unicode appears then we must use a buffer that holds char internal values
 510     //At the end of this method currentCharacter holds the new visited char
 511     //and currentPosition points right next after it
 512     //Both previous lines are true if the currentCharacter is a digit
 513     //On false, no side effect has occured.
 514
 515     //ALL getNextChar.... ARE OPTIMIZED COPIES
 516
 517     int temp = currentPosition;
 518     try {
 519       if (((currentCharacter = source[currentPosition++]) == '\\')
 520         && (source[currentPosition] == 'u')) {
 521         //-------------unicode traitement ------------
 522         int c1, c2, c3, c4;
 523         int unicodeSize = 6;
 524         currentPosition++;
 525         while (source[currentPosition] == 'u') {
 526           currentPosition++;
 527           unicodeSize++;
 528         }
 529
 530         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 531           || c1 < 0)
 532           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 533             || c2 < 0)
 534           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 535             || c3 < 0)
 536           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 537             || c4 < 0)) {
 538           currentPosition = temp;
 539           return false;
 540         }
 541
 542         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 543         if (!Character.isDigit(currentCharacter)) {
 544           currentPosition = temp;
 545           return false;
 546         }
 547
 548         //need the unicode buffer
 549         if (withoutUnicodePtr == 0) {
 550           //buffer all the entries that have been left aside....
 551           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 552           System.arraycopy(
 553             source,
 554             startPosition,
 555             withoutUnicodeBuffer,
 556             1,
 557             withoutUnicodePtr);
 558         }
 559         //fill the buffer with the char
 560         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 561         return true;
 562       } //-------------end unicode traitement--------------
 563       else {
 564         if (!Character.isDigit(currentCharacter)) {
 565           currentPosition = temp;
 566           return false;
 567         }
 568         if (withoutUnicodePtr != 0)
 569           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 570         return true;
 571       }
 572     } catch (IndexOutOfBoundsException e) {
 573       currentPosition = temp;
 574       return false;
 575     }
 576   }
 577   public final boolean getNextCharAsDigit(int radix) {
 578     //BOOLEAN
 579     //handle the case of unicode.
 580     //when a unicode appears then we must use a buffer that holds char internal values
 581     //At the end of this method currentCharacter holds the new visited char
 582     //and currentPosition points right next after it
 583     //Both previous lines are true if the currentCharacter is a digit base on radix
 584     //On false, no side effect has occured.
 585
 586     //ALL getNextChar.... ARE OPTIMIZED COPIES
 587
 588     int temp = currentPosition;
 589     try {
 590       if (((currentCharacter = source[currentPosition++]) == '\\')
 591         && (source[currentPosition] == 'u')) {
 592         //-------------unicode traitement ------------
 593         int c1, c2, c3, c4;
 594         int unicodeSize = 6;
 595         currentPosition++;
 596         while (source[currentPosition] == 'u') {
 597           currentPosition++;
 598           unicodeSize++;
 599         }
 600
 601         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 602           || c1 < 0)
 603           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 604             || c2 < 0)
 605           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 606             || c3 < 0)
 607           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 608             || c4 < 0)) {
 609           currentPosition = temp;
 610           return false;
 611         }
 612
 613         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 614         if (Character.digit(currentCharacter, radix) == -1) {
 615           currentPosition = temp;
 616           return false;
 617         }
 618
 619         //need the unicode buffer
 620         if (withoutUnicodePtr == 0) {
 621           //buffer all the entries that have been left aside....
 622           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 623           System.arraycopy(
 624             source,
 625             startPosition,
 626             withoutUnicodeBuffer,
 627             1,
 628             withoutUnicodePtr);
 629         }
 630         //fill the buffer with the char
 631         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 632         return true;
 633       } //-------------end unicode traitement--------------
 634       else {
 635         if (Character.digit(currentCharacter, radix) == -1) {
 636           currentPosition = temp;
 637           return false;
 638         }
 639         if (withoutUnicodePtr != 0)
 640           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 641         return true;
 642       }
 643     } catch (IndexOutOfBoundsException e) {
 644       currentPosition = temp;
 645       return false;
 646     }
 647   }
 648   public boolean getNextCharAsJavaIdentifierPart() {
 649     //BOOLEAN
 650     //handle the case of unicode.
 651     //when a unicode appears then we must use a buffer that holds char internal values
 652     //At the end of this method currentCharacter holds the new visited char
 653     //and currentPosition points right next after it
 654     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 655     //On false, no side effect has occured.
 656
 657     //ALL getNextChar.... ARE OPTIMIZED COPIES
 658
 659     int temp = currentPosition;
 660     try {
 661       if (((currentCharacter = source[currentPosition++]) == '\\')
 662         && (source[currentPosition] == 'u')) {
 663         //-------------unicode traitement ------------
 664         int c1, c2, c3, c4;
 665         int unicodeSize = 6;
 666         currentPosition++;
 667         while (source[currentPosition] == 'u') {
 668           currentPosition++;
 669           unicodeSize++;
 670         }
 671
 672         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 673           || c1 < 0)
 674           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 675             || c2 < 0)
 676           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 677             || c3 < 0)
 678           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 679             || c4 < 0)) {
 680           currentPosition = temp;
 681           return false;
 682         }
 683
 684         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 685         if (!isPHPIdentifierPart(currentCharacter)) {
 686           currentPosition = temp;
 687           return false;
 688         }
 689
 690         //need the unicode buffer
 691         if (withoutUnicodePtr == 0) {
 692           //buffer all the entries that have been left aside....
 693           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 694           System.arraycopy(
 695             source,
 696             startPosition,
 697             withoutUnicodeBuffer,
 698             1,
 699             withoutUnicodePtr);
 700         }
 701         //fill the buffer with the char
 702         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 703         return true;
 704       } //-------------end unicode traitement--------------
 705       else {
 706         if (!isPHPIdentifierPart(currentCharacter)) {
 707           currentPosition = temp;
 708           return false;
 709         }
 710
 711         if (withoutUnicodePtr != 0)
 712           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 713         return true;
 714       }
 715     } catch (IndexOutOfBoundsException e) {
 716       currentPosition = temp;
 717       return false;
 718     }
 719   }
 720
 721   public int getNextToken() throws InvalidInputException {
 722     try {
 723       while (!phpMode) {
 724         startPosition = currentPosition;
 725         currentCharacter = source[currentPosition++];
 726         if (currentCharacter == '<') {
 727           if (getNextChar('?')) {
 728             currentCharacter = source[currentPosition++];
 729             if ((currentCharacter == ' ')
 730               || Character.isWhitespace(currentCharacter)) {
 731               // <?
 732               startPosition = currentPosition;
 733               phpMode = true;
 734             } else {
 735               boolean phpStart =
 736                 (currentCharacter == 'P') || (currentCharacter == 'p');
 737               if (phpStart) {
 738                 int test = getNextChar('H', 'h');
 739                 if (test >= 0) {
 740                   test = getNextChar('P', 'p');
 741                   if (test >= 0) {
 742                     // <?PHP  <?php
 743                     startPosition = currentPosition;
 744                     phpMode = true;
 745                   }
 746                 }
 747               }
 748             }
 749           }
 750         }
 751
 752         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 753           if (recordLineSeparator) {
 754             pushLineSeparator();
 755           } else {
 756             currentLine = null;
 757           }
 758         }
 759       }
 760     } //-----------------end switch while try--------------------
 761     catch (IndexOutOfBoundsException e) {
 762       return TokenNameEOF;
 763     }
 764
 765     if (phpMode) {
 766       this.wasAcr = false;
 767       if (diet) {
 768         jumpOverMethodBody();
 769         diet = false;
 770         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 771       }
 772       try {
 773         while (true) { //loop for jumping over comments
 774           withoutUnicodePtr = 0;
 775           //start with a new token (even comment written with unicode )
 776
 777           // ---------Consume white space and handles startPosition---------
 778           int whiteStart = currentPosition;
 779           boolean isWhiteSpace;
 780           do {
 781             startPosition = currentPosition;
 782             if (((currentCharacter = source[currentPosition++]) == '\\')
 783               && (source[currentPosition] == 'u')) {
 784               isWhiteSpace = jumpOverUnicodeWhiteSpace();
 785             } else {
 786               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 787                 checkNonExternalizeString();
 788                 if (recordLineSeparator) {
 789                   pushLineSeparator();
 790                 } else {
 791                   currentLine = null;
 792                 }
 793               }
 794               isWhiteSpace =
 795                 (currentCharacter == ' ')
 796                   || Character.isWhitespace(currentCharacter);
 797             }
 798           } while (isWhiteSpace);
 799           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 800             // reposition scanner in case we are interested by spaces as tokens
 801             currentPosition--;
 802             startPosition = whiteStart;
 803             return TokenNameWHITESPACE;
 804           }
 805           //little trick to get out in the middle of a source compuation
 806           if (currentPosition > eofPosition)
 807             return TokenNameEOF;
 808
 809           // ---------Identify the next token-------------
 810
 811           switch (currentCharacter) {
 812             case '(' :
 813               return TokenNameLPAREN;
 814             case ')' :
 815               return TokenNameRPAREN;
 816             case '{' :
 817               return TokenNameLBRACE;
 818             case '}' :
 819               return TokenNameRBRACE;
 820             case '[' :
 821               return TokenNameLBRACKET;
 822             case ']' :
 823               return TokenNameRBRACKET;
 824             case ';' :
 825               return TokenNameSEMICOLON;
 826             case ',' :
 827               return TokenNameCOMMA;
 828
 829             case '.' :
 830               if (getNextCharAsDigit())
 831                 return scanNumber(true);
 832               return TokenNameDOT;
 833             case '+' :
 834               {
 835                 int test;
 836                 if ((test = getNextChar('+', '=')) == 0)
 837                   return TokenNamePLUS_PLUS;
 838                 if (test > 0)
 839                   return TokenNamePLUS_EQUAL;
 840                 return TokenNamePLUS;
 841               }
 842             case '-' :
 843               {
 844                 int test;
 845                 if ((test = getNextChar('-', '=')) == 0)
 846                   return TokenNameMINUS_MINUS;
 847                 if (test > 0)
 848                   return TokenNameMINUS_EQUAL;
 849                 if (getNextChar('>'))
 850                   return TokenNameMINUS_GREATER;
 851
 852                 return TokenNameMINUS;
 853               }
 854             case '~' :
 855               if (getNextChar('='))
 856                 return TokenNameTWIDDLE_EQUAL;
 857               return TokenNameTWIDDLE;
 858             case '!' :
 859               if (getNextChar('='))
 860                 return TokenNameNOT_EQUAL;
 861               return TokenNameNOT;
 862             case '*' :
 863               if (getNextChar('='))
 864                 return TokenNameMULTIPLY_EQUAL;
 865               return TokenNameMULTIPLY;
 866             case '%' :
 867               if (getNextChar('='))
 868                 return TokenNameREMAINDER_EQUAL;
 869               return TokenNameREMAINDER;
 870             case '<' :
 871               {
 872                 int test;
 873                 if ((test = getNextChar('=', '<')) == 0)
 874                   return TokenNameLESS_EQUAL;
 875                 if (test > 0) {
 876                   if (getNextChar('='))
 877                     return TokenNameLEFT_SHIFT_EQUAL;
 878                   if (getNextChar('<')) {
 879                     int heredocStart = currentPosition;
 880                     int heredocLength = 0;
 881                     currentCharacter = source[currentPosition++];
 882                     if (isPHPIdentifierStart(currentCharacter)) {
 883                       currentCharacter = source[currentPosition++];
 884                     } else {
 885                       return TokenNameERROR;
 886                     }
 887                     while (isPHPIdentifierPart(currentCharacter)) {
 888                       currentCharacter = source[currentPosition++];
 889                     }
 890
 891                     heredocLength = currentPosition - heredocStart - 1;
 892
 893                     // heredoc end-tag determination
 894                     boolean endTag = true;
 895                     char ch;
 896                     do {
 897                       ch = source[currentPosition++];
 898                       if (ch == '\r' || ch == '\n') {
 899                         if (recordLineSeparator) {
 900                           pushLineSeparator();
 901                         } else {
 902                           currentLine = null;
 903                         }
 904                         for (int i = 0; i < heredocLength; i++) {
 905                           if (source[currentPosition + i]
 906                             != source[heredocStart + i]) {
 907                             endTag = false;
 908                             break;
 909                           }
 910                         }
 911                         if (endTag) {
 912                           currentPosition += heredocLength - 1;
 913                           currentCharacter = source[currentPosition++];
 914                           break; // do...while loop
 915                         } else {
 916                           endTag = true;
 917                         }
 918                       }
 919
 920                     } while (true);
 921
 922                     return TokenNameHEREDOC;
 923                   }
 924                   return TokenNameLEFT_SHIFT;
 925                 }
 926                 return TokenNameLESS;
 927               }
 928             case '>' :
 929               {
 930                 int test;
 931                 if ((test = getNextChar('=', '>')) == 0)
 932                   return TokenNameGREATER_EQUAL;
 933                 if (test > 0) {
 934                   if ((test = getNextChar('=', '>')) == 0)
 935                     return TokenNameRIGHT_SHIFT_EQUAL;
 936                   return TokenNameRIGHT_SHIFT;
 937                 }
 938                 return TokenNameGREATER;
 939               }
 940             case '=' :
 941               if (getNextChar('='))
 942                 return TokenNameEQUAL_EQUAL;
 943               if (getNextChar('>'))
 944                 return TokenNameEQUAL_GREATER;
 945               return TokenNameEQUAL;
 946             case '&' :
 947               {
 948                 int test;
 949                 if ((test = getNextChar('&', '=')) == 0)
 950                   return TokenNameAND_AND;
 951                 if (test > 0)
 952                   return TokenNameAND_EQUAL;
 953                 return TokenNameAND;
 954               }
 955             case '|' :
 956               {
 957                 int test;
 958                 if ((test = getNextChar('|', '=')) == 0)
 959                   return TokenNameOR_OR;
 960                 if (test > 0)
 961                   return TokenNameOR_EQUAL;
 962                 return TokenNameOR;
 963               }
 964             case '^' :
 965               if (getNextChar('='))
 966                 return TokenNameXOR_EQUAL;
 967               return TokenNameXOR;
 968             case '?' :
 969               if (getNextChar('>')) {
 970                 phpMode = false;
 971                 return TokenNameStopPHP;
 972               }
 973               return TokenNameQUESTION;
 974             case ':' :
 975               if (getNextChar(':'))
 976                 return TokenNameCOLON_COLON;
 977               return TokenNameCOLON;
 978             case '@' :
 979               return TokenNameAT;
 980               //                                        case '\'' :
 981               //                                                {
 982               //                                                        int test;
 983               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
 984               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 985               //                                                        }
 986               //                                                        if (test > 0) {
 987               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 988               //                                                                for (int lookAhead = 0;
 989               //                                                                        lookAhead < 3;
 990               //                                                                        lookAhead++) {
 991               //                                                                        if (currentPosition + lookAhead
 992               //                                                                                == source.length)
 993               //                                                                                break;
 994               //                                                                        if (source[currentPosition + lookAhead]
 995               //                                                                                == '\n')
 996               //                                                                                break;
 997               //                                                                        if (source[currentPosition + lookAhead]
 998               //                                                                                == '\'') {
 999               //                                                                                currentPosition += lookAhead + 1;
1000               //                                                                                break;
1001               //                                                                        }
1002               //                                                                }
1003               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1004               //                                                        }
1005               //                                                }
1006               //                                                if (getNextChar('\'')) {
1007               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1008               //                                                        for (int lookAhead = 0;
1009               //                                                                lookAhead < 3;
1010               //                                                                lookAhead++) {
1011               //                                                                if (currentPosition + lookAhead
1012               //                                                                        == source.length)
1013               //                                                                        break;
1014               //                                                                if (source[currentPosition + lookAhead]
1015               //                                                                        == '\n')
1016               //                                                                        break;
1017               //                                                                if (source[currentPosition + lookAhead]
1018               //                                                                        == '\'') {
1019               //                                                                        currentPosition += lookAhead + 1;
1020               //                                                                        break;
1021               //                                                                }
1022               //                                                        }
1023               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1024               //                                                }
1025               //                                                if (getNextChar('\\'))
1026               //                                                        scanEscapeCharacter();
1027               //                                                else { // consume next character
1028               //                                                        unicodeAsBackSlash = false;
1029               //                                                        if (((currentCharacter = source[currentPosition++])
1030               //                                                                == '\\')
1031               //                                                                && (source[currentPosition] == 'u')) {
1032               //                                                                getNextUnicodeChar();
1033               //                                                        } else {
1034               //                                                                if (withoutUnicodePtr != 0) {
1035               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1036               //                                                                                currentCharacter;
1037               //                                                                }
1038               //                                                        }
1039               //                                                }
1040               //                                                //            if (getNextChar('\''))
1041               //                                                //              return TokenNameCharacterLiteral;
1042               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1043               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1044               //                                                        if (currentPosition + lookAhead == source.length)
1045               //                                                                break;
1046               //                                                        if (source[currentPosition + lookAhead] == '\n')
1047               //                                                                break;
1048               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1049               //                                                                currentPosition += lookAhead + 1;
1050               //                                                                break;
1051               //                                                        }
1052               //                                                }
1053               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1054             case '\'' :
1055               try {
1056                 // consume next character
1057                 unicodeAsBackSlash = false;
1058                 if (((currentCharacter = source[currentPosition++]) == '\\')
1059                   && (source[currentPosition] == 'u')) {
1060                   getNextUnicodeChar();
1061                 } else {
1062                   if (withoutUnicodePtr != 0) {
1063                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1064                       currentCharacter;
1065                   }
1066                 }
1067
1068                 while (currentCharacter != '\'') {
1069
1070                   /**** in PHP \r and \n are valid in string literals ****/
1071                   //                  if ((currentCharacter == '\n')
1072                   //                    || (currentCharacter == '\r')) {
1073                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1074                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1075                   //                      if (currentPosition + lookAhead == source.length)
1076                   //                        break;
1077                   //                      if (source[currentPosition + lookAhead] == '\n')
1078                   //                        break;
1079                   //                      if (source[currentPosition + lookAhead] == '\"') {
1080                   //                        currentPosition += lookAhead + 1;
1081                   //                        break;
1082                   //                      }
1083                   //                    }
1084                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1085                   //                  }
1086                   if (currentCharacter == '\\') {
1087                     int escapeSize = currentPosition;
1088                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1089                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1090                     scanSingleQuotedEscapeCharacter();
1091                     escapeSize = currentPosition - escapeSize;
1092                     if (withoutUnicodePtr == 0) {
1093                       //buffer all the entries that have been left aside....
1094                       withoutUnicodePtr =
1095                         currentPosition - escapeSize - 1 - startPosition;
1096                       System.arraycopy(
1097                         source,
1098                         startPosition,
1099                         withoutUnicodeBuffer,
1100                         1,
1101                         withoutUnicodePtr);
1102                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1103                         currentCharacter;
1104                     } else { //overwrite the / in the buffer
1105                       withoutUnicodeBuffer[withoutUnicodePtr] =
1106                         currentCharacter;
1107                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1108                         withoutUnicodePtr--;
1109                       }
1110                     }
1111                   }
1112                   // consume next character
1113                   unicodeAsBackSlash = false;
1114                   if (((currentCharacter = source[currentPosition++]) == '\\')
1115                     && (source[currentPosition] == 'u')) {
1116                     getNextUnicodeChar();
1117                   } else {
1118                     if (withoutUnicodePtr != 0) {
1119                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1120                         currentCharacter;
1121                     }
1122                   }
1123
1124                 }
1125               } catch (IndexOutOfBoundsException e) {
1126                 throw new InvalidInputException(UNTERMINATED_STRING);
1127               } catch (InvalidInputException e) {
1128                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1129                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1130                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1131                     if (currentPosition + lookAhead == source.length)
1132                       break;
1133                     if (source[currentPosition + lookAhead] == '\n')
1134                       break;
1135                     if (source[currentPosition + lookAhead] == '\'') {
1136                       currentPosition += lookAhead + 1;
1137                       break;
1138                     }
1139                   }
1140
1141                 }
1142                 throw e; // rethrow
1143               }
1144               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1145                 if (currentLine == null) {
1146                   currentLine = new NLSLine();
1147                   lines.add(currentLine);
1148                 }
1149                 currentLine.add(
1150                   new StringLiteral(
1151                     getCurrentTokenSourceString(),
1152                     startPosition,
1153                     currentPosition - 1));
1154               }
1155               return TokenNameStringConstant;
1156             case '"' :
1157               try {
1158                 // consume next character
1159                 unicodeAsBackSlash = false;
1160                 if (((currentCharacter = source[currentPosition++]) == '\\')
1161                   && (source[currentPosition] == 'u')) {
1162                   getNextUnicodeChar();
1163                 } else {
1164                   if (withoutUnicodePtr != 0) {
1165                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1166                       currentCharacter;
1167                   }
1168                 }
1169
1170                 while (currentCharacter != '"') {
1171
1172                   /**** in PHP \r and \n are valid in string literals ****/
1173                   //                  if ((currentCharacter == '\n')
1174                   //                    || (currentCharacter == '\r')) {
1175                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1176                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1177                   //                      if (currentPosition + lookAhead == source.length)
1178                   //                        break;
1179                   //                      if (source[currentPosition + lookAhead] == '\n')
1180                   //                        break;
1181                   //                      if (source[currentPosition + lookAhead] == '\"') {
1182                   //                        currentPosition += lookAhead + 1;
1183                   //                        break;
1184                   //                      }
1185                   //                    }
1186                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1187                   //                  }
1188                   if (currentCharacter == '\\') {
1189                     int escapeSize = currentPosition;
1190                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1191                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1192                     scanDoubleQuotedEscapeCharacter();
1193                     escapeSize = currentPosition - escapeSize;
1194                     if (withoutUnicodePtr == 0) {
1195                       //buffer all the entries that have been left aside....
1196                       withoutUnicodePtr =
1197                         currentPosition - escapeSize - 1 - startPosition;
1198                       System.arraycopy(
1199                         source,
1200                         startPosition,
1201                         withoutUnicodeBuffer,
1202                         1,
1203                         withoutUnicodePtr);
1204                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1205                         currentCharacter;
1206                     } else { //overwrite the / in the buffer
1207                       withoutUnicodeBuffer[withoutUnicodePtr] =
1208                         currentCharacter;
1209                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1210                         withoutUnicodePtr--;
1211                       }
1212                     }
1213                   }
1214                   // consume next character
1215                   unicodeAsBackSlash = false;
1216                   if (((currentCharacter = source[currentPosition++]) == '\\')
1217                     && (source[currentPosition] == 'u')) {
1218                     getNextUnicodeChar();
1219                   } else {
1220                     if (withoutUnicodePtr != 0) {
1221                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1222                         currentCharacter;
1223                     }
1224                   }
1225
1226                 }
1227               } catch (IndexOutOfBoundsException e) {
1228                 throw new InvalidInputException(UNTERMINATED_STRING);
1229               } catch (InvalidInputException e) {
1230                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1231                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1232                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1233                     if (currentPosition + lookAhead == source.length)
1234                       break;
1235                     if (source[currentPosition + lookAhead] == '\n')
1236                       break;
1237                     if (source[currentPosition + lookAhead] == '\"') {
1238                       currentPosition += lookAhead + 1;
1239                       break;
1240                     }
1241                   }
1242
1243                 }
1244                 throw e; // rethrow
1245               }
1246               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1247                 if (currentLine == null) {
1248                   currentLine = new NLSLine();
1249                   lines.add(currentLine);
1250                 }
1251                 currentLine.add(
1252                   new StringLiteral(
1253                     getCurrentTokenSourceString(),
1254                     startPosition,
1255                     currentPosition - 1));
1256               }
1257               return TokenNameStringLiteral;
1258             case '`' :
1259               try {
1260                 // consume next character
1261                 unicodeAsBackSlash = false;
1262                 if (((currentCharacter = source[currentPosition++]) == '\\')
1263                   && (source[currentPosition] == 'u')) {
1264                   getNextUnicodeChar();
1265                 } else {
1266                   if (withoutUnicodePtr != 0) {
1267                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1268                       currentCharacter;
1269                   }
1270                 }
1271
1272                 while (currentCharacter != '`') {
1273
1274                   /**** in PHP \r and \n are valid in string literals ****/
1275                   //                if ((currentCharacter == '\n')
1276                   //                  || (currentCharacter == '\r')) {
1277                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1278                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1279                   //                    if (currentPosition + lookAhead == source.length)
1280                   //                      break;
1281                   //                    if (source[currentPosition + lookAhead] == '\n')
1282                   //                      break;
1283                   //                    if (source[currentPosition + lookAhead] == '\"') {
1284                   //                      currentPosition += lookAhead + 1;
1285                   //                      break;
1286                   //                    }
1287                   //                  }
1288                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1289                   //                }
1290                   if (currentCharacter == '\\') {
1291                     int escapeSize = currentPosition;
1292                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1293                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1294                     scanDoubleQuotedEscapeCharacter();
1295                     escapeSize = currentPosition - escapeSize;
1296                     if (withoutUnicodePtr == 0) {
1297                       //buffer all the entries that have been left aside....
1298                       withoutUnicodePtr =
1299                         currentPosition - escapeSize - 1 - startPosition;
1300                       System.arraycopy(
1301                         source,
1302                         startPosition,
1303                         withoutUnicodeBuffer,
1304                         1,
1305                         withoutUnicodePtr);
1306                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1307                         currentCharacter;
1308                     } else { //overwrite the / in the buffer
1309                       withoutUnicodeBuffer[withoutUnicodePtr] =
1310                         currentCharacter;
1311                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1312                         withoutUnicodePtr--;
1313                       }
1314                     }
1315                   }
1316                   // consume next character
1317                   unicodeAsBackSlash = false;
1318                   if (((currentCharacter = source[currentPosition++]) == '\\')
1319                     && (source[currentPosition] == 'u')) {
1320                     getNextUnicodeChar();
1321                   } else {
1322                     if (withoutUnicodePtr != 0) {
1323                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1324                         currentCharacter;
1325                     }
1326                   }
1327
1328                 }
1329               } catch (IndexOutOfBoundsException e) {
1330                 throw new InvalidInputException(UNTERMINATED_STRING);
1331               } catch (InvalidInputException e) {
1332                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1333                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1334                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1335                     if (currentPosition + lookAhead == source.length)
1336                       break;
1337                     if (source[currentPosition + lookAhead] == '\n')
1338                       break;
1339                     if (source[currentPosition + lookAhead] == '`') {
1340                       currentPosition += lookAhead + 1;
1341                       break;
1342                     }
1343                   }
1344
1345                 }
1346                 throw e; // rethrow
1347               }
1348               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1349                 if (currentLine == null) {
1350                   currentLine = new NLSLine();
1351                   lines.add(currentLine);
1352                 }
1353                 currentLine.add(
1354                   new StringLiteral(
1355                     getCurrentTokenSourceString(),
1356                     startPosition,
1357                     currentPosition - 1));
1358               }
1359               return TokenNameStringInterpolated;
1360             case '#' :
1361             case '/' :
1362               {
1363                 int test;
1364                 if ((currentCharacter == '#')
1365                   || (test = getNextChar('/', '*')) == 0) {
1366                   //line comment
1367                   int endPositionForLineComment = 0;
1368                   try { //get the next char
1369                     if (((currentCharacter = source[currentPosition++])
1370                       == '\\')
1371                       && (source[currentPosition] == 'u')) {
1372                       //-------------unicode traitement ------------
1373                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1374                       currentPosition++;
1375                       while (source[currentPosition] == 'u') {
1376                         currentPosition++;
1377                       }
1378                       if ((c1 =
1379                         Character.getNumericValue(source[currentPosition++]))
1380                         > 15
1381                         || c1 < 0
1382                         || (c2 =
1383                           Character.getNumericValue(source[currentPosition++]))
1384                           > 15
1385                         || c2 < 0
1386                         || (c3 =
1387                           Character.getNumericValue(source[currentPosition++]))
1388                           > 15
1389                         || c3 < 0
1390                         || (c4 =
1391                           Character.getNumericValue(source[currentPosition++]))
1392                           > 15
1393                         || c4 < 0) {
1394                         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1395                       } else {
1396                         currentCharacter =
1397                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1398                       }
1399                     }
1400
1401                     //handle the \\u case manually into comment
1402                     if (currentCharacter == '\\') {
1403                       if (source[currentPosition] == '\\')
1404                         currentPosition++;
1405                     } //jump over the \\
1406                     boolean isUnicode = false;
1407                     while (currentCharacter != '\r'
1408                       && currentCharacter != '\n') {
1409                       if (currentCharacter == '?') {
1410                         if (getNextChar('>')) {
1411                           startPosition = currentPosition - 2;
1412                           phpMode = false;
1413                           return TokenNameStopPHP;
1414                         }
1415                       }
1416
1417                       //get the next char
1418                       isUnicode = false;
1419                       if (((currentCharacter = source[currentPosition++])
1420                         == '\\')
1421                         && (source[currentPosition] == 'u')) {
1422                         isUnicode = true;
1423                         //-------------unicode traitement ------------
1424                         int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1425                         currentPosition++;
1426                         while (source[currentPosition] == 'u') {
1427                           currentPosition++;
1428                         }
1429                         if ((c1 =
1430                           Character.getNumericValue(source[currentPosition++]))
1431                           > 15
1432                           || c1 < 0
1433                           || (c2 =
1434                             Character.getNumericValue(
1435                               source[currentPosition++]))
1436                             > 15
1437                           || c2 < 0
1438                           || (c3 =
1439                             Character.getNumericValue(
1440                               source[currentPosition++]))
1441                             > 15
1442                           || c3 < 0
1443                           || (c4 =
1444                             Character.getNumericValue(
1445                               source[currentPosition++]))
1446                             > 15
1447                           || c4 < 0) {
1448                           throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1449                         } else {
1450                           currentCharacter =
1451                             (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1452                         }
1453                       }
1454                       //handle the \\u case manually into comment
1455                       if (currentCharacter == '\\') {
1456                         if (source[currentPosition] == '\\')
1457                           currentPosition++;
1458                       } //jump over the \\
1459                     }
1460                     if (isUnicode) {
1461                       endPositionForLineComment = currentPosition - 6;
1462                     } else {
1463                       endPositionForLineComment = currentPosition - 1;
1464                     }
1465                     recordComment(false);
1466                     if ((currentCharacter == '\r')
1467                       || (currentCharacter == '\n')) {
1468                       checkNonExternalizeString();
1469                       if (recordLineSeparator) {
1470                         if (isUnicode) {
1471                           pushUnicodeLineSeparator();
1472                         } else {
1473                           pushLineSeparator();
1474                         }
1475                       } else {
1476                         currentLine = null;
1477                       }
1478                     }
1479                     if (tokenizeComments) {
1480                       if (!isUnicode) {
1481                         currentPosition = endPositionForLineComment;
1482                         // reset one character behind
1483                       }
1484                       return TokenNameCOMMENT_LINE;
1485                     }
1486                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1487                     if (tokenizeComments) {
1488                       currentPosition--;
1489                       // reset one character behind
1490                       return TokenNameCOMMENT_LINE;
1491                     }
1492                   }
1493                   break;
1494                 }
1495                 if (test > 0) {
1496                   //traditional and annotation comment
1497                   boolean isJavadoc = false, star = false;
1498                   // consume next character
1499                   unicodeAsBackSlash = false;
1500                   if (((currentCharacter = source[currentPosition++]) == '\\')
1501                     && (source[currentPosition] == 'u')) {
1502                     getNextUnicodeChar();
1503                   } else {
1504                     if (withoutUnicodePtr != 0) {
1505                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1506                         currentCharacter;
1507                     }
1508                   }
1509
1510                   if (currentCharacter == '*') {
1511                     isJavadoc = true;
1512                     star = true;
1513                   }
1514                   if ((currentCharacter == '\r')
1515                     || (currentCharacter == '\n')) {
1516                     checkNonExternalizeString();
1517                     if (recordLineSeparator) {
1518                       pushLineSeparator();
1519                     } else {
1520                       currentLine = null;
1521                     }
1522                   }
1523                   try { //get the next char
1524                     if (((currentCharacter = source[currentPosition++])
1525                       == '\\')
1526                       && (source[currentPosition] == 'u')) {
1527                       //-------------unicode traitement ------------
1528                       getNextUnicodeChar();
1529                     }
1530                     //handle the \\u case manually into comment
1531                     if (currentCharacter == '\\') {
1532                       if (source[currentPosition] == '\\')
1533                         currentPosition++;
1534                       //jump over the \\
1535                     }
1536                     // empty comment is not a javadoc /**/
1537                     if (currentCharacter == '/') {
1538                       isJavadoc = false;
1539                     }
1540                     //loop until end of comment */
1541                     while ((currentCharacter != '/') || (!star)) {
1542                       if ((currentCharacter == '\r')
1543                         || (currentCharacter == '\n')) {
1544                         checkNonExternalizeString();
1545                         if (recordLineSeparator) {
1546                           pushLineSeparator();
1547                         } else {
1548                           currentLine = null;
1549                         }
1550                       }
1551                       star = currentCharacter == '*';
1552                       //get next char
1553                       if (((currentCharacter = source[currentPosition++])
1554                         == '\\')
1555                         && (source[currentPosition] == 'u')) {
1556                         //-------------unicode traitement ------------
1557                         getNextUnicodeChar();
1558                       }
1559                       //handle the \\u case manually into comment
1560                       if (currentCharacter == '\\') {
1561                         if (source[currentPosition] == '\\')
1562                           currentPosition++;
1563                       } //jump over the \\
1564                     }
1565                     recordComment(isJavadoc);
1566                     if (tokenizeComments) {
1567                       if (isJavadoc)
1568                         return TokenNameCOMMENT_PHPDOC;
1569                       return TokenNameCOMMENT_BLOCK;
1570                     }
1571                   } catch (IndexOutOfBoundsException e) {
1572                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1573                   }
1574                   break;
1575                 }
1576                 if (getNextChar('='))
1577                   return TokenNameDIVIDE_EQUAL;
1578                 return TokenNameDIVIDE;
1579               }
1580             case '\u001a' :
1581               if (atEnd())
1582                 return TokenNameEOF;
1583               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1584               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1585
1586             default :
1587               if (currentCharacter == '$') {
1588                 while ((currentCharacter = source[currentPosition++]) == '$') {
1589                 }
1590                 if (currentCharacter == '{')
1591                   return TokenNameDOLLAR_LBRACE;
1592                 if (isPHPIdentifierStart(currentCharacter))
1593                   return scanIdentifierOrKeyword(true);
1594                 return TokenNameERROR;
1595               }
1596               if (isPHPIdentifierStart(currentCharacter))
1597                 return scanIdentifierOrKeyword(false);
1598               if (Character.isDigit(currentCharacter))
1599                 return scanNumber(false);
1600               return TokenNameERROR;
1601           }
1602         }
1603       } //-----------------end switch while try--------------------
1604       catch (IndexOutOfBoundsException e) {
1605       }
1606     }
1607     return TokenNameEOF;
1608   }
1609
1610   public final void getNextUnicodeChar()
1611     throws IndexOutOfBoundsException, InvalidInputException {
1612     //VOID
1613     //handle the case of unicode.
1614     //when a unicode appears then we must use a buffer that holds char internal values
1615     //At the end of this method currentCharacter holds the new visited char
1616     //and currentPosition points right next after it
1617
1618     //ALL getNextChar.... ARE OPTIMIZED COPIES
1619
1620     int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1621     currentPosition++;
1622     while (source[currentPosition] == 'u') {
1623       currentPosition++;
1624       unicodeSize++;
1625     }
1626
1627     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1628       || c1 < 0
1629       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1630       || c2 < 0
1631       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1632       || c3 < 0
1633       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1634       || c4 < 0) {
1635       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1636     } else {
1637       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1638       //need the unicode buffer
1639       if (withoutUnicodePtr == 0) {
1640         //buffer all the entries that have been left aside....
1641         withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1642         System.arraycopy(
1643           source,
1644           startPosition,
1645           withoutUnicodeBuffer,
1646           1,
1647           withoutUnicodePtr);
1648       }
1649       //fill the buffer with the char
1650       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1651     }
1652     unicodeAsBackSlash = currentCharacter == '\\';
1653   }
1654   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1655    */
1656   public final void jumpOverMethodBody() {
1657
1658     this.wasAcr = false;
1659     int found = 1;
1660     try {
1661       while (true) { //loop for jumping over comments
1662         // ---------Consume white space and handles startPosition---------
1663         boolean isWhiteSpace;
1664         do {
1665           startPosition = currentPosition;
1666           if (((currentCharacter = source[currentPosition++]) == '\\')
1667             && (source[currentPosition] == 'u')) {
1668             isWhiteSpace = jumpOverUnicodeWhiteSpace();
1669           } else {
1670             if (recordLineSeparator
1671               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1672               pushLineSeparator();
1673             isWhiteSpace = Character.isWhitespace(currentCharacter);
1674           }
1675         } while (isWhiteSpace);
1676
1677         // -------consume token until } is found---------
1678         switch (currentCharacter) {
1679           case '{' :
1680             found++;
1681             break;
1682           case '}' :
1683             found--;
1684             if (found == 0)
1685               return;
1686             break;
1687           case '\'' :
1688             {
1689               boolean test;
1690               test = getNextChar('\\');
1691               if (test) {
1692                 try {
1693                   scanDoubleQuotedEscapeCharacter();
1694                 } catch (InvalidInputException ex) {
1695                 };
1696               } else {
1697                 try { // consume next character
1698                   unicodeAsBackSlash = false;
1699                   if (((currentCharacter = source[currentPosition++]) == '\\')
1700                     && (source[currentPosition] == 'u')) {
1701                     getNextUnicodeChar();
1702                   } else {
1703                     if (withoutUnicodePtr != 0) {
1704                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1705                         currentCharacter;
1706                     }
1707                   }
1708                 } catch (InvalidInputException ex) {
1709                 };
1710               }
1711               getNextChar('\'');
1712               break;
1713             }
1714           case '"' :
1715             try {
1716               try { // consume next character
1717                 unicodeAsBackSlash = false;
1718                 if (((currentCharacter = source[currentPosition++]) == '\\')
1719                   && (source[currentPosition] == 'u')) {
1720                   getNextUnicodeChar();
1721                 } else {
1722                   if (withoutUnicodePtr != 0) {
1723                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1724                       currentCharacter;
1725                   }
1726                 }
1727               } catch (InvalidInputException ex) {
1728               };
1729               while (currentCharacter != '"') {
1730                 if (currentCharacter == '\r') {
1731                   if (source[currentPosition] == '\n')
1732                     currentPosition++;
1733                   break;
1734                   // the string cannot go further that the line
1735                 }
1736                 if (currentCharacter == '\n') {
1737                   break;
1738                   // the string cannot go further that the line
1739                 }
1740                 if (currentCharacter == '\\') {
1741                   try {
1742                     scanDoubleQuotedEscapeCharacter();
1743                   } catch (InvalidInputException ex) {
1744                   };
1745                 }
1746                 try { // consume next character
1747                   unicodeAsBackSlash = false;
1748                   if (((currentCharacter = source[currentPosition++]) == '\\')
1749                     && (source[currentPosition] == 'u')) {
1750                     getNextUnicodeChar();
1751                   } else {
1752                     if (withoutUnicodePtr != 0) {
1753                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1754                         currentCharacter;
1755                     }
1756                   }
1757                 } catch (InvalidInputException ex) {
1758                 };
1759               }
1760             } catch (IndexOutOfBoundsException e) {
1761               return;
1762             }
1763             break;
1764           case '/' :
1765             {
1766               int test;
1767               if ((test = getNextChar('/', '*')) == 0) {
1768                 //line comment
1769                 try {
1770                   //get the next char
1771                   if (((currentCharacter = source[currentPosition++]) == '\\')
1772                     && (source[currentPosition] == 'u')) {
1773                     //-------------unicode traitement ------------
1774                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1775                     currentPosition++;
1776                     while (source[currentPosition] == 'u') {
1777                       currentPosition++;
1778                     }
1779                     if ((c1 =
1780                       Character.getNumericValue(source[currentPosition++]))
1781                       > 15
1782                       || c1 < 0
1783                       || (c2 =
1784                         Character.getNumericValue(source[currentPosition++]))
1785                         > 15
1786                       || c2 < 0
1787                       || (c3 =
1788                         Character.getNumericValue(source[currentPosition++]))
1789                         > 15
1790                       || c3 < 0
1791                       || (c4 =
1792                         Character.getNumericValue(source[currentPosition++]))
1793                         > 15
1794                       || c4 < 0) {
1795                       //error don't care of the value
1796                       currentCharacter = 'A';
1797                     } //something different from \n and \r
1798                     else {
1799                       currentCharacter =
1800                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1801                     }
1802                   }
1803
1804                   while (currentCharacter != '\r'
1805                     && currentCharacter != '\n') {
1806                     //get the next char
1807                     if (((currentCharacter = source[currentPosition++])
1808                       == '\\')
1809                       && (source[currentPosition] == 'u')) {
1810                       //-------------unicode traitement ------------
1811                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1812                       currentPosition++;
1813                       while (source[currentPosition] == 'u') {
1814                         currentPosition++;
1815                       }
1816                       if ((c1 =
1817                         Character.getNumericValue(source[currentPosition++]))
1818                         > 15
1819                         || c1 < 0
1820                         || (c2 =
1821                           Character.getNumericValue(source[currentPosition++]))
1822                           > 15
1823                         || c2 < 0
1824                         || (c3 =
1825                           Character.getNumericValue(source[currentPosition++]))
1826                           > 15
1827                         || c3 < 0
1828                         || (c4 =
1829                           Character.getNumericValue(source[currentPosition++]))
1830                           > 15
1831                         || c4 < 0) {
1832                         //error don't care of the value
1833                         currentCharacter = 'A';
1834                       } //something different from \n and \r
1835                       else {
1836                         currentCharacter =
1837                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1838                       }
1839                     }
1840                   }
1841                   if (recordLineSeparator
1842                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1843                     pushLineSeparator();
1844                 } catch (IndexOutOfBoundsException e) {
1845                 } //an eof will them be generated
1846                 break;
1847               }
1848               if (test > 0) {
1849                 //traditional and annotation comment
1850                 boolean star = false;
1851                 try { // consume next character
1852                   unicodeAsBackSlash = false;
1853                   if (((currentCharacter = source[currentPosition++]) == '\\')
1854                     && (source[currentPosition] == 'u')) {
1855                     getNextUnicodeChar();
1856                   } else {
1857                     if (withoutUnicodePtr != 0) {
1858                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1859                         currentCharacter;
1860                     }
1861                   };
1862                 } catch (InvalidInputException ex) {
1863                 };
1864                 if (currentCharacter == '*') {
1865                   star = true;
1866                 }
1867                 if (recordLineSeparator
1868                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1869                   pushLineSeparator();
1870                 try { //get the next char
1871                   if (((currentCharacter = source[currentPosition++]) == '\\')
1872                     && (source[currentPosition] == 'u')) {
1873                     //-------------unicode traitement ------------
1874                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1875                     currentPosition++;
1876                     while (source[currentPosition] == 'u') {
1877                       currentPosition++;
1878                     }
1879                     if ((c1 =
1880                       Character.getNumericValue(source[currentPosition++]))
1881                       > 15
1882                       || c1 < 0
1883                       || (c2 =
1884                         Character.getNumericValue(source[currentPosition++]))
1885                         > 15
1886                       || c2 < 0
1887                       || (c3 =
1888                         Character.getNumericValue(source[currentPosition++]))
1889                         > 15
1890                       || c3 < 0
1891                       || (c4 =
1892                         Character.getNumericValue(source[currentPosition++]))
1893                         > 15
1894                       || c4 < 0) {
1895                       //error don't care of the value
1896                       currentCharacter = 'A';
1897                     } //something different from * and /
1898                     else {
1899                       currentCharacter =
1900                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1901                     }
1902                   }
1903                   //loop until end of comment */
1904                   while ((currentCharacter != '/') || (!star)) {
1905                     if (recordLineSeparator
1906                       && ((currentCharacter == '\r')
1907                         || (currentCharacter == '\n')))
1908                       pushLineSeparator();
1909                     star = currentCharacter == '*';
1910                     //get next char
1911                     if (((currentCharacter = source[currentPosition++])
1912                       == '\\')
1913                       && (source[currentPosition] == 'u')) {
1914                       //-------------unicode traitement ------------
1915                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1916                       currentPosition++;
1917                       while (source[currentPosition] == 'u') {
1918                         currentPosition++;
1919                       }
1920                       if ((c1 =
1921                         Character.getNumericValue(source[currentPosition++]))
1922                         > 15
1923                         || c1 < 0
1924                         || (c2 =
1925                           Character.getNumericValue(source[currentPosition++]))
1926                           > 15
1927                         || c2 < 0
1928                         || (c3 =
1929                           Character.getNumericValue(source[currentPosition++]))
1930                           > 15
1931                         || c3 < 0
1932                         || (c4 =
1933                           Character.getNumericValue(source[currentPosition++]))
1934                           > 15
1935                         || c4 < 0) {
1936                         //error don't care of the value
1937                         currentCharacter = 'A';
1938                       } //something different from * and /
1939                       else {
1940                         currentCharacter =
1941                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1942                       }
1943                     }
1944                   }
1945                 } catch (IndexOutOfBoundsException e) {
1946                   return;
1947                 }
1948                 break;
1949               }
1950               break;
1951             }
1952
1953           default :
1954             if (isPHPIdentifierStart(currentCharacter)
1955               || currentCharacter == '$') {
1956               try {
1957                 scanIdentifierOrKeyword((currentCharacter == '$'));
1958               } catch (InvalidInputException ex) {
1959               };
1960               break;
1961             }
1962             if (Character.isDigit(currentCharacter)) {
1963               try {
1964                 scanNumber(false);
1965               } catch (InvalidInputException ex) {
1966               };
1967               break;
1968             }
1969         }
1970       }
1971       //-----------------end switch while try--------------------
1972     } catch (IndexOutOfBoundsException e) {
1973     } catch (InvalidInputException e) {
1974     }
1975     return;
1976   }
1977   public final boolean jumpOverUnicodeWhiteSpace()
1978     throws InvalidInputException {
1979     //BOOLEAN
1980     //handle the case of unicode. Jump over the next whiteSpace
1981     //making startPosition pointing on the next available char
1982     //On false, the currentCharacter is filled up with a potential
1983     //correct char
1984
1985     try {
1986       this.wasAcr = false;
1987       int c1, c2, c3, c4;
1988       int unicodeSize = 6;
1989       currentPosition++;
1990       while (source[currentPosition] == 'u') {
1991         currentPosition++;
1992         unicodeSize++;
1993       }
1994
1995       if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1996         || c1 < 0)
1997         || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1998           || c2 < 0)
1999         || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2000           || c3 < 0)
2001         || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2002           || c4 < 0)) {
2003         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2004       }
2005
2006       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2007       if (recordLineSeparator
2008         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2009         pushLineSeparator();
2010       if (Character.isWhitespace(currentCharacter))
2011         return true;
2012
2013       //buffer the new char which is not a white space
2014       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2015       //withoutUnicodePtr == 1 is true here
2016       return false;
2017     } catch (IndexOutOfBoundsException e) {
2018       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2019     }
2020   }
2021   public final int[] getLineEnds() {
2022     //return a bounded copy of this.lineEnds
2023
2024     int[] copy;
2025     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2026     return copy;
2027   }
2028
2029   public char[] getSource() {
2030     return this.source;
2031   }
2032   final char[] optimizedCurrentTokenSource1() {
2033     //return always the same char[] build only once
2034
2035     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2036     char charOne = source[startPosition];
2037     switch (charOne) {
2038       case 'a' :
2039         return charArray_a;
2040       case 'b' :
2041         return charArray_b;
2042       case 'c' :
2043         return charArray_c;
2044       case 'd' :
2045         return charArray_d;
2046       case 'e' :
2047         return charArray_e;
2048       case 'f' :
2049         return charArray_f;
2050       case 'g' :
2051         return charArray_g;
2052       case 'h' :
2053         return charArray_h;
2054       case 'i' :
2055         return charArray_i;
2056       case 'j' :
2057         return charArray_j;
2058       case 'k' :
2059         return charArray_k;
2060       case 'l' :
2061         return charArray_l;
2062       case 'm' :
2063         return charArray_m;
2064       case 'n' :
2065         return charArray_n;
2066       case 'o' :
2067         return charArray_o;
2068       case 'p' :
2069         return charArray_p;
2070       case 'q' :
2071         return charArray_q;
2072       case 'r' :
2073         return charArray_r;
2074       case 's' :
2075         return charArray_s;
2076       case 't' :
2077         return charArray_t;
2078       case 'u' :
2079         return charArray_u;
2080       case 'v' :
2081         return charArray_v;
2082       case 'w' :
2083         return charArray_w;
2084       case 'x' :
2085         return charArray_x;
2086       case 'y' :
2087         return charArray_y;
2088       case 'z' :
2089         return charArray_z;
2090       default :
2091         return new char[] { charOne };
2092     }
2093   }
2094
2095   final char[] optimizedCurrentTokenSource2() {
2096     //try to return the same char[] build only once
2097
2098     char c0, c1;
2099     int hash =
2100       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2101         % TableSize;
2102     char[][] table = charArray_length[0][hash];
2103     int i = newEntry2;
2104     while (++i < InternalTableSize) {
2105       char[] charArray = table[i];
2106       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2107         return charArray;
2108     }
2109     //---------other side---------
2110     i = -1;
2111     int max = newEntry2;
2112     while (++i <= max) {
2113       char[] charArray = table[i];
2114       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2115         return charArray;
2116     }
2117     //--------add the entry-------
2118     if (++max >= InternalTableSize)
2119       max = 0;
2120     char[] r;
2121     table[max] = (r = new char[] { c0, c1 });
2122     newEntry2 = max;
2123     return r;
2124   }
2125
2126   final char[] optimizedCurrentTokenSource3() {
2127     //try to return the same char[] build only once
2128
2129     char c0, c1, c2;
2130     int hash =
2131       (((c0 = source[startPosition]) << 12)
2132         + ((c1 = source[startPosition + 1]) << 6)
2133         + (c2 = source[startPosition + 2]))
2134         % TableSize;
2135     char[][] table = charArray_length[1][hash];
2136     int i = newEntry3;
2137     while (++i < InternalTableSize) {
2138       char[] charArray = table[i];
2139       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2140         return charArray;
2141     }
2142     //---------other side---------
2143     i = -1;
2144     int max = newEntry3;
2145     while (++i <= max) {
2146       char[] charArray = table[i];
2147       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2148         return charArray;
2149     }
2150     //--------add the entry-------
2151     if (++max >= InternalTableSize)
2152       max = 0;
2153     char[] r;
2154     table[max] = (r = new char[] { c0, c1, c2 });
2155     newEntry3 = max;
2156     return r;
2157   }
2158
2159   final char[] optimizedCurrentTokenSource4() {
2160     //try to return the same char[] build only once
2161
2162     char c0, c1, c2, c3;
2163     long hash =
2164       ((((long) (c0 = source[startPosition])) << 18)
2165         + ((c1 = source[startPosition + 1]) << 12)
2166         + ((c2 = source[startPosition + 2]) << 6)
2167         + (c3 = source[startPosition + 3]))
2168         % TableSize;
2169     char[][] table = charArray_length[2][(int) hash];
2170     int i = newEntry4;
2171     while (++i < InternalTableSize) {
2172       char[] charArray = table[i];
2173       if ((c0 == charArray[0])
2174         && (c1 == charArray[1])
2175         && (c2 == charArray[2])
2176         && (c3 == charArray[3]))
2177         return charArray;
2178     }
2179     //---------other side---------
2180     i = -1;
2181     int max = newEntry4;
2182     while (++i <= max) {
2183       char[] charArray = table[i];
2184       if ((c0 == charArray[0])
2185         && (c1 == charArray[1])
2186         && (c2 == charArray[2])
2187         && (c3 == charArray[3]))
2188         return charArray;
2189     }
2190     //--------add the entry-------
2191     if (++max >= InternalTableSize)
2192       max = 0;
2193     char[] r;
2194     table[max] = (r = new char[] { c0, c1, c2, c3 });
2195     newEntry4 = max;
2196     return r;
2197
2198   }
2199
2200   final char[] optimizedCurrentTokenSource5() {
2201     //try to return the same char[] build only once
2202
2203     char c0, c1, c2, c3, c4;
2204     long hash =
2205       ((((long) (c0 = source[startPosition])) << 24)
2206         + (((long) (c1 = source[startPosition + 1])) << 18)
2207         + ((c2 = source[startPosition + 2]) << 12)
2208         + ((c3 = source[startPosition + 3]) << 6)
2209         + (c4 = source[startPosition + 4]))
2210         % TableSize;
2211     char[][] table = charArray_length[3][(int) hash];
2212     int i = newEntry5;
2213     while (++i < InternalTableSize) {
2214       char[] charArray = table[i];
2215       if ((c0 == charArray[0])
2216         && (c1 == charArray[1])
2217         && (c2 == charArray[2])
2218         && (c3 == charArray[3])
2219         && (c4 == charArray[4]))
2220         return charArray;
2221     }
2222     //---------other side---------
2223     i = -1;
2224     int max = newEntry5;
2225     while (++i <= max) {
2226       char[] charArray = table[i];
2227       if ((c0 == charArray[0])
2228         && (c1 == charArray[1])
2229         && (c2 == charArray[2])
2230         && (c3 == charArray[3])
2231         && (c4 == charArray[4]))
2232         return charArray;
2233     }
2234     //--------add the entry-------
2235     if (++max >= InternalTableSize)
2236       max = 0;
2237     char[] r;
2238     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2239     newEntry5 = max;
2240     return r;
2241
2242   }
2243
2244   final char[] optimizedCurrentTokenSource6() {
2245     //try to return the same char[] build only once
2246
2247     char c0, c1, c2, c3, c4, c5;
2248     long hash =
2249       ((((long) (c0 = source[startPosition])) << 32)
2250         + (((long) (c1 = source[startPosition + 1])) << 24)
2251         + (((long) (c2 = source[startPosition + 2])) << 18)
2252         + ((c3 = source[startPosition + 3]) << 12)
2253         + ((c4 = source[startPosition + 4]) << 6)
2254         + (c5 = source[startPosition + 5]))
2255         % TableSize;
2256     char[][] table = charArray_length[4][(int) hash];
2257     int i = newEntry6;
2258     while (++i < InternalTableSize) {
2259       char[] charArray = table[i];
2260       if ((c0 == charArray[0])
2261         && (c1 == charArray[1])
2262         && (c2 == charArray[2])
2263         && (c3 == charArray[3])
2264         && (c4 == charArray[4])
2265         && (c5 == charArray[5]))
2266         return charArray;
2267     }
2268     //---------other side---------
2269     i = -1;
2270     int max = newEntry6;
2271     while (++i <= max) {
2272       char[] charArray = table[i];
2273       if ((c0 == charArray[0])
2274         && (c1 == charArray[1])
2275         && (c2 == charArray[2])
2276         && (c3 == charArray[3])
2277         && (c4 == charArray[4])
2278         && (c5 == charArray[5]))
2279         return charArray;
2280     }
2281     //--------add the entry-------
2282     if (++max >= InternalTableSize)
2283       max = 0;
2284     char[] r;
2285     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2286     newEntry6 = max;
2287     return r;
2288   }
2289
2290   public final void pushLineSeparator() throws InvalidInputException {
2291     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2292     final int INCREMENT = 250;
2293
2294     if (this.checkNonExternalizedStringLiterals) {
2295       // reinitialize the current line for non externalize strings purpose
2296       currentLine = null;
2297     }
2298     //currentCharacter is at position currentPosition-1
2299
2300     // cr 000D
2301     if (currentCharacter == '\r') {
2302       int separatorPos = currentPosition - 1;
2303       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2304         return;
2305       //System.out.println("CR-" + separatorPos);
2306       try {
2307         lineEnds[++linePtr] = separatorPos;
2308       } catch (IndexOutOfBoundsException e) {
2309         //linePtr value is correct
2310         int oldLength = lineEnds.length;
2311         int[] old = lineEnds;
2312         lineEnds = new int[oldLength + INCREMENT];
2313         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2314         lineEnds[linePtr] = separatorPos;
2315       }
2316       // look-ahead for merged cr+lf
2317       try {
2318         if (source[currentPosition] == '\n') {
2319           //System.out.println("look-ahead LF-" + currentPosition);
2320           lineEnds[linePtr] = currentPosition;
2321           currentPosition++;
2322           wasAcr = false;
2323         } else {
2324           wasAcr = true;
2325         }
2326       } catch (IndexOutOfBoundsException e) {
2327         wasAcr = true;
2328       }
2329     } else {
2330       // lf 000A
2331       if (currentCharacter == '\n') {
2332         //must merge eventual cr followed by lf
2333         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2334           //System.out.println("merge LF-" + (currentPosition - 1));
2335           lineEnds[linePtr] = currentPosition - 1;
2336         } else {
2337           int separatorPos = currentPosition - 1;
2338           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2339             return;
2340           // System.out.println("LF-" + separatorPos);
2341           try {
2342             lineEnds[++linePtr] = separatorPos;
2343           } catch (IndexOutOfBoundsException e) {
2344             //linePtr value is correct
2345             int oldLength = lineEnds.length;
2346             int[] old = lineEnds;
2347             lineEnds = new int[oldLength + INCREMENT];
2348             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2349             lineEnds[linePtr] = separatorPos;
2350           }
2351         }
2352         wasAcr = false;
2353       }
2354     }
2355   }
2356   public final void pushUnicodeLineSeparator() {
2357     // isUnicode means that the \r or \n has been read as a unicode character
2358
2359     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2360
2361     final int INCREMENT = 250;
2362     //currentCharacter is at position currentPosition-1
2363
2364     if (this.checkNonExternalizedStringLiterals) {
2365       // reinitialize the current line for non externalize strings purpose
2366       currentLine = null;
2367     }
2368
2369     // cr 000D
2370     if (currentCharacter == '\r') {
2371       int separatorPos = currentPosition - 6;
2372       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2373         return;
2374       //System.out.println("CR-" + separatorPos);
2375       try {
2376         lineEnds[++linePtr] = separatorPos;
2377       } catch (IndexOutOfBoundsException e) {
2378         //linePtr value is correct
2379         int oldLength = lineEnds.length;
2380         int[] old = lineEnds;
2381         lineEnds = new int[oldLength + INCREMENT];
2382         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2383         lineEnds[linePtr] = separatorPos;
2384       }
2385       // look-ahead for merged cr+lf
2386       if (source[currentPosition] == '\n') {
2387         //System.out.println("look-ahead LF-" + currentPosition);
2388         lineEnds[linePtr] = currentPosition;
2389         currentPosition++;
2390         wasAcr = false;
2391       } else {
2392         wasAcr = true;
2393       }
2394     } else {
2395       // lf 000A
2396       if (currentCharacter == '\n') {
2397         //must merge eventual cr followed by lf
2398         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2399           //System.out.println("merge LF-" + (currentPosition - 1));
2400           lineEnds[linePtr] = currentPosition - 6;
2401         } else {
2402           int separatorPos = currentPosition - 6;
2403           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2404             return;
2405           // System.out.println("LF-" + separatorPos);
2406           try {
2407             lineEnds[++linePtr] = separatorPos;
2408           } catch (IndexOutOfBoundsException e) {
2409             //linePtr value is correct
2410             int oldLength = lineEnds.length;
2411             int[] old = lineEnds;
2412             lineEnds = new int[oldLength + INCREMENT];
2413             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2414             lineEnds[linePtr] = separatorPos;
2415           }
2416         }
2417         wasAcr = false;
2418       }
2419     }
2420   }
2421   public final void recordComment(boolean isJavadoc) {
2422
2423     // a new annotation comment is recorded
2424     try {
2425       commentStops[++commentPtr] =
2426         isJavadoc ? currentPosition : -currentPosition;
2427     } catch (IndexOutOfBoundsException e) {
2428       int oldStackLength = commentStops.length;
2429       int[] oldStack = commentStops;
2430       commentStops = new int[oldStackLength + 30];
2431       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2432       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2433       //grows the positions buffers too
2434       int[] old = commentStarts;
2435       commentStarts = new int[oldStackLength + 30];
2436       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2437     }
2438
2439     //the buffer is of a correct size here
2440     commentStarts[commentPtr] = startPosition;
2441   }
2442   public void resetTo(int begin, int end) {
2443     //reset the scanner to a given position where it may rescan again
2444
2445     diet = false;
2446     initialPosition = startPosition = currentPosition = begin;
2447     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2448     commentPtr = -1; // reset comment stack
2449   }
2450
2451   public final void scanSingleQuotedEscapeCharacter()
2452     throws InvalidInputException {
2453     // the string with "\\u" is a legal string of two chars \ and u
2454     //thus we use a direct access to the source (for regular cases).
2455
2456     if (unicodeAsBackSlash) {
2457       // consume next character
2458       unicodeAsBackSlash = false;
2459       if (((currentCharacter = source[currentPosition++]) == '\\')
2460         && (source[currentPosition] == 'u')) {
2461         getNextUnicodeChar();
2462       } else {
2463         if (withoutUnicodePtr != 0) {
2464           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2465         }
2466       }
2467     } else
2468       currentCharacter = source[currentPosition++];
2469     switch (currentCharacter) {
2470       case '\'' :
2471         currentCharacter = '\'';
2472         break;
2473       case '\\' :
2474         currentCharacter = '\\';
2475         break;
2476       default :
2477         currentCharacter = '\\';
2478         currentPosition--;
2479     }
2480   }
2481
2482   public final void scanDoubleQuotedEscapeCharacter()
2483     throws InvalidInputException {
2484     // the string with "\\u" is a legal string of two chars \ and u
2485     //thus we use a direct access to the source (for regular cases).
2486
2487     if (unicodeAsBackSlash) {
2488       // consume next character
2489       unicodeAsBackSlash = false;
2490       if (((currentCharacter = source[currentPosition++]) == '\\')
2491         && (source[currentPosition] == 'u')) {
2492         getNextUnicodeChar();
2493       } else {
2494         if (withoutUnicodePtr != 0) {
2495           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2496         }
2497       }
2498     } else
2499       currentCharacter = source[currentPosition++];
2500     switch (currentCharacter) {
2501       //      case 'b' :
2502       //        currentCharacter = '\b';
2503       //        break;
2504       case 't' :
2505         currentCharacter = '\t';
2506         break;
2507       case 'n' :
2508         currentCharacter = '\n';
2509         break;
2510         //      case 'f' :
2511         //        currentCharacter = '\f';
2512         //        break;
2513       case 'r' :
2514         currentCharacter = '\r';
2515         break;
2516       case '\"' :
2517         currentCharacter = '\"';
2518         break;
2519       case '\'' :
2520         currentCharacter = '\'';
2521         break;
2522       case '\\' :
2523         currentCharacter = '\\';
2524         break;
2525       case '$' :
2526         currentCharacter = '$';
2527         break;
2528       default :
2529         // -----------octal escape--------------
2530         // OctalDigit
2531         // OctalDigit OctalDigit
2532         // ZeroToThree OctalDigit OctalDigit
2533
2534         int number = Character.getNumericValue(currentCharacter);
2535         if (number >= 0 && number <= 7) {
2536           boolean zeroToThreeNot = number > 3;
2537           if (Character
2538             .isDigit(currentCharacter = source[currentPosition++])) {
2539             int digit = Character.getNumericValue(currentCharacter);
2540             if (digit >= 0 && digit <= 7) {
2541               number = (number * 8) + digit;
2542               if (Character
2543                 .isDigit(currentCharacter = source[currentPosition++])) {
2544                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2545                   currentPosition--;
2546                 } else {
2547                   digit = Character.getNumericValue(currentCharacter);
2548                   if (digit >= 0 && digit <= 7) {
2549                     // has read \ZeroToThree OctalDigit OctalDigit
2550                     number = (number * 8) + digit;
2551                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2552                     currentPosition--;
2553                   }
2554                 }
2555               } else { // has read \OctalDigit NonDigit--> ignore last character
2556                 currentPosition--;
2557               }
2558             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2559               currentPosition--;
2560             }
2561           } else { // has read \OctalDigit --> ignore last character
2562             currentPosition--;
2563           }
2564           if (number > 255)
2565             throw new InvalidInputException(INVALID_ESCAPE);
2566           currentCharacter = (char) number;
2567         }
2568         //else
2569         //     throw new InvalidInputException(INVALID_ESCAPE);
2570     }
2571   }
2572
2573   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2574   //    return scanIdentifierOrKeyword( false );
2575   //  }
2576
2577   public int scanIdentifierOrKeyword(boolean isVariable)
2578     throws InvalidInputException {
2579     //test keywords
2580
2581     //first dispatch on the first char.
2582     //then the length. If there are several
2583     //keywors with the same length AND the same first char, then do another
2584     //disptach on the second char :-)...cool....but fast !
2585
2586     useAssertAsAnIndentifier = false;
2587
2588     while (getNextCharAsJavaIdentifierPart()) {
2589     };
2590
2591     if (isVariable) {
2592       return TokenNameVariable;
2593     }
2594     int index, length;
2595     char[] data;
2596     char firstLetter;
2597     if (withoutUnicodePtr == 0)
2598
2599       //quick test on length == 1 but not on length > 12 while most identifier
2600       //have a length which is <= 12...but there are lots of identifier with
2601       //only one char....
2602
2603       {
2604       if ((length = currentPosition - startPosition) == 1)
2605         return TokenNameIdentifier;
2606       //  data = source;
2607       data = new char[length];
2608       index = startPosition;
2609       for (int i = 0; i < length; i++) {
2610         data[i] = Character.toLowerCase(source[index + i]);
2611       }
2612       index = 0;
2613     } else {
2614       if ((length = withoutUnicodePtr) == 1)
2615         return TokenNameIdentifier;
2616       // data = withoutUnicodeBuffer;
2617       data = new char[withoutUnicodeBuffer.length];
2618       for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2619         data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2620       }
2621       index = 1;
2622     }
2623
2624     firstLetter = data[index];
2625     switch (firstLetter) {
2626
2627       case 'a' : // as and array
2628         switch (length) {
2629           case 2 : //as
2630             if ((data[++index] == 's')) {
2631               return TokenNameas;
2632             } else {
2633               return TokenNameIdentifier;
2634             }
2635           case 3 : //and
2636             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2637               return TokenNameAND;
2638             } else {
2639               return TokenNameIdentifier;
2640             }
2641             //          case 5 :
2642             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2643             //              return TokenNamearray;
2644             //            else
2645             //              return TokenNameIdentifier;
2646           default :
2647             return TokenNameIdentifier;
2648         }
2649       case 'b' : //break
2650         switch (length) {
2651           case 5 :
2652             if ((data[++index] == 'r')
2653               && (data[++index] == 'e')
2654               && (data[++index] == 'a')
2655               && (data[++index] == 'k'))
2656               return TokenNamebreak;
2657             else
2658               return TokenNameIdentifier;
2659           default :
2660             return TokenNameIdentifier;
2661         }
2662
2663       case 'c' : //case class continue
2664         switch (length) {
2665           case 4 :
2666             if ((data[++index] == 'a')
2667               && (data[++index] == 's')
2668               && (data[++index] == 'e'))
2669               return TokenNamecase;
2670             else
2671               return TokenNameIdentifier;
2672           case 5 :
2673             if ((data[++index] == 'l')
2674               && (data[++index] == 'a')
2675               && (data[++index] == 's')
2676               && (data[++index] == 's'))
2677               return TokenNameclass;
2678             else
2679               return TokenNameIdentifier;
2680           case 8 :
2681             if ((data[++index] == 'o')
2682               && (data[++index] == 'n')
2683               && (data[++index] == 't')
2684               && (data[++index] == 'i')
2685               && (data[++index] == 'n')
2686               && (data[++index] == 'u')
2687               && (data[++index] == 'e'))
2688               return TokenNamecontinue;
2689             else
2690               return TokenNameIdentifier;
2691           default :
2692             return TokenNameIdentifier;
2693         }
2694
2695       case 'd' : //define default do
2696         switch (length) {
2697           case 2 :
2698             if ((data[++index] == 'o'))
2699               return TokenNamedo;
2700             else
2701               return TokenNameIdentifier;
2702           case 6 :
2703             if ((data[++index] == 'e')
2704               && (data[++index] == 'f')
2705               && (data[++index] == 'i')
2706               && (data[++index] == 'n')
2707               && (data[++index] == 'e'))
2708               return TokenNamedefine;
2709             else
2710               return TokenNameIdentifier;
2711           case 7 :
2712             if ((data[++index] == 'e')
2713               && (data[++index] == 'f')
2714               && (data[++index] == 'a')
2715               && (data[++index] == 'u')
2716               && (data[++index] == 'l')
2717               && (data[++index] == 't'))
2718               return TokenNamedefault;
2719             else
2720               return TokenNameIdentifier;
2721           default :
2722             return TokenNameIdentifier;
2723         }
2724       case 'e' : //echo else elseif extends
2725         switch (length) {
2726           case 4 :
2727             if ((data[++index] == 'c')
2728               && (data[++index] == 'h')
2729               && (data[++index] == 'o'))
2730               return TokenNameecho;
2731             else if (
2732               (data[index] == 'l')
2733                 && (data[++index] == 's')
2734                 && (data[++index] == 'e'))
2735               return TokenNameelse;
2736             else
2737               return TokenNameIdentifier;
2738           case 5 : // endif
2739             if ((data[++index] == 'n')
2740               && (data[++index] == 'd')
2741               && (data[++index] == 'i')
2742               && (data[++index] == 'f'))
2743               return TokenNameendif;
2744             else
2745               return TokenNameIdentifier;
2746           case 6 : // endfor
2747             if ((data[++index] == 'n')
2748               && (data[++index] == 'd')
2749               && (data[++index] == 'f')
2750               && (data[++index] == 'o')
2751               && (data[++index] == 'r'))
2752               return TokenNameendfor;
2753             else if (
2754               (data[index] == 'l')
2755                 && (data[++index] == 's')
2756                 && (data[++index] == 'e')
2757                 && (data[++index] == 'i')
2758                 && (data[++index] == 'f'))
2759               return TokenNameelseif;
2760             else
2761               return TokenNameIdentifier;
2762           case 7 :
2763             if ((data[++index] == 'x')
2764               && (data[++index] == 't')
2765               && (data[++index] == 'e')
2766               && (data[++index] == 'n')
2767               && (data[++index] == 'd')
2768               && (data[++index] == 's'))
2769               return TokenNameextends;
2770             else
2771               return TokenNameIdentifier;
2772           case 8 : // endwhile
2773             if ((data[++index] == 'n')
2774               && (data[++index] == 'd')
2775               && (data[++index] == 'w')
2776               && (data[++index] == 'h')
2777               && (data[++index] == 'i')
2778               && (data[++index] == 'l')
2779               && (data[++index] == 'e'))
2780               return TokenNameendwhile;
2781             else
2782               return TokenNameIdentifier;
2783           case 9 : // endswitch
2784             if ((data[++index] == 'n')
2785               && (data[++index] == 'd')
2786               && (data[++index] == 's')
2787               && (data[++index] == 'w')
2788               && (data[++index] == 'i')
2789               && (data[++index] == 't')
2790               && (data[++index] == 'c')
2791               && (data[++index] == 'h'))
2792               return TokenNameendswitch;
2793             else
2794               return TokenNameIdentifier;
2795           case 10 : // endforeach
2796             if ((data[++index] == 'n')
2797               && (data[++index] == 'd')
2798               && (data[++index] == 'f')
2799               && (data[++index] == 'o')
2800               && (data[++index] == 'r')
2801               && (data[++index] == 'e')
2802               && (data[++index] == 'a')
2803               && (data[++index] == 'c')
2804               && (data[++index] == 'h'))
2805               return TokenNameendforeach;
2806             else
2807               return TokenNameIdentifier;
2808
2809           default :
2810             return TokenNameIdentifier;
2811         }
2812
2813       case 'f' : //for false function
2814         switch (length) {
2815           case 3 :
2816             if ((data[++index] == 'o') && (data[++index] == 'r'))
2817               return TokenNamefor;
2818             else
2819               return TokenNameIdentifier;
2820           case 5 :
2821             if ((data[++index] == 'a')
2822               && (data[++index] == 'l')
2823               && (data[++index] == 's')
2824               && (data[++index] == 'e'))
2825               return TokenNamefalse;
2826             else
2827               return TokenNameIdentifier;
2828           case 7 : // function
2829             if ((data[++index] == 'o')
2830               && (data[++index] == 'r')
2831               && (data[++index] == 'e')
2832               && (data[++index] == 'a')
2833               && (data[++index] == 'c')
2834               && (data[++index] == 'h'))
2835               return TokenNameforeach;
2836             else
2837               return TokenNameIdentifier;
2838           case 8 : // function
2839             if ((data[++index] == 'u')
2840               && (data[++index] == 'n')
2841               && (data[++index] == 'c')
2842               && (data[++index] == 't')
2843               && (data[++index] == 'i')
2844               && (data[++index] == 'o')
2845               && (data[++index] == 'n'))
2846               return TokenNamefunction;
2847             else
2848               return TokenNameIdentifier;
2849           default :
2850             return TokenNameIdentifier;
2851         }
2852       case 'g' : //global
2853         if (length == 6) {
2854           if ((data[++index] == 'l')
2855             && (data[++index] == 'o')
2856             && (data[++index] == 'b')
2857             && (data[++index] == 'a')
2858             && (data[++index] == 'l')) {
2859             return TokenNameglobal;
2860           }
2861         }
2862         return TokenNameIdentifier;
2863
2864       case 'i' : //if int
2865         switch (length) {
2866           case 2 :
2867             if (data[++index] == 'f')
2868               return TokenNameif;
2869             else
2870               return TokenNameIdentifier;
2871             //          case 3 :
2872             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2873             //              return TokenNameint;
2874             //            else
2875             //              return TokenNameIdentifier;
2876           case 7 :
2877             if ((data[++index] == 'n')
2878               && (data[++index] == 'c')
2879               && (data[++index] == 'l')
2880               && (data[++index] == 'u')
2881               && (data[++index] == 'd')
2882               && (data[++index] == 'e'))
2883               return TokenNameinclude;
2884             else
2885               return TokenNameIdentifier;
2886           case 12 :
2887             if ((data[++index] == 'n')
2888               && (data[++index] == 'c')
2889               && (data[++index] == 'l')
2890               && (data[++index] == 'u')
2891               && (data[++index] == 'd')
2892               && (data[++index] == 'e')
2893               && (data[++index] == '_')
2894               && (data[++index] == 'o')
2895               && (data[++index] == 'n')
2896               && (data[++index] == 'c')
2897               && (data[++index] == 'e'))
2898               return TokenNameinclude_once;
2899             else
2900               return TokenNameIdentifier;
2901           default :
2902             return TokenNameIdentifier;
2903         }
2904
2905       case 'l' : //list
2906         if (length == 4) {
2907           if ((data[++index] == 'i')
2908             && (data[++index] == 's')
2909             && (data[++index] == 't')) {
2910             return TokenNamelist;
2911           }
2912         }
2913         return TokenNameIdentifier;
2914
2915       case 'n' : // new null
2916         switch (length) {
2917           case 3 :
2918             if ((data[++index] == 'e') && (data[++index] == 'w'))
2919               return TokenNamenew;
2920             else
2921               return TokenNameIdentifier;
2922           case 4 :
2923             if ((data[++index] == 'u')
2924               && (data[++index] == 'l')
2925               && (data[++index] == 'l'))
2926               return TokenNamenull;
2927             else
2928               return TokenNameIdentifier;
2929
2930           default :
2931             return TokenNameIdentifier;
2932         }
2933       case 'o' : // or old_function
2934         if (length == 2) {
2935           if (data[++index] == 'r') {
2936             return TokenNameOR;
2937           }
2938         }
2939         //        if (length == 12) {
2940         //          if ((data[++index] == 'l')
2941         //            && (data[++index] == 'd')
2942         //            && (data[++index] == '_')
2943         //            && (data[++index] == 'f')
2944         //            && (data[++index] == 'u')
2945         //            && (data[++index] == 'n')
2946         //            && (data[++index] == 'c')
2947         //            && (data[++index] == 't')
2948         //            && (data[++index] == 'i')
2949         //            && (data[++index] == 'o')
2950         //            && (data[++index] == 'n')) {
2951         //            return TokenNameold_function;
2952         //          }
2953         //        }
2954         return TokenNameIdentifier;
2955
2956       case 'p' : // print
2957         if (length == 5) {
2958           if ((data[++index] == 'r')
2959             && (data[++index] == 'i')
2960             && (data[++index] == 'n')
2961             && (data[++index] == 't')) {
2962             return TokenNameprint;
2963           }
2964         }
2965         return TokenNameIdentifier;
2966       case 'r' : //return require require_once
2967         if (length == 6) {
2968           if ((data[++index] == 'e')
2969             && (data[++index] == 't')
2970             && (data[++index] == 'u')
2971             && (data[++index] == 'r')
2972             && (data[++index] == 'n')) {
2973             return TokenNamereturn;
2974           }
2975         } else if (length == 7) {
2976           if ((data[++index] == 'e')
2977             && (data[++index] == 'q')
2978             && (data[++index] == 'u')
2979             && (data[++index] == 'i')
2980             && (data[++index] == 'r')
2981             && (data[++index] == 'e')) {
2982             return TokenNamerequire;
2983           }
2984         } else if (length == 12) {
2985           if ((data[++index] == 'e')
2986             && (data[++index] == 'q')
2987             && (data[++index] == 'u')
2988             && (data[++index] == 'i')
2989             && (data[++index] == 'r')
2990             && (data[++index] == 'e')
2991             && (data[++index] == '_')
2992             && (data[++index] == 'o')
2993             && (data[++index] == 'n')
2994             && (data[++index] == 'c')
2995             && (data[++index] == 'e')) {
2996             return TokenNamerequire_once;
2997           }
2998         } else
2999           return TokenNameIdentifier;
3000
3001       case 's' : //static switch
3002         switch (length) {
3003           case 6 :
3004             if (data[++index] == 't')
3005               if ((data[++index] == 'a')
3006                 && (data[++index] == 't')
3007                 && (data[++index] == 'i')
3008                 && (data[++index] == 'c')) {
3009                 return TokenNamestatic;
3010               } else
3011                 return TokenNameIdentifier;
3012             else if (
3013               (data[index] == 'w')
3014                 && (data[++index] == 'i')
3015                 && (data[++index] == 't')
3016                 && (data[++index] == 'c')
3017                 && (data[++index] == 'h'))
3018               return TokenNameswitch;
3019             else
3020               return TokenNameIdentifier;
3021           default :
3022             return TokenNameIdentifier;
3023         }
3024
3025       case 't' : // true
3026         switch (length) {
3027
3028           case 4 :
3029             if ((data[++index] == 'r')
3030               && (data[++index] == 'u')
3031               && (data[++index] == 'e'))
3032               return TokenNametrue;
3033             else
3034               return TokenNameIdentifier;
3035             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3036             //              return TokenNamethis;
3037
3038           default :
3039             return TokenNameIdentifier;
3040         }
3041
3042       case 'v' : //var
3043         switch (length) {
3044           case 3 :
3045             if ((data[++index] == 'a') && (data[++index] == 'r'))
3046               return TokenNamevar;
3047             else
3048               return TokenNameIdentifier;
3049
3050           default :
3051             return TokenNameIdentifier;
3052         }
3053
3054       case 'w' : //while
3055         switch (length) {
3056           case 5 :
3057             if ((data[++index] == 'h')
3058               && (data[++index] == 'i')
3059               && (data[++index] == 'l')
3060               && (data[++index] == 'e'))
3061               return TokenNamewhile;
3062             else
3063               return TokenNameIdentifier;
3064             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3065             //return TokenNamewidefp ;
3066             //else
3067             //return TokenNameIdentifier;
3068           default :
3069             return TokenNameIdentifier;
3070         }
3071
3072       case 'x' : //xor
3073         switch (length) {
3074           case 3 :
3075             if ((data[++index] == 'o') && (data[++index] == 'r'))
3076               return TokenNameXOR;
3077             else
3078               return TokenNameIdentifier;
3079
3080           default :
3081             return TokenNameIdentifier;
3082         }
3083       default :
3084         return TokenNameIdentifier;
3085     }
3086   }
3087   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3088
3089     //when entering this method the currentCharacter is the firt
3090     //digit of the number , i.e. it may be preceeded by a . when
3091     //dotPrefix is true
3092
3093     boolean floating = dotPrefix;
3094     if ((!dotPrefix) && (currentCharacter == '0')) {
3095       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3096         //force the first char of the hexa number do exist...
3097         // consume next character
3098         unicodeAsBackSlash = false;
3099         if (((currentCharacter = source[currentPosition++]) == '\\')
3100           && (source[currentPosition] == 'u')) {
3101           getNextUnicodeChar();
3102         } else {
3103           if (withoutUnicodePtr != 0) {
3104             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3105           }
3106         }
3107         if (Character.digit(currentCharacter, 16) == -1)
3108           throw new InvalidInputException(INVALID_HEXA);
3109         //---end forcing--
3110         while (getNextCharAsDigit(16)) {
3111         };
3112         //        if (getNextChar('l', 'L') >= 0)
3113         //          return TokenNameLongLiteral;
3114         //        else
3115         return TokenNameIntegerLiteral;
3116       }
3117
3118       //there is x or X in the number
3119       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3120       if (getNextCharAsDigit()) {
3121         //-------------potential octal-----------------
3122         while (getNextCharAsDigit()) {
3123         };
3124
3125         //        if (getNextChar('l', 'L') >= 0) {
3126         //          return TokenNameLongLiteral;
3127         //        }
3128         //
3129         //        if (getNextChar('f', 'F') >= 0) {
3130         //          return TokenNameFloatingPointLiteral;
3131         //        }
3132
3133         if (getNextChar('d', 'D') >= 0) {
3134           return TokenNameDoubleLiteral;
3135         } else { //make the distinction between octal and float ....
3136           if (getNextChar('.')) { //bingo ! ....
3137             while (getNextCharAsDigit()) {
3138             };
3139             if (getNextChar('e', 'E') >= 0) {
3140               // consume next character
3141               unicodeAsBackSlash = false;
3142               if (((currentCharacter = source[currentPosition++]) == '\\')
3143                 && (source[currentPosition] == 'u')) {
3144                 getNextUnicodeChar();
3145               } else {
3146                 if (withoutUnicodePtr != 0) {
3147                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3148                 }
3149               }
3150
3151               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3152                 // consume next character
3153                 unicodeAsBackSlash = false;
3154                 if (((currentCharacter = source[currentPosition++]) == '\\')
3155                   && (source[currentPosition] == 'u')) {
3156                   getNextUnicodeChar();
3157                 } else {
3158                   if (withoutUnicodePtr != 0) {
3159                     withoutUnicodeBuffer[++withoutUnicodePtr] =
3160                       currentCharacter;
3161                   }
3162                 }
3163               }
3164               if (!Character.isDigit(currentCharacter))
3165                 throw new InvalidInputException(INVALID_FLOAT);
3166               while (getNextCharAsDigit()) {
3167               };
3168             }
3169             //            if (getNextChar('f', 'F') >= 0)
3170             //              return TokenNameFloatingPointLiteral;
3171             getNextChar('d', 'D'); //jump over potential d or D
3172             return TokenNameDoubleLiteral;
3173           } else {
3174             return TokenNameIntegerLiteral;
3175           }
3176         }
3177       } else {
3178         /* carry on */
3179       }
3180     }
3181
3182     while (getNextCharAsDigit()) {
3183     };
3184
3185     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3186     //      return TokenNameLongLiteral;
3187
3188     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3189       while (getNextCharAsDigit()) {
3190       };
3191       floating = true;
3192     }
3193
3194     //if floating is true both exponant and suffix may be optional
3195
3196     if (getNextChar('e', 'E') >= 0) {
3197       floating = true;
3198       // consume next character
3199       unicodeAsBackSlash = false;
3200       if (((currentCharacter = source[currentPosition++]) == '\\')
3201         && (source[currentPosition] == 'u')) {
3202         getNextUnicodeChar();
3203       } else {
3204         if (withoutUnicodePtr != 0) {
3205           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3206         }
3207       }
3208
3209       if ((currentCharacter == '-')
3210         || (currentCharacter == '+')) { // consume next character
3211         unicodeAsBackSlash = false;
3212         if (((currentCharacter = source[currentPosition++]) == '\\')
3213           && (source[currentPosition] == 'u')) {
3214           getNextUnicodeChar();
3215         } else {
3216           if (withoutUnicodePtr != 0) {
3217             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3218           }
3219         }
3220       }
3221       if (!Character.isDigit(currentCharacter))
3222         throw new InvalidInputException(INVALID_FLOAT);
3223       while (getNextCharAsDigit()) {
3224       };
3225     }
3226
3227     if (getNextChar('d', 'D') >= 0)
3228       return TokenNameDoubleLiteral;
3229     //    if (getNextChar('f', 'F') >= 0)
3230     //      return TokenNameFloatingPointLiteral;
3231
3232     //the long flag has been tested before
3233
3234     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3235   }
3236   /**
3237    * Search the line number corresponding to a specific position
3238    *
3239    */
3240   public final int getLineNumber(int position) {
3241
3242     if (lineEnds == null)
3243       return 1;
3244     int length = linePtr + 1;
3245     if (length == 0)
3246       return 1;
3247     int g = 0, d = length - 1;
3248     int m = 0;
3249     while (g <= d) {
3250       m = (g + d) / 2;
3251       if (position < lineEnds[m]) {
3252         d = m - 1;
3253       } else if (position > lineEnds[m]) {
3254         g = m + 1;
3255       } else {
3256         return m + 1;
3257       }
3258     }
3259     if (position < lineEnds[m]) {
3260       return m + 1;
3261     }
3262     return m + 2;
3263   }
3264
3265   public void setPHPMode(boolean mode) {
3266     phpMode = mode;
3267   }
3268
3269   public final void setSource(char[] source) {
3270     //the source-buffer is set to sourceString
3271
3272     if (source == null) {
3273       this.source = new char[0];
3274     } else {
3275       this.source = source;
3276     }
3277     startPosition = -1;
3278     initialPosition = currentPosition = 0;
3279     containsAssertKeyword = false;
3280     withoutUnicodeBuffer = new char[this.source.length];
3281
3282   }
3283
3284   public String toString() {
3285     if (startPosition == source.length)
3286       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3287     if (currentPosition > source.length)
3288       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3289
3290     char front[] = new char[startPosition];
3291     System.arraycopy(source, 0, front, 0, startPosition);
3292
3293     int middleLength = (currentPosition - 1) - startPosition + 1;
3294     char middle[];
3295     if (middleLength > -1) {
3296       middle = new char[middleLength];
3297       System.arraycopy(source, startPosition, middle, 0, middleLength);
3298     } else {
3299       middle = new char[0];
3300     }
3301
3302     char end[] = new char[source.length - (currentPosition - 1)];
3303     System.arraycopy(
3304       source,
3305       (currentPosition - 1) + 1,
3306       end,
3307       0,
3308       source.length - (currentPosition - 1) - 1);
3309
3310     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3311     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3312     + new String(end);
3313   }
3314   public final String toStringAction(int act) {
3315     switch (act) {
3316       case TokenNameERROR :
3317         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3318       case TokenNameStopPHP :
3319         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3320       case TokenNameIdentifier :
3321         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3322       case TokenNameVariable :
3323         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3324       case TokenNameas :
3325         return "as"; //$NON-NLS-1$
3326       case TokenNamebreak :
3327         return "break"; //$NON-NLS-1$
3328       case TokenNamecase :
3329         return "case"; //$NON-NLS-1$
3330       case TokenNameclass :
3331         return "class"; //$NON-NLS-1$
3332       case TokenNamecontinue :
3333         return "continue"; //$NON-NLS-1$
3334       case TokenNamedefault :
3335         return "default"; //$NON-NLS-1$
3336       case TokenNamedefine :
3337         return "define"; //$NON-NLS-1$
3338       case TokenNamedo :
3339         return "do"; //$NON-NLS-1$
3340       case TokenNameecho :
3341         return "echo"; //$NON-NLS-1$
3342       case TokenNameelse :
3343         return "else"; //$NON-NLS-1$
3344       case TokenNameelseif :
3345         return "elseif"; //$NON-NLS-1$
3346       case TokenNameendfor :
3347         return "endfor"; //$NON-NLS-1$
3348       case TokenNameendforeach :
3349         return "endforeach"; //$NON-NLS-1$
3350       case TokenNameendif :
3351         return "endif"; //$NON-NLS-1$
3352       case TokenNameendswitch :
3353         return "endswitch"; //$NON-NLS-1$
3354       case TokenNameendwhile :
3355         return "endwhile"; //$NON-NLS-1$
3356       case TokenNameextends :
3357         return "extends"; //$NON-NLS-1$
3358       case TokenNamefalse :
3359         return "false"; //$NON-NLS-1$
3360       case TokenNamefor :
3361         return "for"; //$NON-NLS-1$
3362       case TokenNameforeach :
3363         return "foreach"; //$NON-NLS-1$
3364       case TokenNamefunction :
3365         return "function"; //$NON-NLS-1$
3366       case TokenNameglobal :
3367         return "global"; //$NON-NLS-1$
3368       case TokenNameif :
3369         return "if"; //$NON-NLS-1$
3370       case TokenNameinclude :
3371         return "include"; //$NON-NLS-1$
3372       case TokenNameinclude_once :
3373         return "include_once"; //$NON-NLS-1$
3374       case TokenNamelist :
3375         return "list"; //$NON-NLS-1$
3376       case TokenNamenew :
3377         return "new"; //$NON-NLS-1$
3378       case TokenNamenull :
3379         return "null"; //$NON-NLS-1$
3380       case TokenNameprint :
3381         return "print"; //$NON-NLS-1$
3382       case TokenNamerequire :
3383         return "require"; //$NON-NLS-1$
3384       case TokenNamerequire_once :
3385         return "require_once"; //$NON-NLS-1$
3386       case TokenNamereturn :
3387         return "return"; //$NON-NLS-1$
3388       case TokenNamestatic :
3389         return "static"; //$NON-NLS-1$
3390       case TokenNameswitch :
3391         return "switch"; //$NON-NLS-1$
3392       case TokenNametrue :
3393         return "true"; //$NON-NLS-1$
3394       case TokenNamevar :
3395         return "var"; //$NON-NLS-1$
3396       case TokenNamewhile :
3397         return "while"; //$NON-NLS-1$
3398       case TokenNameIntegerLiteral :
3399         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3400       case TokenNameDoubleLiteral :
3401         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3402       case TokenNameStringLiteral :
3403         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3404       case TokenNameStringConstant :
3405         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3406       case TokenNameStringInterpolated :
3407         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3408       case TokenNameHEREDOC :
3409         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3410
3411       case TokenNamePLUS_PLUS :
3412         return "++"; //$NON-NLS-1$
3413       case TokenNameMINUS_MINUS :
3414         return "--"; //$NON-NLS-1$
3415       case TokenNameEQUAL_EQUAL :
3416         return "=="; //$NON-NLS-1$
3417       case TokenNameEQUAL_GREATER :
3418         return "=>"; //$NON-NLS-1$
3419       case TokenNameLESS_EQUAL :
3420         return "<="; //$NON-NLS-1$
3421       case TokenNameGREATER_EQUAL :
3422         return ">="; //$NON-NLS-1$
3423       case TokenNameNOT_EQUAL :
3424         return "!="; //$NON-NLS-1$
3425       case TokenNameLEFT_SHIFT :
3426         return "<<"; //$NON-NLS-1$
3427       case TokenNameRIGHT_SHIFT :
3428         return ">>"; //$NON-NLS-1$
3429       case TokenNamePLUS_EQUAL :
3430         return "+="; //$NON-NLS-1$
3431       case TokenNameMINUS_EQUAL :
3432         return "-="; //$NON-NLS-1$
3433       case TokenNameMULTIPLY_EQUAL :
3434         return "*="; //$NON-NLS-1$
3435       case TokenNameDIVIDE_EQUAL :
3436         return "/="; //$NON-NLS-1$
3437       case TokenNameAND_EQUAL :
3438         return "&="; //$NON-NLS-1$
3439       case TokenNameOR_EQUAL :
3440         return "|="; //$NON-NLS-1$
3441       case TokenNameXOR_EQUAL :
3442         return "^="; //$NON-NLS-1$
3443       case TokenNameREMAINDER_EQUAL :
3444         return "%="; //$NON-NLS-1$
3445       case TokenNameLEFT_SHIFT_EQUAL :
3446         return "<<="; //$NON-NLS-1$
3447       case TokenNameRIGHT_SHIFT_EQUAL :
3448         return ">>="; //$NON-NLS-1$
3449       case TokenNameOR_OR :
3450         return "||"; //$NON-NLS-1$
3451       case TokenNameAND_AND :
3452         return "&&"; //$NON-NLS-1$
3453       case TokenNamePLUS :
3454         return "+"; //$NON-NLS-1$
3455       case TokenNameMINUS :
3456         return "-"; //$NON-NLS-1$
3457       case TokenNameMINUS_GREATER :
3458         return "->";
3459       case TokenNameNOT :
3460         return "!"; //$NON-NLS-1$
3461       case TokenNameREMAINDER :
3462         return "%"; //$NON-NLS-1$
3463       case TokenNameXOR :
3464         return "^"; //$NON-NLS-1$
3465       case TokenNameAND :
3466         return "&"; //$NON-NLS-1$
3467       case TokenNameMULTIPLY :
3468         return "*"; //$NON-NLS-1$
3469       case TokenNameOR :
3470         return "|"; //$NON-NLS-1$
3471       case TokenNameTWIDDLE :
3472         return "~"; //$NON-NLS-1$
3473       case TokenNameTWIDDLE_EQUAL :
3474         return "~="; //$NON-NLS-1$
3475       case TokenNameDIVIDE :
3476         return "/"; //$NON-NLS-1$
3477       case TokenNameGREATER :
3478         return ">"; //$NON-NLS-1$
3479       case TokenNameLESS :
3480         return "<"; //$NON-NLS-1$
3481       case TokenNameLPAREN :
3482         return "("; //$NON-NLS-1$
3483       case TokenNameRPAREN :
3484         return ")"; //$NON-NLS-1$
3485       case TokenNameLBRACE :
3486         return "{"; //$NON-NLS-1$
3487       case TokenNameRBRACE :
3488         return "}"; //$NON-NLS-1$
3489       case TokenNameLBRACKET :
3490         return "["; //$NON-NLS-1$
3491       case TokenNameRBRACKET :
3492         return "]"; //$NON-NLS-1$
3493       case TokenNameSEMICOLON :
3494         return ";"; //$NON-NLS-1$
3495       case TokenNameQUESTION :
3496         return "?"; //$NON-NLS-1$
3497       case TokenNameCOLON :
3498         return ":"; //$NON-NLS-1$
3499       case TokenNameCOMMA :
3500         return ","; //$NON-NLS-1$
3501       case TokenNameDOT :
3502         return "."; //$NON-NLS-1$
3503       case TokenNameEQUAL :
3504         return "="; //$NON-NLS-1$
3505       case TokenNameAT :
3506         return "@";
3507       case TokenNameDOLLAR_LBRACE :
3508         return "${";
3509       case TokenNameEOF :
3510         return "EOF"; //$NON-NLS-1$
3511       default :
3512         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3513     }
3514   }
3515
3516   public Scanner(
3517     boolean tokenizeComments,
3518     boolean tokenizeWhiteSpace,
3519     boolean checkNonExternalizedStringLiterals) {
3520     this(
3521       tokenizeComments,
3522       tokenizeWhiteSpace,
3523       checkNonExternalizedStringLiterals,
3524       false);
3525   }
3526
3527   public Scanner(
3528     boolean tokenizeComments,
3529     boolean tokenizeWhiteSpace,
3530     boolean checkNonExternalizedStringLiterals,
3531     boolean assertMode) {
3532     this.eofPosition = Integer.MAX_VALUE;
3533     this.tokenizeComments = tokenizeComments;
3534     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3535     this.checkNonExternalizedStringLiterals =
3536       checkNonExternalizedStringLiterals;
3537     this.assertMode = assertMode;
3538   }
3539
3540   private void checkNonExternalizeString() throws InvalidInputException {
3541     if (currentLine == null)
3542       return;
3543     parseTags(currentLine);
3544   }
3545
3546   private void parseTags(NLSLine line) throws InvalidInputException {
3547     String s = new String(getCurrentTokenSource());
3548     int pos = s.indexOf(TAG_PREFIX);
3549     int lineLength = line.size();
3550     while (pos != -1) {
3551       int start = pos + TAG_PREFIX_LENGTH;
3552       int end = s.indexOf(TAG_POSTFIX, start);
3553       String index = s.substring(start, end);
3554       int i = 0;
3555       try {
3556         i = Integer.parseInt(index) - 1;
3557         // Tags are one based not zero based.
3558       } catch (NumberFormatException e) {
3559         i = -1; // we don't want to consider this as a valid NLS tag
3560       }
3561       if (line.exists(i)) {
3562         line.set(i, null);
3563       }
3564       pos = s.indexOf(TAG_PREFIX, start);
3565     }
3566
3567     this.nonNLSStrings = new StringLiteral[lineLength];
3568     int nonNLSCounter = 0;
3569     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3570       StringLiteral literal = (StringLiteral) iterator.next();
3571       if (literal != null) {
3572         this.nonNLSStrings[nonNLSCounter++] = literal;
3573       }
3574     }
3575     if (nonNLSCounter == 0) {
3576       this.nonNLSStrings = null;
3577       currentLine = null;
3578       return;
3579     }
3580     this.wasNonExternalizedStringLiteral = true;
3581     if (nonNLSCounter != lineLength) {
3582       System.arraycopy(
3583         this.nonNLSStrings,
3584         0,
3585         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),
3586         0,
3587         nonNLSCounter);
3588     }
3589     currentLine = null;
3590   }
3591 }