net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13 import java.util.Stack;
  14
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
  20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23   /*
  24    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  25    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
  26    * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30
  31   public boolean useAssertAsAnIndentifier = false;
  32
  33   //flag indicating if processed source contains occurrences of keyword assert
  34   public boolean containsAssertKeyword = false;
  35
  36   public boolean recordLineSeparator;
  37
  38   public boolean ignorePHPOneLiner = false;
  39
  40   public boolean phpMode = false;
  41
  42   public boolean phpExpressionTag = false;
  43
  44 //  public Stack encapsedStringStack = null;
  45
  46   public char currentCharacter;
  47
  48   public int startPosition;
  49
  50   public int currentPosition;
  51
  52   public int initialPosition, eofPosition;
  53
  54   // after this position eof are generated instead of real token from the
  55   // source
  56   public boolean tokenizeComments;
  57
  58   public boolean tokenizeWhiteSpace;
  59
  60   public boolean tokenizeStrings;
  61
  62   //source should be viewed as a window (aka a part)
  63   //of a entire very large stream
  64   public char source[];
  65
  66   //unicode support
  67   public char[] withoutUnicodeBuffer;
  68
  69   public int withoutUnicodePtr;
  70
  71   //when == 0 ==> no unicode in the current token
  72   public boolean unicodeAsBackSlash = false;
  73
  74   public boolean scanningFloatLiteral = false;
  75
  76   //support for /** comments
  77   public int[] commentStops = new int[10];
  78
  79   public int[] commentStarts = new int[10];
  80
  81   public int commentPtr = -1; // no comment test with commentPtr value -1
  82
  83   protected int lastCommentLinePosition = -1;
  84
  85   //diet parsing support - jump over some method body when requested
  86   public boolean diet = false;
  87
  88   //support for the poor-line-debuggers ....
  89   //remember the position of the cr/lf
  90   public int[] lineEnds = new int[250];
  91
  92   public int linePtr = -1;
  93
  94   public boolean wasAcr = false;
  95
  96   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  97
  98   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  99
 100   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
 101
 102   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 103
 104   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 105
 106   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 107
 108   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 109
 110   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 111
 112   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 113
 114   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 115
 116   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 117
 118   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 119
 120   //----------------optimized identifier managment------------------
 121   static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 122       charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 123       charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 124       charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 125       charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 126       charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 127       charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 128       charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 129       charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 130
 131   static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
 132       '$',
 133       'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
 134       charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
 135       charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
 136       charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
 137       charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
 138       charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
 139       charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
 140       charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
 141
 142   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 143
 144   static final int TableSize = 30, InternalTableSize = 6;
 145
 146   //30*6 = 180 entries
 147   public static final int OptimizedLength = 6;
 148
 149   public/* static */
 150   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 151
 152   // support for detecting non-externalized string literals
 153   int currentLineNr = -1;
 154
 155   int previousLineNr = -1;
 156
 157   NLSLine currentLine = null;
 158
 159   List lines = new ArrayList();
 160
 161   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 162
 163   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 164
 165   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 166
 167   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 168
 169   public StringLiteral[] nonNLSStrings = null;
 170
 171   public boolean checkNonExternalizedStringLiterals = true;
 172
 173   public boolean wasNonExternalizedStringLiteral = false;
 174   /* static */{
 175     for (int i = 0; i < 6; i++) {
 176       for (int j = 0; j < TableSize; j++) {
 177         for (int k = 0; k < InternalTableSize; k++) {
 178           charArray_length[i][j][k] = initCharArray;
 179         }
 180       }
 181     }
 182   }
 183
 184   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 185
 186   public static final int RoundBracket = 0;
 187
 188   public static final int SquareBracket = 1;
 189
 190   public static final int CurlyBracket = 2;
 191
 192   public static final int BracketKinds = 3;
 193
 194   // task tag support
 195   public char[][] foundTaskTags = null;
 196
 197   public char[][] foundTaskMessages;
 198
 199   public char[][] foundTaskPriorities = null;
 200
 201   public int[][] foundTaskPositions;
 202
 203   public int foundTaskCount = 0;
 204
 205   public char[][] taskTags = null;
 206
 207   public char[][] taskPriorities = null;
 208
 209   public boolean isTaskCaseSensitive = true;
 210
 211   public static final boolean DEBUG = false;
 212
 213   public static final boolean TRACE = false;
 214
 215   public ICompilationUnit compilationUnit = null;
 216
 217   /**
 218    * Determines if the specified character is permissible as the first character in a PHP identifier or variable
 219    *
 220    * The '$' character for PHP variables is regarded as a correct first character !
 221    *
 222    */
 223   public static boolean isPHPIdentOrVarStart(char ch) {
 224     return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 225   }
 226
 227   /**
 228    * Determines if the specified character is permissible as the first character in a PHP identifier.
 229    *
 230    * The '$' character for PHP variables isn't regarded as the first character !
 231    */
 232   public static boolean isPHPIdentifierStart(char ch) {
 233     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 234   }
 235
 236   /**
 237    * Determines if the specified character may be part of a PHP identifier as other than the first character
 238    */
 239   public static boolean isPHPIdentifierPart(char ch) {
 240     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 241   }
 242
 243   public final boolean atEnd() {
 244     // This code is not relevant if source is
 245     // Only a part of the real stream input
 246     return source.length == currentPosition;
 247   }
 248
 249   public char[] getCurrentIdentifierSource() {
 250     //return the token REAL source (aka unicodes are precomputed)
 251     char[] result;
 252     //    if (withoutUnicodePtr != 0)
 253     //      //0 is used as a fast test flag so the real first char is in position 1
 254     //      System.arraycopy(
 255     //        withoutUnicodeBuffer,
 256     //        1,
 257     //        result = new char[withoutUnicodePtr],
 258     //        0,
 259     //        withoutUnicodePtr);
 260     //    else {
 261     int length = currentPosition - startPosition;
 262     switch (length) { // see OptimizedLength
 263     case 1:
 264       return optimizedCurrentTokenSource1();
 265     case 2:
 266       return optimizedCurrentTokenSource2();
 267     case 3:
 268       return optimizedCurrentTokenSource3();
 269     case 4:
 270       return optimizedCurrentTokenSource4();
 271     case 5:
 272       return optimizedCurrentTokenSource5();
 273     case 6:
 274       return optimizedCurrentTokenSource6();
 275     }
 276     //no optimization
 277     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 278     //   }
 279     return result;
 280   }
 281
 282   public int getCurrentTokenEndPosition() {
 283     return this.currentPosition - 1;
 284   }
 285
 286   public final char[] getCurrentTokenSource() {
 287     // Return the token REAL source (aka unicodes are precomputed)
 288     char[] result;
 289     //    if (withoutUnicodePtr != 0)
 290     //      // 0 is used as a fast test flag so the real first char is in position 1
 291     //      System.arraycopy(
 292     //        withoutUnicodeBuffer,
 293     //        1,
 294     //        result = new char[withoutUnicodePtr],
 295     //        0,
 296     //        withoutUnicodePtr);
 297     //    else {
 298     int length;
 299     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 300     //    }
 301     return result;
 302   }
 303
 304   public final char[] getCurrentTokenSource(int startPos) {
 305     // Return the token REAL source (aka unicodes are precomputed)
 306     char[] result;
 307     //    if (withoutUnicodePtr != 0)
 308     //      // 0 is used as a fast test flag so the real first char is in position 1
 309     //      System.arraycopy(
 310     //        withoutUnicodeBuffer,
 311     //        1,
 312     //        result = new char[withoutUnicodePtr],
 313     //        0,
 314     //        withoutUnicodePtr);
 315     //    else {
 316     int length;
 317     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 318     //  }
 319     return result;
 320   }
 321
 322   public final char[] getCurrentTokenSourceString() {
 323     //return the token REAL source (aka unicodes are precomputed).
 324     //REMOVE the two " that are at the beginning and the end.
 325     char[] result;
 326     if (withoutUnicodePtr != 0)
 327       //0 is used as a fast test flag so the real first char is in position 1
 328       System.arraycopy(withoutUnicodeBuffer, 2,
 329       //2 is 1 (real start) + 1 (to jump over the ")
 330           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 331     else {
 332       int length;
 333       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 334     }
 335     return result;
 336   }
 337
 338   public final char[] getRawTokenSourceEnd() {
 339     int length = this.eofPosition - this.currentPosition - 1;
 340     char[] sourceEnd = new char[length];
 341     System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
 342     return sourceEnd;
 343   }
 344
 345   public int getCurrentTokenStartPosition() {
 346     return this.startPosition;
 347   }
 348
 349   public final char[] getCurrentStringLiteralSource() {
 350     // Return the token REAL source (aka unicodes are precomputed)
 351     if (startPosition + 1 >= currentPosition) {
 352       return new char[0];
 353     }
 354     char[] result;
 355     int length;
 356     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 357     //    }
 358     return result;
 359   }
 360
 361   public final char[] getCurrentStringLiteralSource(int startPos) {
 362     // Return the token REAL source (aka unicodes are precomputed)
 363     char[] result;
 364     int length;
 365     System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
 366     //    }
 367     return result;
 368   }
 369
 370   /*
 371    * Search the source position corresponding to the end of a given line number
 372    *
 373    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 374    *
 375    * In case the given line number is inconsistent, answers -1.
 376    */
 377   public final int getLineEnd(int lineNumber) {
 378     if (lineEnds == null)
 379       return -1;
 380     if (lineNumber >= lineEnds.length)
 381       return -1;
 382     if (lineNumber <= 0)
 383       return -1;
 384     if (lineNumber == lineEnds.length - 1)
 385       return eofPosition;
 386     return lineEnds[lineNumber - 1];
 387     // next line start one character behind the lineEnd of the previous line
 388   }
 389
 390   /**
 391    * Search the source position corresponding to the beginning of a given line number
 392    *
 393    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 394    *
 395    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 396    *
 397    * In case the given line number is inconsistent, answers -1.
 398    */
 399   public final int getLineStart(int lineNumber) {
 400     if (lineEnds == null)
 401       return -1;
 402     if (lineNumber >= lineEnds.length)
 403       return -1;
 404     if (lineNumber <= 0)
 405       return -1;
 406     if (lineNumber == 1)
 407       return initialPosition;
 408     return lineEnds[lineNumber - 2] + 1;
 409     // next line start one character behind the lineEnd of the previous line
 410   }
 411
 412   public final boolean getNextChar(char testedChar) {
 413     //BOOLEAN
 414     //handle the case of unicode.
 415     //when a unicode appears then we must use a buffer that holds char
 416     // internal values
 417     //At the end of this method currentCharacter holds the new visited char
 418     //and currentPosition points right next after it
 419     //Both previous lines are true if the currentCharacter is == to the
 420     // testedChar
 421     //On false, no side effect has occured.
 422     //ALL getNextChar.... ARE OPTIMIZED COPIES
 423     int temp = currentPosition;
 424     try {
 425       currentCharacter = source[currentPosition++];
 426       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 427       //        && (source[currentPosition] == 'u')) {
 428       //        //-------------unicode traitement ------------
 429       //        int c1, c2, c3, c4;
 430       //        int unicodeSize = 6;
 431       //        currentPosition++;
 432       //        while (source[currentPosition] == 'u') {
 433       //          currentPosition++;
 434       //          unicodeSize++;
 435       //        }
 436       //
 437       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 438       //          || c1 < 0)
 439       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 440       //            || c2 < 0)
 441       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 442       //            || c3 < 0)
 443       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 444       //            || c4 < 0)) {
 445       //          currentPosition = temp;
 446       //          return false;
 447       //        }
 448       //
 449       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 450       //        if (currentCharacter != testedChar) {
 451       //          currentPosition = temp;
 452       //          return false;
 453       //        }
 454       //        unicodeAsBackSlash = currentCharacter == '\\';
 455       //
 456       //        //need the unicode buffer
 457       //        if (withoutUnicodePtr == 0) {
 458       //          //buffer all the entries that have been left aside....
 459       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 460       //          System.arraycopy(
 461       //            source,
 462       //            startPosition,
 463       //            withoutUnicodeBuffer,
 464       //            1,
 465       //            withoutUnicodePtr);
 466       //        }
 467       //        //fill the buffer with the char
 468       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 469       //        return true;
 470       //
 471       //      } //-------------end unicode traitement--------------
 472       //      else {
 473       if (currentCharacter != testedChar) {
 474         currentPosition = temp;
 475         return false;
 476       }
 477       unicodeAsBackSlash = false;
 478       //        if (withoutUnicodePtr != 0)
 479       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 480       return true;
 481       //      }
 482     } catch (IndexOutOfBoundsException e) {
 483       unicodeAsBackSlash = false;
 484       currentPosition = temp;
 485       return false;
 486     }
 487   }
 488
 489   public final int getNextChar(char testedChar1, char testedChar2) {
 490     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 491     //test can be done with (x==0) for the first and (x>0) for the second
 492     //handle the case of unicode.
 493     //when a unicode appears then we must use a buffer that holds char
 494     // internal values
 495     //At the end of this method currentCharacter holds the new visited char
 496     //and currentPosition points right next after it
 497     //Both previous lines are true if the currentCharacter is == to the
 498     // testedChar1/2
 499     //On false, no side effect has occured.
 500     //ALL getNextChar.... ARE OPTIMIZED COPIES
 501     int temp = currentPosition;
 502     try {
 503       int result;
 504       currentCharacter = source[currentPosition++];
 505       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 506       //        && (source[currentPosition] == 'u')) {
 507       //        //-------------unicode traitement ------------
 508       //        int c1, c2, c3, c4;
 509       //        int unicodeSize = 6;
 510       //        currentPosition++;
 511       //        while (source[currentPosition] == 'u') {
 512       //          currentPosition++;
 513       //          unicodeSize++;
 514       //        }
 515       //
 516       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 517       //          || c1 < 0)
 518       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 519       //            || c2 < 0)
 520       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 521       //            || c3 < 0)
 522       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 523       //            || c4 < 0)) {
 524       //          currentPosition = temp;
 525       //          return 2;
 526       //        }
 527       //
 528       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 529       //        if (currentCharacter == testedChar1)
 530       //          result = 0;
 531       //        else if (currentCharacter == testedChar2)
 532       //          result = 1;
 533       //        else {
 534       //          currentPosition = temp;
 535       //          return -1;
 536       //        }
 537       //
 538       //        //need the unicode buffer
 539       //        if (withoutUnicodePtr == 0) {
 540       //          //buffer all the entries that have been left aside....
 541       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 542       //          System.arraycopy(
 543       //            source,
 544       //            startPosition,
 545       //            withoutUnicodeBuffer,
 546       //            1,
 547       //            withoutUnicodePtr);
 548       //        }
 549       //        //fill the buffer with the char
 550       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 551       //        return result;
 552       //      } //-------------end unicode traitement--------------
 553       //      else {
 554       if (currentCharacter == testedChar1)
 555         result = 0;
 556       else if (currentCharacter == testedChar2)
 557         result = 1;
 558       else {
 559         currentPosition = temp;
 560         return -1;
 561       }
 562       //        if (withoutUnicodePtr != 0)
 563       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 564       return result;
 565       //     }
 566     } catch (IndexOutOfBoundsException e) {
 567       currentPosition = temp;
 568       return -1;
 569     }
 570   }
 571
 572   public final boolean getNextCharAsDigit() {
 573     //BOOLEAN
 574     //handle the case of unicode.
 575     //when a unicode appears then we must use a buffer that holds char
 576     // internal values
 577     //At the end of this method currentCharacter holds the new visited char
 578     //and currentPosition points right next after it
 579     //Both previous lines are true if the currentCharacter is a digit
 580     //On false, no side effect has occured.
 581     //ALL getNextChar.... ARE OPTIMIZED COPIES
 582     int temp = currentPosition;
 583     try {
 584       currentCharacter = source[currentPosition++];
 585       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 586       //        && (source[currentPosition] == 'u')) {
 587       //        //-------------unicode traitement ------------
 588       //        int c1, c2, c3, c4;
 589       //        int unicodeSize = 6;
 590       //        currentPosition++;
 591       //        while (source[currentPosition] == 'u') {
 592       //          currentPosition++;
 593       //          unicodeSize++;
 594       //        }
 595       //
 596       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 597       //          || c1 < 0)
 598       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 599       //            || c2 < 0)
 600       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 601       //            || c3 < 0)
 602       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 603       //            || c4 < 0)) {
 604       //          currentPosition = temp;
 605       //          return false;
 606       //        }
 607       //
 608       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 609       //        if (!Character.isDigit(currentCharacter)) {
 610       //          currentPosition = temp;
 611       //          return false;
 612       //        }
 613       //
 614       //        //need the unicode buffer
 615       //        if (withoutUnicodePtr == 0) {
 616       //          //buffer all the entries that have been left aside....
 617       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 618       //          System.arraycopy(
 619       //            source,
 620       //            startPosition,
 621       //            withoutUnicodeBuffer,
 622       //            1,
 623       //            withoutUnicodePtr);
 624       //        }
 625       //        //fill the buffer with the char
 626       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 627       //        return true;
 628       //      } //-------------end unicode traitement--------------
 629       //      else {
 630       if (!Character.isDigit(currentCharacter)) {
 631         currentPosition = temp;
 632         return false;
 633       }
 634       //        if (withoutUnicodePtr != 0)
 635       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 636       return true;
 637       //      }
 638     } catch (IndexOutOfBoundsException e) {
 639       currentPosition = temp;
 640       return false;
 641     }
 642   }
 643
 644   public final boolean getNextCharAsDigit(int radix) {
 645     //BOOLEAN
 646     //handle the case of unicode.
 647     //when a unicode appears then we must use a buffer that holds char
 648     // internal values
 649     //At the end of this method currentCharacter holds the new visited char
 650     //and currentPosition points right next after it
 651     //Both previous lines are true if the currentCharacter is a digit base on
 652     // radix
 653     //On false, no side effect has occured.
 654     //ALL getNextChar.... ARE OPTIMIZED COPIES
 655     int temp = currentPosition;
 656     try {
 657       currentCharacter = source[currentPosition++];
 658       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 659       //        && (source[currentPosition] == 'u')) {
 660       //        //-------------unicode traitement ------------
 661       //        int c1, c2, c3, c4;
 662       //        int unicodeSize = 6;
 663       //        currentPosition++;
 664       //        while (source[currentPosition] == 'u') {
 665       //          currentPosition++;
 666       //          unicodeSize++;
 667       //        }
 668       //
 669       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 670       //          || c1 < 0)
 671       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 672       //            || c2 < 0)
 673       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 674       //            || c3 < 0)
 675       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 676       //            || c4 < 0)) {
 677       //          currentPosition = temp;
 678       //          return false;
 679       //        }
 680       //
 681       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 682       //        if (Character.digit(currentCharacter, radix) == -1) {
 683       //          currentPosition = temp;
 684       //          return false;
 685       //        }
 686       //
 687       //        //need the unicode buffer
 688       //        if (withoutUnicodePtr == 0) {
 689       //          //buffer all the entries that have been left aside....
 690       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 691       //          System.arraycopy(
 692       //            source,
 693       //            startPosition,
 694       //            withoutUnicodeBuffer,
 695       //            1,
 696       //            withoutUnicodePtr);
 697       //        }
 698       //        //fill the buffer with the char
 699       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 700       //        return true;
 701       //      } //-------------end unicode traitement--------------
 702       //      else {
 703       if (Character.digit(currentCharacter, radix) == -1) {
 704         currentPosition = temp;
 705         return false;
 706       }
 707       //        if (withoutUnicodePtr != 0)
 708       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 709       return true;
 710       //      }
 711     } catch (IndexOutOfBoundsException e) {
 712       currentPosition = temp;
 713       return false;
 714     }
 715   }
 716
 717   public boolean getNextCharAsJavaIdentifierPart() {
 718     //BOOLEAN
 719     //handle the case of unicode.
 720     //when a unicode appears then we must use a buffer that holds char
 721     // internal values
 722     //At the end of this method currentCharacter holds the new visited char
 723     //and currentPosition points right next after it
 724     //Both previous lines are true if the currentCharacter is a
 725     // JavaIdentifierPart
 726     //On false, no side effect has occured.
 727     //ALL getNextChar.... ARE OPTIMIZED COPIES
 728     int temp = currentPosition;
 729     try {
 730       currentCharacter = source[currentPosition++];
 731       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 732       //        && (source[currentPosition] == 'u')) {
 733       //        //-------------unicode traitement ------------
 734       //        int c1, c2, c3, c4;
 735       //        int unicodeSize = 6;
 736       //        currentPosition++;
 737       //        while (source[currentPosition] == 'u') {
 738       //          currentPosition++;
 739       //          unicodeSize++;
 740       //        }
 741       //
 742       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 743       //          || c1 < 0)
 744       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 745       //            || c2 < 0)
 746       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 747       //            || c3 < 0)
 748       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 749       //            || c4 < 0)) {
 750       //          currentPosition = temp;
 751       //          return false;
 752       //        }
 753       //
 754       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 755       //        if (!isPHPIdentifierPart(currentCharacter)) {
 756       //          currentPosition = temp;
 757       //          return false;
 758       //        }
 759       //
 760       //        //need the unicode buffer
 761       //        if (withoutUnicodePtr == 0) {
 762       //          //buffer all the entries that have been left aside....
 763       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 764       //          System.arraycopy(
 765       //            source,
 766       //            startPosition,
 767       //            withoutUnicodeBuffer,
 768       //            1,
 769       //            withoutUnicodePtr);
 770       //        }
 771       //        //fill the buffer with the char
 772       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 773       //        return true;
 774       //      } //-------------end unicode traitement--------------
 775       //      else {
 776       if (!isPHPIdentifierPart(currentCharacter)) {
 777         currentPosition = temp;
 778         return false;
 779       }
 780       //        if (withoutUnicodePtr != 0)
 781       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 782       return true;
 783       //      }
 784     } catch (IndexOutOfBoundsException e) {
 785       currentPosition = temp;
 786       return false;
 787     }
 788   }
 789
 790   public int getCastOrParen() {
 791     int tempPosition = currentPosition;
 792     char tempCharacter = currentCharacter;
 793     int tempToken = TokenNameLPAREN;
 794     boolean found = false;
 795     StringBuffer buf = new StringBuffer();
 796     try {
 797       do {
 798         currentCharacter = source[currentPosition++];
 799       } while (currentCharacter == ' ' || currentCharacter == '\t');
 800       while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 801         buf.append(currentCharacter);
 802         currentCharacter = source[currentPosition++];
 803       }
 804       if (buf.length() >= 3 && buf.length() <= 7) {
 805         char[] data = buf.toString().toCharArray();
 806         int index = 0;
 807         switch (data.length) {
 808         case 3:
 809           // int
 810           if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 811             found = true;
 812             tempToken = TokenNameintCAST;
 813           }
 814           break;
 815         case 4:
 816           // bool real
 817           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 818             found = true;
 819             tempToken = TokenNameboolCAST;
 820           } else {
 821             index = 0;
 822             if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 823               found = true;
 824               tempToken = TokenNamedoubleCAST;
 825             }
 826           }
 827           break;
 828         case 5:
 829           // array unset float
 830           if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 831               && (data[++index] == 'y')) {
 832             found = true;
 833             tempToken = TokenNamearrayCAST;
 834           } else {
 835             index = 0;
 836             if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 837                 && (data[++index] == 't')) {
 838               found = true;
 839               tempToken = TokenNameunsetCAST;
 840             } else {
 841               index = 0;
 842               if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 843                   && (data[++index] == 't')) {
 844                 found = true;
 845                 tempToken = TokenNamedoubleCAST;
 846               }
 847             }
 848           }
 849           break;
 850         case 6:
 851           // object string double
 852           if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 853               && (data[++index] == 'c') && (data[++index] == 't')) {
 854             found = true;
 855             tempToken = TokenNameobjectCAST;
 856           } else {
 857             index = 0;
 858             if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 859                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 860               found = true;
 861               tempToken = TokenNamestringCAST;
 862             } else {
 863               index = 0;
 864               if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 865                   && (data[++index] == 'l') && (data[++index] == 'e')) {
 866                 found = true;
 867                 tempToken = TokenNamedoubleCAST;
 868               }
 869             }
 870           }
 871           break;
 872         case 7:
 873           // boolean integer
 874           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 875               && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 876             found = true;
 877             tempToken = TokenNameboolCAST;
 878           } else {
 879             index = 0;
 880             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 881                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 882               found = true;
 883               tempToken = TokenNameintCAST;
 884             }
 885           }
 886           break;
 887         }
 888         if (found) {
 889           while (currentCharacter == ' ' || currentCharacter == '\t') {
 890             currentCharacter = source[currentPosition++];
 891           }
 892           if (currentCharacter == ')') {
 893             return tempToken;
 894           }
 895         }
 896       }
 897     } catch (IndexOutOfBoundsException e) {
 898     }
 899     currentCharacter = tempCharacter;
 900     currentPosition = tempPosition;
 901     return TokenNameLPAREN;
 902   }
 903
 904   public void consumeStringInterpolated() throws InvalidInputException {
 905     try {
 906       // consume next character
 907       unicodeAsBackSlash = false;
 908       currentCharacter = source[currentPosition++];
 909       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 910       //                  && (source[currentPosition] == 'u')) {
 911       //                  getNextUnicodeChar();
 912       //                } else {
 913       //                  if (withoutUnicodePtr != 0) {
 914       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 915       //                      currentCharacter;
 916       //                  }
 917       //                }
 918       while (currentCharacter != '`') {
 919         /** ** in PHP \r and \n are valid in string literals *** */
 920         //                if ((currentCharacter == '\n')
 921         //                  || (currentCharacter == '\r')) {
 922         //                  // relocate if finding another quote fairly close: thus unicode
 923         // '/u000D' will be fully consumed
 924         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 925         //                    if (currentPosition + lookAhead == source.length)
 926         //                      break;
 927         //                    if (source[currentPosition + lookAhead] == '\n')
 928         //                      break;
 929         //                    if (source[currentPosition + lookAhead] == '\"') {
 930         //                      currentPosition += lookAhead + 1;
 931         //                      break;
 932         //                    }
 933         //                  }
 934         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 935         //                }
 936         if (currentCharacter == '\\') {
 937           int escapeSize = currentPosition;
 938           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 939           //scanEscapeCharacter make a side effect on this value and we need
 940           // the previous value few lines down this one
 941           scanDoubleQuotedEscapeCharacter();
 942           escapeSize = currentPosition - escapeSize;
 943           if (withoutUnicodePtr == 0) {
 944             //buffer all the entries that have been left aside....
 945             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 946             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 947             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 948           } else { //overwrite the / in the buffer
 949             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 950             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 951               // where only one is correct
 952               withoutUnicodePtr--;
 953             }
 954           }
 955         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 956           if (recordLineSeparator) {
 957             pushLineSeparator();
 958           }
 959         }
 960         // consume next character
 961         unicodeAsBackSlash = false;
 962         currentCharacter = source[currentPosition++];
 963         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 964         //                    && (source[currentPosition] == 'u')) {
 965         //                    getNextUnicodeChar();
 966         //                  } else {
 967         if (withoutUnicodePtr != 0) {
 968           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 969         }
 970         //                  }
 971       }
 972     } catch (IndexOutOfBoundsException e) {
 973       //    reset end position for error reporting
 974       currentPosition -= 2;
 975       throw new InvalidInputException(UNTERMINATED_STRING);
 976     } catch (InvalidInputException e) {
 977       if (e.getMessage().equals(INVALID_ESCAPE)) {
 978         // relocate if finding another quote fairly close: thus unicode
 979         // '/u000D' will be fully consumed
 980         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 981           if (currentPosition + lookAhead == source.length)
 982             break;
 983           if (source[currentPosition + lookAhead] == '\n')
 984             break;
 985           if (source[currentPosition + lookAhead] == '`') {
 986             currentPosition += lookAhead + 1;
 987             break;
 988           }
 989         }
 990       }
 991       throw e; // rethrow
 992     }
 993     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 994       // //$NON-NLS-?$ where ? is an
 995       // int.
 996       if (currentLine == null) {
 997         currentLine = new NLSLine();
 998         lines.add(currentLine);
 999       }
1000       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1001     }
1002   }
1003
1004   public void consumeStringConstant() throws InvalidInputException {
1005     try {
1006       // consume next character
1007       unicodeAsBackSlash = false;
1008       currentCharacter = source[currentPosition++];
1009       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1010       //                  && (source[currentPosition] == 'u')) {
1011       //                  getNextUnicodeChar();
1012       //                } else {
1013       //                  if (withoutUnicodePtr != 0) {
1014       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1015       //                      currentCharacter;
1016       //                  }
1017       //                }
1018       while (currentCharacter != '\'') {
1019         /** ** in PHP \r and \n are valid in string literals *** */
1020         //                  if ((currentCharacter == '\n')
1021         //                    || (currentCharacter == '\r')) {
1022         //                    // relocate if finding another quote fairly close: thus unicode
1023         // '/u000D' will be fully consumed
1024         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1025         //                      if (currentPosition + lookAhead == source.length)
1026         //                        break;
1027         //                      if (source[currentPosition + lookAhead] == '\n')
1028         //                        break;
1029         //                      if (source[currentPosition + lookAhead] == '\"') {
1030         //                        currentPosition += lookAhead + 1;
1031         //                        break;
1032         //                      }
1033         //                    }
1034         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1035         //                  }
1036         if (currentCharacter == '\\') {
1037           int escapeSize = currentPosition;
1038           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1039           //scanEscapeCharacter make a side effect on this value and we need
1040           // the previous value few lines down this one
1041           scanSingleQuotedEscapeCharacter();
1042           escapeSize = currentPosition - escapeSize;
1043           if (withoutUnicodePtr == 0) {
1044             //buffer all the entries that have been left aside....
1045             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1046             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1047             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1048           } else { //overwrite the / in the buffer
1049             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1050             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1051               // where only one is correct
1052               withoutUnicodePtr--;
1053             }
1054           }
1055         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1056           if (recordLineSeparator) {
1057             pushLineSeparator();
1058           }
1059         }
1060         // consume next character
1061         unicodeAsBackSlash = false;
1062         currentCharacter = source[currentPosition++];
1063         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1064         //                    && (source[currentPosition] == 'u')) {
1065         //                    getNextUnicodeChar();
1066         //                  } else {
1067         if (withoutUnicodePtr != 0) {
1068           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1069         }
1070         //                  }
1071       }
1072     } catch (IndexOutOfBoundsException e) {
1073       // reset end position for error reporting
1074       currentPosition -= 2;
1075       throw new InvalidInputException(UNTERMINATED_STRING);
1076     } catch (InvalidInputException e) {
1077       if (e.getMessage().equals(INVALID_ESCAPE)) {
1078         // relocate if finding another quote fairly close: thus unicode
1079         // '/u000D' will be fully consumed
1080         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1081           if (currentPosition + lookAhead == source.length)
1082             break;
1083           if (source[currentPosition + lookAhead] == '\n')
1084             break;
1085           if (source[currentPosition + lookAhead] == '\'') {
1086             currentPosition += lookAhead + 1;
1087             break;
1088           }
1089         }
1090       }
1091       throw e; // rethrow
1092     }
1093     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1094       // //$NON-NLS-?$ where ? is an
1095       // int.
1096       if (currentLine == null) {
1097         currentLine = new NLSLine();
1098         lines.add(currentLine);
1099       }
1100       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1101     }
1102   }
1103
1104   public void consumeStringLiteral() throws InvalidInputException {
1105     try {
1106       boolean openDollarBrace = false;
1107       // consume next character
1108       unicodeAsBackSlash = false;
1109       currentCharacter = source[currentPosition++];
1110       while (currentCharacter != '"' || openDollarBrace) {
1111         /** ** in PHP \r and \n are valid in string literals *** */
1112         if (currentCharacter == '\\') {
1113           int escapeSize = currentPosition;
1114           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1115           //scanEscapeCharacter make a side effect on this value and we need
1116           // the previous value few lines down this one
1117           scanDoubleQuotedEscapeCharacter();
1118           escapeSize = currentPosition - escapeSize;
1119           if (withoutUnicodePtr == 0) {
1120             //buffer all the entries that have been left aside....
1121             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1122             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1123             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1124           } else { //overwrite the / in the buffer
1125             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1126             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1127               // where only one is correct
1128               withoutUnicodePtr--;
1129             }
1130           }
1131         } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1132           openDollarBrace = true;
1133         } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1134           openDollarBrace = true;
1135         } else if (currentCharacter == '}') {
1136           openDollarBrace = false;
1137         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1138           if (recordLineSeparator) {
1139             pushLineSeparator();
1140           }
1141         }
1142         // consume next character
1143         unicodeAsBackSlash = false;
1144         currentCharacter = source[currentPosition++];
1145         if (withoutUnicodePtr != 0) {
1146           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1147         }
1148       }
1149     } catch (IndexOutOfBoundsException e) {
1150       //    reset end position for error reporting
1151       currentPosition -= 2;
1152       throw new InvalidInputException(UNTERMINATED_STRING);
1153     } catch (InvalidInputException e) {
1154       if (e.getMessage().equals(INVALID_ESCAPE)) {
1155         // relocate if finding another quote fairly close: thus unicode
1156         // '/u000D' will be fully consumed
1157         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1158           if (currentPosition + lookAhead == source.length)
1159             break;
1160           if (source[currentPosition + lookAhead] == '\n')
1161             break;
1162           if (source[currentPosition + lookAhead] == '\"') {
1163             currentPosition += lookAhead + 1;
1164             break;
1165           }
1166         }
1167       }
1168       throw e; // rethrow
1169     }
1170     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1171       // //$NON-NLS-?$ where ? is an
1172       // int.
1173       if (currentLine == null) {
1174         currentLine = new NLSLine();
1175         lines.add(currentLine);
1176       }
1177       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1178     }
1179   }
1180
1181   public int getNextToken() throws InvalidInputException {
1182     phpExpressionTag = false;
1183     if (!phpMode) {
1184       return getInlinedHTMLToken(currentPosition);
1185     }
1186     if (phpMode) {
1187       this.wasAcr = false;
1188       if (diet) {
1189         jumpOverMethodBody();
1190         diet = false;
1191         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1192       }
1193       try {
1194         while (true) {
1195           withoutUnicodePtr = 0;
1196           //start with a new token
1197           char encapsedChar = ' ';
1198           //          if (!encapsedStringStack.isEmpty()) {
1199           //            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1200           //          }
1201           //          if (encapsedChar != '$' && encapsedChar != ' ') {
1202           //            currentCharacter = source[currentPosition++];
1203           //            if (currentCharacter == encapsedChar) {
1204           //              switch (currentCharacter) {
1205           //              case '`':
1206           //                return TokenNameEncapsedString0;
1207           //              case '\'':
1208           //                return TokenNameEncapsedString1;
1209           //              case '"':
1210           //                return TokenNameEncapsedString2;
1211           //              }
1212           //            }
1213           //            while (currentCharacter != encapsedChar) {
1214           //              /** ** in PHP \r and \n are valid in string literals *** */
1215           //              switch (currentCharacter) {
1216           //              case '\\':
1217           //                int escapeSize = currentPosition;
1218           //                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1219           //                //scanEscapeCharacter make a side effect on this value and
1220           //                // we need the previous value few lines down this one
1221           //                scanDoubleQuotedEscapeCharacter();
1222           //                escapeSize = currentPosition - escapeSize;
1223           //                if (withoutUnicodePtr == 0) {
1224           //                  //buffer all the entries that have been left aside....
1225           //                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1226           //                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1227           //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1228           //                } else { //overwrite the / in the buffer
1229           //                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1230           //                  if (backSlashAsUnicodeInString) { //there are TWO \ in
1231           //                    withoutUnicodePtr--;
1232           //                  }
1233           //                }
1234           //                break;
1235           //              case '\r':
1236           //              case '\n':
1237           //                if (recordLineSeparator) {
1238           //                  pushLineSeparator();
1239           //                }
1240           //                break;
1241           //              case '$':
1242           //                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1243           //                  currentPosition--;
1244           //                  encapsedStringStack.push(new Character('$'));
1245           //                  return TokenNameSTRING;
1246           //                }
1247           //                break;
1248           //              case '{':
1249           //                if (source[currentPosition] == '$') { // CURLY_OPEN
1250           //                  currentPosition--;
1251           //                  encapsedStringStack.push(new Character('$'));
1252           //                  return TokenNameSTRING;
1253           //                }
1254           //              }
1255           //              // consume next character
1256           //              unicodeAsBackSlash = false;
1257           //              currentCharacter = source[currentPosition++];
1258           //              if (withoutUnicodePtr != 0) {
1259           //                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1260           //              }
1261           //              // }
1262           //            } // end while
1263           //            currentPosition--;
1264           //            return TokenNameSTRING;
1265           //          }
1266           // ---------Consume white space and handles startPosition---------
1267           int whiteStart = currentPosition;
1268           startPosition = currentPosition;
1269           currentCharacter = source[currentPosition++];
1270           //          if (encapsedChar == '$') {
1271           //            switch (currentCharacter) {
1272           //            case '\\':
1273           //              currentCharacter = source[currentPosition++];
1274           //              return TokenNameSTRING;
1275           //            case '{':
1276           //              if (encapsedChar == '$') {
1277           //                if (getNextChar('$'))
1278           //                  return TokenNameLBRACE_DOLLAR;
1279           //              }
1280           //              return TokenNameLBRACE;
1281           //            case '}':
1282           //              return TokenNameRBRACE;
1283           //            case '[':
1284           //              return TokenNameLBRACKET;
1285           //            case ']':
1286           //              return TokenNameRBRACKET;
1287           //            case '\'':
1288           //              if (tokenizeStrings) {
1289           //                consumeStringConstant();
1290           //                return TokenNameStringSingleQuote;
1291           //              }
1292           //              return TokenNameEncapsedString1;
1293           //            case '"':
1294           //              return TokenNameEncapsedString2;
1295           //            case '`':
1296           //              if (tokenizeStrings) {
1297           //                consumeStringInterpolated();
1298           //                return TokenNameStringInterpolated;
1299           //              }
1300           //              return TokenNameEncapsedString0;
1301           //            case '-':
1302           //              if (getNextChar('>'))
1303           //                return TokenNameMINUS_GREATER;
1304           //              return TokenNameSTRING;
1305           //            default:
1306           //              if (currentCharacter == '$') {
1307           //                int oldPosition = currentPosition;
1308           //                try {
1309           //                  currentCharacter = source[currentPosition++];
1310           //                  if (currentCharacter == '{') {
1311           //                    return TokenNameDOLLAR_LBRACE;
1312           //                  }
1313           //                  if (isPHPIdentifierStart(currentCharacter)) {
1314           //                    return scanIdentifierOrKeyword(true);
1315           //                  } else {
1316           //                    currentPosition = oldPosition;
1317           //                    return TokenNameSTRING;
1318           //                  }
1319           //                } catch (IndexOutOfBoundsException e) {
1320           //                  currentPosition = oldPosition;
1321           //                  return TokenNameSTRING;
1322           //                }
1323           //              }
1324           //              if (isPHPIdentifierStart(currentCharacter))
1325           //                return scanIdentifierOrKeyword(false);
1326           //              if (Character.isDigit(currentCharacter))
1327           //                return scanNumber(false);
1328           //              return TokenNameERROR;
1329           //            }
1330           //          }
1331           //          boolean isWhiteSpace;
1332
1333           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1334             startPosition = currentPosition;
1335             currentCharacter = source[currentPosition++];
1336             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1337             //              && (source[currentPosition] == 'u')) {
1338             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1339             //            } else {
1340             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1341               checkNonExternalizeString();
1342               if (recordLineSeparator) {
1343                 pushLineSeparator();
1344               } else {
1345                 currentLine = null;
1346               }
1347             }
1348             //            isWhiteSpace = (currentCharacter == ' ')
1349             //                || Character.isWhitespace(currentCharacter);
1350             //            }
1351           }
1352           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1353             // reposition scanner in case we are interested by spaces as tokens
1354             currentPosition--;
1355             startPosition = whiteStart;
1356             return TokenNameWHITESPACE;
1357           }
1358           //little trick to get out in the middle of a source compuation
1359           if (currentPosition > eofPosition)
1360             return TokenNameEOF;
1361           // ---------Identify the next token-------------
1362           switch (currentCharacter) {
1363           case '(':
1364             return getCastOrParen();
1365           case ')':
1366             return TokenNameRPAREN;
1367           case '{':
1368             return TokenNameLBRACE;
1369           case '}':
1370             return TokenNameRBRACE;
1371           case '[':
1372             return TokenNameLBRACKET;
1373           case ']':
1374             return TokenNameRBRACKET;
1375           case ';':
1376             return TokenNameSEMICOLON;
1377           case ',':
1378             return TokenNameCOMMA;
1379           case '.':
1380             if (getNextChar('='))
1381               return TokenNameDOT_EQUAL;
1382             if (getNextCharAsDigit())
1383               return scanNumber(true);
1384             return TokenNameDOT;
1385           case '+': {
1386             int test;
1387             if ((test = getNextChar('+', '=')) == 0)
1388               return TokenNamePLUS_PLUS;
1389             if (test > 0)
1390               return TokenNamePLUS_EQUAL;
1391             return TokenNamePLUS;
1392           }
1393           case '-': {
1394             int test;
1395             if ((test = getNextChar('-', '=')) == 0)
1396               return TokenNameMINUS_MINUS;
1397             if (test > 0)
1398               return TokenNameMINUS_EQUAL;
1399             if (getNextChar('>'))
1400               return TokenNameMINUS_GREATER;
1401             return TokenNameMINUS;
1402           }
1403           case '~':
1404             if (getNextChar('='))
1405               return TokenNameTWIDDLE_EQUAL;
1406             return TokenNameTWIDDLE;
1407           case '!':
1408             if (getNextChar('=')) {
1409               if (getNextChar('=')) {
1410                 return TokenNameNOT_EQUAL_EQUAL;
1411               }
1412               return TokenNameNOT_EQUAL;
1413             }
1414             return TokenNameNOT;
1415           case '*':
1416             if (getNextChar('='))
1417               return TokenNameMULTIPLY_EQUAL;
1418             return TokenNameMULTIPLY;
1419           case '%':
1420             if (getNextChar('='))
1421               return TokenNameREMAINDER_EQUAL;
1422             return TokenNameREMAINDER;
1423           case '<': {
1424             int oldPosition = currentPosition;
1425             try {
1426               currentCharacter = source[currentPosition++];
1427             } catch (IndexOutOfBoundsException e) {
1428               currentPosition = oldPosition;
1429               return TokenNameLESS;
1430             }
1431             switch (currentCharacter) {
1432             case '=':
1433               return TokenNameLESS_EQUAL;
1434             case '>':
1435               return TokenNameNOT_EQUAL;
1436             case '<':
1437               if (getNextChar('='))
1438                 return TokenNameLEFT_SHIFT_EQUAL;
1439               if (getNextChar('<')) {
1440                 currentCharacter = source[currentPosition++];
1441                 while (Character.isWhitespace(currentCharacter)) {
1442                   currentCharacter = source[currentPosition++];
1443                 }
1444                 int heredocStart = currentPosition - 1;
1445                 int heredocLength = 0;
1446                 if (isPHPIdentifierStart(currentCharacter)) {
1447                   currentCharacter = source[currentPosition++];
1448                 } else {
1449                   return TokenNameERROR;
1450                 }
1451                 while (isPHPIdentifierPart(currentCharacter)) {
1452                   currentCharacter = source[currentPosition++];
1453                 }
1454                 heredocLength = currentPosition - heredocStart - 1;
1455                 // heredoc end-tag determination
1456                 boolean endTag = true;
1457                 char ch;
1458                 do {
1459                   ch = source[currentPosition++];
1460                   if (ch == '\r' || ch == '\n') {
1461                     if (recordLineSeparator) {
1462                       pushLineSeparator();
1463                     } else {
1464                       currentLine = null;
1465                     }
1466                     for (int i = 0; i < heredocLength; i++) {
1467                       if (source[currentPosition + i] != source[heredocStart + i]) {
1468                         endTag = false;
1469                         break;
1470                       }
1471                     }
1472                     if (endTag) {
1473                       currentPosition += heredocLength - 1;
1474                       currentCharacter = source[currentPosition++];
1475                       break; // do...while loop
1476                     } else {
1477                       endTag = true;
1478                     }
1479                   }
1480                 } while (true);
1481                 return TokenNameHEREDOC;
1482               }
1483               return TokenNameLEFT_SHIFT;
1484             }
1485             currentPosition = oldPosition;
1486             return TokenNameLESS;
1487           }
1488           case '>': {
1489             int test;
1490             if ((test = getNextChar('=', '>')) == 0)
1491               return TokenNameGREATER_EQUAL;
1492             if (test > 0) {
1493               if ((test = getNextChar('=', '>')) == 0)
1494                 return TokenNameRIGHT_SHIFT_EQUAL;
1495               return TokenNameRIGHT_SHIFT;
1496             }
1497             return TokenNameGREATER;
1498           }
1499           case '=':
1500             if (getNextChar('=')) {
1501               if (getNextChar('=')) {
1502                 return TokenNameEQUAL_EQUAL_EQUAL;
1503               }
1504               return TokenNameEQUAL_EQUAL;
1505             }
1506             if (getNextChar('>'))
1507               return TokenNameEQUAL_GREATER;
1508             return TokenNameEQUAL;
1509           case '&': {
1510             int test;
1511             if ((test = getNextChar('&', '=')) == 0)
1512               return TokenNameAND_AND;
1513             if (test > 0)
1514               return TokenNameAND_EQUAL;
1515             return TokenNameAND;
1516           }
1517           case '|': {
1518             int test;
1519             if ((test = getNextChar('|', '=')) == 0)
1520               return TokenNameOR_OR;
1521             if (test > 0)
1522               return TokenNameOR_EQUAL;
1523             return TokenNameOR;
1524           }
1525           case '^':
1526             if (getNextChar('='))
1527               return TokenNameXOR_EQUAL;
1528             return TokenNameXOR;
1529           case '?':
1530             if (getNextChar('>')) {
1531               phpMode = false;
1532               if (currentPosition == source.length) {
1533                 phpMode = true;
1534                 return TokenNameINLINE_HTML;
1535               }
1536               return getInlinedHTMLToken(currentPosition - 2);
1537             }
1538             return TokenNameQUESTION;
1539           case ':':
1540             if (getNextChar(':'))
1541               return TokenNamePAAMAYIM_NEKUDOTAYIM;
1542             return TokenNameCOLON;
1543           case '@':
1544             return TokenNameAT;
1545           case '\'':
1546             consumeStringConstant();
1547             return TokenNameStringSingleQuote;
1548           case '"':
1549 //            if (tokenizeStrings) {
1550               consumeStringLiteral();
1551               return TokenNameStringDoubleQuote;
1552 //            }
1553 //            return TokenNameEncapsedString2;
1554           case '`':
1555 //            if (tokenizeStrings) {
1556               consumeStringInterpolated();
1557               return TokenNameStringInterpolated;
1558 //            }
1559 //            return TokenNameEncapsedString0;
1560           case '#':
1561           case '/': {
1562             char startChar = currentCharacter;
1563             if (getNextChar('=') && startChar == '/') {
1564               return TokenNameDIVIDE_EQUAL;
1565             }
1566             int test;
1567             if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1568               //line comment
1569               this.lastCommentLinePosition = this.currentPosition;
1570               int endPositionForLineComment = 0;
1571               try { //get the next char
1572                 currentCharacter = source[currentPosition++];
1573                 //                    if (((currentCharacter = source[currentPosition++])
1574                 //                      == '\\')
1575                 //                      && (source[currentPosition] == 'u')) {
1576                 //                      //-------------unicode traitement ------------
1577                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1578                 //                      currentPosition++;
1579                 //                      while (source[currentPosition] == 'u') {
1580                 //                        currentPosition++;
1581                 //                      }
1582                 //                      if ((c1 =
1583                 //                        Character.getNumericValue(source[currentPosition++]))
1584                 //                        > 15
1585                 //                        || c1 < 0
1586                 //                        || (c2 =
1587                 //                          Character.getNumericValue(source[currentPosition++]))
1588                 //                          > 15
1589                 //                        || c2 < 0
1590                 //                        || (c3 =
1591                 //                          Character.getNumericValue(source[currentPosition++]))
1592                 //                          > 15
1593                 //                        || c3 < 0
1594                 //                        || (c4 =
1595                 //                          Character.getNumericValue(source[currentPosition++]))
1596                 //                          > 15
1597                 //                        || c4 < 0) {
1598                 //                        throw new
1599                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1600                 //                      } else {
1601                 //                        currentCharacter =
1602                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1603                 //                      }
1604                 //                    }
1605                 //handle the \\u case manually into comment
1606                 //                    if (currentCharacter == '\\') {
1607                 //                      if (source[currentPosition] == '\\')
1608                 //                        currentPosition++;
1609                 //                    } //jump over the \\
1610                 boolean isUnicode = false;
1611                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1612                   this.lastCommentLinePosition = this.currentPosition;
1613                   if (currentCharacter == '?') {
1614                     if (getNextChar('>')) {
1615                       startPosition = currentPosition - 2;
1616                       phpMode = false;
1617                       return TokenNameINLINE_HTML;
1618                     }
1619                   }
1620                   //get the next char
1621                   isUnicode = false;
1622                   currentCharacter = source[currentPosition++];
1623                   //                      if (((currentCharacter = source[currentPosition++])
1624                   //                        == '\\')
1625                   //                        && (source[currentPosition] == 'u')) {
1626                   //                        isUnicode = true;
1627                   //                        //-------------unicode traitement ------------
1628                   //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1629                   //                        currentPosition++;
1630                   //                        while (source[currentPosition] == 'u') {
1631                   //                          currentPosition++;
1632                   //                        }
1633                   //                        if ((c1 =
1634                   //                          Character.getNumericValue(source[currentPosition++]))
1635                   //                          > 15
1636                   //                          || c1 < 0
1637                   //                          || (c2 =
1638                   //                            Character.getNumericValue(
1639                   //                              source[currentPosition++]))
1640                   //                            > 15
1641                   //                          || c2 < 0
1642                   //                          || (c3 =
1643                   //                            Character.getNumericValue(
1644                   //                              source[currentPosition++]))
1645                   //                            > 15
1646                   //                          || c3 < 0
1647                   //                          || (c4 =
1648                   //                            Character.getNumericValue(
1649                   //                              source[currentPosition++]))
1650                   //                            > 15
1651                   //                          || c4 < 0) {
1652                   //                          throw new
1653                   // InvalidInputException(INVALID_UNICODE_ESCAPE);
1654                   //                        } else {
1655                   //                          currentCharacter =
1656                   //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1657                   //                        }
1658                   //                      }
1659                   //handle the \\u case manually into comment
1660                   //                      if (currentCharacter == '\\') {
1661                   //                        if (source[currentPosition] == '\\')
1662                   //                          currentPosition++;
1663                   //                      } //jump over the \\
1664                 }
1665                 if (isUnicode) {
1666                   endPositionForLineComment = currentPosition - 6;
1667                 } else {
1668                   endPositionForLineComment = currentPosition - 1;
1669                 }
1670                 //                    recordComment(false);
1671                 recordComment(TokenNameCOMMENT_LINE);
1672                 if (this.taskTags != null)
1673                   checkTaskTag(this.startPosition, this.currentPosition);
1674                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1675                   checkNonExternalizeString();
1676                   if (recordLineSeparator) {
1677                     if (isUnicode) {
1678                       pushUnicodeLineSeparator();
1679                     } else {
1680                       pushLineSeparator();
1681                     }
1682                   } else {
1683                     currentLine = null;
1684                   }
1685                 }
1686                 if (tokenizeComments) {
1687                   if (!isUnicode) {
1688                     currentPosition = endPositionForLineComment;
1689                     // reset one character behind
1690                   }
1691                   return TokenNameCOMMENT_LINE;
1692                 }
1693               } catch (IndexOutOfBoundsException e) { //an eof will them
1694                 // be generated
1695                 if (tokenizeComments) {
1696                   currentPosition--;
1697                   // reset one character behind
1698                   return TokenNameCOMMENT_LINE;
1699                 }
1700               }
1701               break;
1702             }
1703             if (test > 0) {
1704               //traditional and annotation comment
1705               boolean isJavadoc = false, star = false;
1706               // consume next character
1707               unicodeAsBackSlash = false;
1708               currentCharacter = source[currentPosition++];
1709               //                  if (((currentCharacter = source[currentPosition++]) ==
1710               // '\\')
1711               //                    && (source[currentPosition] == 'u')) {
1712               //                    getNextUnicodeChar();
1713               //                  } else {
1714               //                    if (withoutUnicodePtr != 0) {
1715               //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1716               //                        currentCharacter;
1717               //                    }
1718               //                  }
1719               if (currentCharacter == '*') {
1720                 isJavadoc = true;
1721                 star = true;
1722               }
1723               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1724                 checkNonExternalizeString();
1725                 if (recordLineSeparator) {
1726                   pushLineSeparator();
1727                 } else {
1728                   currentLine = null;
1729                 }
1730               }
1731               try { //get the next char
1732                 currentCharacter = source[currentPosition++];
1733                 //                    if (((currentCharacter = source[currentPosition++])
1734                 //                      == '\\')
1735                 //                      && (source[currentPosition] == 'u')) {
1736                 //                      //-------------unicode traitement ------------
1737                 //                      getNextUnicodeChar();
1738                 //                    }
1739                 //handle the \\u case manually into comment
1740                 //                    if (currentCharacter == '\\') {
1741                 //                      if (source[currentPosition] == '\\')
1742                 //                        currentPosition++;
1743                 //                      //jump over the \\
1744                 //                    }
1745                 // empty comment is not a javadoc /**/
1746                 if (currentCharacter == '/') {
1747                   isJavadoc = false;
1748                 }
1749                 //loop until end of comment */
1750                 while ((currentCharacter != '/') || (!star)) {
1751                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1752                     checkNonExternalizeString();
1753                     if (recordLineSeparator) {
1754                       pushLineSeparator();
1755                     } else {
1756                       currentLine = null;
1757                     }
1758                   }
1759                   star = currentCharacter == '*';
1760                   //get next char
1761                   currentCharacter = source[currentPosition++];
1762                   //                      if (((currentCharacter = source[currentPosition++])
1763                   //                        == '\\')
1764                   //                        && (source[currentPosition] == 'u')) {
1765                   //                        //-------------unicode traitement ------------
1766                   //                        getNextUnicodeChar();
1767                   //                      }
1768                   //handle the \\u case manually into comment
1769                   //                      if (currentCharacter == '\\') {
1770                   //                        if (source[currentPosition] == '\\')
1771                   //                          currentPosition++;
1772                   //                      } //jump over the \\
1773                 }
1774                 //recordComment(isJavadoc);
1775                 if (isJavadoc) {
1776                   recordComment(TokenNameCOMMENT_PHPDOC);
1777                 } else {
1778                   recordComment(TokenNameCOMMENT_BLOCK);
1779                 }
1780
1781                 if (tokenizeComments) {
1782                   if (isJavadoc)
1783                     return TokenNameCOMMENT_PHPDOC;
1784                   return TokenNameCOMMENT_BLOCK;
1785                 }
1786
1787                 if (this.taskTags != null) {
1788                   checkTaskTag(this.startPosition, this.currentPosition);
1789                 }
1790               } catch (IndexOutOfBoundsException e) {
1791                 //                  reset end position for error reporting
1792                 currentPosition -= 2;
1793                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1794               }
1795               break;
1796             }
1797             return TokenNameDIVIDE;
1798           }
1799           case '\u001a':
1800             if (atEnd())
1801               return TokenNameEOF;
1802             //the atEnd may not be <currentPosition == source.length> if
1803             // source is only some part of a real (external) stream
1804             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1805           default:
1806             if (currentCharacter == '$') {
1807               int oldPosition = currentPosition;
1808               try {
1809                 currentCharacter = source[currentPosition++];
1810                 if (isPHPIdentifierStart(currentCharacter)) {
1811                   return scanIdentifierOrKeyword(true);
1812                 } else {
1813                   currentPosition = oldPosition;
1814                   return TokenNameDOLLAR;
1815                 }
1816               } catch (IndexOutOfBoundsException e) {
1817                 currentPosition = oldPosition;
1818                 return TokenNameDOLLAR;
1819               }
1820             }
1821             if (isPHPIdentifierStart(currentCharacter))
1822               return scanIdentifierOrKeyword(false);
1823             if (Character.isDigit(currentCharacter))
1824               return scanNumber(false);
1825             return TokenNameERROR;
1826           }
1827         }
1828       } //-----------------end switch while try--------------------
1829       catch (IndexOutOfBoundsException e) {
1830       }
1831     }
1832     return TokenNameEOF;
1833   }
1834
1835   /**
1836    * @return
1837    * @throws InvalidInputException
1838    */
1839   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1840     if (currentPosition > source.length) {
1841       currentPosition = source.length;
1842       return TokenNameEOF;
1843     }
1844     startPosition = start;
1845     try {
1846       while (!phpMode) {
1847         currentCharacter = source[currentPosition++];
1848         if (currentCharacter == '<') {
1849           if (getNextChar('?')) {
1850             currentCharacter = source[currentPosition++];
1851             if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1852               if (currentCharacter != '=') { // <?=
1853                 currentPosition--;
1854               } else {
1855                 phpExpressionTag = true;
1856               }
1857               // <?
1858               if (ignorePHPOneLiner) { // for CodeFormatter
1859                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1860                   phpMode = true;
1861                   return TokenNameINLINE_HTML;
1862                 }
1863               } else {
1864                 phpMode = true;
1865                 return TokenNameINLINE_HTML;
1866               }
1867             } else {
1868               //              boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1869               //              if (phpStart) {
1870               int test = getNextChar('H', 'h');
1871               if (test >= 0) {
1872                 test = getNextChar('P', 'p');
1873                 if (test >= 0) {
1874                   // <?PHP <?php
1875                   if (ignorePHPOneLiner) {
1876                     if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1877                       phpMode = true;
1878                       return TokenNameINLINE_HTML;
1879                     }
1880                   } else {
1881                     phpMode = true;
1882                     return TokenNameINLINE_HTML;
1883                   }
1884                 }
1885               }
1886               //              }
1887             }
1888           }
1889         }
1890         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1891           if (recordLineSeparator) {
1892             pushLineSeparator();
1893           } else {
1894             currentLine = null;
1895           }
1896         }
1897       } //-----------------while--------------------
1898       phpMode = true;
1899       return TokenNameINLINE_HTML;
1900     } //-----------------try--------------------
1901     catch (IndexOutOfBoundsException e) {
1902       startPosition = start;
1903       currentPosition--;
1904     }
1905     phpMode = true;
1906     return TokenNameINLINE_HTML;
1907   }
1908
1909   /**
1910    * @return
1911    */
1912   private int lookAheadLinePHPTag() {
1913     // check if the PHP is only in this line (for CodeFormatter)
1914     int currentPositionInLine = currentPosition;
1915     char previousCharInLine = ' ';
1916     char currentCharInLine = ' ';
1917     boolean singleQuotedStringActive = false;
1918     boolean doubleQuotedStringActive = false;
1919
1920     try {
1921       // look ahead in this line
1922       while (true) {
1923         previousCharInLine = currentCharInLine;
1924         currentCharInLine = source[currentPositionInLine++];
1925         switch (currentCharInLine) {
1926         case '>':
1927           if (previousCharInLine == '?') {
1928             // update the scanner's current Position in the source
1929             currentPosition = currentPositionInLine;
1930             // use as "dummy" token
1931             return TokenNameEOF;
1932           }
1933           break;
1934         case '\\':
1935           if (doubleQuotedStringActive) {
1936             // ignore escaped characters in double quoted strings
1937             previousCharInLine = currentCharInLine;
1938             currentCharInLine = source[currentPositionInLine++];
1939           }
1940         case '\"':
1941           if (doubleQuotedStringActive) {
1942             doubleQuotedStringActive = false;
1943           } else {
1944             if (!singleQuotedStringActive) {
1945               doubleQuotedStringActive = true;
1946             }
1947           }
1948           break;
1949         case '\'':
1950           if (singleQuotedStringActive) {
1951             if (previousCharInLine != '\\') {
1952               singleQuotedStringActive = false;
1953             }
1954           } else {
1955             if (!doubleQuotedStringActive) {
1956               singleQuotedStringActive = true;
1957             }
1958           }
1959           break;
1960         case '\n':
1961           phpMode = true;
1962           return TokenNameINLINE_HTML;
1963         case '#':
1964           if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1965             phpMode = true;
1966             return TokenNameINLINE_HTML;
1967           }
1968           break;
1969         case '/':
1970           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1971             phpMode = true;
1972             return TokenNameINLINE_HTML;
1973           }
1974           break;
1975         case '*':
1976           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1977             phpMode = true;
1978             return TokenNameINLINE_HTML;
1979           }
1980           break;
1981         }
1982       }
1983     } catch (IndexOutOfBoundsException e) {
1984       phpMode = true;
1985       currentPosition = currentPositionInLine;
1986       return TokenNameINLINE_HTML;
1987     }
1988   }
1989
1990   //  public final void getNextUnicodeChar()
1991   //    throws IndexOutOfBoundsException, InvalidInputException {
1992   //    //VOID
1993   //    //handle the case of unicode.
1994   //    //when a unicode appears then we must use a buffer that holds char
1995   // internal values
1996   //    //At the end of this method currentCharacter holds the new visited char
1997   //    //and currentPosition points right next after it
1998   //
1999   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
2000   //
2001   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2002   //    currentPosition++;
2003   //    while (source[currentPosition] == 'u') {
2004   //      currentPosition++;
2005   //      unicodeSize++;
2006   //    }
2007   //
2008   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2009   //      || c1 < 0
2010   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2011   //      || c2 < 0
2012   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2013   //      || c3 < 0
2014   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2015   //      || c4 < 0) {
2016   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2017   //    } else {
2018   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2019   //      //need the unicode buffer
2020   //      if (withoutUnicodePtr == 0) {
2021   //        //buffer all the entries that have been left aside....
2022   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2023   //        System.arraycopy(
2024   //          source,
2025   //          startPosition,
2026   //          withoutUnicodeBuffer,
2027   //          1,
2028   //          withoutUnicodePtr);
2029   //      }
2030   //      //fill the buffer with the char
2031   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2032   //    }
2033   //    unicodeAsBackSlash = currentCharacter == '\\';
2034   //  }
2035   /*
2036    * Tokenize a method body, assuming that curly brackets are properly balanced.
2037    */
2038   public final void jumpOverMethodBody() {
2039     this.wasAcr = false;
2040     int found = 1;
2041     try {
2042       while (true) { //loop for jumping over comments
2043         // ---------Consume white space and handles startPosition---------
2044         boolean isWhiteSpace;
2045         do {
2046           startPosition = currentPosition;
2047           currentCharacter = source[currentPosition++];
2048           //          if (((currentCharacter = source[currentPosition++]) == '\\')
2049           //            && (source[currentPosition] == 'u')) {
2050           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
2051           //          } else {
2052           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2053             pushLineSeparator();
2054           isWhiteSpace = Character.isWhitespace(currentCharacter);
2055           //          }
2056         } while (isWhiteSpace);
2057         // -------consume token until } is found---------
2058         switch (currentCharacter) {
2059         case '{':
2060           found++;
2061           break;
2062         case '}':
2063           found--;
2064           if (found == 0)
2065             return;
2066           break;
2067         case '\'': {
2068           boolean test;
2069           test = getNextChar('\\');
2070           if (test) {
2071             try {
2072               scanDoubleQuotedEscapeCharacter();
2073             } catch (InvalidInputException ex) {
2074             }
2075             ;
2076           } else {
2077             //                try { // consume next character
2078             unicodeAsBackSlash = false;
2079             currentCharacter = source[currentPosition++];
2080             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2081             //                    && (source[currentPosition] == 'u')) {
2082             //                    getNextUnicodeChar();
2083             //                  } else {
2084             if (withoutUnicodePtr != 0) {
2085               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2086             }
2087             //                  }
2088             //                } catch (InvalidInputException ex) {
2089             //                };
2090           }
2091           getNextChar('\'');
2092           break;
2093         }
2094         case '"':
2095           try {
2096             //              try { // consume next character
2097             unicodeAsBackSlash = false;
2098             currentCharacter = source[currentPosition++];
2099             //                if (((currentCharacter = source[currentPosition++]) == '\\')
2100             //                  && (source[currentPosition] == 'u')) {
2101             //                  getNextUnicodeChar();
2102             //                } else {
2103             if (withoutUnicodePtr != 0) {
2104               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2105             }
2106             //                }
2107             //              } catch (InvalidInputException ex) {
2108             //              };
2109             while (currentCharacter != '"') {
2110               if (currentCharacter == '\r') {
2111                 if (source[currentPosition] == '\n')
2112                   currentPosition++;
2113                 break;
2114                 // the string cannot go further that the line
2115               }
2116               if (currentCharacter == '\n') {
2117                 break;
2118                 // the string cannot go further that the line
2119               }
2120               if (currentCharacter == '\\') {
2121                 try {
2122                   scanDoubleQuotedEscapeCharacter();
2123                 } catch (InvalidInputException ex) {
2124                 }
2125                 ;
2126               }
2127               //                try { // consume next character
2128               unicodeAsBackSlash = false;
2129               currentCharacter = source[currentPosition++];
2130               //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2131               //                    && (source[currentPosition] == 'u')) {
2132               //                    getNextUnicodeChar();
2133               //                  } else {
2134               if (withoutUnicodePtr != 0) {
2135                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2136               }
2137               //                  }
2138               //                } catch (InvalidInputException ex) {
2139               //                };
2140             }
2141           } catch (IndexOutOfBoundsException e) {
2142             return;
2143           }
2144           break;
2145         case '/': {
2146           int test;
2147           if ((test = getNextChar('/', '*')) == 0) {
2148             //line comment
2149             try {
2150               //get the next char
2151               currentCharacter = source[currentPosition++];
2152               //                  if (((currentCharacter = source[currentPosition++]) ==
2153               // '\\')
2154               //                    && (source[currentPosition] == 'u')) {
2155               //                    //-------------unicode traitement ------------
2156               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2157               //                    currentPosition++;
2158               //                    while (source[currentPosition] == 'u') {
2159               //                      currentPosition++;
2160               //                    }
2161               //                    if ((c1 =
2162               //                      Character.getNumericValue(source[currentPosition++]))
2163               //                      > 15
2164               //                      || c1 < 0
2165               //                      || (c2 =
2166               //                        Character.getNumericValue(source[currentPosition++]))
2167               //                        > 15
2168               //                      || c2 < 0
2169               //                      || (c3 =
2170               //                        Character.getNumericValue(source[currentPosition++]))
2171               //                        > 15
2172               //                      || c3 < 0
2173               //                      || (c4 =
2174               //                        Character.getNumericValue(source[currentPosition++]))
2175               //                        > 15
2176               //                      || c4 < 0) {
2177               //                      //error don't care of the value
2178               //                      currentCharacter = 'A';
2179               //                    } //something different from \n and \r
2180               //                    else {
2181               //                      currentCharacter =
2182               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2183               //                    }
2184               //                  }
2185               while (currentCharacter != '\r' && currentCharacter != '\n') {
2186                 //get the next char
2187                 currentCharacter = source[currentPosition++];
2188                 //                    if (((currentCharacter = source[currentPosition++])
2189                 //                      == '\\')
2190                 //                      && (source[currentPosition] == 'u')) {
2191                 //                      //-------------unicode traitement ------------
2192                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2193                 //                      currentPosition++;
2194                 //                      while (source[currentPosition] == 'u') {
2195                 //                        currentPosition++;
2196                 //                      }
2197                 //                      if ((c1 =
2198                 //                        Character.getNumericValue(source[currentPosition++]))
2199                 //                        > 15
2200                 //                        || c1 < 0
2201                 //                        || (c2 =
2202                 //                          Character.getNumericValue(source[currentPosition++]))
2203                 //                          > 15
2204                 //                        || c2 < 0
2205                 //                        || (c3 =
2206                 //                          Character.getNumericValue(source[currentPosition++]))
2207                 //                          > 15
2208                 //                        || c3 < 0
2209                 //                        || (c4 =
2210                 //                          Character.getNumericValue(source[currentPosition++]))
2211                 //                          > 15
2212                 //                        || c4 < 0) {
2213                 //                        //error don't care of the value
2214                 //                        currentCharacter = 'A';
2215                 //                      } //something different from \n and \r
2216                 //                      else {
2217                 //                        currentCharacter =
2218                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2219                 //                      }
2220                 //                    }
2221               }
2222               if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2223                 pushLineSeparator();
2224             } catch (IndexOutOfBoundsException e) {
2225             } //an eof will them be generated
2226             break;
2227           }
2228           if (test > 0) {
2229             //traditional and annotation comment
2230             boolean star = false;
2231             //                try { // consume next character
2232             unicodeAsBackSlash = false;
2233             currentCharacter = source[currentPosition++];
2234             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2235             //                    && (source[currentPosition] == 'u')) {
2236             //                    getNextUnicodeChar();
2237             //                  } else {
2238             if (withoutUnicodePtr != 0) {
2239               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2240             }
2241             //                  };
2242             //                } catch (InvalidInputException ex) {
2243             //                };
2244             if (currentCharacter == '*') {
2245               star = true;
2246             }
2247             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2248               pushLineSeparator();
2249             try { //get the next char
2250               currentCharacter = source[currentPosition++];
2251               //                  if (((currentCharacter = source[currentPosition++]) ==
2252               // '\\')
2253               //                    && (source[currentPosition] == 'u')) {
2254               //                    //-------------unicode traitement ------------
2255               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2256               //                    currentPosition++;
2257               //                    while (source[currentPosition] == 'u') {
2258               //                      currentPosition++;
2259               //                    }
2260               //                    if ((c1 =
2261               //                      Character.getNumericValue(source[currentPosition++]))
2262               //                      > 15
2263               //                      || c1 < 0
2264               //                      || (c2 =
2265               //                        Character.getNumericValue(source[currentPosition++]))
2266               //                        > 15
2267               //                      || c2 < 0
2268               //                      || (c3 =
2269               //                        Character.getNumericValue(source[currentPosition++]))
2270               //                        > 15
2271               //                      || c3 < 0
2272               //                      || (c4 =
2273               //                        Character.getNumericValue(source[currentPosition++]))
2274               //                        > 15
2275               //                      || c4 < 0) {
2276               //                      //error don't care of the value
2277               //                      currentCharacter = 'A';
2278               //                    } //something different from * and /
2279               //                    else {
2280               //                      currentCharacter =
2281               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2282               //                    }
2283               //                  }
2284               //loop until end of comment */
2285               while ((currentCharacter != '/') || (!star)) {
2286                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2287                   pushLineSeparator();
2288                 star = currentCharacter == '*';
2289                 //get next char
2290                 currentCharacter = source[currentPosition++];
2291                 //                    if (((currentCharacter = source[currentPosition++])
2292                 //                      == '\\')
2293                 //                      && (source[currentPosition] == 'u')) {
2294                 //                      //-------------unicode traitement ------------
2295                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2296                 //                      currentPosition++;
2297                 //                      while (source[currentPosition] == 'u') {
2298                 //                        currentPosition++;
2299                 //                      }
2300                 //                      if ((c1 =
2301                 //                        Character.getNumericValue(source[currentPosition++]))
2302                 //                        > 15
2303                 //                        || c1 < 0
2304                 //                        || (c2 =
2305                 //                          Character.getNumericValue(source[currentPosition++]))
2306                 //                          > 15
2307                 //                        || c2 < 0
2308                 //                        || (c3 =
2309                 //                          Character.getNumericValue(source[currentPosition++]))
2310                 //                          > 15
2311                 //                        || c3 < 0
2312                 //                        || (c4 =
2313                 //                          Character.getNumericValue(source[currentPosition++]))
2314                 //                          > 15
2315                 //                        || c4 < 0) {
2316                 //                        //error don't care of the value
2317                 //                        currentCharacter = 'A';
2318                 //                      } //something different from * and /
2319                 //                      else {
2320                 //                        currentCharacter =
2321                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2322                 //                      }
2323                 //                    }
2324               }
2325             } catch (IndexOutOfBoundsException e) {
2326               return;
2327             }
2328             break;
2329           }
2330           break;
2331         }
2332         default:
2333           if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2334             try {
2335               scanIdentifierOrKeyword((currentCharacter == '$'));
2336             } catch (InvalidInputException ex) {
2337             }
2338             ;
2339             break;
2340           }
2341           if (Character.isDigit(currentCharacter)) {
2342             try {
2343               scanNumber(false);
2344             } catch (InvalidInputException ex) {
2345             }
2346             ;
2347             break;
2348           }
2349         }
2350       }
2351       //-----------------end switch while try--------------------
2352     } catch (IndexOutOfBoundsException e) {
2353     } catch (InvalidInputException e) {
2354     }
2355     return;
2356   }
2357
2358   //  public final boolean jumpOverUnicodeWhiteSpace()
2359   //    throws InvalidInputException {
2360   //    //BOOLEAN
2361   //    //handle the case of unicode. Jump over the next whiteSpace
2362   //    //making startPosition pointing on the next available char
2363   //    //On false, the currentCharacter is filled up with a potential
2364   //    //correct char
2365   //
2366   //    try {
2367   //      this.wasAcr = false;
2368   //      int c1, c2, c3, c4;
2369   //      int unicodeSize = 6;
2370   //      currentPosition++;
2371   //      while (source[currentPosition] == 'u') {
2372   //        currentPosition++;
2373   //        unicodeSize++;
2374   //      }
2375   //
2376   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2377   //        || c1 < 0)
2378   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2379   //          || c2 < 0)
2380   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2381   //          || c3 < 0)
2382   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2383   //          || c4 < 0)) {
2384   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2385   //      }
2386   //
2387   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2388   //      if (recordLineSeparator
2389   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2390   //        pushLineSeparator();
2391   //      if (Character.isWhitespace(currentCharacter))
2392   //        return true;
2393   //
2394   //      //buffer the new char which is not a white space
2395   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2396   //      //withoutUnicodePtr == 1 is true here
2397   //      return false;
2398   //    } catch (IndexOutOfBoundsException e) {
2399   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2400   //    }
2401   //  }
2402   public final int[] getLineEnds() {
2403     //return a bounded copy of this.lineEnds
2404     int[] copy;
2405     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2406     return copy;
2407   }
2408
2409   public char[] getSource() {
2410     return this.source;
2411   }
2412
2413   public static boolean isIdentifierOrKeyword(int token) {
2414     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2415   }
2416
2417   final char[] optimizedCurrentTokenSource1() {
2418     //return always the same char[] build only once
2419     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2420     char charOne = source[startPosition];
2421     switch (charOne) {
2422     case 'a':
2423       return charArray_a;
2424     case 'b':
2425       return charArray_b;
2426     case 'c':
2427       return charArray_c;
2428     case 'd':
2429       return charArray_d;
2430     case 'e':
2431       return charArray_e;
2432     case 'f':
2433       return charArray_f;
2434     case 'g':
2435       return charArray_g;
2436     case 'h':
2437       return charArray_h;
2438     case 'i':
2439       return charArray_i;
2440     case 'j':
2441       return charArray_j;
2442     case 'k':
2443       return charArray_k;
2444     case 'l':
2445       return charArray_l;
2446     case 'm':
2447       return charArray_m;
2448     case 'n':
2449       return charArray_n;
2450     case 'o':
2451       return charArray_o;
2452     case 'p':
2453       return charArray_p;
2454     case 'q':
2455       return charArray_q;
2456     case 'r':
2457       return charArray_r;
2458     case 's':
2459       return charArray_s;
2460     case 't':
2461       return charArray_t;
2462     case 'u':
2463       return charArray_u;
2464     case 'v':
2465       return charArray_v;
2466     case 'w':
2467       return charArray_w;
2468     case 'x':
2469       return charArray_x;
2470     case 'y':
2471       return charArray_y;
2472     case 'z':
2473       return charArray_z;
2474     default:
2475       return new char[] { charOne };
2476     }
2477   }
2478
2479   final char[] optimizedCurrentTokenSource2() {
2480     char c0, c1;
2481     c0 = source[startPosition];
2482     c1 = source[startPosition + 1];
2483     if (c0 == '$') {
2484       //return always the same char[] build only once
2485       //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2486       switch (c1) {
2487       case 'a':
2488         return charArray_va;
2489       case 'b':
2490         return charArray_vb;
2491       case 'c':
2492         return charArray_vc;
2493       case 'd':
2494         return charArray_vd;
2495       case 'e':
2496         return charArray_ve;
2497       case 'f':
2498         return charArray_vf;
2499       case 'g':
2500         return charArray_vg;
2501       case 'h':
2502         return charArray_vh;
2503       case 'i':
2504         return charArray_vi;
2505       case 'j':
2506         return charArray_vj;
2507       case 'k':
2508         return charArray_vk;
2509       case 'l':
2510         return charArray_vl;
2511       case 'm':
2512         return charArray_vm;
2513       case 'n':
2514         return charArray_vn;
2515       case 'o':
2516         return charArray_vo;
2517       case 'p':
2518         return charArray_vp;
2519       case 'q':
2520         return charArray_vq;
2521       case 'r':
2522         return charArray_vr;
2523       case 's':
2524         return charArray_vs;
2525       case 't':
2526         return charArray_vt;
2527       case 'u':
2528         return charArray_vu;
2529       case 'v':
2530         return charArray_vv;
2531       case 'w':
2532         return charArray_vw;
2533       case 'x':
2534         return charArray_vx;
2535       case 'y':
2536         return charArray_vy;
2537       case 'z':
2538         return charArray_vz;
2539       }
2540     }
2541     //try to return the same char[] build only once
2542     int hash = ((c0 << 6) + c1) % TableSize;
2543     char[][] table = charArray_length[0][hash];
2544     int i = newEntry2;
2545     while (++i < InternalTableSize) {
2546       char[] charArray = table[i];
2547       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2548         return charArray;
2549     }
2550     //---------other side---------
2551     i = -1;
2552     int max = newEntry2;
2553     while (++i <= max) {
2554       char[] charArray = table[i];
2555       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2556         return charArray;
2557     }
2558     //--------add the entry-------
2559     if (++max >= InternalTableSize)
2560       max = 0;
2561     char[] r;
2562     table[max] = (r = new char[] { c0, c1 });
2563     newEntry2 = max;
2564     return r;
2565   }
2566
2567   final char[] optimizedCurrentTokenSource3() {
2568     //try to return the same char[] build only once
2569     char c0, c1, c2;
2570     int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2571         % TableSize;
2572     char[][] table = charArray_length[1][hash];
2573     int i = newEntry3;
2574     while (++i < InternalTableSize) {
2575       char[] charArray = table[i];
2576       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2577         return charArray;
2578     }
2579     //---------other side---------
2580     i = -1;
2581     int max = newEntry3;
2582     while (++i <= max) {
2583       char[] charArray = table[i];
2584       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2585         return charArray;
2586     }
2587     //--------add the entry-------
2588     if (++max >= InternalTableSize)
2589       max = 0;
2590     char[] r;
2591     table[max] = (r = new char[] { c0, c1, c2 });
2592     newEntry3 = max;
2593     return r;
2594   }
2595
2596   final char[] optimizedCurrentTokenSource4() {
2597     //try to return the same char[] build only once
2598     char c0, c1, c2, c3;
2599     long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2600         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2601         % TableSize;
2602     char[][] table = charArray_length[2][(int) hash];
2603     int i = newEntry4;
2604     while (++i < InternalTableSize) {
2605       char[] charArray = table[i];
2606       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2607         return charArray;
2608     }
2609     //---------other side---------
2610     i = -1;
2611     int max = newEntry4;
2612     while (++i <= max) {
2613       char[] charArray = table[i];
2614       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2615         return charArray;
2616     }
2617     //--------add the entry-------
2618     if (++max >= InternalTableSize)
2619       max = 0;
2620     char[] r;
2621     table[max] = (r = new char[] { c0, c1, c2, c3 });
2622     newEntry4 = max;
2623     return r;
2624   }
2625
2626   final char[] optimizedCurrentTokenSource5() {
2627     //try to return the same char[] build only once
2628     char c0, c1, c2, c3, c4;
2629     long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2630         + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2631         % TableSize;
2632     char[][] table = charArray_length[3][(int) hash];
2633     int i = newEntry5;
2634     while (++i < InternalTableSize) {
2635       char[] charArray = table[i];
2636       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2637         return charArray;
2638     }
2639     //---------other side---------
2640     i = -1;
2641     int max = newEntry5;
2642     while (++i <= max) {
2643       char[] charArray = table[i];
2644       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2645         return charArray;
2646     }
2647     //--------add the entry-------
2648     if (++max >= InternalTableSize)
2649       max = 0;
2650     char[] r;
2651     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2652     newEntry5 = max;
2653     return r;
2654   }
2655
2656   final char[] optimizedCurrentTokenSource6() {
2657     //try to return the same char[] build only once
2658     char c0, c1, c2, c3, c4, c5;
2659     long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2660         + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2661         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2662         % TableSize;
2663     char[][] table = charArray_length[4][(int) hash];
2664     int i = newEntry6;
2665     while (++i < InternalTableSize) {
2666       char[] charArray = table[i];
2667       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2668           && (c5 == charArray[5]))
2669         return charArray;
2670     }
2671     //---------other side---------
2672     i = -1;
2673     int max = newEntry6;
2674     while (++i <= max) {
2675       char[] charArray = table[i];
2676       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2677           && (c5 == charArray[5]))
2678         return charArray;
2679     }
2680     //--------add the entry-------
2681     if (++max >= InternalTableSize)
2682       max = 0;
2683     char[] r;
2684     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2685     newEntry6 = max;
2686     return r;
2687   }
2688
2689   public final void pushLineSeparator() throws InvalidInputException {
2690     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2691     final int INCREMENT = 250;
2692     if (this.checkNonExternalizedStringLiterals) {
2693       // reinitialize the current line for non externalize strings purpose
2694       currentLine = null;
2695     }
2696     //currentCharacter is at position currentPosition-1
2697     // cr 000D
2698     if (currentCharacter == '\r') {
2699       int separatorPos = currentPosition - 1;
2700       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2701         return;
2702       //System.out.println("CR-" + separatorPos);
2703       try {
2704         lineEnds[++linePtr] = separatorPos;
2705       } catch (IndexOutOfBoundsException e) {
2706         //linePtr value is correct
2707         int oldLength = lineEnds.length;
2708         int[] old = lineEnds;
2709         lineEnds = new int[oldLength + INCREMENT];
2710         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2711         lineEnds[linePtr] = separatorPos;
2712       }
2713       // look-ahead for merged cr+lf
2714       try {
2715         if (source[currentPosition] == '\n') {
2716           //System.out.println("look-ahead LF-" + currentPosition);
2717           lineEnds[linePtr] = currentPosition;
2718           currentPosition++;
2719           wasAcr = false;
2720         } else {
2721           wasAcr = true;
2722         }
2723       } catch (IndexOutOfBoundsException e) {
2724         wasAcr = true;
2725       }
2726     } else {
2727       // lf 000A
2728       if (currentCharacter == '\n') {
2729         //must merge eventual cr followed by lf
2730         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2731           //System.out.println("merge LF-" + (currentPosition - 1));
2732           lineEnds[linePtr] = currentPosition - 1;
2733         } else {
2734           int separatorPos = currentPosition - 1;
2735           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2736             return;
2737           // System.out.println("LF-" + separatorPos);
2738           try {
2739             lineEnds[++linePtr] = separatorPos;
2740           } catch (IndexOutOfBoundsException e) {
2741             //linePtr value is correct
2742             int oldLength = lineEnds.length;
2743             int[] old = lineEnds;
2744             lineEnds = new int[oldLength + INCREMENT];
2745             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2746             lineEnds[linePtr] = separatorPos;
2747           }
2748         }
2749         wasAcr = false;
2750       }
2751     }
2752   }
2753
2754   public final void pushUnicodeLineSeparator() {
2755     // isUnicode means that the \r or \n has been read as a unicode character
2756     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2757     final int INCREMENT = 250;
2758     //currentCharacter is at position currentPosition-1
2759     if (this.checkNonExternalizedStringLiterals) {
2760       // reinitialize the current line for non externalize strings purpose
2761       currentLine = null;
2762     }
2763     // cr 000D
2764     if (currentCharacter == '\r') {
2765       int separatorPos = currentPosition - 6;
2766       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2767         return;
2768       //System.out.println("CR-" + separatorPos);
2769       try {
2770         lineEnds[++linePtr] = separatorPos;
2771       } catch (IndexOutOfBoundsException e) {
2772         //linePtr value is correct
2773         int oldLength = lineEnds.length;
2774         int[] old = lineEnds;
2775         lineEnds = new int[oldLength + INCREMENT];
2776         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2777         lineEnds[linePtr] = separatorPos;
2778       }
2779       // look-ahead for merged cr+lf
2780       if (source[currentPosition] == '\n') {
2781         //System.out.println("look-ahead LF-" + currentPosition);
2782         lineEnds[linePtr] = currentPosition;
2783         currentPosition++;
2784         wasAcr = false;
2785       } else {
2786         wasAcr = true;
2787       }
2788     } else {
2789       // lf 000A
2790       if (currentCharacter == '\n') {
2791         //must merge eventual cr followed by lf
2792         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2793           //System.out.println("merge LF-" + (currentPosition - 1));
2794           lineEnds[linePtr] = currentPosition - 6;
2795         } else {
2796           int separatorPos = currentPosition - 6;
2797           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2798             return;
2799           // System.out.println("LF-" + separatorPos);
2800           try {
2801             lineEnds[++linePtr] = separatorPos;
2802           } catch (IndexOutOfBoundsException e) {
2803             //linePtr value is correct
2804             int oldLength = lineEnds.length;
2805             int[] old = lineEnds;
2806             lineEnds = new int[oldLength + INCREMENT];
2807             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2808             lineEnds[linePtr] = separatorPos;
2809           }
2810         }
2811         wasAcr = false;
2812       }
2813     }
2814   }
2815
2816   public void recordComment(int token) {
2817     // compute position
2818     int stopPosition = this.currentPosition;
2819     switch (token) {
2820     case TokenNameCOMMENT_LINE:
2821       stopPosition = -this.lastCommentLinePosition;
2822       break;
2823     case TokenNameCOMMENT_BLOCK:
2824       stopPosition = -this.currentPosition;
2825       break;
2826     }
2827
2828     // a new comment is recorded
2829     int length = this.commentStops.length;
2830     if (++this.commentPtr >= length) {
2831       System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2832       //grows the positions buffers too
2833       System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2834     }
2835     this.commentStops[this.commentPtr] = stopPosition;
2836     this.commentStarts[this.commentPtr] = this.startPosition;
2837   }
2838
2839   //  public final void recordComment(boolean isJavadoc) {
2840   //    // a new annotation comment is recorded
2841   //    try {
2842   //      commentStops[++commentPtr] = isJavadoc
2843   //          ? currentPosition
2844   //          : -currentPosition;
2845   //    } catch (IndexOutOfBoundsException e) {
2846   //      int oldStackLength = commentStops.length;
2847   //      int[] oldStack = commentStops;
2848   //      commentStops = new int[oldStackLength + 30];
2849   //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2850   //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2851   //      //grows the positions buffers too
2852   //      int[] old = commentStarts;
2853   //      commentStarts = new int[oldStackLength + 30];
2854   //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2855   //    }
2856   //    //the buffer is of a correct size here
2857   //    commentStarts[commentPtr] = startPosition;
2858   //  }
2859   public void resetTo(int begin, int end) {
2860     //reset the scanner to a given position where it may rescan again
2861     diet = false;
2862     initialPosition = startPosition = currentPosition = begin;
2863     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2864     commentPtr = -1; // reset comment stack
2865   }
2866
2867   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2868     // the string with "\\u" is a legal string of two chars \ and u
2869     //thus we use a direct access to the source (for regular cases).
2870     //    if (unicodeAsBackSlash) {
2871     //      // consume next character
2872     //      unicodeAsBackSlash = false;
2873     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2874     //        && (source[currentPosition] == 'u')) {
2875     //        getNextUnicodeChar();
2876     //      } else {
2877     //        if (withoutUnicodePtr != 0) {
2878     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2879     //        }
2880     //      }
2881     //    } else
2882     currentCharacter = source[currentPosition++];
2883     switch (currentCharacter) {
2884     case '\'':
2885       currentCharacter = '\'';
2886       break;
2887     case '\\':
2888       currentCharacter = '\\';
2889       break;
2890     default:
2891       currentCharacter = '\\';
2892       currentPosition--;
2893     }
2894   }
2895
2896   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2897     currentCharacter = source[currentPosition++];
2898     switch (currentCharacter) {
2899     //      case 'b' :
2900     //        currentCharacter = '\b';
2901     //        break;
2902     case 't':
2903       currentCharacter = '\t';
2904       break;
2905     case 'n':
2906       currentCharacter = '\n';
2907       break;
2908     //      case 'f' :
2909     //        currentCharacter = '\f';
2910     //        break;
2911     case 'r':
2912       currentCharacter = '\r';
2913       break;
2914     case '\"':
2915       currentCharacter = '\"';
2916       break;
2917     case '\'':
2918       currentCharacter = '\'';
2919       break;
2920     case '\\':
2921       currentCharacter = '\\';
2922       break;
2923     case '$':
2924       currentCharacter = '$';
2925       break;
2926     default:
2927       // -----------octal escape--------------
2928       // OctalDigit
2929       // OctalDigit OctalDigit
2930       // ZeroToThree OctalDigit OctalDigit
2931       int number = Character.getNumericValue(currentCharacter);
2932       if (number >= 0 && number <= 7) {
2933         boolean zeroToThreeNot = number > 3;
2934         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2935           int digit = Character.getNumericValue(currentCharacter);
2936           if (digit >= 0 && digit <= 7) {
2937             number = (number * 8) + digit;
2938             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2939               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2940                 // Digit --> ignore last character
2941                 currentPosition--;
2942               } else {
2943                 digit = Character.getNumericValue(currentCharacter);
2944                 if (digit >= 0 && digit <= 7) {
2945                   // has read \ZeroToThree OctalDigit OctalDigit
2946                   number = (number * 8) + digit;
2947                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2948                   // --> ignore last character
2949                   currentPosition--;
2950                 }
2951               }
2952             } else { // has read \OctalDigit NonDigit--> ignore last
2953               // character
2954               currentPosition--;
2955             }
2956           } else { // has read \OctalDigit NonOctalDigit--> ignore last
2957             // character
2958             currentPosition--;
2959           }
2960         } else { // has read \OctalDigit --> ignore last character
2961           currentPosition--;
2962         }
2963         if (number > 255)
2964           throw new InvalidInputException(INVALID_ESCAPE);
2965         currentCharacter = (char) number;
2966       }
2967     //else
2968     //     throw new InvalidInputException(INVALID_ESCAPE);
2969     }
2970   }
2971
2972   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2973   //    return scanIdentifierOrKeyword( false );
2974   //  }
2975   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2976     //test keywords
2977     //first dispatch on the first char.
2978     //then the length. If there are several
2979     //keywors with the same length AND the same first char, then do another
2980     //disptach on the second char :-)...cool....but fast !
2981     useAssertAsAnIndentifier = false;
2982     while (getNextCharAsJavaIdentifierPart()) {
2983     }
2984     ;
2985     if (isVariable) {
2986       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2987       //        return TokenNamethis;
2988       //      }
2989       return TokenNameVariable;
2990     }
2991     int index, length;
2992     char[] data;
2993     char firstLetter;
2994     //    if (withoutUnicodePtr == 0)
2995     //quick test on length == 1 but not on length > 12 while most identifier
2996     //have a length which is <= 12...but there are lots of identifier with
2997     //only one char....
2998     //      {
2999     if ((length = currentPosition - startPosition) == 1)
3000       return TokenNameIdentifier;
3001     //  data = source;
3002     data = new char[length];
3003     index = startPosition;
3004     for (int i = 0; i < length; i++) {
3005       data[i] = Character.toLowerCase(source[index + i]);
3006     }
3007     index = 0;
3008     //    } else {
3009     //      if ((length = withoutUnicodePtr) == 1)
3010     //        return TokenNameIdentifier;
3011     //      // data = withoutUnicodeBuffer;
3012     //      data = new char[withoutUnicodeBuffer.length];
3013     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3014     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3015     //      }
3016     //      index = 1;
3017     //    }
3018     firstLetter = data[index];
3019     switch (firstLetter) {
3020     case '_':
3021       switch (length) {
3022       case 8:
3023         //__FILE__
3024         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3025             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3026           return TokenNameFILE;
3027         index = 0; //__LINE__
3028         if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3029             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3030           return TokenNameLINE;
3031         break;
3032       case 9:
3033         //__CLASS__
3034         if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3035             && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3036           return TokenNameCLASS_C;
3037         break;
3038       case 11:
3039         //__METHOD__
3040         if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3041             && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3042             && (data[++index] == '_'))
3043           return TokenNameMETHOD_C;
3044         break;
3045       case 12:
3046         //__FUNCTION__
3047         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3048             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3049             && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3050           return TokenNameFUNC_C;
3051         break;
3052       }
3053       return TokenNameIdentifier;
3054     case 'a':
3055       // as and array abstract
3056       switch (length) {
3057       case 2:
3058         //as
3059         if ((data[++index] == 's')) {
3060           return TokenNameas;
3061         } else {
3062           return TokenNameIdentifier;
3063         }
3064       case 3:
3065         //and
3066         if ((data[++index] == 'n') && (data[++index] == 'd')) {
3067           return TokenNameand;
3068         } else {
3069           return TokenNameIdentifier;
3070         }
3071       case 5:
3072         // array
3073         if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3074           return TokenNamearray;
3075         else
3076           return TokenNameIdentifier;
3077       case 8:
3078         if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3079             && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3080           return TokenNameabstract;
3081         else
3082           return TokenNameIdentifier;
3083       default:
3084         return TokenNameIdentifier;
3085       }
3086     case 'b':
3087       //break
3088       switch (length) {
3089       case 5:
3090         if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3091           return TokenNamebreak;
3092         else
3093           return TokenNameIdentifier;
3094       default:
3095         return TokenNameIdentifier;
3096       }
3097     case 'c':
3098       //case catch class clone const continue
3099       switch (length) {
3100       case 4:
3101         if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3102           return TokenNamecase;
3103         else
3104           return TokenNameIdentifier;
3105       case 5:
3106         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3107           return TokenNamecatch;
3108         index = 0;
3109         if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3110           return TokenNameclass;
3111         index = 0;
3112         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3113           return TokenNameclone;
3114         index = 0;
3115         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3116           return TokenNameconst;
3117         else
3118           return TokenNameIdentifier;
3119       case 8:
3120         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3121             && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3122           return TokenNamecontinue;
3123         else
3124           return TokenNameIdentifier;
3125       default:
3126         return TokenNameIdentifier;
3127       }
3128     case 'd':
3129       // declare default do die
3130       // TODO delete define ==> no keyword !
3131       switch (length) {
3132       case 2:
3133         if ((data[++index] == 'o'))
3134           return TokenNamedo;
3135         else
3136           return TokenNameIdentifier;
3137       //          case 6 :
3138       //            if ((data[++index] == 'e')
3139       //              && (data[++index] == 'f')
3140       //              && (data[++index] == 'i')
3141       //              && (data[++index] == 'n')
3142       //              && (data[++index] == 'e'))
3143       //              return TokenNamedefine;
3144       //            else
3145       //              return TokenNameIdentifier;
3146       case 7:
3147         if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3148             && (data[++index] == 'r') && (data[++index] == 'e'))
3149           return TokenNamedeclare;
3150         index = 0;
3151         if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3152             && (data[++index] == 'l') && (data[++index] == 't'))
3153           return TokenNamedefault;
3154         else
3155           return TokenNameIdentifier;
3156       default:
3157         return TokenNameIdentifier;
3158       }
3159     case 'e':
3160       //echo else exit elseif extends eval
3161       switch (length) {
3162       case 4:
3163         if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3164           return TokenNameecho;
3165         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3166           return TokenNameelse;
3167         else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3168           return TokenNameexit;
3169         else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3170           return TokenNameeval;
3171         else
3172           return TokenNameIdentifier;
3173       case 5:
3174         // endif empty
3175         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3176           return TokenNameendif;
3177         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3178           return TokenNameempty;
3179         else
3180           return TokenNameIdentifier;
3181       case 6:
3182         // endfor
3183         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3184             && (data[++index] == 'r'))
3185           return TokenNameendfor;
3186         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3187             && (data[++index] == 'f'))
3188           return TokenNameelseif;
3189         else
3190           return TokenNameIdentifier;
3191       case 7:
3192         if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3193             && (data[++index] == 'd') && (data[++index] == 's'))
3194           return TokenNameextends;
3195         else
3196           return TokenNameIdentifier;
3197       case 8:
3198         // endwhile
3199         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3200             && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3201           return TokenNameendwhile;
3202         else
3203           return TokenNameIdentifier;
3204       case 9:
3205         // endswitch
3206         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3207             && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3208           return TokenNameendswitch;
3209         else
3210           return TokenNameIdentifier;
3211       case 10:
3212         // enddeclare
3213         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3214             && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3215             && (data[++index] == 'e'))
3216           return TokenNameenddeclare;
3217         index = 0;
3218         if ((data[++index] == 'n') // endforeach
3219             && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3220             && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3221           return TokenNameendforeach;
3222         else
3223           return TokenNameIdentifier;
3224       default:
3225         return TokenNameIdentifier;
3226       }
3227     case 'f':
3228       //for false final function
3229       switch (length) {
3230       case 3:
3231         if ((data[++index] == 'o') && (data[++index] == 'r'))
3232           return TokenNamefor;
3233         else
3234           return TokenNameIdentifier;
3235       case 5:
3236         //            if ((data[++index] == 'a') && (data[++index] == 'l')
3237         //                && (data[++index] == 's') && (data[++index] == 'e'))
3238         //              return TokenNamefalse;
3239         if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3240           return TokenNamefinal;
3241         else
3242           return TokenNameIdentifier;
3243       case 7:
3244         // foreach
3245         if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3246             && (data[++index] == 'c') && (data[++index] == 'h'))
3247           return TokenNameforeach;
3248         else
3249           return TokenNameIdentifier;
3250       case 8:
3251         // function
3252         if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3253             && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3254           return TokenNamefunction;
3255         else
3256           return TokenNameIdentifier;
3257       default:
3258         return TokenNameIdentifier;
3259       }
3260     case 'g':
3261       //global
3262       if (length == 6) {
3263         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3264             && (data[++index] == 'l')) {
3265           return TokenNameglobal;
3266         }
3267       }
3268       return TokenNameIdentifier;
3269     case 'i':
3270       //if int isset include include_once instanceof interface implements
3271       switch (length) {
3272       case 2:
3273         if (data[++index] == 'f')
3274           return TokenNameif;
3275         else
3276           return TokenNameIdentifier;
3277       //          case 3 :
3278       //            if ((data[++index] == 'n') && (data[++index] == 't'))
3279       //              return TokenNameint;
3280       //            else
3281       //              return TokenNameIdentifier;
3282       case 5:
3283         if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3284           return TokenNameisset;
3285         else
3286           return TokenNameIdentifier;
3287       case 7:
3288         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3289             && (data[++index] == 'd') && (data[++index] == 'e'))
3290           return TokenNameinclude;
3291         else
3292           return TokenNameIdentifier;
3293       case 9:
3294         // interface
3295         if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3296             && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3297           return TokenNameinterface;
3298         else
3299           return TokenNameIdentifier;
3300       case 10:
3301         // instanceof
3302         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3303             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3304             && (data[++index] == 'f'))
3305           return TokenNameinstanceof;
3306         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3307             && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3308             && (data[++index] == 's'))
3309           return TokenNameimplements;
3310         else
3311           return TokenNameIdentifier;
3312       case 12:
3313         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3314             && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3315             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3316           return TokenNameinclude_once;
3317         else
3318           return TokenNameIdentifier;
3319       default:
3320         return TokenNameIdentifier;
3321       }
3322     case 'l':
3323       //list
3324       if (length == 4) {
3325         if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3326           return TokenNamelist;
3327         }
3328       }
3329       return TokenNameIdentifier;
3330     case 'n':
3331       // new null
3332       switch (length) {
3333       case 3:
3334         if ((data[++index] == 'e') && (data[++index] == 'w'))
3335           return TokenNamenew;
3336         else
3337           return TokenNameIdentifier;
3338       //          case 4 :
3339       //            if ((data[++index] == 'u') && (data[++index] == 'l')
3340       //                && (data[++index] == 'l'))
3341       //              return TokenNamenull;
3342       //            else
3343       //              return TokenNameIdentifier;
3344       default:
3345         return TokenNameIdentifier;
3346       }
3347     case 'o':
3348       // or old_function
3349       if (length == 2) {
3350         if (data[++index] == 'r') {
3351           return TokenNameor;
3352         }
3353       }
3354       //        if (length == 12) {
3355       //          if ((data[++index] == 'l')
3356       //            && (data[++index] == 'd')
3357       //            && (data[++index] == '_')
3358       //            && (data[++index] == 'f')
3359       //            && (data[++index] == 'u')
3360       //            && (data[++index] == 'n')
3361       //            && (data[++index] == 'c')
3362       //            && (data[++index] == 't')
3363       //            && (data[++index] == 'i')
3364       //            && (data[++index] == 'o')
3365       //            && (data[++index] == 'n')) {
3366       //            return TokenNameold_function;
3367       //          }
3368       //        }
3369       return TokenNameIdentifier;
3370     case 'p':
3371       // print public private protected
3372       switch (length) {
3373       case 5:
3374         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3375           return TokenNameprint;
3376         } else
3377           return TokenNameIdentifier;
3378       case 6:
3379         if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3380             && (data[++index] == 'c')) {
3381           return TokenNamepublic;
3382         } else
3383           return TokenNameIdentifier;
3384       case 7:
3385         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3386             && (data[++index] == 't') && (data[++index] == 'e')) {
3387           return TokenNameprivate;
3388         } else
3389           return TokenNameIdentifier;
3390       case 9:
3391         if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3392             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3393           return TokenNameprotected;
3394         } else
3395           return TokenNameIdentifier;
3396       }
3397       return TokenNameIdentifier;
3398     case 'r':
3399       //return require require_once
3400       if (length == 6) {
3401         if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3402             && (data[++index] == 'n')) {
3403           return TokenNamereturn;
3404         }
3405       } else if (length == 7) {
3406         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3407             && (data[++index] == 'r') && (data[++index] == 'e')) {
3408           return TokenNamerequire;
3409         }
3410       } else if (length == 12) {
3411         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3412             && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3413             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3414           return TokenNamerequire_once;
3415         }
3416       } else
3417         return TokenNameIdentifier;
3418     case 's':
3419       //static switch
3420       switch (length) {
3421       case 6:
3422         if (data[++index] == 't')
3423           if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3424             return TokenNamestatic;
3425           } else
3426             return TokenNameIdentifier;
3427         else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3428             && (data[++index] == 'h'))
3429           return TokenNameswitch;
3430         else
3431           return TokenNameIdentifier;
3432       default:
3433         return TokenNameIdentifier;
3434       }
3435     case 't':
3436       // try true throw
3437       switch (length) {
3438       case 3:
3439         if ((data[++index] == 'r') && (data[++index] == 'y'))
3440           return TokenNametry;
3441         else
3442           return TokenNameIdentifier;
3443       //          case 4 :
3444       //            if ((data[++index] == 'r') && (data[++index] == 'u')
3445       //                && (data[++index] == 'e'))
3446       //              return TokenNametrue;
3447       //            else
3448       //              return TokenNameIdentifier;
3449       case 5:
3450         if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3451           return TokenNamethrow;
3452         else
3453           return TokenNameIdentifier;
3454       default:
3455         return TokenNameIdentifier;
3456       }
3457     case 'u':
3458       //use unset
3459       switch (length) {
3460       case 3:
3461         if ((data[++index] == 's') && (data[++index] == 'e'))
3462           return TokenNameuse;
3463         else
3464           return TokenNameIdentifier;
3465       case 5:
3466         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3467           return TokenNameunset;
3468         else
3469           return TokenNameIdentifier;
3470       default:
3471         return TokenNameIdentifier;
3472       }
3473     case 'v':
3474       //var
3475       switch (length) {
3476       case 3:
3477         if ((data[++index] == 'a') && (data[++index] == 'r'))
3478           return TokenNamevar;
3479         else
3480           return TokenNameIdentifier;
3481       default:
3482         return TokenNameIdentifier;
3483       }
3484     case 'w':
3485       //while
3486       switch (length) {
3487       case 5:
3488         if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3489           return TokenNamewhile;
3490         else
3491           return TokenNameIdentifier;
3492       //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3493       // (data[++index]=='e') && (data[++index]=='f')&&
3494       // (data[++index]=='p'))
3495       //return TokenNamewidefp ;
3496       //else
3497       //return TokenNameIdentifier;
3498       default:
3499         return TokenNameIdentifier;
3500       }
3501     case 'x':
3502       //xor
3503       switch (length) {
3504       case 3:
3505         if ((data[++index] == 'o') && (data[++index] == 'r'))
3506           return TokenNamexor;
3507         else
3508           return TokenNameIdentifier;
3509       default:
3510         return TokenNameIdentifier;
3511       }
3512     default:
3513       return TokenNameIdentifier;
3514     }
3515   }
3516
3517   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3518     //when entering this method the currentCharacter is the firt
3519     //digit of the number , i.e. it may be preceeded by a . when
3520     //dotPrefix is true
3521     boolean floating = dotPrefix;
3522     if ((!dotPrefix) && (currentCharacter == '0')) {
3523       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3524         //force the first char of the hexa number do exist...
3525         // consume next character
3526         unicodeAsBackSlash = false;
3527         currentCharacter = source[currentPosition++];
3528         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3529         //          && (source[currentPosition] == 'u')) {
3530         //          getNextUnicodeChar();
3531         //        } else {
3532         //          if (withoutUnicodePtr != 0) {
3533         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3534         //          }
3535         //        }
3536         if (Character.digit(currentCharacter, 16) == -1)
3537           throw new InvalidInputException(INVALID_HEXA);
3538         //---end forcing--
3539         while (getNextCharAsDigit(16)) {
3540         }
3541         ;
3542         //        if (getNextChar('l', 'L') >= 0)
3543         //          return TokenNameLongLiteral;
3544         //        else
3545         return TokenNameIntegerLiteral;
3546       }
3547       //there is x or X in the number
3548       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3549       // 00078.0 is true !!!!! crazy language
3550       if (getNextCharAsDigit()) {
3551         //-------------potential octal-----------------
3552         while (getNextCharAsDigit()) {
3553         }
3554         ;
3555         //        if (getNextChar('l', 'L') >= 0) {
3556         //          return TokenNameLongLiteral;
3557         //        }
3558         //
3559         //        if (getNextChar('f', 'F') >= 0) {
3560         //          return TokenNameFloatingPointLiteral;
3561         //        }
3562         if (getNextChar('d', 'D') >= 0) {
3563           return TokenNameDoubleLiteral;
3564         } else { //make the distinction between octal and float ....
3565           if (getNextChar('.')) { //bingo ! ....
3566             while (getNextCharAsDigit()) {
3567             }
3568             ;
3569             if (getNextChar('e', 'E') >= 0) {
3570               // consume next character
3571               unicodeAsBackSlash = false;
3572               currentCharacter = source[currentPosition++];
3573               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3574               //                && (source[currentPosition] == 'u')) {
3575               //                getNextUnicodeChar();
3576               //              } else {
3577               //                if (withoutUnicodePtr != 0) {
3578               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3579               //                }
3580               //              }
3581               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3582                 // consume next character
3583                 unicodeAsBackSlash = false;
3584                 currentCharacter = source[currentPosition++];
3585                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3586                 //                  && (source[currentPosition] == 'u')) {
3587                 //                  getNextUnicodeChar();
3588                 //                } else {
3589                 //                  if (withoutUnicodePtr != 0) {
3590                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3591                 //                      currentCharacter;
3592                 //                  }
3593                 //                }
3594               }
3595               if (!Character.isDigit(currentCharacter))
3596                 throw new InvalidInputException(INVALID_FLOAT);
3597               while (getNextCharAsDigit()) {
3598               }
3599               ;
3600             }
3601             //            if (getNextChar('f', 'F') >= 0)
3602             //              return TokenNameFloatingPointLiteral;
3603             getNextChar('d', 'D'); //jump over potential d or D
3604             return TokenNameDoubleLiteral;
3605           } else {
3606             return TokenNameIntegerLiteral;
3607           }
3608         }
3609       } else {
3610         /* carry on */
3611       }
3612     }
3613     while (getNextCharAsDigit()) {
3614     }
3615     ;
3616     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3617     //      return TokenNameLongLiteral;
3618     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3619       while (getNextCharAsDigit()) {
3620       }
3621       ;
3622       floating = true;
3623     }
3624     //if floating is true both exponant and suffix may be optional
3625     if (getNextChar('e', 'E') >= 0) {
3626       floating = true;
3627       // consume next character
3628       unicodeAsBackSlash = false;
3629       currentCharacter = source[currentPosition++];
3630       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3631       //        && (source[currentPosition] == 'u')) {
3632       //        getNextUnicodeChar();
3633       //      } else {
3634       //        if (withoutUnicodePtr != 0) {
3635       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3636       //        }
3637       //      }
3638       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3639         // next
3640         // character
3641         unicodeAsBackSlash = false;
3642         currentCharacter = source[currentPosition++];
3643         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3644         //          && (source[currentPosition] == 'u')) {
3645         //          getNextUnicodeChar();
3646         //        } else {
3647         //          if (withoutUnicodePtr != 0) {
3648         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3649         //          }
3650         //        }
3651       }
3652       if (!Character.isDigit(currentCharacter))
3653         throw new InvalidInputException(INVALID_FLOAT);
3654       while (getNextCharAsDigit()) {
3655       }
3656       ;
3657     }
3658     if (getNextChar('d', 'D') >= 0)
3659       return TokenNameDoubleLiteral;
3660     //    if (getNextChar('f', 'F') >= 0)
3661     //      return TokenNameFloatingPointLiteral;
3662     //the long flag has been tested before
3663     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3664   }
3665
3666   /**
3667    * Search the line number corresponding to a specific position
3668    *
3669    */
3670   public final int getLineNumber(int position) {
3671     if (lineEnds == null)
3672       return 1;
3673     int length = linePtr + 1;
3674     if (length == 0)
3675       return 1;
3676     int g = 0, d = length - 1;
3677     int m = 0;
3678     while (g <= d) {
3679       m = (g + d) / 2;
3680       if (position < lineEnds[m]) {
3681         d = m - 1;
3682       } else if (position > lineEnds[m]) {
3683         g = m + 1;
3684       } else {
3685         return m + 1;
3686       }
3687     }
3688     if (position < lineEnds[m]) {
3689       return m + 1;
3690     }
3691     return m + 2;
3692   }
3693
3694   public void setPHPMode(boolean mode) {
3695     phpMode = mode;
3696   }
3697
3698   public final void setSource(char[] source) {
3699     setSource(null, source);
3700   }
3701
3702   public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3703     //the source-buffer is set to sourceString
3704     this.compilationUnit = compilationUnit;
3705     if (source == null) {
3706       this.source = new char[0];
3707     } else {
3708       this.source = source;
3709     }
3710     startPosition = -1;
3711     initialPosition = currentPosition = 0;
3712     containsAssertKeyword = false;
3713     withoutUnicodeBuffer = new char[this.source.length];
3714 //    encapsedStringStack = new Stack();
3715   }
3716
3717   public String toString() {
3718     if (startPosition == source.length)
3719       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3720     if (currentPosition > source.length)
3721       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3722     char front[] = new char[startPosition];
3723     System.arraycopy(source, 0, front, 0, startPosition);
3724     int middleLength = (currentPosition - 1) - startPosition + 1;
3725     char middle[];
3726     if (middleLength > -1) {
3727       middle = new char[middleLength];
3728       System.arraycopy(source, startPosition, middle, 0, middleLength);
3729     } else {
3730       middle = new char[0];
3731     }
3732     char end[] = new char[source.length - (currentPosition - 1)];
3733     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3734     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3735         + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3736         + new String(end);
3737   }
3738
3739   public final String toStringAction(int act) {
3740     switch (act) {
3741     case TokenNameERROR:
3742       return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3743     // //$NON-NLS-1$
3744     case TokenNameINLINE_HTML:
3745       return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3746     case TokenNameIdentifier:
3747       return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3748     case TokenNameVariable:
3749       return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3750     case TokenNameabstract:
3751       return "abstract"; //$NON-NLS-1$
3752     case TokenNameand:
3753       return "AND"; //$NON-NLS-1$
3754     case TokenNamearray:
3755       return "array"; //$NON-NLS-1$
3756     case TokenNameas:
3757       return "as"; //$NON-NLS-1$
3758     case TokenNamebreak:
3759       return "break"; //$NON-NLS-1$
3760     case TokenNamecase:
3761       return "case"; //$NON-NLS-1$
3762     case TokenNameclass:
3763       return "class"; //$NON-NLS-1$
3764     case TokenNamecatch:
3765       return "catch"; //$NON-NLS-1$
3766     case TokenNameclone:
3767       //$NON-NLS-1$
3768       return "clone";
3769     case TokenNameconst:
3770       //$NON-NLS-1$
3771       return "const";
3772     case TokenNamecontinue:
3773       return "continue"; //$NON-NLS-1$
3774     case TokenNamedefault:
3775       return "default"; //$NON-NLS-1$
3776     //      case TokenNamedefine :
3777     //        return "define"; //$NON-NLS-1$
3778     case TokenNamedo:
3779       return "do"; //$NON-NLS-1$
3780     case TokenNameecho:
3781       return "echo"; //$NON-NLS-1$
3782     case TokenNameelse:
3783       return "else"; //$NON-NLS-1$
3784     case TokenNameelseif:
3785       return "elseif"; //$NON-NLS-1$
3786     case TokenNameendfor:
3787       return "endfor"; //$NON-NLS-1$
3788     case TokenNameendforeach:
3789       return "endforeach"; //$NON-NLS-1$
3790     case TokenNameendif:
3791       return "endif"; //$NON-NLS-1$
3792     case TokenNameendswitch:
3793       return "endswitch"; //$NON-NLS-1$
3794     case TokenNameendwhile:
3795       return "endwhile"; //$NON-NLS-1$
3796     case TokenNameexit:
3797       return "exit";
3798     case TokenNameextends:
3799       return "extends"; //$NON-NLS-1$
3800     //      case TokenNamefalse :
3801     //        return "false"; //$NON-NLS-1$
3802     case TokenNamefinal:
3803       return "final"; //$NON-NLS-1$
3804     case TokenNamefor:
3805       return "for"; //$NON-NLS-1$
3806     case TokenNameforeach:
3807       return "foreach"; //$NON-NLS-1$
3808     case TokenNamefunction:
3809       return "function"; //$NON-NLS-1$
3810     case TokenNameglobal:
3811       return "global"; //$NON-NLS-1$
3812     case TokenNameif:
3813       return "if"; //$NON-NLS-1$
3814     case TokenNameimplements:
3815       return "implements"; //$NON-NLS-1$
3816     case TokenNameinclude:
3817       return "include"; //$NON-NLS-1$
3818     case TokenNameinclude_once:
3819       return "include_once"; //$NON-NLS-1$
3820     case TokenNameinstanceof:
3821       return "instanceof"; //$NON-NLS-1$
3822     case TokenNameinterface:
3823       return "interface"; //$NON-NLS-1$
3824     case TokenNameisset:
3825       return "isset"; //$NON-NLS-1$
3826     case TokenNamelist:
3827       return "list"; //$NON-NLS-1$
3828     case TokenNamenew:
3829       return "new"; //$NON-NLS-1$
3830     //      case TokenNamenull :
3831     //        return "null"; //$NON-NLS-1$
3832     case TokenNameor:
3833       return "OR"; //$NON-NLS-1$
3834     case TokenNameprint:
3835       return "print"; //$NON-NLS-1$
3836     case TokenNameprivate:
3837       return "private"; //$NON-NLS-1$
3838     case TokenNameprotected:
3839       return "protected"; //$NON-NLS-1$
3840     case TokenNamepublic:
3841       return "public"; //$NON-NLS-1$
3842     case TokenNamerequire:
3843       return "require"; //$NON-NLS-1$
3844     case TokenNamerequire_once:
3845       return "require_once"; //$NON-NLS-1$
3846     case TokenNamereturn:
3847       return "return"; //$NON-NLS-1$
3848     case TokenNamestatic:
3849       return "static"; //$NON-NLS-1$
3850     case TokenNameswitch:
3851       return "switch"; //$NON-NLS-1$
3852     //      case TokenNametrue :
3853     //        return "true"; //$NON-NLS-1$
3854     case TokenNameunset:
3855       return "unset"; //$NON-NLS-1$
3856     case TokenNamevar:
3857       return "var"; //$NON-NLS-1$
3858     case TokenNamewhile:
3859       return "while"; //$NON-NLS-1$
3860     case TokenNamexor:
3861       return "XOR"; //$NON-NLS-1$
3862     //      case TokenNamethis :
3863     //        return "$this"; //$NON-NLS-1$
3864     case TokenNameIntegerLiteral:
3865       return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3866     case TokenNameDoubleLiteral:
3867       return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3868     case TokenNameStringDoubleQuote:
3869       return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3870     case TokenNameStringSingleQuote:
3871       return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3872     case TokenNameStringInterpolated:
3873       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3874     case TokenNameEncapsedString0:
3875       return "`"; //$NON-NLS-1$
3876 //    case TokenNameEncapsedString1:
3877 //      return "\'"; //$NON-NLS-1$
3878 //    case TokenNameEncapsedString2:
3879 //      return "\""; //$NON-NLS-1$
3880     case TokenNameSTRING:
3881       return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3882     case TokenNameHEREDOC:
3883       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3884     case TokenNamePLUS_PLUS:
3885       return "++"; //$NON-NLS-1$
3886     case TokenNameMINUS_MINUS:
3887       return "--"; //$NON-NLS-1$
3888     case TokenNameEQUAL_EQUAL:
3889       return "=="; //$NON-NLS-1$
3890     case TokenNameEQUAL_EQUAL_EQUAL:
3891       return "==="; //$NON-NLS-1$
3892     case TokenNameEQUAL_GREATER:
3893       return "=>"; //$NON-NLS-1$
3894     case TokenNameLESS_EQUAL:
3895       return "<="; //$NON-NLS-1$
3896     case TokenNameGREATER_EQUAL:
3897       return ">="; //$NON-NLS-1$
3898     case TokenNameNOT_EQUAL:
3899       return "!="; //$NON-NLS-1$
3900     case TokenNameNOT_EQUAL_EQUAL:
3901       return "!=="; //$NON-NLS-1$
3902     case TokenNameLEFT_SHIFT:
3903       return "<<"; //$NON-NLS-1$
3904     case TokenNameRIGHT_SHIFT:
3905       return ">>"; //$NON-NLS-1$
3906     case TokenNamePLUS_EQUAL:
3907       return "+="; //$NON-NLS-1$
3908     case TokenNameMINUS_EQUAL:
3909       return "-="; //$NON-NLS-1$
3910     case TokenNameMULTIPLY_EQUAL:
3911       return "*="; //$NON-NLS-1$
3912     case TokenNameDIVIDE_EQUAL:
3913       return "/="; //$NON-NLS-1$
3914     case TokenNameAND_EQUAL:
3915       return "&="; //$NON-NLS-1$
3916     case TokenNameOR_EQUAL:
3917       return "|="; //$NON-NLS-1$
3918     case TokenNameXOR_EQUAL:
3919       return "^="; //$NON-NLS-1$
3920     case TokenNameREMAINDER_EQUAL:
3921       return "%="; //$NON-NLS-1$
3922     case TokenNameDOT_EQUAL:
3923       return ".="; //$NON-NLS-1$
3924     case TokenNameLEFT_SHIFT_EQUAL:
3925       return "<<="; //$NON-NLS-1$
3926     case TokenNameRIGHT_SHIFT_EQUAL:
3927       return ">>="; //$NON-NLS-1$
3928     case TokenNameOR_OR:
3929       return "||"; //$NON-NLS-1$
3930     case TokenNameAND_AND:
3931       return "&&"; //$NON-NLS-1$
3932     case TokenNamePLUS:
3933       return "+"; //$NON-NLS-1$
3934     case TokenNameMINUS:
3935       return "-"; //$NON-NLS-1$
3936     case TokenNameMINUS_GREATER:
3937       return "->";
3938     case TokenNameNOT:
3939       return "!"; //$NON-NLS-1$
3940     case TokenNameREMAINDER:
3941       return "%"; //$NON-NLS-1$
3942     case TokenNameXOR:
3943       return "^"; //$NON-NLS-1$
3944     case TokenNameAND:
3945       return "&"; //$NON-NLS-1$
3946     case TokenNameMULTIPLY:
3947       return "*"; //$NON-NLS-1$
3948     case TokenNameOR:
3949       return "|"; //$NON-NLS-1$
3950     case TokenNameTWIDDLE:
3951       return "~"; //$NON-NLS-1$
3952     case TokenNameTWIDDLE_EQUAL:
3953       return "~="; //$NON-NLS-1$
3954     case TokenNameDIVIDE:
3955       return "/"; //$NON-NLS-1$
3956     case TokenNameGREATER:
3957       return ">"; //$NON-NLS-1$
3958     case TokenNameLESS:
3959       return "<"; //$NON-NLS-1$
3960     case TokenNameLPAREN:
3961       return "("; //$NON-NLS-1$
3962     case TokenNameRPAREN:
3963       return ")"; //$NON-NLS-1$
3964     case TokenNameLBRACE:
3965       return "{"; //$NON-NLS-1$
3966     case TokenNameRBRACE:
3967       return "}"; //$NON-NLS-1$
3968     case TokenNameLBRACKET:
3969       return "["; //$NON-NLS-1$
3970     case TokenNameRBRACKET:
3971       return "]"; //$NON-NLS-1$
3972     case TokenNameSEMICOLON:
3973       return ";"; //$NON-NLS-1$
3974     case TokenNameQUESTION:
3975       return "?"; //$NON-NLS-1$
3976     case TokenNameCOLON:
3977       return ":"; //$NON-NLS-1$
3978     case TokenNameCOMMA:
3979       return ","; //$NON-NLS-1$
3980     case TokenNameDOT:
3981       return "."; //$NON-NLS-1$
3982     case TokenNameEQUAL:
3983       return "="; //$NON-NLS-1$
3984     case TokenNameAT:
3985       return "@";
3986     case TokenNameDOLLAR:
3987       return "$";
3988     case TokenNameDOLLAR_LBRACE:
3989       return "${";
3990     case TokenNameLBRACE_DOLLAR:
3991       return "{$";
3992     case TokenNameEOF:
3993       return "EOF"; //$NON-NLS-1$
3994     case TokenNameWHITESPACE:
3995       return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3996     case TokenNameCOMMENT_LINE:
3997       return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3998     case TokenNameCOMMENT_BLOCK:
3999       return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4000     case TokenNameCOMMENT_PHPDOC:
4001       return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4002     //      case TokenNameHTML :
4003     //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
4004     // //$NON-NLS-1$
4005     case TokenNameFILE:
4006       return "__FILE__"; //$NON-NLS-1$
4007     case TokenNameLINE:
4008       return "__LINE__"; //$NON-NLS-1$
4009     case TokenNameCLASS_C:
4010       return "__CLASS__"; //$NON-NLS-1$
4011     case TokenNameMETHOD_C:
4012       return "__METHOD__"; //$NON-NLS-1$
4013     case TokenNameFUNC_C:
4014       return "__FUNCTION__"; //$NON-NLS-1
4015     case TokenNameboolCAST:
4016       return "( bool )"; //$NON-NLS-1$
4017     case TokenNameintCAST:
4018       return "( int )"; //$NON-NLS-1$
4019     case TokenNamedoubleCAST:
4020       return "( double )"; //$NON-NLS-1$
4021     case TokenNameobjectCAST:
4022       return "( object )"; //$NON-NLS-1$
4023     case TokenNamestringCAST:
4024       return "( string )"; //$NON-NLS-1$
4025     default:
4026       return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4027     }
4028   }
4029
4030   public Scanner() {
4031     this(false, false);
4032   }
4033
4034   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4035     this(tokenizeComments, tokenizeWhiteSpace, false);
4036   }
4037
4038   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4039     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4040   }
4041
4042   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4043       boolean assertMode) {
4044     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4045   }
4046
4047   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4048       boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4049     this.eofPosition = Integer.MAX_VALUE;
4050     this.tokenizeComments = tokenizeComments;
4051     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4052     this.tokenizeStrings = tokenizeStrings;
4053     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4054     this.assertMode = assertMode;
4055 //    this.encapsedStringStack = null;
4056     this.taskTags = taskTags;
4057     this.taskPriorities = taskPriorities;
4058   }
4059
4060   private void checkNonExternalizeString() throws InvalidInputException {
4061     if (currentLine == null)
4062       return;
4063     parseTags(currentLine);
4064   }
4065
4066   private void parseTags(NLSLine line) throws InvalidInputException {
4067     String s = new String(getCurrentTokenSource());
4068     int pos = s.indexOf(TAG_PREFIX);
4069     int lineLength = line.size();
4070     while (pos != -1) {
4071       int start = pos + TAG_PREFIX_LENGTH;
4072       int end = s.indexOf(TAG_POSTFIX, start);
4073       String index = s.substring(start, end);
4074       int i = 0;
4075       try {
4076         i = Integer.parseInt(index) - 1;
4077         // Tags are one based not zero based.
4078       } catch (NumberFormatException e) {
4079         i = -1; // we don't want to consider this as a valid NLS tag
4080       }
4081       if (line.exists(i)) {
4082         line.set(i, null);
4083       }
4084       pos = s.indexOf(TAG_PREFIX, start);
4085     }
4086     this.nonNLSStrings = new StringLiteral[lineLength];
4087     int nonNLSCounter = 0;
4088     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4089       StringLiteral literal = (StringLiteral) iterator.next();
4090       if (literal != null) {
4091         this.nonNLSStrings[nonNLSCounter++] = literal;
4092       }
4093     }
4094     if (nonNLSCounter == 0) {
4095       this.nonNLSStrings = null;
4096       currentLine = null;
4097       return;
4098     }
4099     this.wasNonExternalizedStringLiteral = true;
4100     if (nonNLSCounter != lineLength) {
4101       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4102     }
4103     currentLine = null;
4104   }
4105
4106   public final void scanEscapeCharacter() throws InvalidInputException {
4107     // the string with "\\u" is a legal string of two chars \ and u
4108     //thus we use a direct access to the source (for regular cases).
4109     if (unicodeAsBackSlash) {
4110       // consume next character
4111       unicodeAsBackSlash = false;
4112       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
4113       // (source[currentPosition] == 'u')) {
4114       //                                getNextUnicodeChar();
4115       //                        } else {
4116       if (withoutUnicodePtr != 0) {
4117         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4118         //                              }
4119       }
4120     } else
4121       currentCharacter = source[currentPosition++];
4122     switch (currentCharacter) {
4123     case 'b':
4124       currentCharacter = '\b';
4125       break;
4126     case 't':
4127       currentCharacter = '\t';
4128       break;
4129     case 'n':
4130       currentCharacter = '\n';
4131       break;
4132     case 'f':
4133       currentCharacter = '\f';
4134       break;
4135     case 'r':
4136       currentCharacter = '\r';
4137       break;
4138     case '\"':
4139       currentCharacter = '\"';
4140       break;
4141     case '\'':
4142       currentCharacter = '\'';
4143       break;
4144     case '\\':
4145       currentCharacter = '\\';
4146       break;
4147     default:
4148       // -----------octal escape--------------
4149       // OctalDigit
4150       // OctalDigit OctalDigit
4151       // ZeroToThree OctalDigit OctalDigit
4152       int number = Character.getNumericValue(currentCharacter);
4153       if (number >= 0 && number <= 7) {
4154         boolean zeroToThreeNot = number > 3;
4155         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4156           int digit = Character.getNumericValue(currentCharacter);
4157           if (digit >= 0 && digit <= 7) {
4158             number = (number * 8) + digit;
4159             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4160               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4161                 // Digit --> ignore last character
4162                 currentPosition--;
4163               } else {
4164                 digit = Character.getNumericValue(currentCharacter);
4165                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4166                   // OctalDigit OctalDigit
4167                   number = (number * 8) + digit;
4168                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4169                   // --> ignore last character
4170                   currentPosition--;
4171                 }
4172               }
4173             } else { // has read \OctalDigit NonDigit--> ignore last
4174               // character
4175               currentPosition--;
4176             }
4177           } else { // has read \OctalDigit NonOctalDigit--> ignore last
4178             // character
4179             currentPosition--;
4180           }
4181         } else { // has read \OctalDigit --> ignore last character
4182           currentPosition--;
4183         }
4184         if (number > 255)
4185           throw new InvalidInputException(INVALID_ESCAPE);
4186         currentCharacter = (char) number;
4187       } else
4188         throw new InvalidInputException(INVALID_ESCAPE);
4189     }
4190   }
4191
4192   //chech presence of task: tags
4193   //TODO (frederic) see if we need to take unicode characters into account...
4194   public void checkTaskTag(int commentStart, int commentEnd) {
4195     char[] src = this.source;
4196
4197     // only look for newer task: tags
4198     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4199       return;
4200     }
4201     int foundTaskIndex = this.foundTaskCount;
4202     char previous = src[commentStart + 1]; // should be '*' or '/'
4203     nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4204       char[] tag = null;
4205       char[] priority = null;
4206       // check for tag occurrence only if not ambiguous with javadoc tag
4207       if (previous != '@') {
4208         nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4209           tag = this.taskTags[itag];
4210           int tagLength = tag.length;
4211           if (tagLength == 0)
4212             continue nextTag;
4213
4214           // ensure tag is not leaded with letter if tag starts with a letter
4215           if (Scanner.isPHPIdentifierStart(tag[0])) {
4216             if (Scanner.isPHPIdentifierPart(previous)) {
4217               continue nextTag;
4218             }
4219           }
4220
4221           for (int t = 0; t < tagLength; t++) {
4222             char sc, tc;
4223             int x = i + t;
4224             if (x >= this.eofPosition || x >= commentEnd)
4225               continue nextTag;
4226             if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4227               if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4228                 continue nextTag;
4229               }
4230             }
4231           }
4232           // ensure tag is not followed with letter if tag finishes with a letter
4233           if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4234             if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4235               continue nextTag;
4236           }
4237           if (this.foundTaskTags == null) {
4238             this.foundTaskTags = new char[5][];
4239             this.foundTaskMessages = new char[5][];
4240             this.foundTaskPriorities = new char[5][];
4241             this.foundTaskPositions = new int[5][];
4242           } else if (this.foundTaskCount == this.foundTaskTags.length) {
4243             System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4244                 this.foundTaskCount);
4245             System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4246                 this.foundTaskCount);
4247             System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4248                 this.foundTaskCount);
4249             System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4250                 this.foundTaskCount);
4251           }
4252
4253           priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4254
4255           this.foundTaskTags[this.foundTaskCount] = tag;
4256           this.foundTaskPriorities[this.foundTaskCount] = priority;
4257           this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4258           this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4259           this.foundTaskCount++;
4260           i += tagLength - 1; // will be incremented when looping
4261           break nextTag;
4262         }
4263       }
4264       previous = src[i];
4265     }
4266     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4267       // retrieve message start and end positions
4268       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4269       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4270       // at most beginning of next task
4271       if (max_value < msgStart) {
4272         max_value = msgStart; // would only occur if tag is before EOF.
4273       }
4274       int end = -1;
4275       char c;
4276       for (int j = msgStart; j < max_value; j++) {
4277         if ((c = src[j]) == '\n' || c == '\r') {
4278           end = j - 1;
4279           break;
4280         }
4281       }
4282       if (end == -1) {
4283         for (int j = max_value; j > msgStart; j--) {
4284           if ((c = src[j]) == '*') {
4285             end = j - 1;
4286             break;
4287           }
4288         }
4289         if (end == -1)
4290           end = max_value;
4291       }
4292       if (msgStart == end)
4293         continue; // empty
4294       // trim the message
4295       while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4296         end--;
4297       while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4298         msgStart++;
4299       // update the end position of the task
4300       this.foundTaskPositions[i][1] = end;
4301       // get the message source
4302       final int messageLength = end - msgStart + 1;
4303       char[] message = new char[messageLength];
4304       System.arraycopy(src, msgStart, message, 0, messageLength);
4305       this.foundTaskMessages[i] = message;
4306     }
4307   }
4308
4309   // chech presence of task: tags
4310   //  public void checkTaskTag(int commentStart, int commentEnd) {
4311   //    // only look for newer task: tags
4312   //    if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4313   //      return;
4314   //    }
4315   //    int foundTaskIndex = this.foundTaskCount;
4316   //    nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4317   //      char[] tag = null;
4318   //      char[] priority = null;
4319   //      // check for tag occurrence
4320   //      nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4321   //        tag = this.taskTags[itag];
4322   //        priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4323   //        int tagLength = tag.length;
4324   //        for (int t = 0; t < tagLength; t++) {
4325   //          if (this.source[i + t] != tag[t])
4326   //            continue nextTag;
4327   //        }
4328   //        if (this.foundTaskTags == null) {
4329   //          this.foundTaskTags = new char[5][];
4330   //          this.foundTaskMessages = new char[5][];
4331   //          this.foundTaskPriorities = new char[5][];
4332   //          this.foundTaskPositions = new int[5][];
4333   //        } else if (this.foundTaskCount == this.foundTaskTags.length) {
4334   //          System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4335   //          System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4336   //              this.foundTaskCount);
4337   //          System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4338   //              this.foundTaskCount);
4339   //          System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4340   //              this.foundTaskCount);
4341   //        }
4342   //        this.foundTaskTags[this.foundTaskCount] = tag;
4343   //        this.foundTaskPriorities[this.foundTaskCount] = priority;
4344   //        this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4345   //        this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4346   //        this.foundTaskCount++;
4347   //        i += tagLength - 1; // will be incremented when looping
4348   //      }
4349   //    }
4350   //    for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4351   //      // retrieve message start and end positions
4352   //      int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4353   //      int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4354   //      // at most beginning of next task
4355   //      if (max_value < msgStart)
4356   //        max_value = msgStart; // would only occur if tag is before EOF.
4357   //      int end = -1;
4358   //      char c;
4359   //      for (int j = msgStart; j < max_value; j++) {
4360   //        if ((c = this.source[j]) == '\n' || c == '\r') {
4361   //          end = j - 1;
4362   //          break;
4363   //        }
4364   //      }
4365   //      if (end == -1) {
4366   //        for (int j = max_value; j > msgStart; j--) {
4367   //          if ((c = this.source[j]) == '*') {
4368   //            end = j - 1;
4369   //            break;
4370   //          }
4371   //        }
4372   //        if (end == -1)
4373   //          end = max_value;
4374   //      }
4375   //      if (msgStart == end)
4376   //        continue; // empty
4377   //      // trim the message
4378   //      while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4379   //        end--;
4380   //      while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4381   //        msgStart++;
4382   //      // update the end position of the task
4383   //      this.foundTaskPositions[i][1] = end;
4384   //      // get the message source
4385   //      final int messageLength = end - msgStart + 1;
4386   //      char[] message = new char[messageLength];
4387   //      System.arraycopy(source, msgStart, message, 0, messageLength);
4388   //      this.foundTaskMessages[i] = message;
4389   //    }
4390   //  }
4391 }