Cleaned up PHP Prefs (Code Assist, typing)
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
16
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22
23
24 public class Scanner implements IScanner, ITerminalSymbols {
25   /*
26    * APIs ares - getNextToken() which return the current type of the token
27    * (this value is not memorized by the scanner) - getCurrentTokenSource()
28    * which provides with the token "REAL" source (aka all unicode have been
29    * transformed into a correct char) - sourceStart gives the position into the
30    * stream - currentPosition-1 gives the sourceEnd position into the stream
31    */
32   // 1.4 feature
33   private boolean assertMode;
34   public boolean useAssertAsAnIndentifier = false;
35   //flag indicating if processed source contains occurrences of keyword assert
36   public boolean containsAssertKeyword = false;
37   public boolean recordLineSeparator;
38   public boolean phpMode = false;
39   public Stack encapsedStringStack = null;
40   public char currentCharacter;
41   public int startPosition;
42   public int currentPosition;
43   public int initialPosition, eofPosition;
44   // after this position eof are generated instead of real token from the
45   // source
46   public boolean tokenizeComments;
47   public boolean tokenizeWhiteSpace;
48   public boolean tokenizeStrings;
49   //source should be viewed as a window (aka a part)
50   //of a entire very large stream
51   public char source[];
52   //unicode support
53   public char[] withoutUnicodeBuffer;
54   public int withoutUnicodePtr;
55   //when == 0 ==> no unicode in the current token
56   public boolean unicodeAsBackSlash = false;
57   public boolean scanningFloatLiteral = false;
58 //support for /** comments
59         public int[] commentStops = new int[10];
60         public int[] commentStarts = new int[10];
61         public int commentPtr = -1; // no comment test with commentPtr value -1
62         protected int lastCommentLinePosition = -1;
63   //diet parsing support - jump over some method body when requested
64   public boolean diet = false;
65   //support for the poor-line-debuggers ....
66   //remember the position of the cr/lf
67   public int[] lineEnds = new int[250];
68   public int linePtr = -1;
69   public boolean wasAcr = false;
70   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
71   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
72   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
73   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
74   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
75   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
76   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
77   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
78   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
79   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
80   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
81   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
82   //----------------optimized identifier managment------------------
83   static final char[] charArray_a = new char[]{'a'},
84       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
85       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
86       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
87       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
88       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
89       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
90       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
91       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
92       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
93       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
94       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
95       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
96       charArray_z = new char[]{'z'};
97   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
98       '\u0000', '\u0000', '\u0000'};
99   static final int TableSize = 30, InternalTableSize = 6;
100   //30*6 = 180 entries
101   public static final int OptimizedLength = 6;
102   public/* static */
103   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
104   // support for detecting non-externalized string literals
105   int currentLineNr = -1;
106   int previousLineNr = -1;
107   NLSLine currentLine = null;
108   List lines = new ArrayList();
109   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
110   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
111   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
112   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
113   public StringLiteral[] nonNLSStrings = null;
114   public boolean checkNonExternalizedStringLiterals = true;
115   public boolean wasNonExternalizedStringLiteral = false;
116   /* static */{
117     for (int i = 0; i < 6; i++) {
118       for (int j = 0; j < TableSize; j++) {
119         for (int k = 0; k < InternalTableSize; k++) {
120           charArray_length[i][j][k] = initCharArray;
121         }
122       }
123     }
124   }
125   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
126       newEntry6 = 0;
127   public static final int RoundBracket = 0;
128   public static final int SquareBracket = 1;
129   public static final int CurlyBracket = 2;
130   public static final int BracketKinds = 3;
131   // task tag support
132   public char[][] foundTaskTags = null;
133   public char[][] foundTaskMessages;
134   public char[][] foundTaskPriorities = null;
135   public int[][] foundTaskPositions;
136   public int foundTaskCount = 0;
137   public char[][] taskTags = null;
138   public char[][] taskPriorities = null;
139   public static final boolean DEBUG = false;
140   public static final boolean TRACE = false;
141
142   /**
143    * Determines if the specified character is permissible as the first
144    * character in a PHP identifier
145    */
146   public static boolean isPHPIdentifierStart(char ch) {
147     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
148   }
149   /**
150    * Determines if the specified character may be part of a PHP identifier as
151    * other than the first character
152    */
153   public static boolean isPHPIdentifierPart(char ch) {
154     return Character.isLetterOrDigit(ch) || (ch == '_')
155         || (0x7F <= ch && ch <= 0xFF);
156   }
157   public final boolean atEnd() {
158     // This code is not relevant if source is
159     // Only a part of the real stream input
160     return source.length == currentPosition;
161   }
162   public char[] getCurrentIdentifierSource() {
163     //return the token REAL source (aka unicodes are precomputed)
164     char[] result;
165     //    if (withoutUnicodePtr != 0)
166     //      //0 is used as a fast test flag so the real first char is in position 1
167     //      System.arraycopy(
168     //        withoutUnicodeBuffer,
169     //        1,
170     //        result = new char[withoutUnicodePtr],
171     //        0,
172     //        withoutUnicodePtr);
173     //    else {
174     int length = currentPosition - startPosition;
175     switch (length) { // see OptimizedLength
176       case 1 :
177         return optimizedCurrentTokenSource1();
178       case 2 :
179         return optimizedCurrentTokenSource2();
180       case 3 :
181         return optimizedCurrentTokenSource3();
182       case 4 :
183         return optimizedCurrentTokenSource4();
184       case 5 :
185         return optimizedCurrentTokenSource5();
186       case 6 :
187         return optimizedCurrentTokenSource6();
188     }
189     //no optimization
190     System.arraycopy(source, startPosition, result = new char[length], 0,
191         length);
192     //   }
193     return result;
194   }
195   public int getCurrentTokenEndPosition() {
196     return this.currentPosition - 1;
197   }
198   public final char[] getCurrentTokenSource() {
199     // Return the token REAL source (aka unicodes are precomputed)
200     char[] result;
201     //    if (withoutUnicodePtr != 0)
202     //      // 0 is used as a fast test flag so the real first char is in position 1
203     //      System.arraycopy(
204     //        withoutUnicodeBuffer,
205     //        1,
206     //        result = new char[withoutUnicodePtr],
207     //        0,
208     //        withoutUnicodePtr);
209     //    else {
210     int length;
211     System.arraycopy(source, startPosition,
212         result = new char[length = currentPosition - startPosition], 0, length);
213     //    }
214     return result;
215   }
216   public final char[] getCurrentTokenSource(int startPos) {
217     // Return the token REAL source (aka unicodes are precomputed)
218     char[] result;
219     //    if (withoutUnicodePtr != 0)
220     //      // 0 is used as a fast test flag so the real first char is in position 1
221     //      System.arraycopy(
222     //        withoutUnicodeBuffer,
223     //        1,
224     //        result = new char[withoutUnicodePtr],
225     //        0,
226     //        withoutUnicodePtr);
227     //    else {
228     int length;
229     System.arraycopy(source, startPos,
230         result = new char[length = currentPosition - startPos], 0, length);
231     //  }
232     return result;
233   }
234   public final char[] getCurrentTokenSourceString() {
235     //return the token REAL source (aka unicodes are precomputed).
236     //REMOVE the two " that are at the beginning and the end.
237     char[] result;
238     if (withoutUnicodePtr != 0)
239       //0 is used as a fast test flag so the real first char is in position 1
240       System.arraycopy(withoutUnicodeBuffer, 2, 
241       //2 is 1 (real start) + 1 (to jump over the ")
242           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
243     else {
244       int length;
245       System.arraycopy(source, startPosition + 1,
246           result = new char[length = currentPosition - startPosition - 2], 0,
247           length);
248     }
249     return result;
250   }
251   public int getCurrentTokenStartPosition() {
252     return this.startPosition;
253   }
254   public final char[] getCurrentStringLiteralSource() {
255     // Return the token REAL source (aka unicodes are precomputed)
256     char[] result;
257     int length;
258     System.arraycopy(source, startPosition + 1,
259         result = new char[length = currentPosition - startPosition - 2], 0,
260         length);
261     //    }
262     return result;
263   }
264   /*
265    * Search the source position corresponding to the end of a given line number
266    * 
267    * Line numbers are 1-based, and relative to the scanner initialPosition.
268    * Character positions are 0-based.
269    * 
270    * In case the given line number is inconsistent, answers -1.
271    */
272   public final int getLineEnd(int lineNumber) {
273     if (lineEnds == null)
274       return -1;
275     if (lineNumber >= lineEnds.length)
276       return -1;
277     if (lineNumber <= 0)
278       return -1;
279     if (lineNumber == lineEnds.length - 1)
280       return eofPosition;
281     return lineEnds[lineNumber - 1];
282     // next line start one character behind the lineEnd of the previous line
283   }
284   /**
285    * Search the source position corresponding to the beginning of a given line
286    * number
287    * 
288    * Line numbers are 1-based, and relative to the scanner initialPosition.
289    * Character positions are 0-based.
290    * 
291    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
292    * 
293    * In case the given line number is inconsistent, answers -1.
294    */
295   public final int getLineStart(int lineNumber) {
296     if (lineEnds == null)
297       return -1;
298     if (lineNumber >= lineEnds.length)
299       return -1;
300     if (lineNumber <= 0)
301       return -1;
302     if (lineNumber == 1)
303       return initialPosition;
304     return lineEnds[lineNumber - 2] + 1;
305     // next line start one character behind the lineEnd of the previous line
306   }
307   public final boolean getNextChar(char testedChar) {
308     //BOOLEAN
309     //handle the case of unicode.
310     //when a unicode appears then we must use a buffer that holds char
311     // internal values
312     //At the end of this method currentCharacter holds the new visited char
313     //and currentPosition points right next after it
314     //Both previous lines are true if the currentCharacter is == to the
315     // testedChar
316     //On false, no side effect has occured.
317     //ALL getNextChar.... ARE OPTIMIZED COPIES
318     int temp = currentPosition;
319     try {
320       currentCharacter = source[currentPosition++];
321       //      if (((currentCharacter = source[currentPosition++]) == '\\')
322       //        && (source[currentPosition] == 'u')) {
323       //        //-------------unicode traitement ------------
324       //        int c1, c2, c3, c4;
325       //        int unicodeSize = 6;
326       //        currentPosition++;
327       //        while (source[currentPosition] == 'u') {
328       //          currentPosition++;
329       //          unicodeSize++;
330       //        }
331       //
332       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
333       //          || c1 < 0)
334       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
335       //            || c2 < 0)
336       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
337       //            || c3 < 0)
338       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
339       //            || c4 < 0)) {
340       //          currentPosition = temp;
341       //          return false;
342       //        }
343       //
344       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
345       //        if (currentCharacter != testedChar) {
346       //          currentPosition = temp;
347       //          return false;
348       //        }
349       //        unicodeAsBackSlash = currentCharacter == '\\';
350       //
351       //        //need the unicode buffer
352       //        if (withoutUnicodePtr == 0) {
353       //          //buffer all the entries that have been left aside....
354       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
355       //          System.arraycopy(
356       //            source,
357       //            startPosition,
358       //            withoutUnicodeBuffer,
359       //            1,
360       //            withoutUnicodePtr);
361       //        }
362       //        //fill the buffer with the char
363       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
364       //        return true;
365       //
366       //      } //-------------end unicode traitement--------------
367       //      else {
368       if (currentCharacter != testedChar) {
369         currentPosition = temp;
370         return false;
371       }
372       unicodeAsBackSlash = false;
373       //        if (withoutUnicodePtr != 0)
374       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
375       return true;
376       //      }
377     } catch (IndexOutOfBoundsException e) {
378       unicodeAsBackSlash = false;
379       currentPosition = temp;
380       return false;
381     }
382   }
383   public final int getNextChar(char testedChar1, char testedChar2) {
384     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
385     //test can be done with (x==0) for the first and (x>0) for the second
386     //handle the case of unicode.
387     //when a unicode appears then we must use a buffer that holds char
388     // internal values
389     //At the end of this method currentCharacter holds the new visited char
390     //and currentPosition points right next after it
391     //Both previous lines are true if the currentCharacter is == to the
392     // testedChar1/2
393     //On false, no side effect has occured.
394     //ALL getNextChar.... ARE OPTIMIZED COPIES
395     int temp = currentPosition;
396     try {
397       int result;
398       currentCharacter = source[currentPosition++];
399       //      if (((currentCharacter = source[currentPosition++]) == '\\')
400       //        && (source[currentPosition] == 'u')) {
401       //        //-------------unicode traitement ------------
402       //        int c1, c2, c3, c4;
403       //        int unicodeSize = 6;
404       //        currentPosition++;
405       //        while (source[currentPosition] == 'u') {
406       //          currentPosition++;
407       //          unicodeSize++;
408       //        }
409       //
410       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
411       //          || c1 < 0)
412       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
413       //            || c2 < 0)
414       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
415       //            || c3 < 0)
416       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
417       //            || c4 < 0)) {
418       //          currentPosition = temp;
419       //          return 2;
420       //        }
421       //
422       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
423       //        if (currentCharacter == testedChar1)
424       //          result = 0;
425       //        else if (currentCharacter == testedChar2)
426       //          result = 1;
427       //        else {
428       //          currentPosition = temp;
429       //          return -1;
430       //        }
431       //
432       //        //need the unicode buffer
433       //        if (withoutUnicodePtr == 0) {
434       //          //buffer all the entries that have been left aside....
435       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
436       //          System.arraycopy(
437       //            source,
438       //            startPosition,
439       //            withoutUnicodeBuffer,
440       //            1,
441       //            withoutUnicodePtr);
442       //        }
443       //        //fill the buffer with the char
444       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
445       //        return result;
446       //      } //-------------end unicode traitement--------------
447       //      else {
448       if (currentCharacter == testedChar1)
449         result = 0;
450       else if (currentCharacter == testedChar2)
451         result = 1;
452       else {
453         currentPosition = temp;
454         return -1;
455       }
456       //        if (withoutUnicodePtr != 0)
457       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
458       return result;
459       //     }
460     } catch (IndexOutOfBoundsException e) {
461       currentPosition = temp;
462       return -1;
463     }
464   }
465   public final boolean getNextCharAsDigit() {
466     //BOOLEAN
467     //handle the case of unicode.
468     //when a unicode appears then we must use a buffer that holds char
469     // internal values
470     //At the end of this method currentCharacter holds the new visited char
471     //and currentPosition points right next after it
472     //Both previous lines are true if the currentCharacter is a digit
473     //On false, no side effect has occured.
474     //ALL getNextChar.... ARE OPTIMIZED COPIES
475     int temp = currentPosition;
476     try {
477       currentCharacter = source[currentPosition++];
478       //      if (((currentCharacter = source[currentPosition++]) == '\\')
479       //        && (source[currentPosition] == 'u')) {
480       //        //-------------unicode traitement ------------
481       //        int c1, c2, c3, c4;
482       //        int unicodeSize = 6;
483       //        currentPosition++;
484       //        while (source[currentPosition] == 'u') {
485       //          currentPosition++;
486       //          unicodeSize++;
487       //        }
488       //
489       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
490       //          || c1 < 0)
491       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
492       //            || c2 < 0)
493       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
494       //            || c3 < 0)
495       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
496       //            || c4 < 0)) {
497       //          currentPosition = temp;
498       //          return false;
499       //        }
500       //
501       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
502       //        if (!Character.isDigit(currentCharacter)) {
503       //          currentPosition = temp;
504       //          return false;
505       //        }
506       //
507       //        //need the unicode buffer
508       //        if (withoutUnicodePtr == 0) {
509       //          //buffer all the entries that have been left aside....
510       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
511       //          System.arraycopy(
512       //            source,
513       //            startPosition,
514       //            withoutUnicodeBuffer,
515       //            1,
516       //            withoutUnicodePtr);
517       //        }
518       //        //fill the buffer with the char
519       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
520       //        return true;
521       //      } //-------------end unicode traitement--------------
522       //      else {
523       if (!Character.isDigit(currentCharacter)) {
524         currentPosition = temp;
525         return false;
526       }
527       //        if (withoutUnicodePtr != 0)
528       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
529       return true;
530       //      }
531     } catch (IndexOutOfBoundsException e) {
532       currentPosition = temp;
533       return false;
534     }
535   }
536   public final boolean getNextCharAsDigit(int radix) {
537     //BOOLEAN
538     //handle the case of unicode.
539     //when a unicode appears then we must use a buffer that holds char
540     // internal values
541     //At the end of this method currentCharacter holds the new visited char
542     //and currentPosition points right next after it
543     //Both previous lines are true if the currentCharacter is a digit base on
544     // radix
545     //On false, no side effect has occured.
546     //ALL getNextChar.... ARE OPTIMIZED COPIES
547     int temp = currentPosition;
548     try {
549       currentCharacter = source[currentPosition++];
550       //      if (((currentCharacter = source[currentPosition++]) == '\\')
551       //        && (source[currentPosition] == 'u')) {
552       //        //-------------unicode traitement ------------
553       //        int c1, c2, c3, c4;
554       //        int unicodeSize = 6;
555       //        currentPosition++;
556       //        while (source[currentPosition] == 'u') {
557       //          currentPosition++;
558       //          unicodeSize++;
559       //        }
560       //
561       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
562       //          || c1 < 0)
563       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
564       //            || c2 < 0)
565       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
566       //            || c3 < 0)
567       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
568       //            || c4 < 0)) {
569       //          currentPosition = temp;
570       //          return false;
571       //        }
572       //
573       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
574       //        if (Character.digit(currentCharacter, radix) == -1) {
575       //          currentPosition = temp;
576       //          return false;
577       //        }
578       //
579       //        //need the unicode buffer
580       //        if (withoutUnicodePtr == 0) {
581       //          //buffer all the entries that have been left aside....
582       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
583       //          System.arraycopy(
584       //            source,
585       //            startPosition,
586       //            withoutUnicodeBuffer,
587       //            1,
588       //            withoutUnicodePtr);
589       //        }
590       //        //fill the buffer with the char
591       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
592       //        return true;
593       //      } //-------------end unicode traitement--------------
594       //      else {
595       if (Character.digit(currentCharacter, radix) == -1) {
596         currentPosition = temp;
597         return false;
598       }
599       //        if (withoutUnicodePtr != 0)
600       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
601       return true;
602       //      }
603     } catch (IndexOutOfBoundsException e) {
604       currentPosition = temp;
605       return false;
606     }
607   }
608   public boolean getNextCharAsJavaIdentifierPart() {
609     //BOOLEAN
610     //handle the case of unicode.
611     //when a unicode appears then we must use a buffer that holds char
612     // internal values
613     //At the end of this method currentCharacter holds the new visited char
614     //and currentPosition points right next after it
615     //Both previous lines are true if the currentCharacter is a
616     // JavaIdentifierPart
617     //On false, no side effect has occured.
618     //ALL getNextChar.... ARE OPTIMIZED COPIES
619     int temp = currentPosition;
620     try {
621       currentCharacter = source[currentPosition++];
622       //      if (((currentCharacter = source[currentPosition++]) == '\\')
623       //        && (source[currentPosition] == 'u')) {
624       //        //-------------unicode traitement ------------
625       //        int c1, c2, c3, c4;
626       //        int unicodeSize = 6;
627       //        currentPosition++;
628       //        while (source[currentPosition] == 'u') {
629       //          currentPosition++;
630       //          unicodeSize++;
631       //        }
632       //
633       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
634       //          || c1 < 0)
635       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
636       //            || c2 < 0)
637       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
638       //            || c3 < 0)
639       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
640       //            || c4 < 0)) {
641       //          currentPosition = temp;
642       //          return false;
643       //        }
644       //
645       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
646       //        if (!isPHPIdentifierPart(currentCharacter)) {
647       //          currentPosition = temp;
648       //          return false;
649       //        }
650       //
651       //        //need the unicode buffer
652       //        if (withoutUnicodePtr == 0) {
653       //          //buffer all the entries that have been left aside....
654       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
655       //          System.arraycopy(
656       //            source,
657       //            startPosition,
658       //            withoutUnicodeBuffer,
659       //            1,
660       //            withoutUnicodePtr);
661       //        }
662       //        //fill the buffer with the char
663       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
664       //        return true;
665       //      } //-------------end unicode traitement--------------
666       //      else {
667       if (!isPHPIdentifierPart(currentCharacter)) {
668         currentPosition = temp;
669         return false;
670       }
671       //        if (withoutUnicodePtr != 0)
672       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
673       return true;
674       //      }
675     } catch (IndexOutOfBoundsException e) {
676       currentPosition = temp;
677       return false;
678     }
679   }
680   public int getCastOrParen() {
681     int tempPosition = currentPosition;
682     char tempCharacter = currentCharacter;
683     int tempToken = TokenNameLPAREN;
684     boolean found = false;
685     StringBuffer buf = new StringBuffer();
686     try {
687       do {
688         currentCharacter = source[currentPosition++];
689       } while (currentCharacter == ' ' || currentCharacter == '\t');
690       while ((currentCharacter >= 'a' && currentCharacter <= 'z')
691           || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
692         buf.append(currentCharacter);
693         currentCharacter = source[currentPosition++];
694       }
695       if (buf.length() >= 3 && buf.length() <= 7) {
696         char[] data = buf.toString().toCharArray();
697         int index = 0;
698         switch (data.length) {
699           case 3 :
700             // int
701             if ((data[index] == 'i') && (data[++index] == 'n')
702                 && (data[++index] == 't')) {
703               found = true;
704               tempToken = TokenNameintCAST;
705             }
706             break;
707           case 4 :
708             // bool real
709             if ((data[index] == 'b') && (data[++index] == 'o')
710                 && (data[++index] == 'o') && (data[++index] == 'l')) {
711               found = true;
712               tempToken = TokenNameboolCAST;
713             } else {
714               index = 0;
715               if ((data[index] == 'r') && (data[++index] == 'e')
716                   && (data[++index] == 'a') && (data[++index] == 'l')) {
717                 found = true;
718                 tempToken = TokenNamedoubleCAST;
719               }
720             }
721             break;
722           case 5 :
723             // array unset float
724             if ((data[index] == 'a') && (data[++index] == 'r')
725                 && (data[++index] == 'r') && (data[++index] == 'a')
726                 && (data[++index] == 'y')) {
727               found = true;
728               tempToken = TokenNamearrayCAST;
729             } else {
730               index = 0;
731               if ((data[index] == 'u') && (data[++index] == 'n')
732                   && (data[++index] == 's') && (data[++index] == 'e')
733                   && (data[++index] == 't')) {
734                 found = true;
735                 tempToken = TokenNameunsetCAST;
736               } else {
737                 index = 0;
738                 if ((data[index] == 'f') && (data[++index] == 'l')
739                     && (data[++index] == 'o') && (data[++index] == 'a')
740                     && (data[++index] == 't')) {
741                   found = true;
742                   tempToken = TokenNamedoubleCAST;
743                 }
744               }
745             }
746             break;
747           case 6 :
748             // object string double
749             if ((data[index] == 'o') && (data[++index] == 'b')
750                 && (data[++index] == 'j') && (data[++index] == 'e')
751                 && (data[++index] == 'c') && (data[++index] == 't')) {
752               found = true;
753               tempToken = TokenNameobjectCAST;
754             } else {
755               index = 0;
756               if ((data[index] == 's') && (data[++index] == 't')
757                   && (data[++index] == 'r') && (data[++index] == 'i')
758                   && (data[++index] == 'n') && (data[++index] == 'g')) {
759                 found = true;
760                 tempToken = TokenNamestringCAST;
761               } else {
762                 index = 0;
763                 if ((data[index] == 'd') && (data[++index] == 'o')
764                     && (data[++index] == 'u') && (data[++index] == 'b')
765                     && (data[++index] == 'l') && (data[++index] == 'e')) {
766                   found = true;
767                   tempToken = TokenNamedoubleCAST;
768                 }
769               }
770             }
771             break;
772           case 7 :
773             // boolean integer
774             if ((data[index] == 'b') && (data[++index] == 'o')
775                 && (data[++index] == 'o') && (data[++index] == 'l')
776                 && (data[++index] == 'e') && (data[++index] == 'a')
777                 && (data[++index] == 'n')) {
778               found = true;
779               tempToken = TokenNameboolCAST;
780             } else {
781               index = 0;
782               if ((data[index] == 'i') && (data[++index] == 'n')
783                   && (data[++index] == 't') && (data[++index] == 'e')
784                   && (data[++index] == 'g') && (data[++index] == 'e')
785                   && (data[++index] == 'r')) {
786                 found = true;
787                 tempToken = TokenNameintCAST;
788               }
789             }
790             break;
791         }
792         if (found) {
793           while (currentCharacter == ' ' || currentCharacter == '\t') {
794             currentCharacter = source[currentPosition++];
795           }
796           if (currentCharacter == ')') {
797             return tempToken;
798           }
799         }
800       }
801     } catch (IndexOutOfBoundsException e) {
802     }
803     currentCharacter = tempCharacter;
804     currentPosition = tempPosition;
805     return TokenNameLPAREN;
806   }
807   public void consumeStringInterpolated() throws InvalidInputException {
808     try {
809       // consume next character
810       unicodeAsBackSlash = false;
811       currentCharacter = source[currentPosition++];
812       //                if (((currentCharacter = source[currentPosition++]) == '\\')
813       //                  && (source[currentPosition] == 'u')) {
814       //                  getNextUnicodeChar();
815       //                } else {
816       //                  if (withoutUnicodePtr != 0) {
817       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
818       //                      currentCharacter;
819       //                  }
820       //                }
821       while (currentCharacter != '`') {
822         /** ** in PHP \r and \n are valid in string literals *** */
823         //                if ((currentCharacter == '\n')
824         //                  || (currentCharacter == '\r')) {
825         //                  // relocate if finding another quote fairly close: thus unicode
826         // '/u000D' will be fully consumed
827         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
828         //                    if (currentPosition + lookAhead == source.length)
829         //                      break;
830         //                    if (source[currentPosition + lookAhead] == '\n')
831         //                      break;
832         //                    if (source[currentPosition + lookAhead] == '\"') {
833         //                      currentPosition += lookAhead + 1;
834         //                      break;
835         //                    }
836         //                  }
837         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
838         //                }
839         if (currentCharacter == '\\') {
840           int escapeSize = currentPosition;
841           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
842           //scanEscapeCharacter make a side effect on this value and we need
843           // the previous value few lines down this one
844           scanDoubleQuotedEscapeCharacter();
845           escapeSize = currentPosition - escapeSize;
846           if (withoutUnicodePtr == 0) {
847             //buffer all the entries that have been left aside....
848             withoutUnicodePtr = currentPosition - escapeSize - 1
849                 - startPosition;
850             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
851                 withoutUnicodePtr);
852             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
853           } else { //overwrite the / in the buffer
854             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
855             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
856                                               // where only one is correct
857               withoutUnicodePtr--;
858             }
859           }
860         }
861         // consume next character
862         unicodeAsBackSlash = false;
863         currentCharacter = source[currentPosition++];
864         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
865         //                    && (source[currentPosition] == 'u')) {
866         //                    getNextUnicodeChar();
867         //                  } else {
868         if (withoutUnicodePtr != 0) {
869           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
870         }
871         //                  }
872       }
873     } catch (IndexOutOfBoundsException e) {
874 //    reset end position for error reporting
875       currentPosition-=2; 
876       throw new InvalidInputException(UNTERMINATED_STRING);
877     } catch (InvalidInputException e) {
878       if (e.getMessage().equals(INVALID_ESCAPE)) {
879         // relocate if finding another quote fairly close: thus unicode
880         // '/u000D' will be fully consumed
881         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
882           if (currentPosition + lookAhead == source.length)
883             break;
884           if (source[currentPosition + lookAhead] == '\n')
885             break;
886           if (source[currentPosition + lookAhead] == '`') {
887             currentPosition += lookAhead + 1;
888             break;
889           }
890         }
891       }
892       throw e; // rethrow
893     }
894     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
895                                               // //$NON-NLS-?$ where ? is an
896                                               // int.
897       if (currentLine == null) {
898         currentLine = new NLSLine();
899         lines.add(currentLine);
900       }
901       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
902           startPosition, currentPosition - 1));
903     }
904   }
905   public void consumeStringConstant() throws InvalidInputException {
906     try {
907       // consume next character
908       unicodeAsBackSlash = false;
909       currentCharacter = source[currentPosition++];
910       //                if (((currentCharacter = source[currentPosition++]) == '\\')
911       //                  && (source[currentPosition] == 'u')) {
912       //                  getNextUnicodeChar();
913       //                } else {
914       //                  if (withoutUnicodePtr != 0) {
915       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
916       //                      currentCharacter;
917       //                  }
918       //                }
919       while (currentCharacter != '\'') {
920         /** ** in PHP \r and \n are valid in string literals *** */
921         //                  if ((currentCharacter == '\n')
922         //                    || (currentCharacter == '\r')) {
923         //                    // relocate if finding another quote fairly close: thus unicode
924         // '/u000D' will be fully consumed
925         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
926         //                      if (currentPosition + lookAhead == source.length)
927         //                        break;
928         //                      if (source[currentPosition + lookAhead] == '\n')
929         //                        break;
930         //                      if (source[currentPosition + lookAhead] == '\"') {
931         //                        currentPosition += lookAhead + 1;
932         //                        break;
933         //                      }
934         //                    }
935         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
936         //                  }
937         if (currentCharacter == '\\') {
938           int escapeSize = currentPosition;
939           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
940           //scanEscapeCharacter make a side effect on this value and we need
941           // the previous value few lines down this one
942           scanSingleQuotedEscapeCharacter();
943           escapeSize = currentPosition - escapeSize;
944           if (withoutUnicodePtr == 0) {
945             //buffer all the entries that have been left aside....
946             withoutUnicodePtr = currentPosition - escapeSize - 1
947                 - startPosition;
948             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
949                 withoutUnicodePtr);
950             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
951           } else { //overwrite the / in the buffer
952             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
953             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
954                                               // where only one is correct
955               withoutUnicodePtr--;
956             }
957           }
958         }
959         // consume next character
960         unicodeAsBackSlash = false;
961         currentCharacter = source[currentPosition++];
962         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
963         //                    && (source[currentPosition] == 'u')) {
964         //                    getNextUnicodeChar();
965         //                  } else {
966         if (withoutUnicodePtr != 0) {
967           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
968         }
969         //                  }
970       }
971     } catch (IndexOutOfBoundsException e) {
972       // reset end position for error reporting
973       currentPosition-=2;
974       throw new InvalidInputException(UNTERMINATED_STRING);
975     } catch (InvalidInputException e) {
976       if (e.getMessage().equals(INVALID_ESCAPE)) {
977         // relocate if finding another quote fairly close: thus unicode
978         // '/u000D' will be fully consumed
979         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
980           if (currentPosition + lookAhead == source.length)
981             break;
982           if (source[currentPosition + lookAhead] == '\n')
983             break;
984           if (source[currentPosition + lookAhead] == '\'') {
985             currentPosition += lookAhead + 1;
986             break;
987           }
988         }
989       }
990       throw e; // rethrow
991     }
992     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
993                                               // //$NON-NLS-?$ where ? is an
994                                               // int.
995       if (currentLine == null) {
996         currentLine = new NLSLine();
997         lines.add(currentLine);
998       }
999       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1000           startPosition, currentPosition - 1));
1001     }
1002   }
1003   public void consumeStringLiteral() throws InvalidInputException {
1004     try {
1005       // consume next character
1006       unicodeAsBackSlash = false;
1007       currentCharacter = source[currentPosition++];
1008       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1009       //                  && (source[currentPosition] == 'u')) {
1010       //                  getNextUnicodeChar();
1011       //                } else {
1012       //                  if (withoutUnicodePtr != 0) {
1013       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1014       //                      currentCharacter;
1015       //                  }
1016       //                }
1017       while (currentCharacter != '"') {
1018         /** ** in PHP \r and \n are valid in string literals *** */
1019         //                  if ((currentCharacter == '\n')
1020         //                    || (currentCharacter == '\r')) {
1021         //                    // relocate if finding another quote fairly close: thus unicode
1022         // '/u000D' will be fully consumed
1023         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1024         //                      if (currentPosition + lookAhead == source.length)
1025         //                        break;
1026         //                      if (source[currentPosition + lookAhead] == '\n')
1027         //                        break;
1028         //                      if (source[currentPosition + lookAhead] == '\"') {
1029         //                        currentPosition += lookAhead + 1;
1030         //                        break;
1031         //                      }
1032         //                    }
1033         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1034         //                  }
1035         if (currentCharacter == '\\') {
1036           int escapeSize = currentPosition;
1037           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1038           //scanEscapeCharacter make a side effect on this value and we need
1039           // the previous value few lines down this one
1040           scanDoubleQuotedEscapeCharacter();
1041           escapeSize = currentPosition - escapeSize;
1042           if (withoutUnicodePtr == 0) {
1043             //buffer all the entries that have been left aside....
1044             withoutUnicodePtr = currentPosition - escapeSize - 1
1045                 - startPosition;
1046             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1047                 withoutUnicodePtr);
1048             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1049           } else { //overwrite the / in the buffer
1050             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1051             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1052                                               // where only one is correct
1053               withoutUnicodePtr--;
1054             }
1055           }
1056         }
1057         // consume next character
1058         unicodeAsBackSlash = false;
1059         currentCharacter = source[currentPosition++];
1060         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1061         //                    && (source[currentPosition] == 'u')) {
1062         //                    getNextUnicodeChar();
1063         //                  } else {
1064         if (withoutUnicodePtr != 0) {
1065           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1066         }
1067         //                  }
1068       }
1069     } catch (IndexOutOfBoundsException e) {
1070 //    reset end position for error reporting
1071       currentPosition-=2;
1072       throw new InvalidInputException(UNTERMINATED_STRING);
1073     } catch (InvalidInputException e) {
1074       if (e.getMessage().equals(INVALID_ESCAPE)) {
1075         // relocate if finding another quote fairly close: thus unicode
1076         // '/u000D' will be fully consumed
1077         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1078           if (currentPosition + lookAhead == source.length)
1079             break;
1080           if (source[currentPosition + lookAhead] == '\n')
1081             break;
1082           if (source[currentPosition + lookAhead] == '\"') {
1083             currentPosition += lookAhead + 1;
1084             break;
1085           }
1086         }
1087       }
1088       throw e; // rethrow
1089     }
1090     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1091                                               // //$NON-NLS-?$ where ? is an
1092                                               // int.
1093       if (currentLine == null) {
1094         currentLine = new NLSLine();
1095         lines.add(currentLine);
1096       }
1097       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1098           startPosition, currentPosition - 1));
1099     }
1100   }
1101   public int getNextToken() throws InvalidInputException {
1102     if (!phpMode) {
1103       return getInlinedHTML(currentPosition);
1104     }
1105     if (phpMode) {
1106       this.wasAcr = false;
1107       if (diet) {
1108         jumpOverMethodBody();
1109         diet = false;
1110         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1111       }
1112       try {
1113         while (true) {
1114           withoutUnicodePtr = 0;
1115           //start with a new token
1116           char encapsedChar = ' ';
1117           if (!encapsedStringStack.isEmpty()) {
1118             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1119           }
1120           if (encapsedChar != '$' && encapsedChar != ' ') {
1121             currentCharacter = source[currentPosition++];
1122             if (currentCharacter == encapsedChar) {
1123               switch (currentCharacter) {
1124                 case '`' :
1125                   return TokenNameEncapsedString0;
1126                 case '\'' :
1127                   return TokenNameEncapsedString1;
1128                 case '"' :
1129                   return TokenNameEncapsedString2;
1130               }
1131             }
1132             while (currentCharacter != encapsedChar) {
1133               /** ** in PHP \r and \n are valid in string literals *** */
1134               switch (currentCharacter) {
1135                 case '\\' :
1136                   int escapeSize = currentPosition;
1137                   boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1138                   //scanEscapeCharacter make a side effect on this value and
1139                   // we need the previous value few lines down this one
1140                   scanDoubleQuotedEscapeCharacter();
1141                   escapeSize = currentPosition - escapeSize;
1142                   if (withoutUnicodePtr == 0) {
1143                     //buffer all the entries that have been left aside....
1144                     withoutUnicodePtr = currentPosition - escapeSize - 1
1145                         - startPosition;
1146                     System.arraycopy(source, startPosition,
1147                         withoutUnicodeBuffer, 1, withoutUnicodePtr);
1148                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1149                   } else { //overwrite the / in the buffer
1150                     withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1151                     if (backSlashAsUnicodeInString) { //there are TWO \ in
1152                       withoutUnicodePtr--;
1153                     }
1154                   }
1155                   break;
1156                 case '$' :
1157                   if (isPHPIdentifierStart(source[currentPosition])
1158                       || source[currentPosition] == '{') {
1159                     currentPosition--;
1160                     encapsedStringStack.push(new Character('$'));
1161                     return TokenNameSTRING;
1162                   }
1163                   break;
1164                 case '{' :
1165                   if (source[currentPosition] == '$') { // CURLY_OPEN
1166                     currentPosition--;
1167                     encapsedStringStack.push(new Character('$'));
1168                     return TokenNameSTRING;
1169                   }
1170               }
1171               // consume next character
1172               unicodeAsBackSlash = false;
1173               currentCharacter = source[currentPosition++];
1174               if (withoutUnicodePtr != 0) {
1175                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1176               }
1177               //                  }
1178             } // end while
1179             currentPosition--;
1180             return TokenNameSTRING;
1181           }
1182           // ---------Consume white space and handles startPosition---------
1183           int whiteStart = currentPosition;
1184           startPosition = currentPosition;
1185           currentCharacter = source[currentPosition++];
1186           if (encapsedChar == '$') {
1187             switch (currentCharacter) {
1188               case '\\' :
1189                 currentCharacter = source[currentPosition++];
1190                 return TokenNameSTRING;
1191               case '{' :
1192                 if (encapsedChar == '$') {
1193                   if (getNextChar('$'))
1194                     return TokenNameCURLY_OPEN;
1195                 }
1196                 return TokenNameLBRACE;
1197               case '}' :
1198                 return TokenNameRBRACE;
1199               case '[' :
1200                 return TokenNameLBRACKET;
1201               case ']' :
1202                 return TokenNameRBRACKET;
1203               case '\'' :
1204                 if (tokenizeStrings) {
1205                   consumeStringConstant();
1206                   return TokenNameStringSingleQuote;
1207                 }
1208                 return TokenNameEncapsedString1;
1209               case '"' :
1210                 return TokenNameEncapsedString2;
1211               case '`' :
1212                 if (tokenizeStrings) {
1213                   consumeStringInterpolated();
1214                   return TokenNameStringInterpolated;
1215                 }
1216                 return TokenNameEncapsedString0;
1217               case '-' :
1218                 if (getNextChar('>'))
1219                   return TokenNameMINUS_GREATER;
1220                 return TokenNameSTRING;
1221               default :
1222                 if (currentCharacter == '$') {
1223                   int oldPosition = currentPosition;
1224                   try {
1225                     currentCharacter = source[currentPosition++];
1226                     if (currentCharacter == '{') {
1227                       return TokenNameDOLLAR_LBRACE;
1228                     }
1229                     if (isPHPIdentifierStart(currentCharacter)) {
1230                       return scanIdentifierOrKeyword(true);
1231                     } else {
1232                       currentPosition = oldPosition;
1233                       return TokenNameSTRING;
1234                     }
1235                   } catch (IndexOutOfBoundsException e) {
1236                     currentPosition = oldPosition;
1237                     return TokenNameSTRING;
1238                   }
1239                 }
1240                 if (isPHPIdentifierStart(currentCharacter))
1241                   return scanIdentifierOrKeyword(false);
1242                 if (Character.isDigit(currentCharacter))
1243                   return scanNumber(false);
1244                 return TokenNameERROR;
1245             }
1246           }
1247           //          boolean isWhiteSpace;
1248           
1249           while ((currentCharacter == ' ')
1250               || Character.isWhitespace(currentCharacter)) {
1251             startPosition = currentPosition;
1252             currentCharacter = source[currentPosition++];
1253             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1254             //              && (source[currentPosition] == 'u')) {
1255             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1256             //            } else {
1257             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1258               checkNonExternalizeString();
1259               if (recordLineSeparator) {
1260                 pushLineSeparator();
1261               } else {
1262                 currentLine = null;
1263               }
1264             }
1265             //            isWhiteSpace = (currentCharacter == ' ')
1266             //                || Character.isWhitespace(currentCharacter);
1267             //            }
1268           }
1269           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1270             // reposition scanner in case we are interested by spaces as tokens
1271             currentPosition--;
1272             startPosition = whiteStart;
1273             return TokenNameWHITESPACE;
1274           }
1275           //little trick to get out in the middle of a source compuation
1276           if (currentPosition > eofPosition)
1277             return TokenNameEOF;
1278           // ---------Identify the next token-------------
1279           switch (currentCharacter) {
1280             case '(' :
1281               return getCastOrParen();
1282             case ')' :
1283               return TokenNameRPAREN;
1284             case '{' :
1285               return TokenNameLBRACE;
1286             case '}' :
1287               return TokenNameRBRACE;
1288             case '[' :
1289               return TokenNameLBRACKET;
1290             case ']' :
1291               return TokenNameRBRACKET;
1292             case ';' :
1293               return TokenNameSEMICOLON;
1294             case ',' :
1295               return TokenNameCOMMA;
1296             case '.' :
1297               if (getNextChar('='))
1298                 return TokenNameDOT_EQUAL;
1299               if (getNextCharAsDigit())
1300                 return scanNumber(true);
1301               return TokenNameDOT;
1302             case '+' :
1303               {
1304                 int test;
1305                 if ((test = getNextChar('+', '=')) == 0)
1306                   return TokenNamePLUS_PLUS;
1307                 if (test > 0)
1308                   return TokenNamePLUS_EQUAL;
1309                 return TokenNamePLUS;
1310               }
1311             case '-' :
1312               {
1313                 int test;
1314                 if ((test = getNextChar('-', '=')) == 0)
1315                   return TokenNameMINUS_MINUS;
1316                 if (test > 0)
1317                   return TokenNameMINUS_EQUAL;
1318                 if (getNextChar('>'))
1319                   return TokenNameMINUS_GREATER;
1320                 return TokenNameMINUS;
1321               }
1322             case '~' :
1323               if (getNextChar('='))
1324                 return TokenNameTWIDDLE_EQUAL;
1325               return TokenNameTWIDDLE;
1326             case '!' :
1327               if (getNextChar('=')) {
1328                 if (getNextChar('=')) {
1329                   return TokenNameNOT_EQUAL_EQUAL;
1330                 }
1331                 return TokenNameNOT_EQUAL;
1332               }
1333               return TokenNameNOT;
1334             case '*' :
1335               if (getNextChar('='))
1336                 return TokenNameMULTIPLY_EQUAL;
1337               return TokenNameMULTIPLY;
1338             case '%' :
1339               if (getNextChar('='))
1340                 return TokenNameREMAINDER_EQUAL;
1341               return TokenNameREMAINDER;
1342             case '<' :
1343               {
1344                 int oldPosition = currentPosition;
1345                 try {
1346                   currentCharacter = source[currentPosition++];
1347                 } catch (IndexOutOfBoundsException e) {
1348                   currentPosition = oldPosition;
1349                   return TokenNameLESS;
1350                 }
1351                 switch (currentCharacter) {
1352                   case '=' :
1353                     return TokenNameLESS_EQUAL;
1354                   case '>' :
1355                     return TokenNameNOT_EQUAL;
1356                   case '<' :
1357                     if (getNextChar('='))
1358                       return TokenNameLEFT_SHIFT_EQUAL;
1359                     if (getNextChar('<')) {
1360                       currentCharacter = source[currentPosition++];
1361                       while (Character.isWhitespace(currentCharacter)) {
1362                         currentCharacter = source[currentPosition++];
1363                       }
1364                       int heredocStart = currentPosition - 1;
1365                       int heredocLength = 0;
1366                       if (isPHPIdentifierStart(currentCharacter)) {
1367                         currentCharacter = source[currentPosition++];
1368                       } else {
1369                         return TokenNameERROR;
1370                       }
1371                       while (isPHPIdentifierPart(currentCharacter)) {
1372                         currentCharacter = source[currentPosition++];
1373                       }
1374                       heredocLength = currentPosition - heredocStart - 1;
1375                       // heredoc end-tag determination
1376                       boolean endTag = true;
1377                       char ch;
1378                       do {
1379                         ch = source[currentPosition++];
1380                         if (ch == '\r' || ch == '\n') {
1381                           if (recordLineSeparator) {
1382                             pushLineSeparator();
1383                           } else {
1384                             currentLine = null;
1385                           }
1386                           for (int i = 0; i < heredocLength; i++) {
1387                             if (source[currentPosition + i] != source[heredocStart
1388                                 + i]) {
1389                               endTag = false;
1390                               break;
1391                             }
1392                           }
1393                           if (endTag) {
1394                             currentPosition += heredocLength - 1;
1395                             currentCharacter = source[currentPosition++];
1396                             break; // do...while loop
1397                           } else {
1398                             endTag = true;
1399                           }
1400                         }
1401                       } while (true);
1402                       return TokenNameHEREDOC;
1403                     }
1404                     return TokenNameLEFT_SHIFT;
1405                 }
1406                 currentPosition = oldPosition;
1407                 return TokenNameLESS;
1408               }
1409             case '>' :
1410               {
1411                 int test;
1412                 if ((test = getNextChar('=', '>')) == 0)
1413                   return TokenNameGREATER_EQUAL;
1414                 if (test > 0) {
1415                   if ((test = getNextChar('=', '>')) == 0)
1416                     return TokenNameRIGHT_SHIFT_EQUAL;
1417                   return TokenNameRIGHT_SHIFT;
1418                 }
1419                 return TokenNameGREATER;
1420               }
1421             case '=' :
1422               if (getNextChar('=')) {
1423                 if (getNextChar('=')) {
1424                   return TokenNameEQUAL_EQUAL_EQUAL;
1425                 }
1426                 return TokenNameEQUAL_EQUAL;
1427               }
1428               if (getNextChar('>'))
1429                 return TokenNameEQUAL_GREATER;
1430               return TokenNameEQUAL;
1431             case '&' :
1432               {
1433                 int test;
1434                 if ((test = getNextChar('&', '=')) == 0)
1435                   return TokenNameAND_AND;
1436                 if (test > 0)
1437                   return TokenNameAND_EQUAL;
1438                 return TokenNameAND;
1439               }
1440             case '|' :
1441               {
1442                 int test;
1443                 if ((test = getNextChar('|', '=')) == 0)
1444                   return TokenNameOR_OR;
1445                 if (test > 0)
1446                   return TokenNameOR_EQUAL;
1447                 return TokenNameOR;
1448               }
1449             case '^' :
1450               if (getNextChar('='))
1451                 return TokenNameXOR_EQUAL;
1452               return TokenNameXOR;
1453             case '?' :
1454               if (getNextChar('>')) {
1455                 phpMode = false;
1456                 if (currentPosition == source.length) {
1457                   phpMode = true;
1458                   return TokenNameINLINE_HTML;
1459                 }
1460                 return getInlinedHTML(currentPosition - 2);
1461               }
1462               return TokenNameQUESTION;
1463             case ':' :
1464               if (getNextChar(':'))
1465                 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1466               return TokenNameCOLON;
1467             case '@' :
1468               return TokenNameAT;
1469             case '\'' :
1470               consumeStringConstant();
1471               return TokenNameStringSingleQuote;
1472             case '"' :
1473               if (tokenizeStrings) {
1474                 consumeStringLiteral();
1475                 return TokenNameStringDoubleQuote;
1476               }
1477               return TokenNameEncapsedString2;
1478             case '`' :
1479               if (tokenizeStrings) {
1480                 consumeStringInterpolated();
1481                 return TokenNameStringInterpolated;
1482               }
1483               return TokenNameEncapsedString0;
1484             case '#' :
1485             case '/' :
1486               {
1487                 char startChar = currentCharacter;
1488                 if (getNextChar('=')) {
1489                   return TokenNameDIVIDE_EQUAL;
1490                 }
1491                 int test;
1492                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1493                   //line comment
1494                         this.lastCommentLinePosition = this.currentPosition;
1495                   int endPositionForLineComment = 0;
1496                   try { //get the next char
1497                     currentCharacter = source[currentPosition++];
1498                     //                    if (((currentCharacter = source[currentPosition++])
1499                     //                      == '\\')
1500                     //                      && (source[currentPosition] == 'u')) {
1501                     //                      //-------------unicode traitement ------------
1502                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1503                     //                      currentPosition++;
1504                     //                      while (source[currentPosition] == 'u') {
1505                     //                        currentPosition++;
1506                     //                      }
1507                     //                      if ((c1 =
1508                     //                        Character.getNumericValue(source[currentPosition++]))
1509                     //                        > 15
1510                     //                        || c1 < 0
1511                     //                        || (c2 =
1512                     //                          Character.getNumericValue(source[currentPosition++]))
1513                     //                          > 15
1514                     //                        || c2 < 0
1515                     //                        || (c3 =
1516                     //                          Character.getNumericValue(source[currentPosition++]))
1517                     //                          > 15
1518                     //                        || c3 < 0
1519                     //                        || (c4 =
1520                     //                          Character.getNumericValue(source[currentPosition++]))
1521                     //                          > 15
1522                     //                        || c4 < 0) {
1523                     //                        throw new
1524                     // InvalidInputException(INVALID_UNICODE_ESCAPE);
1525                     //                      } else {
1526                     //                        currentCharacter =
1527                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1528                     //                      }
1529                     //                    }
1530                     //handle the \\u case manually into comment
1531                     //                    if (currentCharacter == '\\') {
1532                     //                      if (source[currentPosition] == '\\')
1533                     //                        currentPosition++;
1534                     //                    } //jump over the \\
1535                     boolean isUnicode = false;
1536                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1537                         this.lastCommentLinePosition = this.currentPosition;
1538                       if (currentCharacter == '?') {
1539                         if (getNextChar('>')) {
1540                           startPosition = currentPosition - 2;
1541                           phpMode = false;
1542                           return TokenNameINLINE_HTML;
1543                         }
1544                       }
1545                       //get the next char
1546                       isUnicode = false;
1547                       currentCharacter = source[currentPosition++];
1548                       //                      if (((currentCharacter = source[currentPosition++])
1549                       //                        == '\\')
1550                       //                        && (source[currentPosition] == 'u')) {
1551                       //                        isUnicode = true;
1552                       //                        //-------------unicode traitement ------------
1553                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1554                       //                        currentPosition++;
1555                       //                        while (source[currentPosition] == 'u') {
1556                       //                          currentPosition++;
1557                       //                        }
1558                       //                        if ((c1 =
1559                       //                          Character.getNumericValue(source[currentPosition++]))
1560                       //                          > 15
1561                       //                          || c1 < 0
1562                       //                          || (c2 =
1563                       //                            Character.getNumericValue(
1564                       //                              source[currentPosition++]))
1565                       //                            > 15
1566                       //                          || c2 < 0
1567                       //                          || (c3 =
1568                       //                            Character.getNumericValue(
1569                       //                              source[currentPosition++]))
1570                       //                            > 15
1571                       //                          || c3 < 0
1572                       //                          || (c4 =
1573                       //                            Character.getNumericValue(
1574                       //                              source[currentPosition++]))
1575                       //                            > 15
1576                       //                          || c4 < 0) {
1577                       //                          throw new
1578                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1579                       //                        } else {
1580                       //                          currentCharacter =
1581                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1582                       //                        }
1583                       //                      }
1584                       //handle the \\u case manually into comment
1585                       //                      if (currentCharacter == '\\') {
1586                       //                        if (source[currentPosition] == '\\')
1587                       //                          currentPosition++;
1588                       //                      } //jump over the \\
1589                     }
1590                     if (isUnicode) {
1591                       endPositionForLineComment = currentPosition - 6;
1592                     } else {
1593                       endPositionForLineComment = currentPosition - 1;
1594                     }
1595 //                    recordComment(false);
1596                     recordComment(TokenNameCOMMENT_LINE);
1597                     if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1598                                         if ((currentCharacter == '\r')
1599                         || (currentCharacter == '\n')) {
1600                       checkNonExternalizeString();
1601                       if (recordLineSeparator) {
1602                         if (isUnicode) {
1603                           pushUnicodeLineSeparator();
1604                         } else {
1605                           pushLineSeparator();
1606                         }
1607                       } else {
1608                         currentLine = null;
1609                       }
1610                     }
1611                     if (tokenizeComments) {
1612                       if (!isUnicode) {
1613                         currentPosition = endPositionForLineComment;
1614                         // reset one character behind
1615                       }
1616                       return TokenNameCOMMENT_LINE;
1617                     }
1618                   } catch (IndexOutOfBoundsException e) { //an eof will them
1619                     // be generated
1620                     if (tokenizeComments) {
1621                       currentPosition--;
1622                       // reset one character behind
1623                       return TokenNameCOMMENT_LINE;
1624                     }
1625                   }
1626                   break;
1627                 }
1628                 if (test > 0) {
1629                   //traditional and annotation comment
1630                   boolean isJavadoc = false, star = false;
1631                   // consume next character
1632                   unicodeAsBackSlash = false;
1633                   currentCharacter = source[currentPosition++];
1634                   //                  if (((currentCharacter = source[currentPosition++]) ==
1635                   // '\\')
1636                   //                    && (source[currentPosition] == 'u')) {
1637                   //                    getNextUnicodeChar();
1638                   //                  } else {
1639                   //                    if (withoutUnicodePtr != 0) {
1640                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1641                   //                        currentCharacter;
1642                   //                    }
1643                   //                  }
1644                   if (currentCharacter == '*') {
1645                     isJavadoc = true;
1646                     star = true;
1647                   }
1648                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1649                     checkNonExternalizeString();
1650                     if (recordLineSeparator) {
1651                       pushLineSeparator();
1652                     } else {
1653                       currentLine = null;
1654                     }
1655                   }
1656                   try { //get the next char
1657                     currentCharacter = source[currentPosition++];
1658                     //                    if (((currentCharacter = source[currentPosition++])
1659                     //                      == '\\')
1660                     //                      && (source[currentPosition] == 'u')) {
1661                     //                      //-------------unicode traitement ------------
1662                     //                      getNextUnicodeChar();
1663                     //                    }
1664                     //handle the \\u case manually into comment
1665                     //                    if (currentCharacter == '\\') {
1666                     //                      if (source[currentPosition] == '\\')
1667                     //                        currentPosition++;
1668                     //                      //jump over the \\
1669                     //                    }
1670                     // empty comment is not a javadoc /**/
1671                     if (currentCharacter == '/') {
1672                       isJavadoc = false;
1673                     }
1674                     //loop until end of comment */
1675                     while ((currentCharacter != '/') || (!star)) {
1676                       if ((currentCharacter == '\r')
1677                           || (currentCharacter == '\n')) {
1678                         checkNonExternalizeString();
1679                         if (recordLineSeparator) {
1680                           pushLineSeparator();
1681                         } else {
1682                           currentLine = null;
1683                         }
1684                       }
1685                       star = currentCharacter == '*';
1686                       //get next char
1687                       currentCharacter = source[currentPosition++];
1688                       //                      if (((currentCharacter = source[currentPosition++])
1689                       //                        == '\\')
1690                       //                        && (source[currentPosition] == 'u')) {
1691                       //                        //-------------unicode traitement ------------
1692                       //                        getNextUnicodeChar();
1693                       //                      }
1694                       //handle the \\u case manually into comment
1695                       //                      if (currentCharacter == '\\') {
1696                       //                        if (source[currentPosition] == '\\')
1697                       //                          currentPosition++;
1698                       //                      } //jump over the \\
1699                     }
1700                     //recordComment(isJavadoc);
1701                     if (isJavadoc) {
1702                         recordComment(TokenNameCOMMENT_PHPDOC);
1703                     } else {
1704                         recordComment(TokenNameCOMMENT_BLOCK);
1705                     }
1706                     
1707                     if (tokenizeComments) {
1708                       if (isJavadoc)
1709                         return TokenNameCOMMENT_PHPDOC;
1710                       return TokenNameCOMMENT_BLOCK;
1711                     }
1712                   } catch (IndexOutOfBoundsException e) {
1713 //                  reset end position for error reporting
1714                     currentPosition-=2;
1715                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1716                   }
1717                   break;
1718                 }
1719                 return TokenNameDIVIDE;
1720               }
1721             case '\u001a' :
1722               if (atEnd())
1723                 return TokenNameEOF;
1724               //the atEnd may not be <currentPosition == source.length> if
1725               // source is only some part of a real (external) stream
1726               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1727             default :
1728               if (currentCharacter == '$') {
1729                 int oldPosition = currentPosition;
1730                 try {
1731                   currentCharacter = source[currentPosition++];
1732                   if (isPHPIdentifierStart(currentCharacter)) {
1733                     return scanIdentifierOrKeyword(true);
1734                   } else {
1735                     currentPosition = oldPosition;
1736                     return TokenNameDOLLAR;
1737                   }
1738                 } catch (IndexOutOfBoundsException e) {
1739                   currentPosition = oldPosition;
1740                   return TokenNameDOLLAR;
1741                 }
1742               }
1743               if (isPHPIdentifierStart(currentCharacter))
1744                 return scanIdentifierOrKeyword(false);
1745               if (Character.isDigit(currentCharacter))
1746                 return scanNumber(false);
1747               return TokenNameERROR;
1748           }
1749         }
1750       } //-----------------end switch while try--------------------
1751       catch (IndexOutOfBoundsException e) {
1752       }
1753     }
1754     return TokenNameEOF;
1755   }
1756   
1757   private int getInlinedHTML(int start) throws InvalidInputException {
1758         int token = getInlinedHTMLToken(start);
1759         if (token == TokenNameINLINE_HTML) {
1760 //              Stack stack = new Stack();
1761 //              // scan html for errors
1762 //              Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1763 //              int lastPHPEndPos=0;
1764 //              for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1765 //                  Tag tag=(Tag)i.next();
1766 //                  
1767 //                  if (tag instanceof StartTag) {
1768 //                      StartTag startTag=(StartTag)tag;
1769 //                    //  System.out.println("startTag: "+tag);
1770 //                      if (startTag.isServerTag()) {
1771 //                        // TODO : what to do with a server tag ? 
1772 //                      } else {
1773 //                          // do whatever with HTML start tag
1774 //                          // use startTag.getElement() to find the element corresponding
1775 //                          // to this start tag which may be useful if you implement code
1776 //                          // folding etc
1777 //                              stack.push(startTag);
1778 //                      }
1779 //                  } else {
1780 //                      EndTag endTag=(EndTag)tag;
1781 //                      StartTag stag = (StartTag) stack.peek();
1782 ////                  System.out.println("endTag: "+tag);
1783 //                      // do whatever with HTML end tag.
1784 //                  }
1785 //              }
1786         }
1787         return token;
1788   }
1789   /**
1790    * @return @throws
1791    *         InvalidInputException
1792    */
1793   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1794     //    int htmlPosition = start;
1795     if (currentPosition > source.length) {
1796       currentPosition = source.length;
1797       return TokenNameEOF;
1798     }
1799     startPosition = start;
1800     try {
1801       while (!phpMode) {
1802         currentCharacter = source[currentPosition++];
1803         if (currentCharacter == '<') {
1804           if (getNextChar('?')) {
1805             currentCharacter = source[currentPosition++];
1806             if ((currentCharacter == ' ')
1807                 || Character.isWhitespace(currentCharacter)) {
1808               // <?
1809               phpMode = true;
1810               return TokenNameINLINE_HTML;
1811             } else {
1812               boolean phpStart = (currentCharacter == 'P')
1813                   || (currentCharacter == 'p');
1814               if (phpStart) {
1815                 int test = getNextChar('H', 'h');
1816                 if (test >= 0) {
1817                   test = getNextChar('P', 'p');
1818                   if (test >= 0) {
1819                     // <?PHP <?php
1820                     phpMode = true;
1821                     return TokenNameINLINE_HTML;
1822                   }
1823                 }
1824               }
1825             }
1826           }
1827         }
1828         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1829           if (recordLineSeparator) {
1830             pushLineSeparator();
1831           } else {
1832             currentLine = null;
1833           }
1834         }
1835       } //-----------------while--------------------
1836       phpMode = true;
1837       return TokenNameINLINE_HTML;
1838     } //-----------------try--------------------
1839     catch (IndexOutOfBoundsException e) {
1840       startPosition = start;
1841       currentPosition--;
1842     }
1843     phpMode = true;
1844     return TokenNameINLINE_HTML;
1845   }
1846   //  public final void getNextUnicodeChar()
1847   //    throws IndexOutOfBoundsException, InvalidInputException {
1848   //    //VOID
1849   //    //handle the case of unicode.
1850   //    //when a unicode appears then we must use a buffer that holds char
1851   // internal values
1852   //    //At the end of this method currentCharacter holds the new visited char
1853   //    //and currentPosition points right next after it
1854   //
1855   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1856   //
1857   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1858   //    currentPosition++;
1859   //    while (source[currentPosition] == 'u') {
1860   //      currentPosition++;
1861   //      unicodeSize++;
1862   //    }
1863   //
1864   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1865   //      || c1 < 0
1866   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1867   //      || c2 < 0
1868   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1869   //      || c3 < 0
1870   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1871   //      || c4 < 0) {
1872   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1873   //    } else {
1874   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1875   //      //need the unicode buffer
1876   //      if (withoutUnicodePtr == 0) {
1877   //        //buffer all the entries that have been left aside....
1878   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1879   //        System.arraycopy(
1880   //          source,
1881   //          startPosition,
1882   //          withoutUnicodeBuffer,
1883   //          1,
1884   //          withoutUnicodePtr);
1885   //      }
1886   //      //fill the buffer with the char
1887   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1888   //    }
1889   //    unicodeAsBackSlash = currentCharacter == '\\';
1890   //  }
1891   /*
1892    * Tokenize a method body, assuming that curly brackets are properly
1893    * balanced.
1894    */
1895   public final void jumpOverMethodBody() {
1896     this.wasAcr = false;
1897     int found = 1;
1898     try {
1899       while (true) { //loop for jumping over comments
1900         // ---------Consume white space and handles startPosition---------
1901         boolean isWhiteSpace;
1902         do {
1903           startPosition = currentPosition;
1904           currentCharacter = source[currentPosition++];
1905           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1906           //            && (source[currentPosition] == 'u')) {
1907           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1908           //          } else {
1909           if (recordLineSeparator
1910               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1911             pushLineSeparator();
1912           isWhiteSpace = Character.isWhitespace(currentCharacter);
1913           //          }
1914         } while (isWhiteSpace);
1915         // -------consume token until } is found---------
1916         switch (currentCharacter) {
1917           case '{' :
1918             found++;
1919             break;
1920           case '}' :
1921             found--;
1922             if (found == 0)
1923               return;
1924             break;
1925           case '\'' :
1926             {
1927               boolean test;
1928               test = getNextChar('\\');
1929               if (test) {
1930                 try {
1931                   scanDoubleQuotedEscapeCharacter();
1932                 } catch (InvalidInputException ex) {
1933                 };
1934               } else {
1935                 //                try { // consume next character
1936                 unicodeAsBackSlash = false;
1937                 currentCharacter = source[currentPosition++];
1938                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1939                 //                    && (source[currentPosition] == 'u')) {
1940                 //                    getNextUnicodeChar();
1941                 //                  } else {
1942                 if (withoutUnicodePtr != 0) {
1943                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1944                 }
1945                 //                  }
1946                 //                } catch (InvalidInputException ex) {
1947                 //                };
1948               }
1949               getNextChar('\'');
1950               break;
1951             }
1952           case '"' :
1953             try {
1954               //              try { // consume next character
1955               unicodeAsBackSlash = false;
1956               currentCharacter = source[currentPosition++];
1957               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1958               //                  && (source[currentPosition] == 'u')) {
1959               //                  getNextUnicodeChar();
1960               //                } else {
1961               if (withoutUnicodePtr != 0) {
1962                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1963               }
1964               //                }
1965               //              } catch (InvalidInputException ex) {
1966               //              };
1967               while (currentCharacter != '"') {
1968                 if (currentCharacter == '\r') {
1969                   if (source[currentPosition] == '\n')
1970                     currentPosition++;
1971                   break;
1972                   // the string cannot go further that the line
1973                 }
1974                 if (currentCharacter == '\n') {
1975                   break;
1976                   // the string cannot go further that the line
1977                 }
1978                 if (currentCharacter == '\\') {
1979                   try {
1980                     scanDoubleQuotedEscapeCharacter();
1981                   } catch (InvalidInputException ex) {
1982                   };
1983                 }
1984                 //                try { // consume next character
1985                 unicodeAsBackSlash = false;
1986                 currentCharacter = source[currentPosition++];
1987                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1988                 //                    && (source[currentPosition] == 'u')) {
1989                 //                    getNextUnicodeChar();
1990                 //                  } else {
1991                 if (withoutUnicodePtr != 0) {
1992                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1993                 }
1994                 //                  }
1995                 //                } catch (InvalidInputException ex) {
1996                 //                };
1997               }
1998             } catch (IndexOutOfBoundsException e) {
1999               return;
2000             }
2001             break;
2002           case '/' :
2003             {
2004               int test;
2005               if ((test = getNextChar('/', '*')) == 0) {
2006                 //line comment
2007                 try {
2008                   //get the next char
2009                   currentCharacter = source[currentPosition++];
2010                   //                  if (((currentCharacter = source[currentPosition++]) ==
2011                   // '\\')
2012                   //                    && (source[currentPosition] == 'u')) {
2013                   //                    //-------------unicode traitement ------------
2014                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2015                   //                    currentPosition++;
2016                   //                    while (source[currentPosition] == 'u') {
2017                   //                      currentPosition++;
2018                   //                    }
2019                   //                    if ((c1 =
2020                   //                      Character.getNumericValue(source[currentPosition++]))
2021                   //                      > 15
2022                   //                      || c1 < 0
2023                   //                      || (c2 =
2024                   //                        Character.getNumericValue(source[currentPosition++]))
2025                   //                        > 15
2026                   //                      || c2 < 0
2027                   //                      || (c3 =
2028                   //                        Character.getNumericValue(source[currentPosition++]))
2029                   //                        > 15
2030                   //                      || c3 < 0
2031                   //                      || (c4 =
2032                   //                        Character.getNumericValue(source[currentPosition++]))
2033                   //                        > 15
2034                   //                      || c4 < 0) {
2035                   //                      //error don't care of the value
2036                   //                      currentCharacter = 'A';
2037                   //                    } //something different from \n and \r
2038                   //                    else {
2039                   //                      currentCharacter =
2040                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2041                   //                    }
2042                   //                  }
2043                   while (currentCharacter != '\r' && currentCharacter != '\n') {
2044                     //get the next char
2045                     currentCharacter = source[currentPosition++];
2046                     //                    if (((currentCharacter = source[currentPosition++])
2047                     //                      == '\\')
2048                     //                      && (source[currentPosition] == 'u')) {
2049                     //                      //-------------unicode traitement ------------
2050                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2051                     //                      currentPosition++;
2052                     //                      while (source[currentPosition] == 'u') {
2053                     //                        currentPosition++;
2054                     //                      }
2055                     //                      if ((c1 =
2056                     //                        Character.getNumericValue(source[currentPosition++]))
2057                     //                        > 15
2058                     //                        || c1 < 0
2059                     //                        || (c2 =
2060                     //                          Character.getNumericValue(source[currentPosition++]))
2061                     //                          > 15
2062                     //                        || c2 < 0
2063                     //                        || (c3 =
2064                     //                          Character.getNumericValue(source[currentPosition++]))
2065                     //                          > 15
2066                     //                        || c3 < 0
2067                     //                        || (c4 =
2068                     //                          Character.getNumericValue(source[currentPosition++]))
2069                     //                          > 15
2070                     //                        || c4 < 0) {
2071                     //                        //error don't care of the value
2072                     //                        currentCharacter = 'A';
2073                     //                      } //something different from \n and \r
2074                     //                      else {
2075                     //                        currentCharacter =
2076                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2077                     //                      }
2078                     //                    }
2079                   }
2080                   if (recordLineSeparator
2081                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2082                     pushLineSeparator();
2083                 } catch (IndexOutOfBoundsException e) {
2084                 } //an eof will them be generated
2085                 break;
2086               }
2087               if (test > 0) {
2088                 //traditional and annotation comment
2089                 boolean star = false;
2090                 //                try { // consume next character
2091                 unicodeAsBackSlash = false;
2092                 currentCharacter = source[currentPosition++];
2093                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2094                 //                    && (source[currentPosition] == 'u')) {
2095                 //                    getNextUnicodeChar();
2096                 //                  } else {
2097                 if (withoutUnicodePtr != 0) {
2098                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2099                 }
2100                 //                  };
2101                 //                } catch (InvalidInputException ex) {
2102                 //                };
2103                 if (currentCharacter == '*') {
2104                   star = true;
2105                 }
2106                 if (recordLineSeparator
2107                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2108                   pushLineSeparator();
2109                 try { //get the next char
2110                   currentCharacter = source[currentPosition++];
2111                   //                  if (((currentCharacter = source[currentPosition++]) ==
2112                   // '\\')
2113                   //                    && (source[currentPosition] == 'u')) {
2114                   //                    //-------------unicode traitement ------------
2115                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2116                   //                    currentPosition++;
2117                   //                    while (source[currentPosition] == 'u') {
2118                   //                      currentPosition++;
2119                   //                    }
2120                   //                    if ((c1 =
2121                   //                      Character.getNumericValue(source[currentPosition++]))
2122                   //                      > 15
2123                   //                      || c1 < 0
2124                   //                      || (c2 =
2125                   //                        Character.getNumericValue(source[currentPosition++]))
2126                   //                        > 15
2127                   //                      || c2 < 0
2128                   //                      || (c3 =
2129                   //                        Character.getNumericValue(source[currentPosition++]))
2130                   //                        > 15
2131                   //                      || c3 < 0
2132                   //                      || (c4 =
2133                   //                        Character.getNumericValue(source[currentPosition++]))
2134                   //                        > 15
2135                   //                      || c4 < 0) {
2136                   //                      //error don't care of the value
2137                   //                      currentCharacter = 'A';
2138                   //                    } //something different from * and /
2139                   //                    else {
2140                   //                      currentCharacter =
2141                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2142                   //                    }
2143                   //                  }
2144                   //loop until end of comment */
2145                   while ((currentCharacter != '/') || (!star)) {
2146                     if (recordLineSeparator
2147                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2148                       pushLineSeparator();
2149                     star = currentCharacter == '*';
2150                     //get next char
2151                     currentCharacter = source[currentPosition++];
2152                     //                    if (((currentCharacter = source[currentPosition++])
2153                     //                      == '\\')
2154                     //                      && (source[currentPosition] == 'u')) {
2155                     //                      //-------------unicode traitement ------------
2156                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2157                     //                      currentPosition++;
2158                     //                      while (source[currentPosition] == 'u') {
2159                     //                        currentPosition++;
2160                     //                      }
2161                     //                      if ((c1 =
2162                     //                        Character.getNumericValue(source[currentPosition++]))
2163                     //                        > 15
2164                     //                        || c1 < 0
2165                     //                        || (c2 =
2166                     //                          Character.getNumericValue(source[currentPosition++]))
2167                     //                          > 15
2168                     //                        || c2 < 0
2169                     //                        || (c3 =
2170                     //                          Character.getNumericValue(source[currentPosition++]))
2171                     //                          > 15
2172                     //                        || c3 < 0
2173                     //                        || (c4 =
2174                     //                          Character.getNumericValue(source[currentPosition++]))
2175                     //                          > 15
2176                     //                        || c4 < 0) {
2177                     //                        //error don't care of the value
2178                     //                        currentCharacter = 'A';
2179                     //                      } //something different from * and /
2180                     //                      else {
2181                     //                        currentCharacter =
2182                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2183                     //                      }
2184                     //                    }
2185                   }
2186                 } catch (IndexOutOfBoundsException e) {
2187                   return;
2188                 }
2189                 break;
2190               }
2191               break;
2192             }
2193           default :
2194             if (isPHPIdentifierStart(currentCharacter)
2195                 || currentCharacter == '$') {
2196               try {
2197                 scanIdentifierOrKeyword((currentCharacter == '$'));
2198               } catch (InvalidInputException ex) {
2199               };
2200               break;
2201             }
2202             if (Character.isDigit(currentCharacter)) {
2203               try {
2204                 scanNumber(false);
2205               } catch (InvalidInputException ex) {
2206               };
2207               break;
2208             }
2209         }
2210       }
2211       //-----------------end switch while try--------------------
2212     } catch (IndexOutOfBoundsException e) {
2213     } catch (InvalidInputException e) {
2214     }
2215     return;
2216   }
2217   //  public final boolean jumpOverUnicodeWhiteSpace()
2218   //    throws InvalidInputException {
2219   //    //BOOLEAN
2220   //    //handle the case of unicode. Jump over the next whiteSpace
2221   //    //making startPosition pointing on the next available char
2222   //    //On false, the currentCharacter is filled up with a potential
2223   //    //correct char
2224   //
2225   //    try {
2226   //      this.wasAcr = false;
2227   //      int c1, c2, c3, c4;
2228   //      int unicodeSize = 6;
2229   //      currentPosition++;
2230   //      while (source[currentPosition] == 'u') {
2231   //        currentPosition++;
2232   //        unicodeSize++;
2233   //      }
2234   //
2235   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2236   //        || c1 < 0)
2237   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2238   //          || c2 < 0)
2239   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2240   //          || c3 < 0)
2241   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2242   //          || c4 < 0)) {
2243   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2244   //      }
2245   //
2246   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2247   //      if (recordLineSeparator
2248   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2249   //        pushLineSeparator();
2250   //      if (Character.isWhitespace(currentCharacter))
2251   //        return true;
2252   //
2253   //      //buffer the new char which is not a white space
2254   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2255   //      //withoutUnicodePtr == 1 is true here
2256   //      return false;
2257   //    } catch (IndexOutOfBoundsException e) {
2258   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2259   //    }
2260   //  }
2261   public final int[] getLineEnds() {
2262     //return a bounded copy of this.lineEnds
2263     int[] copy;
2264     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2265     return copy;
2266   }
2267   public char[] getSource() {
2268     return this.source;
2269   }
2270   public static boolean isIdentifierOrKeyword(int token) {
2271     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2272   }
2273   final char[] optimizedCurrentTokenSource1() {
2274     //return always the same char[] build only once
2275     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2276     char charOne = source[startPosition];
2277     switch (charOne) {
2278       case 'a' :
2279         return charArray_a;
2280       case 'b' :
2281         return charArray_b;
2282       case 'c' :
2283         return charArray_c;
2284       case 'd' :
2285         return charArray_d;
2286       case 'e' :
2287         return charArray_e;
2288       case 'f' :
2289         return charArray_f;
2290       case 'g' :
2291         return charArray_g;
2292       case 'h' :
2293         return charArray_h;
2294       case 'i' :
2295         return charArray_i;
2296       case 'j' :
2297         return charArray_j;
2298       case 'k' :
2299         return charArray_k;
2300       case 'l' :
2301         return charArray_l;
2302       case 'm' :
2303         return charArray_m;
2304       case 'n' :
2305         return charArray_n;
2306       case 'o' :
2307         return charArray_o;
2308       case 'p' :
2309         return charArray_p;
2310       case 'q' :
2311         return charArray_q;
2312       case 'r' :
2313         return charArray_r;
2314       case 's' :
2315         return charArray_s;
2316       case 't' :
2317         return charArray_t;
2318       case 'u' :
2319         return charArray_u;
2320       case 'v' :
2321         return charArray_v;
2322       case 'w' :
2323         return charArray_w;
2324       case 'x' :
2325         return charArray_x;
2326       case 'y' :
2327         return charArray_y;
2328       case 'z' :
2329         return charArray_z;
2330       default :
2331         return new char[]{charOne};
2332     }
2333   }
2334   final char[] optimizedCurrentTokenSource2() {
2335     //try to return the same char[] build only once
2336     char c0, c1;
2337     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2338         % TableSize;
2339     char[][] table = charArray_length[0][hash];
2340     int i = newEntry2;
2341     while (++i < InternalTableSize) {
2342       char[] charArray = table[i];
2343       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2344         return charArray;
2345     }
2346     //---------other side---------
2347     i = -1;
2348     int max = newEntry2;
2349     while (++i <= max) {
2350       char[] charArray = table[i];
2351       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2352         return charArray;
2353     }
2354     //--------add the entry-------
2355     if (++max >= InternalTableSize)
2356       max = 0;
2357     char[] r;
2358     table[max] = (r = new char[]{c0, c1});
2359     newEntry2 = max;
2360     return r;
2361   }
2362   final char[] optimizedCurrentTokenSource3() {
2363     //try to return the same char[] build only once
2364     char c0, c1, c2;
2365     int hash = (((c0 = source[startPosition]) << 12)
2366         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2367         % TableSize;
2368     char[][] table = charArray_length[1][hash];
2369     int i = newEntry3;
2370     while (++i < InternalTableSize) {
2371       char[] charArray = table[i];
2372       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2373         return charArray;
2374     }
2375     //---------other side---------
2376     i = -1;
2377     int max = newEntry3;
2378     while (++i <= max) {
2379       char[] charArray = table[i];
2380       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2381         return charArray;
2382     }
2383     //--------add the entry-------
2384     if (++max >= InternalTableSize)
2385       max = 0;
2386     char[] r;
2387     table[max] = (r = new char[]{c0, c1, c2});
2388     newEntry3 = max;
2389     return r;
2390   }
2391   final char[] optimizedCurrentTokenSource4() {
2392     //try to return the same char[] build only once
2393     char c0, c1, c2, c3;
2394     long hash = ((((long) (c0 = source[startPosition])) << 18)
2395         + ((c1 = source[startPosition + 1]) << 12)
2396         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2397         % TableSize;
2398     char[][] table = charArray_length[2][(int) hash];
2399     int i = newEntry4;
2400     while (++i < InternalTableSize) {
2401       char[] charArray = table[i];
2402       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2403           && (c3 == charArray[3]))
2404         return charArray;
2405     }
2406     //---------other side---------
2407     i = -1;
2408     int max = newEntry4;
2409     while (++i <= max) {
2410       char[] charArray = table[i];
2411       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2412           && (c3 == charArray[3]))
2413         return charArray;
2414     }
2415     //--------add the entry-------
2416     if (++max >= InternalTableSize)
2417       max = 0;
2418     char[] r;
2419     table[max] = (r = new char[]{c0, c1, c2, c3});
2420     newEntry4 = max;
2421     return r;
2422   }
2423   final char[] optimizedCurrentTokenSource5() {
2424     //try to return the same char[] build only once
2425     char c0, c1, c2, c3, c4;
2426     long hash = ((((long) (c0 = source[startPosition])) << 24)
2427         + (((long) (c1 = source[startPosition + 1])) << 18)
2428         + ((c2 = source[startPosition + 2]) << 12)
2429         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2430         % TableSize;
2431     char[][] table = charArray_length[3][(int) hash];
2432     int i = newEntry5;
2433     while (++i < InternalTableSize) {
2434       char[] charArray = table[i];
2435       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2436           && (c3 == charArray[3]) && (c4 == charArray[4]))
2437         return charArray;
2438     }
2439     //---------other side---------
2440     i = -1;
2441     int max = newEntry5;
2442     while (++i <= max) {
2443       char[] charArray = table[i];
2444       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2445           && (c3 == charArray[3]) && (c4 == charArray[4]))
2446         return charArray;
2447     }
2448     //--------add the entry-------
2449     if (++max >= InternalTableSize)
2450       max = 0;
2451     char[] r;
2452     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2453     newEntry5 = max;
2454     return r;
2455   }
2456   final char[] optimizedCurrentTokenSource6() {
2457     //try to return the same char[] build only once
2458     char c0, c1, c2, c3, c4, c5;
2459     long hash = ((((long) (c0 = source[startPosition])) << 32)
2460         + (((long) (c1 = source[startPosition + 1])) << 24)
2461         + (((long) (c2 = source[startPosition + 2])) << 18)
2462         + ((c3 = source[startPosition + 3]) << 12)
2463         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2464         % TableSize;
2465     char[][] table = charArray_length[4][(int) hash];
2466     int i = newEntry6;
2467     while (++i < InternalTableSize) {
2468       char[] charArray = table[i];
2469       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2470           && (c3 == charArray[3]) && (c4 == charArray[4])
2471           && (c5 == charArray[5]))
2472         return charArray;
2473     }
2474     //---------other side---------
2475     i = -1;
2476     int max = newEntry6;
2477     while (++i <= max) {
2478       char[] charArray = table[i];
2479       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2480           && (c3 == charArray[3]) && (c4 == charArray[4])
2481           && (c5 == charArray[5]))
2482         return charArray;
2483     }
2484     //--------add the entry-------
2485     if (++max >= InternalTableSize)
2486       max = 0;
2487     char[] r;
2488     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2489     newEntry6 = max;
2490     return r;
2491   }
2492   public final void pushLineSeparator() throws InvalidInputException {
2493     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2494     final int INCREMENT = 250;
2495     if (this.checkNonExternalizedStringLiterals) {
2496       // reinitialize the current line for non externalize strings purpose
2497       currentLine = null;
2498     }
2499     //currentCharacter is at position currentPosition-1
2500     // cr 000D
2501     if (currentCharacter == '\r') {
2502       int separatorPos = currentPosition - 1;
2503       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2504         return;
2505       //System.out.println("CR-" + separatorPos);
2506       try {
2507         lineEnds[++linePtr] = separatorPos;
2508       } catch (IndexOutOfBoundsException e) {
2509         //linePtr value is correct
2510         int oldLength = lineEnds.length;
2511         int[] old = lineEnds;
2512         lineEnds = new int[oldLength + INCREMENT];
2513         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2514         lineEnds[linePtr] = separatorPos;
2515       }
2516       // look-ahead for merged cr+lf
2517       try {
2518         if (source[currentPosition] == '\n') {
2519           //System.out.println("look-ahead LF-" + currentPosition);
2520           lineEnds[linePtr] = currentPosition;
2521           currentPosition++;
2522           wasAcr = false;
2523         } else {
2524           wasAcr = true;
2525         }
2526       } catch (IndexOutOfBoundsException e) {
2527         wasAcr = true;
2528       }
2529     } else {
2530       // lf 000A
2531       if (currentCharacter == '\n') {
2532         //must merge eventual cr followed by lf
2533         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2534           //System.out.println("merge LF-" + (currentPosition - 1));
2535           lineEnds[linePtr] = currentPosition - 1;
2536         } else {
2537           int separatorPos = currentPosition - 1;
2538           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2539             return;
2540           // System.out.println("LF-" + separatorPos);
2541           try {
2542             lineEnds[++linePtr] = separatorPos;
2543           } catch (IndexOutOfBoundsException e) {
2544             //linePtr value is correct
2545             int oldLength = lineEnds.length;
2546             int[] old = lineEnds;
2547             lineEnds = new int[oldLength + INCREMENT];
2548             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2549             lineEnds[linePtr] = separatorPos;
2550           }
2551         }
2552         wasAcr = false;
2553       }
2554     }
2555   }
2556   public final void pushUnicodeLineSeparator() {
2557     // isUnicode means that the \r or \n has been read as a unicode character
2558     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2559     final int INCREMENT = 250;
2560     //currentCharacter is at position currentPosition-1
2561     if (this.checkNonExternalizedStringLiterals) {
2562       // reinitialize the current line for non externalize strings purpose
2563       currentLine = null;
2564     }
2565     // cr 000D
2566     if (currentCharacter == '\r') {
2567       int separatorPos = currentPosition - 6;
2568       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2569         return;
2570       //System.out.println("CR-" + separatorPos);
2571       try {
2572         lineEnds[++linePtr] = separatorPos;
2573       } catch (IndexOutOfBoundsException e) {
2574         //linePtr value is correct
2575         int oldLength = lineEnds.length;
2576         int[] old = lineEnds;
2577         lineEnds = new int[oldLength + INCREMENT];
2578         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2579         lineEnds[linePtr] = separatorPos;
2580       }
2581       // look-ahead for merged cr+lf
2582       if (source[currentPosition] == '\n') {
2583         //System.out.println("look-ahead LF-" + currentPosition);
2584         lineEnds[linePtr] = currentPosition;
2585         currentPosition++;
2586         wasAcr = false;
2587       } else {
2588         wasAcr = true;
2589       }
2590     } else {
2591       // lf 000A
2592       if (currentCharacter == '\n') {
2593         //must merge eventual cr followed by lf
2594         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2595           //System.out.println("merge LF-" + (currentPosition - 1));
2596           lineEnds[linePtr] = currentPosition - 6;
2597         } else {
2598           int separatorPos = currentPosition - 6;
2599           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2600             return;
2601           // System.out.println("LF-" + separatorPos);
2602           try {
2603             lineEnds[++linePtr] = separatorPos;
2604           } catch (IndexOutOfBoundsException e) {
2605             //linePtr value is correct
2606             int oldLength = lineEnds.length;
2607             int[] old = lineEnds;
2608             lineEnds = new int[oldLength + INCREMENT];
2609             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2610             lineEnds[linePtr] = separatorPos;
2611           }
2612         }
2613         wasAcr = false;
2614       }
2615     }
2616   }
2617   public void recordComment(int token) {
2618         // compute position
2619         int stopPosition = this.currentPosition;
2620         switch (token) {
2621                 case TokenNameCOMMENT_LINE:
2622                         stopPosition = -this.lastCommentLinePosition;
2623                         break;
2624                 case TokenNameCOMMENT_BLOCK:
2625                         stopPosition = -this.currentPosition;
2626                         break;
2627         }
2628
2629         // a new comment is recorded
2630         int length = this.commentStops.length;
2631         if (++this.commentPtr >=  length) {
2632                 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2633                 //grows the positions buffers too
2634                 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2635         }
2636         this.commentStops[this.commentPtr] = stopPosition;
2637         this.commentStarts[this.commentPtr] = this.startPosition;
2638 }
2639 //  public final void recordComment(boolean isJavadoc) {
2640 //    // a new annotation comment is recorded
2641 //    try {
2642 //      commentStops[++commentPtr] = isJavadoc
2643 //          ? currentPosition
2644 //          : -currentPosition;
2645 //    } catch (IndexOutOfBoundsException e) {
2646 //      int oldStackLength = commentStops.length;
2647 //      int[] oldStack = commentStops;
2648 //      commentStops = new int[oldStackLength + 30];
2649 //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2650 //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2651 //      //grows the positions buffers too
2652 //      int[] old = commentStarts;
2653 //      commentStarts = new int[oldStackLength + 30];
2654 //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2655 //    }
2656 //    //the buffer is of a correct size here
2657 //    commentStarts[commentPtr] = startPosition;
2658 //  }
2659   public void resetTo(int begin, int end) {
2660     //reset the scanner to a given position where it may rescan again
2661     diet = false;
2662     initialPosition = startPosition = currentPosition = begin;
2663     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2664     commentPtr = -1; // reset comment stack
2665   }
2666   public final void scanSingleQuotedEscapeCharacter()
2667       throws InvalidInputException {
2668     // the string with "\\u" is a legal string of two chars \ and u
2669     //thus we use a direct access to the source (for regular cases).
2670     //    if (unicodeAsBackSlash) {
2671     //      // consume next character
2672     //      unicodeAsBackSlash = false;
2673     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2674     //        && (source[currentPosition] == 'u')) {
2675     //        getNextUnicodeChar();
2676     //      } else {
2677     //        if (withoutUnicodePtr != 0) {
2678     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2679     //        }
2680     //      }
2681     //    } else
2682     currentCharacter = source[currentPosition++];
2683     switch (currentCharacter) {
2684       case '\'' :
2685         currentCharacter = '\'';
2686         break;
2687       case '\\' :
2688         currentCharacter = '\\';
2689         break;
2690       default :
2691         currentCharacter = '\\';
2692         currentPosition--;
2693     }
2694   }
2695   public final void scanDoubleQuotedEscapeCharacter()
2696       throws InvalidInputException {
2697     // the string with "\\u" is a legal string of two chars \ and u
2698     //thus we use a direct access to the source (for regular cases).
2699     //    if (unicodeAsBackSlash) {
2700     //      // consume next character
2701     //      unicodeAsBackSlash = false;
2702     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2703     //        && (source[currentPosition] == 'u')) {
2704     //        getNextUnicodeChar();
2705     //      } else {
2706     //        if (withoutUnicodePtr != 0) {
2707     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2708     //        }
2709     //      }
2710     //    } else
2711     currentCharacter = source[currentPosition++];
2712     switch (currentCharacter) {
2713       //      case 'b' :
2714       //        currentCharacter = '\b';
2715       //        break;
2716       case 't' :
2717         currentCharacter = '\t';
2718         break;
2719       case 'n' :
2720         currentCharacter = '\n';
2721         break;
2722       //      case 'f' :
2723       //        currentCharacter = '\f';
2724       //        break;
2725       case 'r' :
2726         currentCharacter = '\r';
2727         break;
2728       case '\"' :
2729         currentCharacter = '\"';
2730         break;
2731       case '\'' :
2732         currentCharacter = '\'';
2733         break;
2734       case '\\' :
2735         currentCharacter = '\\';
2736         break;
2737       case '$' :
2738         currentCharacter = '$';
2739         break;
2740       default :
2741         // -----------octal escape--------------
2742         // OctalDigit
2743         // OctalDigit OctalDigit
2744         // ZeroToThree OctalDigit OctalDigit
2745         int number = Character.getNumericValue(currentCharacter);
2746         if (number >= 0 && number <= 7) {
2747           boolean zeroToThreeNot = number > 3;
2748           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2749             int digit = Character.getNumericValue(currentCharacter);
2750             if (digit >= 0 && digit <= 7) {
2751               number = (number * 8) + digit;
2752               if (Character
2753                   .isDigit(currentCharacter = source[currentPosition++])) {
2754                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2755                   // Digit --> ignore last character
2756                   currentPosition--;
2757                 } else {
2758                   digit = Character.getNumericValue(currentCharacter);
2759                   if (digit >= 0 && digit <= 7) {
2760                     // has read \ZeroToThree OctalDigit OctalDigit
2761                     number = (number * 8) + digit;
2762                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2763                     // --> ignore last character
2764                     currentPosition--;
2765                   }
2766                 }
2767               } else { // has read \OctalDigit NonDigit--> ignore last
2768                 // character
2769                 currentPosition--;
2770               }
2771             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2772               // character
2773               currentPosition--;
2774             }
2775           } else { // has read \OctalDigit --> ignore last character
2776             currentPosition--;
2777           }
2778           if (number > 255)
2779             throw new InvalidInputException(INVALID_ESCAPE);
2780           currentCharacter = (char) number;
2781         }
2782     //else
2783     //     throw new InvalidInputException(INVALID_ESCAPE);
2784     }
2785   }
2786   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2787   //    return scanIdentifierOrKeyword( false );
2788   //  }
2789   public int scanIdentifierOrKeyword(boolean isVariable)
2790       throws InvalidInputException {
2791     //test keywords
2792     //first dispatch on the first char.
2793     //then the length. If there are several
2794     //keywors with the same length AND the same first char, then do another
2795     //disptach on the second char :-)...cool....but fast !
2796     useAssertAsAnIndentifier = false;
2797     while (getNextCharAsJavaIdentifierPart()) {
2798     };
2799     if (isVariable) {
2800       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2801       //        return TokenNamethis;
2802       //      }
2803       return TokenNameVariable;
2804     }
2805     int index, length;
2806     char[] data;
2807     char firstLetter;
2808     //    if (withoutUnicodePtr == 0)
2809     //quick test on length == 1 but not on length > 12 while most identifier
2810     //have a length which is <= 12...but there are lots of identifier with
2811     //only one char....
2812     //      {
2813     if ((length = currentPosition - startPosition) == 1)
2814       return TokenNameIdentifier;
2815     //  data = source;
2816     data = new char[length];
2817     index = startPosition;
2818     for (int i = 0; i < length; i++) {
2819       data[i] = Character.toLowerCase(source[index + i]);
2820     }
2821     index = 0;
2822     //    } else {
2823     //      if ((length = withoutUnicodePtr) == 1)
2824     //        return TokenNameIdentifier;
2825     //      // data = withoutUnicodeBuffer;
2826     //      data = new char[withoutUnicodeBuffer.length];
2827     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2828     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2829     //      }
2830     //      index = 1;
2831     //    }
2832     firstLetter = data[index];
2833     switch (firstLetter) {
2834       case '_' :
2835         switch (length) {
2836           case 8 :
2837             //__FILE__
2838             if ((data[++index] == '_') && (data[++index] == 'f')
2839                 && (data[++index] == 'i') && (data[++index] == 'l')
2840                 && (data[++index] == 'e') && (data[++index] == '_')
2841                 && (data[++index] == '_'))
2842               return TokenNameFILE;
2843             index = 0; //__LINE__
2844             if ((data[++index] == '_') && (data[++index] == 'l')
2845                 && (data[++index] == 'i') && (data[++index] == 'n')
2846                 && (data[++index] == 'e') && (data[++index] == '_')
2847                 && (data[++index] == '_'))
2848               return TokenNameLINE;
2849             break;
2850           case 9 :
2851             //__CLASS__
2852             if ((data[++index] == '_') && (data[++index] == 'c')
2853                 && (data[++index] == 'l') && (data[++index] == 'a')
2854                 && (data[++index] == 's') && (data[++index] == 's')
2855                 && (data[++index] == '_') && (data[++index] == '_'))
2856               return TokenNameCLASS_C;
2857             break;
2858           case 11 :
2859             //__METHOD__
2860             if ((data[++index] == '_') && (data[++index] == 'm')
2861                 && (data[++index] == 'e') && (data[++index] == 't')
2862                 && (data[++index] == 'h') && (data[++index] == 'o')
2863                 && (data[++index] == 'd') && (data[++index] == '_')
2864                 && (data[++index] == '_'))
2865               return TokenNameMETHOD_C;
2866             break;
2867           case 12 :
2868             //__FUNCTION__
2869             if ((data[++index] == '_') && (data[++index] == 'f')
2870                 && (data[++index] == 'u') && (data[++index] == 'n')
2871                 && (data[++index] == 'c') && (data[++index] == 't')
2872                 && (data[++index] == 'i') && (data[++index] == 'o')
2873                 && (data[++index] == 'n') && (data[++index] == '_')
2874                 && (data[++index] == '_'))
2875               return TokenNameFUNC_C;
2876             break;
2877         }
2878         return TokenNameIdentifier;
2879       case 'a' :
2880         // as and array abstract
2881         switch (length) {
2882           case 2 :
2883             //as
2884             if ((data[++index] == 's')) {
2885               return TokenNameas;
2886             } else {
2887               return TokenNameIdentifier;
2888             }
2889           case 3 :
2890             //and
2891             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2892               return TokenNameand;
2893             } else {
2894               return TokenNameIdentifier;
2895             }
2896           case 5 :
2897             // array
2898             if ((data[++index] == 'r') && (data[++index] == 'r')
2899                 && (data[++index] == 'a') && (data[++index] == 'y'))
2900               return TokenNamearray;
2901             else
2902               return TokenNameIdentifier;
2903           case 8 :
2904             if ((data[++index] == 'b') && (data[++index] == 's')
2905                 && (data[++index] == 't') && (data[++index] == 'r')
2906                 && (data[++index] == 'a') && (data[++index] == 'c')
2907                 && (data[++index] == 't'))
2908               return TokenNameabstract;
2909             else
2910               return TokenNameIdentifier;
2911           default :
2912             return TokenNameIdentifier;
2913         }
2914       case 'b' :
2915         //break
2916         switch (length) {
2917           case 5 :
2918             if ((data[++index] == 'r') && (data[++index] == 'e')
2919                 && (data[++index] == 'a') && (data[++index] == 'k'))
2920               return TokenNamebreak;
2921             else
2922               return TokenNameIdentifier;
2923           default :
2924             return TokenNameIdentifier;
2925         }
2926       case 'c' :
2927         //case catch class clone const continue
2928         switch (length) {
2929           case 4 :
2930             if ((data[++index] == 'a') && (data[++index] == 's')
2931                 && (data[++index] == 'e'))
2932               return TokenNamecase;
2933             else
2934               return TokenNameIdentifier;
2935           case 5 :
2936             if ((data[++index] == 'a') && (data[++index] == 't')
2937                 && (data[++index] == 'c') && (data[++index] == 'h'))
2938               return TokenNamecatch;
2939             index = 0;
2940             if ((data[++index] == 'l') && (data[++index] == 'a')
2941                 && (data[++index] == 's') && (data[++index] == 's'))
2942               return TokenNameclass;
2943             index = 0;
2944             if ((data[++index] == 'l') && (data[++index] == 'o')
2945                 && (data[++index] == 'n') && (data[++index] == 'e'))
2946               return TokenNameclone;
2947             index = 0;
2948             if ((data[++index] == 'o') && (data[++index] == 'n')
2949                 && (data[++index] == 's') && (data[++index] == 't'))
2950               return TokenNameconst;
2951             else
2952               return TokenNameIdentifier;
2953           case 8 :
2954             if ((data[++index] == 'o') && (data[++index] == 'n')
2955                 && (data[++index] == 't') && (data[++index] == 'i')
2956                 && (data[++index] == 'n') && (data[++index] == 'u')
2957                 && (data[++index] == 'e'))
2958               return TokenNamecontinue;
2959             else
2960               return TokenNameIdentifier;
2961           default :
2962             return TokenNameIdentifier;
2963         }
2964       case 'd' :
2965         // declare default do die
2966         // TODO delete define ==> no keyword !
2967         switch (length) {
2968           case 2 :
2969             if ((data[++index] == 'o'))
2970               return TokenNamedo;
2971             else
2972               return TokenNameIdentifier;
2973           //          case 6 :
2974           //            if ((data[++index] == 'e')
2975           //              && (data[++index] == 'f')
2976           //              && (data[++index] == 'i')
2977           //              && (data[++index] == 'n')
2978           //              && (data[++index] == 'e'))
2979           //              return TokenNamedefine;
2980           //            else
2981           //              return TokenNameIdentifier;
2982           case 7 :
2983             if ((data[++index] == 'e') && (data[++index] == 'c')
2984                 && (data[++index] == 'l') && (data[++index] == 'a')
2985                 && (data[++index] == 'r') && (data[++index] == 'e'))
2986               return TokenNamedeclare;
2987             index = 0;
2988             if ((data[++index] == 'e') && (data[++index] == 'f')
2989                 && (data[++index] == 'a') && (data[++index] == 'u')
2990                 && (data[++index] == 'l') && (data[++index] == 't'))
2991               return TokenNamedefault;
2992             else
2993               return TokenNameIdentifier;
2994           default :
2995             return TokenNameIdentifier;
2996         }
2997       case 'e' :
2998         //echo else exit elseif extends eval
2999         switch (length) {
3000           case 4 :
3001             if ((data[++index] == 'c') && (data[++index] == 'h')
3002                 && (data[++index] == 'o'))
3003               return TokenNameecho;
3004             else if ((data[index] == 'l') && (data[++index] == 's')
3005                 && (data[++index] == 'e'))
3006               return TokenNameelse;
3007             else if ((data[index] == 'x') && (data[++index] == 'i')
3008                 && (data[++index] == 't'))
3009               return TokenNameexit;
3010             else if ((data[index] == 'v') && (data[++index] == 'a')
3011                 && (data[++index] == 'l'))
3012               return TokenNameeval;
3013             else
3014               return TokenNameIdentifier;
3015           case 5 :
3016             // endif empty
3017             if ((data[++index] == 'n') && (data[++index] == 'd')
3018                 && (data[++index] == 'i') && (data[++index] == 'f'))
3019               return TokenNameendif;
3020             if ((data[index] == 'm') && (data[++index] == 'p')
3021                 && (data[++index] == 't') && (data[++index] == 'y'))
3022               return TokenNameempty;
3023             else
3024               return TokenNameIdentifier;
3025           case 6 :
3026             // endfor
3027             if ((data[++index] == 'n') && (data[++index] == 'd')
3028                 && (data[++index] == 'f') && (data[++index] == 'o')
3029                 && (data[++index] == 'r'))
3030               return TokenNameendfor;
3031             else if ((data[index] == 'l') && (data[++index] == 's')
3032                 && (data[++index] == 'e') && (data[++index] == 'i')
3033                 && (data[++index] == 'f'))
3034               return TokenNameelseif;
3035             else
3036               return TokenNameIdentifier;
3037           case 7 :
3038             if ((data[++index] == 'x') && (data[++index] == 't')
3039                 && (data[++index] == 'e') && (data[++index] == 'n')
3040                 && (data[++index] == 'd') && (data[++index] == 's'))
3041               return TokenNameextends;
3042             else
3043               return TokenNameIdentifier;
3044           case 8 :
3045             // endwhile
3046             if ((data[++index] == 'n') && (data[++index] == 'd')
3047                 && (data[++index] == 'w') && (data[++index] == 'h')
3048                 && (data[++index] == 'i') && (data[++index] == 'l')
3049                 && (data[++index] == 'e'))
3050               return TokenNameendwhile;
3051             else
3052               return TokenNameIdentifier;
3053           case 9 :
3054             // endswitch
3055             if ((data[++index] == 'n') && (data[++index] == 'd')
3056                 && (data[++index] == 's') && (data[++index] == 'w')
3057                 && (data[++index] == 'i') && (data[++index] == 't')
3058                 && (data[++index] == 'c') && (data[++index] == 'h'))
3059               return TokenNameendswitch;
3060             else
3061               return TokenNameIdentifier;
3062           case 10 :
3063             // enddeclare
3064             if ((data[++index] == 'n') && (data[++index] == 'd')
3065                 && (data[++index] == 'd') && (data[++index] == 'e')
3066                 && (data[++index] == 'c') && (data[++index] == 'l')
3067                 && (data[++index] == 'a') && (data[++index] == 'r')
3068                 && (data[++index] == 'e'))
3069               return TokenNameendforeach;
3070             index = 0;
3071             if ((data[++index] == 'n') // endforeach
3072                 && (data[++index] == 'd') && (data[++index] == 'f')
3073                 && (data[++index] == 'o') && (data[++index] == 'r')
3074                 && (data[++index] == 'e') && (data[++index] == 'a')
3075                 && (data[++index] == 'c') && (data[++index] == 'h'))
3076               return TokenNameendforeach;
3077             else
3078               return TokenNameIdentifier;
3079           default :
3080             return TokenNameIdentifier;
3081         }
3082       case 'f' :
3083         //for false final function
3084         switch (length) {
3085           case 3 :
3086             if ((data[++index] == 'o') && (data[++index] == 'r'))
3087               return TokenNamefor;
3088             else
3089               return TokenNameIdentifier;
3090           case 5 :
3091             //            if ((data[++index] == 'a') && (data[++index] == 'l')
3092             //                && (data[++index] == 's') && (data[++index] == 'e'))
3093             //              return TokenNamefalse;
3094             if ((data[++index] == 'i') && (data[++index] == 'n')
3095                 && (data[++index] == 'a') && (data[++index] == 'l'))
3096               return TokenNamefinal;
3097             else
3098               return TokenNameIdentifier;
3099           case 7 :
3100             // foreach
3101             if ((data[++index] == 'o') && (data[++index] == 'r')
3102                 && (data[++index] == 'e') && (data[++index] == 'a')
3103                 && (data[++index] == 'c') && (data[++index] == 'h'))
3104               return TokenNameforeach;
3105             else
3106               return TokenNameIdentifier;
3107           case 8 :
3108             // function
3109             if ((data[++index] == 'u') && (data[++index] == 'n')
3110                 && (data[++index] == 'c') && (data[++index] == 't')
3111                 && (data[++index] == 'i') && (data[++index] == 'o')
3112                 && (data[++index] == 'n'))
3113               return TokenNamefunction;
3114             else
3115               return TokenNameIdentifier;
3116           default :
3117             return TokenNameIdentifier;
3118         }
3119       case 'g' :
3120         //global
3121         if (length == 6) {
3122           if ((data[++index] == 'l') && (data[++index] == 'o')
3123               && (data[++index] == 'b') && (data[++index] == 'a')
3124               && (data[++index] == 'l')) {
3125             return TokenNameglobal;
3126           }
3127         }
3128         return TokenNameIdentifier;
3129       case 'i' :
3130         //if int isset include include_once instanceof interface implements
3131         switch (length) {
3132           case 2 :
3133             if (data[++index] == 'f')
3134               return TokenNameif;
3135             else
3136               return TokenNameIdentifier;
3137           //          case 3 :
3138           //            if ((data[++index] == 'n') && (data[++index] == 't'))
3139           //              return TokenNameint;
3140           //            else
3141           //              return TokenNameIdentifier;
3142           case 5 :
3143             if ((data[++index] == 's') && (data[++index] == 's')
3144                 && (data[++index] == 'e') && (data[++index] == 't'))
3145               return TokenNameisset;
3146             else
3147               return TokenNameIdentifier;
3148           case 7 :
3149             if ((data[++index] == 'n') && (data[++index] == 'c')
3150                 && (data[++index] == 'l') && (data[++index] == 'u')
3151                 && (data[++index] == 'd') && (data[++index] == 'e'))
3152               return TokenNameinclude;
3153             else
3154               return TokenNameIdentifier;
3155           case 9 :
3156             // interface
3157             if ((data[++index] == 'n') && (data[++index] == 't')
3158                 && (data[++index] == 'e') && (data[++index] == 'r')
3159                 && (data[++index] == 'f') && (data[++index] == 'a')
3160                 && (data[++index] == 'c') && (data[++index] == 'e'))
3161               return TokenNameinterface;
3162             else
3163               return TokenNameIdentifier;
3164           case 10 :
3165             // instanceof
3166             if ((data[++index] == 'n') && (data[++index] == 's')
3167                 && (data[++index] == 't') && (data[++index] == 'a')
3168                 && (data[++index] == 'n') && (data[++index] == 'c')
3169                 && (data[++index] == 'e') && (data[++index] == 'o')
3170                 && (data[++index] == 'f'))
3171               return TokenNameinstanceof;
3172             if ((data[index] == 'm') && (data[++index] == 'p')
3173                 && (data[++index] == 'l') && (data[++index] == 'e')
3174                 && (data[++index] == 'm') && (data[++index] == 'e')
3175                 && (data[++index] == 'n') && (data[++index] == 't')
3176                 && (data[++index] == 's'))
3177               return TokenNameimplements;
3178             else
3179               return TokenNameIdentifier;
3180           case 12 :
3181             if ((data[++index] == 'n') && (data[++index] == 'c')
3182                 && (data[++index] == 'l') && (data[++index] == 'u')
3183                 && (data[++index] == 'd') && (data[++index] == 'e')
3184                 && (data[++index] == '_') && (data[++index] == 'o')
3185                 && (data[++index] == 'n') && (data[++index] == 'c')
3186                 && (data[++index] == 'e'))
3187               return TokenNameinclude_once;
3188             else
3189               return TokenNameIdentifier;
3190           default :
3191             return TokenNameIdentifier;
3192         }
3193       case 'l' :
3194         //list
3195         if (length == 4) {
3196           if ((data[++index] == 'i') && (data[++index] == 's')
3197               && (data[++index] == 't')) {
3198             return TokenNamelist;
3199           }
3200         }
3201         return TokenNameIdentifier;
3202       case 'n' :
3203         // new null
3204         switch (length) {
3205           case 3 :
3206             if ((data[++index] == 'e') && (data[++index] == 'w'))
3207               return TokenNamenew;
3208             else
3209               return TokenNameIdentifier;
3210           //          case 4 :
3211           //            if ((data[++index] == 'u') && (data[++index] == 'l')
3212           //                && (data[++index] == 'l'))
3213           //              return TokenNamenull;
3214           //            else
3215           //              return TokenNameIdentifier;
3216           default :
3217             return TokenNameIdentifier;
3218         }
3219       case 'o' :
3220         // or old_function
3221         if (length == 2) {
3222           if (data[++index] == 'r') {
3223             return TokenNameor;
3224           }
3225         }
3226         //        if (length == 12) {
3227         //          if ((data[++index] == 'l')
3228         //            && (data[++index] == 'd')
3229         //            && (data[++index] == '_')
3230         //            && (data[++index] == 'f')
3231         //            && (data[++index] == 'u')
3232         //            && (data[++index] == 'n')
3233         //            && (data[++index] == 'c')
3234         //            && (data[++index] == 't')
3235         //            && (data[++index] == 'i')
3236         //            && (data[++index] == 'o')
3237         //            && (data[++index] == 'n')) {
3238         //            return TokenNameold_function;
3239         //          }
3240         //        }
3241         return TokenNameIdentifier;
3242       case 'p' :
3243         // print public private protected
3244         switch (length) {
3245           case 5 :
3246             if ((data[++index] == 'r') && (data[++index] == 'i')
3247                 && (data[++index] == 'n') && (data[++index] == 't')) {
3248               return TokenNameprint;
3249             } else
3250               return TokenNameIdentifier;
3251           case 6 :
3252             if ((data[++index] == 'u') && (data[++index] == 'b')
3253                 && (data[++index] == 'l') && (data[++index] == 'i')
3254                 && (data[++index] == 'c')) {
3255               return TokenNamepublic;
3256             } else
3257               return TokenNameIdentifier;
3258           case 7 :
3259             if ((data[++index] == 'r') && (data[++index] == 'i')
3260                 && (data[++index] == 'v') && (data[++index] == 'a')
3261                 && (data[++index] == 't') && (data[++index] == 'e')) {
3262               return TokenNameprivate;
3263             } else
3264               return TokenNameIdentifier;
3265           case 9 :
3266             if ((data[++index] == 'r') && (data[++index] == 'o')
3267                 && (data[++index] == 't') && (data[++index] == 'e')
3268                 && (data[++index] == 'c') && (data[++index] == 't')
3269                 && (data[++index] == 'e') && (data[++index] == 'd')) {
3270               return TokenNameprotected;
3271             } else
3272               return TokenNameIdentifier;
3273         }
3274         return TokenNameIdentifier;
3275       case 'r' :
3276         //return require require_once
3277         if (length == 6) {
3278           if ((data[++index] == 'e') && (data[++index] == 't')
3279               && (data[++index] == 'u') && (data[++index] == 'r')
3280               && (data[++index] == 'n')) {
3281             return TokenNamereturn;
3282           }
3283         } else if (length == 7) {
3284           if ((data[++index] == 'e') && (data[++index] == 'q')
3285               && (data[++index] == 'u') && (data[++index] == 'i')
3286               && (data[++index] == 'r') && (data[++index] == 'e')) {
3287             return TokenNamerequire;
3288           }
3289         } else if (length == 12) {
3290           if ((data[++index] == 'e') && (data[++index] == 'q')
3291               && (data[++index] == 'u') && (data[++index] == 'i')
3292               && (data[++index] == 'r') && (data[++index] == 'e')
3293               && (data[++index] == '_') && (data[++index] == 'o')
3294               && (data[++index] == 'n') && (data[++index] == 'c')
3295               && (data[++index] == 'e')) {
3296             return TokenNamerequire_once;
3297           }
3298         } else
3299           return TokenNameIdentifier;
3300       case 's' :
3301         //static switch
3302         switch (length) {
3303           case 6 :
3304             if (data[++index] == 't')
3305               if ((data[++index] == 'a') && (data[++index] == 't')
3306                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3307                 return TokenNamestatic;
3308               } else
3309                 return TokenNameIdentifier;
3310             else if ((data[index] == 'w') && (data[++index] == 'i')
3311                 && (data[++index] == 't') && (data[++index] == 'c')
3312                 && (data[++index] == 'h'))
3313               return TokenNameswitch;
3314             else
3315               return TokenNameIdentifier;
3316           default :
3317             return TokenNameIdentifier;
3318         }
3319       case 't' :
3320         // try true throw
3321         switch (length) {
3322           case 3 :
3323             if ((data[++index] == 'r') && (data[++index] == 'y'))
3324               return TokenNametry;
3325             else
3326               return TokenNameIdentifier;
3327           //          case 4 :
3328           //            if ((data[++index] == 'r') && (data[++index] == 'u')
3329           //                && (data[++index] == 'e'))
3330           //              return TokenNametrue;
3331           //            else
3332           //              return TokenNameIdentifier;
3333           case 5 :
3334             if ((data[++index] == 'h') && (data[++index] == 'r')
3335                 && (data[++index] == 'o') && (data[++index] == 'w'))
3336               return TokenNamethrow;
3337             else
3338               return TokenNameIdentifier;
3339           default :
3340             return TokenNameIdentifier;
3341         }
3342       case 'u' :
3343         //use unset
3344         switch (length) {
3345           case 3 :
3346             if ((data[++index] == 's') && (data[++index] == 'e'))
3347               return TokenNameuse;
3348             else
3349               return TokenNameIdentifier;
3350           case 5 :
3351             if ((data[++index] == 'n') && (data[++index] == 's')
3352                 && (data[++index] == 'e') && (data[++index] == 't'))
3353               return TokenNameunset;
3354             else
3355               return TokenNameIdentifier;
3356           default :
3357             return TokenNameIdentifier;
3358         }
3359       case 'v' :
3360         //var
3361         switch (length) {
3362           case 3 :
3363             if ((data[++index] == 'a') && (data[++index] == 'r'))
3364               return TokenNamevar;
3365             else
3366               return TokenNameIdentifier;
3367           default :
3368             return TokenNameIdentifier;
3369         }
3370       case 'w' :
3371         //while
3372         switch (length) {
3373           case 5 :
3374             if ((data[++index] == 'h') && (data[++index] == 'i')
3375                 && (data[++index] == 'l') && (data[++index] == 'e'))
3376               return TokenNamewhile;
3377             else
3378               return TokenNameIdentifier;
3379           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3380           // (data[++index]=='e') && (data[++index]=='f')&&
3381           // (data[++index]=='p'))
3382           //return TokenNamewidefp ;
3383           //else
3384           //return TokenNameIdentifier;
3385           default :
3386             return TokenNameIdentifier;
3387         }
3388       case 'x' :
3389         //xor
3390         switch (length) {
3391           case 3 :
3392             if ((data[++index] == 'o') && (data[++index] == 'r'))
3393               return TokenNamexor;
3394             else
3395               return TokenNameIdentifier;
3396           default :
3397             return TokenNameIdentifier;
3398         }
3399       default :
3400         return TokenNameIdentifier;
3401     }
3402   }
3403   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3404     //when entering this method the currentCharacter is the firt
3405     //digit of the number , i.e. it may be preceeded by a . when
3406     //dotPrefix is true
3407     boolean floating = dotPrefix;
3408     if ((!dotPrefix) && (currentCharacter == '0')) {
3409       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3410         //force the first char of the hexa number do exist...
3411         // consume next character
3412         unicodeAsBackSlash = false;
3413         currentCharacter = source[currentPosition++];
3414         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3415         //          && (source[currentPosition] == 'u')) {
3416         //          getNextUnicodeChar();
3417         //        } else {
3418         //          if (withoutUnicodePtr != 0) {
3419         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3420         //          }
3421         //        }
3422         if (Character.digit(currentCharacter, 16) == -1)
3423           throw new InvalidInputException(INVALID_HEXA);
3424         //---end forcing--
3425         while (getNextCharAsDigit(16)) {
3426         };
3427         //        if (getNextChar('l', 'L') >= 0)
3428         //          return TokenNameLongLiteral;
3429         //        else
3430         return TokenNameIntegerLiteral;
3431       }
3432       //there is x or X in the number
3433       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3434       // 00078.0 is true !!!!! crazy language
3435       if (getNextCharAsDigit()) {
3436         //-------------potential octal-----------------
3437         while (getNextCharAsDigit()) {
3438         };
3439         //        if (getNextChar('l', 'L') >= 0) {
3440         //          return TokenNameLongLiteral;
3441         //        }
3442         //
3443         //        if (getNextChar('f', 'F') >= 0) {
3444         //          return TokenNameFloatingPointLiteral;
3445         //        }
3446         if (getNextChar('d', 'D') >= 0) {
3447           return TokenNameDoubleLiteral;
3448         } else { //make the distinction between octal and float ....
3449           if (getNextChar('.')) { //bingo ! ....
3450             while (getNextCharAsDigit()) {
3451             };
3452             if (getNextChar('e', 'E') >= 0) {
3453               // consume next character
3454               unicodeAsBackSlash = false;
3455               currentCharacter = source[currentPosition++];
3456               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3457               //                && (source[currentPosition] == 'u')) {
3458               //                getNextUnicodeChar();
3459               //              } else {
3460               //                if (withoutUnicodePtr != 0) {
3461               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3462               //                }
3463               //              }
3464               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3465                 // consume next character
3466                 unicodeAsBackSlash = false;
3467                 currentCharacter = source[currentPosition++];
3468                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3469                 //                  && (source[currentPosition] == 'u')) {
3470                 //                  getNextUnicodeChar();
3471                 //                } else {
3472                 //                  if (withoutUnicodePtr != 0) {
3473                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3474                 //                      currentCharacter;
3475                 //                  }
3476                 //                }
3477               }
3478               if (!Character.isDigit(currentCharacter))
3479                 throw new InvalidInputException(INVALID_FLOAT);
3480               while (getNextCharAsDigit()) {
3481               };
3482             }
3483             //            if (getNextChar('f', 'F') >= 0)
3484             //              return TokenNameFloatingPointLiteral;
3485             getNextChar('d', 'D'); //jump over potential d or D
3486             return TokenNameDoubleLiteral;
3487           } else {
3488             return TokenNameIntegerLiteral;
3489           }
3490         }
3491       } else {
3492         /* carry on */
3493       }
3494     }
3495     while (getNextCharAsDigit()) {
3496     };
3497     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3498     //      return TokenNameLongLiteral;
3499     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3500       while (getNextCharAsDigit()) {
3501       };
3502       floating = true;
3503     }
3504     //if floating is true both exponant and suffix may be optional
3505     if (getNextChar('e', 'E') >= 0) {
3506       floating = true;
3507       // consume next character
3508       unicodeAsBackSlash = false;
3509       currentCharacter = source[currentPosition++];
3510       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3511       //        && (source[currentPosition] == 'u')) {
3512       //        getNextUnicodeChar();
3513       //      } else {
3514       //        if (withoutUnicodePtr != 0) {
3515       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3516       //        }
3517       //      }
3518       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3519         // next
3520         // character
3521         unicodeAsBackSlash = false;
3522         currentCharacter = source[currentPosition++];
3523         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3524         //          && (source[currentPosition] == 'u')) {
3525         //          getNextUnicodeChar();
3526         //        } else {
3527         //          if (withoutUnicodePtr != 0) {
3528         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3529         //          }
3530         //        }
3531       }
3532       if (!Character.isDigit(currentCharacter))
3533         throw new InvalidInputException(INVALID_FLOAT);
3534       while (getNextCharAsDigit()) {
3535       };
3536     }
3537     if (getNextChar('d', 'D') >= 0)
3538       return TokenNameDoubleLiteral;
3539     //    if (getNextChar('f', 'F') >= 0)
3540     //      return TokenNameFloatingPointLiteral;
3541     //the long flag has been tested before
3542     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3543   }
3544   /**
3545    * Search the line number corresponding to a specific position
3546    *  
3547    */
3548   public final int getLineNumber(int position) {
3549     if (lineEnds == null)
3550       return 1;
3551     int length = linePtr + 1;
3552     if (length == 0)
3553       return 1;
3554     int g = 0, d = length - 1;
3555     int m = 0;
3556     while (g <= d) {
3557       m = (g + d) / 2;
3558       if (position < lineEnds[m]) {
3559         d = m - 1;
3560       } else if (position > lineEnds[m]) {
3561         g = m + 1;
3562       } else {
3563         return m + 1;
3564       }
3565     }
3566     if (position < lineEnds[m]) {
3567       return m + 1;
3568     }
3569     return m + 2;
3570   }
3571   public void setPHPMode(boolean mode) {
3572     phpMode = mode;
3573   }
3574   public final void setSource(char[] source) {
3575     //the source-buffer is set to sourceString
3576     if (source == null) {
3577       this.source = new char[0];
3578     } else {
3579       this.source = source;
3580     }
3581     startPosition = -1;
3582     initialPosition = currentPosition = 0;
3583     containsAssertKeyword = false;
3584     withoutUnicodeBuffer = new char[this.source.length];
3585     encapsedStringStack = new Stack();
3586   }
3587   public String toString() {
3588     if (startPosition == source.length)
3589       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3590     if (currentPosition > source.length)
3591       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3592     char front[] = new char[startPosition];
3593     System.arraycopy(source, 0, front, 0, startPosition);
3594     int middleLength = (currentPosition - 1) - startPosition + 1;
3595     char middle[];
3596     if (middleLength > -1) {
3597       middle = new char[middleLength];
3598       System.arraycopy(source, startPosition, middle, 0, middleLength);
3599     } else {
3600       middle = new char[0];
3601     }
3602     char end[] = new char[source.length - (currentPosition - 1)];
3603     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3604         - (currentPosition - 1) - 1);
3605     return new String(front)
3606         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3607         + new String(middle)
3608         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3609         + new String(end);
3610   }
3611   public final String toStringAction(int act) {
3612     switch (act) {
3613       case TokenNameERROR :
3614         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3615       // //$NON-NLS-1$
3616       case TokenNameINLINE_HTML :
3617         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3618       case TokenNameIdentifier :
3619         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3620       case TokenNameVariable :
3621         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3622       case TokenNameabstract :
3623         return "abstract"; //$NON-NLS-1$
3624       case TokenNameand :
3625         return "AND"; //$NON-NLS-1$
3626       case TokenNamearray :
3627         return "array"; //$NON-NLS-1$
3628       case TokenNameas :
3629         return "as"; //$NON-NLS-1$
3630       case TokenNamebreak :
3631         return "break"; //$NON-NLS-1$
3632       case TokenNamecase :
3633         return "case"; //$NON-NLS-1$
3634       case TokenNameclass :
3635         return "class"; //$NON-NLS-1$
3636       case TokenNamecatch :
3637         return "catch"; //$NON-NLS-1$
3638       case TokenNameclone :
3639         //$NON-NLS-1$
3640         return "clone";
3641       case TokenNameconst :
3642         //$NON-NLS-1$
3643         return "const";
3644       case TokenNamecontinue :
3645         return "continue"; //$NON-NLS-1$
3646       case TokenNamedefault :
3647         return "default"; //$NON-NLS-1$
3648       //      case TokenNamedefine :
3649       //        return "define"; //$NON-NLS-1$
3650       case TokenNamedo :
3651         return "do"; //$NON-NLS-1$
3652       case TokenNameecho :
3653         return "echo"; //$NON-NLS-1$
3654       case TokenNameelse :
3655         return "else"; //$NON-NLS-1$
3656       case TokenNameelseif :
3657         return "elseif"; //$NON-NLS-1$
3658       case TokenNameendfor :
3659         return "endfor"; //$NON-NLS-1$
3660       case TokenNameendforeach :
3661         return "endforeach"; //$NON-NLS-1$
3662       case TokenNameendif :
3663         return "endif"; //$NON-NLS-1$
3664       case TokenNameendswitch :
3665         return "endswitch"; //$NON-NLS-1$
3666       case TokenNameendwhile :
3667         return "endwhile"; //$NON-NLS-1$
3668       case TokenNameexit:
3669         return "exit";
3670       case TokenNameextends :
3671         return "extends"; //$NON-NLS-1$
3672       //      case TokenNamefalse :
3673       //        return "false"; //$NON-NLS-1$
3674       case TokenNamefinal :
3675         return "final"; //$NON-NLS-1$
3676       case TokenNamefor :
3677         return "for"; //$NON-NLS-1$
3678       case TokenNameforeach :
3679         return "foreach"; //$NON-NLS-1$
3680       case TokenNamefunction :
3681         return "function"; //$NON-NLS-1$
3682       case TokenNameglobal :
3683         return "global"; //$NON-NLS-1$
3684       case TokenNameif :
3685         return "if"; //$NON-NLS-1$
3686       case TokenNameimplements :
3687         return "implements"; //$NON-NLS-1$
3688       case TokenNameinclude :
3689         return "include"; //$NON-NLS-1$
3690       case TokenNameinclude_once :
3691         return "include_once"; //$NON-NLS-1$
3692       case TokenNameinstanceof :
3693         return "instanceof"; //$NON-NLS-1$
3694       case TokenNameinterface :
3695         return "interface"; //$NON-NLS-1$
3696       case TokenNameisset :
3697         return "isset"; //$NON-NLS-1$
3698       case TokenNamelist :
3699         return "list"; //$NON-NLS-1$
3700       case TokenNamenew :
3701         return "new"; //$NON-NLS-1$
3702       //      case TokenNamenull :
3703       //        return "null"; //$NON-NLS-1$
3704       case TokenNameor :
3705         return "OR"; //$NON-NLS-1$
3706       case TokenNameprint :
3707         return "print"; //$NON-NLS-1$
3708       case TokenNameprivate :
3709         return "private"; //$NON-NLS-1$
3710       case TokenNameprotected :
3711         return "protected"; //$NON-NLS-1$
3712       case TokenNamepublic :
3713         return "public"; //$NON-NLS-1$
3714       case TokenNamerequire :
3715         return "require"; //$NON-NLS-1$
3716       case TokenNamerequire_once :
3717         return "require_once"; //$NON-NLS-1$
3718       case TokenNamereturn :
3719         return "return"; //$NON-NLS-1$
3720       case TokenNamestatic :
3721         return "static"; //$NON-NLS-1$
3722       case TokenNameswitch :
3723         return "switch"; //$NON-NLS-1$
3724       //      case TokenNametrue :
3725       //        return "true"; //$NON-NLS-1$
3726       case TokenNameunset :
3727         return "unset"; //$NON-NLS-1$
3728       case TokenNamevar :
3729         return "var"; //$NON-NLS-1$
3730       case TokenNamewhile :
3731         return "while"; //$NON-NLS-1$
3732       case TokenNamexor :
3733         return "XOR"; //$NON-NLS-1$
3734       //      case TokenNamethis :
3735       //        return "$this"; //$NON-NLS-1$
3736       case TokenNameIntegerLiteral :
3737         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3738       case TokenNameDoubleLiteral :
3739         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3740       case TokenNameStringDoubleQuote :
3741         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3742       case TokenNameStringSingleQuote :
3743         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3744       case TokenNameStringInterpolated :
3745         return "StringInterpolated(" + new String(getCurrentTokenSource())
3746             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3747       case TokenNameEncapsedString0 :
3748         return "`"; //$NON-NLS-1$  
3749       case TokenNameEncapsedString1 :
3750         return "\'"; //$NON-NLS-1$  
3751       case TokenNameEncapsedString2 :
3752         return "\""; //$NON-NLS-1$  
3753       case TokenNameSTRING :
3754         return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3755       case TokenNameHEREDOC :
3756         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3757       case TokenNamePLUS_PLUS :
3758         return "++"; //$NON-NLS-1$
3759       case TokenNameMINUS_MINUS :
3760         return "--"; //$NON-NLS-1$
3761       case TokenNameEQUAL_EQUAL :
3762         return "=="; //$NON-NLS-1$
3763       case TokenNameEQUAL_EQUAL_EQUAL :
3764         return "==="; //$NON-NLS-1$
3765       case TokenNameEQUAL_GREATER :
3766         return "=>"; //$NON-NLS-1$
3767       case TokenNameLESS_EQUAL :
3768         return "<="; //$NON-NLS-1$
3769       case TokenNameGREATER_EQUAL :
3770         return ">="; //$NON-NLS-1$
3771       case TokenNameNOT_EQUAL :
3772         return "!="; //$NON-NLS-1$
3773       case TokenNameNOT_EQUAL_EQUAL :
3774         return "!=="; //$NON-NLS-1$
3775       case TokenNameLEFT_SHIFT :
3776         return "<<"; //$NON-NLS-1$
3777       case TokenNameRIGHT_SHIFT :
3778         return ">>"; //$NON-NLS-1$
3779       case TokenNamePLUS_EQUAL :
3780         return "+="; //$NON-NLS-1$
3781       case TokenNameMINUS_EQUAL :
3782         return "-="; //$NON-NLS-1$
3783       case TokenNameMULTIPLY_EQUAL :
3784         return "*="; //$NON-NLS-1$
3785       case TokenNameDIVIDE_EQUAL :
3786         return "/="; //$NON-NLS-1$
3787       case TokenNameAND_EQUAL :
3788         return "&="; //$NON-NLS-1$
3789       case TokenNameOR_EQUAL :
3790         return "|="; //$NON-NLS-1$
3791       case TokenNameXOR_EQUAL :
3792         return "^="; //$NON-NLS-1$
3793       case TokenNameREMAINDER_EQUAL :
3794         return "%="; //$NON-NLS-1$
3795       case TokenNameDOT_EQUAL :
3796         return ".="; //$NON-NLS-1$
3797       case TokenNameLEFT_SHIFT_EQUAL :
3798         return "<<="; //$NON-NLS-1$
3799       case TokenNameRIGHT_SHIFT_EQUAL :
3800         return ">>="; //$NON-NLS-1$
3801       case TokenNameOR_OR :
3802         return "||"; //$NON-NLS-1$
3803       case TokenNameAND_AND :
3804         return "&&"; //$NON-NLS-1$
3805       case TokenNamePLUS :
3806         return "+"; //$NON-NLS-1$
3807       case TokenNameMINUS :
3808         return "-"; //$NON-NLS-1$
3809       case TokenNameMINUS_GREATER :
3810         return "->";
3811       case TokenNameNOT :
3812         return "!"; //$NON-NLS-1$
3813       case TokenNameREMAINDER :
3814         return "%"; //$NON-NLS-1$
3815       case TokenNameXOR :
3816         return "^"; //$NON-NLS-1$
3817       case TokenNameAND :
3818         return "&"; //$NON-NLS-1$
3819       case TokenNameMULTIPLY :
3820         return "*"; //$NON-NLS-1$
3821       case TokenNameOR :
3822         return "|"; //$NON-NLS-1$
3823       case TokenNameTWIDDLE :
3824         return "~"; //$NON-NLS-1$
3825       case TokenNameTWIDDLE_EQUAL :
3826         return "~="; //$NON-NLS-1$
3827       case TokenNameDIVIDE :
3828         return "/"; //$NON-NLS-1$
3829       case TokenNameGREATER :
3830         return ">"; //$NON-NLS-1$
3831       case TokenNameLESS :
3832         return "<"; //$NON-NLS-1$
3833       case TokenNameLPAREN :
3834         return "("; //$NON-NLS-1$
3835       case TokenNameRPAREN :
3836         return ")"; //$NON-NLS-1$
3837       case TokenNameLBRACE :
3838         return "{"; //$NON-NLS-1$
3839       case TokenNameRBRACE :
3840         return "}"; //$NON-NLS-1$
3841       case TokenNameLBRACKET :
3842         return "["; //$NON-NLS-1$
3843       case TokenNameRBRACKET :
3844         return "]"; //$NON-NLS-1$
3845       case TokenNameSEMICOLON :
3846         return ";"; //$NON-NLS-1$
3847       case TokenNameQUESTION :
3848         return "?"; //$NON-NLS-1$
3849       case TokenNameCOLON :
3850         return ":"; //$NON-NLS-1$
3851       case TokenNameCOMMA :
3852         return ","; //$NON-NLS-1$
3853       case TokenNameDOT :
3854         return "."; //$NON-NLS-1$
3855       case TokenNameEQUAL :
3856         return "="; //$NON-NLS-1$
3857       case TokenNameAT :
3858         return "@";
3859       case TokenNameDOLLAR :
3860         return "$";
3861       case TokenNameDOLLAR_LBRACE :
3862         return "${";
3863       case TokenNameEOF :
3864         return "EOF"; //$NON-NLS-1$
3865       case TokenNameWHITESPACE :
3866         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3867       case TokenNameCOMMENT_LINE :
3868         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3869       case TokenNameCOMMENT_BLOCK :
3870         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3871       case TokenNameCOMMENT_PHPDOC :
3872         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3873       //      case TokenNameHTML :
3874       //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3875       // //$NON-NLS-1$
3876       case TokenNameFILE :
3877         return "__FILE__"; //$NON-NLS-1$
3878       case TokenNameLINE :
3879         return "__LINE__"; //$NON-NLS-1$
3880       case TokenNameCLASS_C :
3881         return "__CLASS__"; //$NON-NLS-1$
3882       case TokenNameMETHOD_C :
3883         return "__METHOD__"; //$NON-NLS-1$
3884       case TokenNameFUNC_C :
3885         return "__FUNCTION__"; //$NON-NLS-1
3886       case TokenNameboolCAST :
3887         return "( bool )"; //$NON-NLS-1$
3888       case TokenNameintCAST :
3889         return "( int )"; //$NON-NLS-1$
3890       case TokenNamedoubleCAST :
3891         return "( double )"; //$NON-NLS-1$
3892       case TokenNameobjectCAST :
3893         return "( object )"; //$NON-NLS-1$
3894       case TokenNamestringCAST :
3895         return "( string )"; //$NON-NLS-1$
3896       default :
3897         return "not-a-token(" + (new Integer(act)) + ") "
3898             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3899     }
3900   }
3901   
3902   public Scanner() {
3903     this(false, false);
3904   }
3905   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3906     this(tokenizeComments, tokenizeWhiteSpace, false);
3907   }
3908   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3909       boolean checkNonExternalizedStringLiterals) {
3910     this(tokenizeComments, tokenizeWhiteSpace,
3911         checkNonExternalizedStringLiterals, false);
3912   }
3913   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3914       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3915     this(tokenizeComments, tokenizeWhiteSpace,
3916         checkNonExternalizedStringLiterals, assertMode, false, null, null);
3917   }
3918   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3919       boolean checkNonExternalizedStringLiterals, boolean assertMode,
3920       boolean tokenizeStrings,
3921       char[][] taskTags,
3922           char[][] taskPriorities) {
3923     this.eofPosition = Integer.MAX_VALUE;
3924     this.tokenizeComments = tokenizeComments;
3925     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3926     this.tokenizeStrings = tokenizeStrings;
3927     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3928     this.assertMode = assertMode;
3929     this.encapsedStringStack = null;
3930     this.taskTags = taskTags;
3931         this.taskPriorities = taskPriorities;
3932   }
3933   private void checkNonExternalizeString() throws InvalidInputException {
3934     if (currentLine == null)
3935       return;
3936     parseTags(currentLine);
3937   }
3938   private void parseTags(NLSLine line) throws InvalidInputException {
3939     String s = new String(getCurrentTokenSource());
3940     int pos = s.indexOf(TAG_PREFIX);
3941     int lineLength = line.size();
3942     while (pos != -1) {
3943       int start = pos + TAG_PREFIX_LENGTH;
3944       int end = s.indexOf(TAG_POSTFIX, start);
3945       String index = s.substring(start, end);
3946       int i = 0;
3947       try {
3948         i = Integer.parseInt(index) - 1;
3949         // Tags are one based not zero based.
3950       } catch (NumberFormatException e) {
3951         i = -1; // we don't want to consider this as a valid NLS tag
3952       }
3953       if (line.exists(i)) {
3954         line.set(i, null);
3955       }
3956       pos = s.indexOf(TAG_PREFIX, start);
3957     }
3958     this.nonNLSStrings = new StringLiteral[lineLength];
3959     int nonNLSCounter = 0;
3960     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3961       StringLiteral literal = (StringLiteral) iterator.next();
3962       if (literal != null) {
3963         this.nonNLSStrings[nonNLSCounter++] = literal;
3964       }
3965     }
3966     if (nonNLSCounter == 0) {
3967       this.nonNLSStrings = null;
3968       currentLine = null;
3969       return;
3970     }
3971     this.wasNonExternalizedStringLiteral = true;
3972     if (nonNLSCounter != lineLength) {
3973       System.arraycopy(this.nonNLSStrings, 0,
3974           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3975           nonNLSCounter);
3976     }
3977     currentLine = null;
3978   }
3979   public final void scanEscapeCharacter() throws InvalidInputException {
3980     // the string with "\\u" is a legal string of two chars \ and u
3981     //thus we use a direct access to the source (for regular cases).
3982     if (unicodeAsBackSlash) {
3983       // consume next character
3984       unicodeAsBackSlash = false;
3985       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3986       // (source[currentPosition] == 'u')) {
3987       //                                getNextUnicodeChar();
3988       //                        } else {
3989       if (withoutUnicodePtr != 0) {
3990         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3991         //                              }
3992       }
3993     } else
3994       currentCharacter = source[currentPosition++];
3995     switch (currentCharacter) {
3996       case 'b' :
3997         currentCharacter = '\b';
3998         break;
3999       case 't' :
4000         currentCharacter = '\t';
4001         break;
4002       case 'n' :
4003         currentCharacter = '\n';
4004         break;
4005       case 'f' :
4006         currentCharacter = '\f';
4007         break;
4008       case 'r' :
4009         currentCharacter = '\r';
4010         break;
4011       case '\"' :
4012         currentCharacter = '\"';
4013         break;
4014       case '\'' :
4015         currentCharacter = '\'';
4016         break;
4017       case '\\' :
4018         currentCharacter = '\\';
4019         break;
4020       default :
4021         // -----------octal escape--------------
4022         // OctalDigit
4023         // OctalDigit OctalDigit
4024         // ZeroToThree OctalDigit OctalDigit
4025         int number = Character.getNumericValue(currentCharacter);
4026         if (number >= 0 && number <= 7) {
4027           boolean zeroToThreeNot = number > 3;
4028           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4029             int digit = Character.getNumericValue(currentCharacter);
4030             if (digit >= 0 && digit <= 7) {
4031               number = (number * 8) + digit;
4032               if (Character
4033                   .isDigit(currentCharacter = source[currentPosition++])) {
4034                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4035                   // Digit --> ignore last character
4036                   currentPosition--;
4037                 } else {
4038                   digit = Character.getNumericValue(currentCharacter);
4039                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4040                     // OctalDigit OctalDigit
4041                     number = (number * 8) + digit;
4042                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4043                     // --> ignore last character
4044                     currentPosition--;
4045                   }
4046                 }
4047               } else { // has read \OctalDigit NonDigit--> ignore last
4048                 // character
4049                 currentPosition--;
4050               }
4051             } else { // has read \OctalDigit NonOctalDigit--> ignore last
4052               // character
4053               currentPosition--;
4054             }
4055           } else { // has read \OctalDigit --> ignore last character
4056             currentPosition--;
4057           }
4058           if (number > 255)
4059             throw new InvalidInputException(INVALID_ESCAPE);
4060           currentCharacter = (char) number;
4061         } else
4062           throw new InvalidInputException(INVALID_ESCAPE);
4063     }
4064   }
4065   // chech presence of task: tags
4066   public void checkTaskTag(int commentStart, int commentEnd) {
4067     // only look for newer task: tags
4068     if (this.foundTaskCount > 0
4069         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4070       return;
4071     }
4072     int foundTaskIndex = this.foundTaskCount;
4073     nextChar : for (int i = commentStart; i < commentEnd
4074         && i < this.eofPosition; i++) {
4075       char[] tag = null;
4076       char[] priority = null;
4077       // check for tag occurrence
4078       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4079         tag = this.taskTags[itag];
4080         priority = this.taskPriorities != null
4081             && itag < this.taskPriorities.length
4082             ? this.taskPriorities[itag]
4083             : null;
4084         int tagLength = tag.length;
4085         for (int t = 0; t < tagLength; t++) {
4086           if (this.source[i + t] != tag[t])
4087             continue nextTag;
4088         }
4089         if (this.foundTaskTags == null) {
4090           this.foundTaskTags = new char[5][];
4091           this.foundTaskMessages = new char[5][];
4092           this.foundTaskPriorities = new char[5][];
4093           this.foundTaskPositions = new int[5][];
4094         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4095           System.arraycopy(this.foundTaskTags, 0,
4096               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4097               this.foundTaskCount);
4098           System.arraycopy(this.foundTaskMessages, 0,
4099               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4100               this.foundTaskCount);
4101           System.arraycopy(this.foundTaskPriorities, 0,
4102               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4103               0, this.foundTaskCount);
4104           System.arraycopy(this.foundTaskPositions, 0,
4105               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4106               this.foundTaskCount);
4107         }
4108         this.foundTaskTags[this.foundTaskCount] = tag;
4109         this.foundTaskPriorities[this.foundTaskCount] = priority;
4110         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4111             i + tagLength - 1};
4112         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4113         this.foundTaskCount++;
4114         i += tagLength - 1; // will be incremented when looping
4115       }
4116     }
4117     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4118       // retrieve message start and end positions
4119       int msgStart = this.foundTaskPositions[i][0]
4120           + this.foundTaskTags[i].length;
4121       int max_value = i + 1 < this.foundTaskCount
4122           ? this.foundTaskPositions[i + 1][0] - 1
4123           : commentEnd - 1;
4124       // at most beginning of next task
4125       if (max_value < msgStart)
4126         max_value = msgStart; // would only occur if tag is before EOF.
4127       int end = -1;
4128       char c;
4129       for (int j = msgStart; j < max_value; j++) {
4130         if ((c = this.source[j]) == '\n' || c == '\r') {
4131           end = j - 1;
4132           break;
4133         }
4134       }
4135       if (end == -1) {
4136         for (int j = max_value; j > msgStart; j--) {
4137           if ((c = this.source[j]) == '*') {
4138             end = j - 1;
4139             break;
4140           }
4141         }
4142         if (end == -1)
4143           end = max_value;
4144       }
4145       if (msgStart == end)
4146         continue; // empty
4147       // trim the message
4148       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4149         end--;
4150       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4151         msgStart++;
4152       // update the end position of the task
4153       this.foundTaskPositions[i][1] = end;
4154       // get the message source
4155       final int messageLength = end - msgStart + 1;
4156       char[] message = new char[messageLength];
4157       System.arraycopy(source, msgStart, message, 0, messageLength);
4158       this.foundTaskMessages[i] = message;
4159     }
4160   }
4161 }