improved syntax highlighting scanners and preferences
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
16
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22
23
24 public class Scanner implements IScanner, ITerminalSymbols {
25   /*
26    * APIs ares - getNextToken() which return the current type of the token
27    * (this value is not memorized by the scanner) - getCurrentTokenSource()
28    * which provides with the token "REAL" source (aka all unicode have been
29    * transformed into a correct char) - sourceStart gives the position into the
30    * stream - currentPosition-1 gives the sourceEnd position into the stream
31    */
32   // 1.4 feature
33   private boolean assertMode;
34   public boolean useAssertAsAnIndentifier = false;
35   //flag indicating if processed source contains occurrences of keyword assert
36   public boolean containsAssertKeyword = false;
37   public boolean recordLineSeparator;
38   public boolean phpMode = false;
39   public Stack encapsedStringStack = null;
40   public char currentCharacter;
41   public int startPosition;
42   public int currentPosition;
43   public int initialPosition, eofPosition;
44   // after this position eof are generated instead of real token from the
45   // source
46   public boolean tokenizeComments;
47   public boolean tokenizeWhiteSpace;
48   public boolean tokenizeStrings;
49   //source should be viewed as a window (aka a part)
50   //of a entire very large stream
51   public char source[];
52   //unicode support
53   public char[] withoutUnicodeBuffer;
54   public int withoutUnicodePtr;
55   //when == 0 ==> no unicode in the current token
56   public boolean unicodeAsBackSlash = false;
57   public boolean scanningFloatLiteral = false;
58 //support for /** comments
59         public int[] commentStops = new int[10];
60         public int[] commentStarts = new int[10];
61         public int commentPtr = -1; // no comment test with commentPtr value -1
62         protected int lastCommentLinePosition = -1;
63   //diet parsing support - jump over some method body when requested
64   public boolean diet = false;
65   //support for the poor-line-debuggers ....
66   //remember the position of the cr/lf
67   public int[] lineEnds = new int[250];
68   public int linePtr = -1;
69   public boolean wasAcr = false;
70   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
71   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
72   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
73   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
74   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
75   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
76   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
77   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
78   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
79   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
80   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
81   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
82   //----------------optimized identifier managment------------------
83   static final char[] charArray_a = new char[]{'a'},
84       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
85       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
86       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
87       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
88       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
89       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
90       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
91       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
92       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
93       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
94       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
95       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
96       charArray_z = new char[]{'z'};
97   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
98       '\u0000', '\u0000', '\u0000'};
99   static final int TableSize = 30, InternalTableSize = 6;
100   //30*6 = 180 entries
101   public static final int OptimizedLength = 6;
102   public/* static */
103   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
104   // support for detecting non-externalized string literals
105   int currentLineNr = -1;
106   int previousLineNr = -1;
107   NLSLine currentLine = null;
108   List lines = new ArrayList();
109   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
110   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
111   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
112   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
113   public StringLiteral[] nonNLSStrings = null;
114   public boolean checkNonExternalizedStringLiterals = true;
115   public boolean wasNonExternalizedStringLiteral = false;
116   /* static */{
117     for (int i = 0; i < 6; i++) {
118       for (int j = 0; j < TableSize; j++) {
119         for (int k = 0; k < InternalTableSize; k++) {
120           charArray_length[i][j][k] = initCharArray;
121         }
122       }
123     }
124   }
125   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
126       newEntry6 = 0;
127   public static final int RoundBracket = 0;
128   public static final int SquareBracket = 1;
129   public static final int CurlyBracket = 2;
130   public static final int BracketKinds = 3;
131   // task tag support
132   public char[][] foundTaskTags = null;
133   public char[][] foundTaskMessages;
134   public char[][] foundTaskPriorities = null;
135   public int[][] foundTaskPositions;
136   public int foundTaskCount = 0;
137   public char[][] taskTags = null;
138   public char[][] taskPriorities = null;
139   public static final boolean DEBUG = false;
140   public static final boolean TRACE = false;
141
142   /**
143    * Determines if the specified character is permissible as the first
144    * character in a PHP identifier
145    */
146   public static boolean isPHPIdentifierStart(char ch) {
147     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
148   }
149   /**
150    * Determines if the specified character may be part of a PHP identifier as
151    * other than the first character
152    */
153   public static boolean isPHPIdentifierPart(char ch) {
154     return Character.isLetterOrDigit(ch) || (ch == '_')
155         || (0x7F <= ch && ch <= 0xFF);
156   }
157   public final boolean atEnd() {
158     // This code is not relevant if source is
159     // Only a part of the real stream input
160     return source.length == currentPosition;
161   }
162   public char[] getCurrentIdentifierSource() {
163     //return the token REAL source (aka unicodes are precomputed)
164     char[] result;
165     //    if (withoutUnicodePtr != 0)
166     //      //0 is used as a fast test flag so the real first char is in position 1
167     //      System.arraycopy(
168     //        withoutUnicodeBuffer,
169     //        1,
170     //        result = new char[withoutUnicodePtr],
171     //        0,
172     //        withoutUnicodePtr);
173     //    else {
174     int length = currentPosition - startPosition;
175     switch (length) { // see OptimizedLength
176       case 1 :
177         return optimizedCurrentTokenSource1();
178       case 2 :
179         return optimizedCurrentTokenSource2();
180       case 3 :
181         return optimizedCurrentTokenSource3();
182       case 4 :
183         return optimizedCurrentTokenSource4();
184       case 5 :
185         return optimizedCurrentTokenSource5();
186       case 6 :
187         return optimizedCurrentTokenSource6();
188     }
189     //no optimization
190     System.arraycopy(source, startPosition, result = new char[length], 0,
191         length);
192     //   }
193     return result;
194   }
195   public int getCurrentTokenEndPosition() {
196     return this.currentPosition - 1;
197   }
198   public final char[] getCurrentTokenSource() {
199     // Return the token REAL source (aka unicodes are precomputed)
200     char[] result;
201     //    if (withoutUnicodePtr != 0)
202     //      // 0 is used as a fast test flag so the real first char is in position 1
203     //      System.arraycopy(
204     //        withoutUnicodeBuffer,
205     //        1,
206     //        result = new char[withoutUnicodePtr],
207     //        0,
208     //        withoutUnicodePtr);
209     //    else {
210     int length;
211     System.arraycopy(source, startPosition,
212         result = new char[length = currentPosition - startPosition], 0, length);
213     //    }
214     return result;
215   }
216   public final char[] getCurrentTokenSource(int startPos) {
217     // Return the token REAL source (aka unicodes are precomputed)
218     char[] result;
219     //    if (withoutUnicodePtr != 0)
220     //      // 0 is used as a fast test flag so the real first char is in position 1
221     //      System.arraycopy(
222     //        withoutUnicodeBuffer,
223     //        1,
224     //        result = new char[withoutUnicodePtr],
225     //        0,
226     //        withoutUnicodePtr);
227     //    else {
228     int length;
229     System.arraycopy(source, startPos,
230         result = new char[length = currentPosition - startPos], 0, length);
231     //  }
232     return result;
233   }
234   public final char[] getCurrentTokenSourceString() {
235     //return the token REAL source (aka unicodes are precomputed).
236     //REMOVE the two " that are at the beginning and the end.
237     char[] result;
238     if (withoutUnicodePtr != 0)
239       //0 is used as a fast test flag so the real first char is in position 1
240       System.arraycopy(withoutUnicodeBuffer, 2, 
241       //2 is 1 (real start) + 1 (to jump over the ")
242           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
243     else {
244       int length;
245       System.arraycopy(source, startPosition + 1,
246           result = new char[length = currentPosition - startPosition - 2], 0,
247           length);
248     }
249     return result;
250   }
251   public int getCurrentTokenStartPosition() {
252     return this.startPosition;
253   }
254   public final char[] getCurrentStringLiteralSource() {
255     // Return the token REAL source (aka unicodes are precomputed)
256     char[] result;
257     int length;
258     System.arraycopy(source, startPosition + 1,
259         result = new char[length = currentPosition - startPosition - 2], 0,
260         length);
261     //    }
262     return result;
263   }
264   /*
265    * Search the source position corresponding to the end of a given line number
266    * 
267    * Line numbers are 1-based, and relative to the scanner initialPosition.
268    * Character positions are 0-based.
269    * 
270    * In case the given line number is inconsistent, answers -1.
271    */
272   public final int getLineEnd(int lineNumber) {
273     if (lineEnds == null)
274       return -1;
275     if (lineNumber >= lineEnds.length)
276       return -1;
277     if (lineNumber <= 0)
278       return -1;
279     if (lineNumber == lineEnds.length - 1)
280       return eofPosition;
281     return lineEnds[lineNumber - 1];
282     // next line start one character behind the lineEnd of the previous line
283   }
284   /**
285    * Search the source position corresponding to the beginning of a given line
286    * number
287    * 
288    * Line numbers are 1-based, and relative to the scanner initialPosition.
289    * Character positions are 0-based.
290    * 
291    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
292    * 
293    * In case the given line number is inconsistent, answers -1.
294    */
295   public final int getLineStart(int lineNumber) {
296     if (lineEnds == null)
297       return -1;
298     if (lineNumber >= lineEnds.length)
299       return -1;
300     if (lineNumber <= 0)
301       return -1;
302     if (lineNumber == 1)
303       return initialPosition;
304     return lineEnds[lineNumber - 2] + 1;
305     // next line start one character behind the lineEnd of the previous line
306   }
307   public final boolean getNextChar(char testedChar) {
308     //BOOLEAN
309     //handle the case of unicode.
310     //when a unicode appears then we must use a buffer that holds char
311     // internal values
312     //At the end of this method currentCharacter holds the new visited char
313     //and currentPosition points right next after it
314     //Both previous lines are true if the currentCharacter is == to the
315     // testedChar
316     //On false, no side effect has occured.
317     //ALL getNextChar.... ARE OPTIMIZED COPIES
318     int temp = currentPosition;
319     try {
320       currentCharacter = source[currentPosition++];
321       //      if (((currentCharacter = source[currentPosition++]) == '\\')
322       //        && (source[currentPosition] == 'u')) {
323       //        //-------------unicode traitement ------------
324       //        int c1, c2, c3, c4;
325       //        int unicodeSize = 6;
326       //        currentPosition++;
327       //        while (source[currentPosition] == 'u') {
328       //          currentPosition++;
329       //          unicodeSize++;
330       //        }
331       //
332       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
333       //          || c1 < 0)
334       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
335       //            || c2 < 0)
336       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
337       //            || c3 < 0)
338       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
339       //            || c4 < 0)) {
340       //          currentPosition = temp;
341       //          return false;
342       //        }
343       //
344       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
345       //        if (currentCharacter != testedChar) {
346       //          currentPosition = temp;
347       //          return false;
348       //        }
349       //        unicodeAsBackSlash = currentCharacter == '\\';
350       //
351       //        //need the unicode buffer
352       //        if (withoutUnicodePtr == 0) {
353       //          //buffer all the entries that have been left aside....
354       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
355       //          System.arraycopy(
356       //            source,
357       //            startPosition,
358       //            withoutUnicodeBuffer,
359       //            1,
360       //            withoutUnicodePtr);
361       //        }
362       //        //fill the buffer with the char
363       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
364       //        return true;
365       //
366       //      } //-------------end unicode traitement--------------
367       //      else {
368       if (currentCharacter != testedChar) {
369         currentPosition = temp;
370         return false;
371       }
372       unicodeAsBackSlash = false;
373       //        if (withoutUnicodePtr != 0)
374       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
375       return true;
376       //      }
377     } catch (IndexOutOfBoundsException e) {
378       unicodeAsBackSlash = false;
379       currentPosition = temp;
380       return false;
381     }
382   }
383   public final int getNextChar(char testedChar1, char testedChar2) {
384     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
385     //test can be done with (x==0) for the first and (x>0) for the second
386     //handle the case of unicode.
387     //when a unicode appears then we must use a buffer that holds char
388     // internal values
389     //At the end of this method currentCharacter holds the new visited char
390     //and currentPosition points right next after it
391     //Both previous lines are true if the currentCharacter is == to the
392     // testedChar1/2
393     //On false, no side effect has occured.
394     //ALL getNextChar.... ARE OPTIMIZED COPIES
395     int temp = currentPosition;
396     try {
397       int result;
398       currentCharacter = source[currentPosition++];
399       //      if (((currentCharacter = source[currentPosition++]) == '\\')
400       //        && (source[currentPosition] == 'u')) {
401       //        //-------------unicode traitement ------------
402       //        int c1, c2, c3, c4;
403       //        int unicodeSize = 6;
404       //        currentPosition++;
405       //        while (source[currentPosition] == 'u') {
406       //          currentPosition++;
407       //          unicodeSize++;
408       //        }
409       //
410       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
411       //          || c1 < 0)
412       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
413       //            || c2 < 0)
414       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
415       //            || c3 < 0)
416       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
417       //            || c4 < 0)) {
418       //          currentPosition = temp;
419       //          return 2;
420       //        }
421       //
422       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
423       //        if (currentCharacter == testedChar1)
424       //          result = 0;
425       //        else if (currentCharacter == testedChar2)
426       //          result = 1;
427       //        else {
428       //          currentPosition = temp;
429       //          return -1;
430       //        }
431       //
432       //        //need the unicode buffer
433       //        if (withoutUnicodePtr == 0) {
434       //          //buffer all the entries that have been left aside....
435       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
436       //          System.arraycopy(
437       //            source,
438       //            startPosition,
439       //            withoutUnicodeBuffer,
440       //            1,
441       //            withoutUnicodePtr);
442       //        }
443       //        //fill the buffer with the char
444       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
445       //        return result;
446       //      } //-------------end unicode traitement--------------
447       //      else {
448       if (currentCharacter == testedChar1)
449         result = 0;
450       else if (currentCharacter == testedChar2)
451         result = 1;
452       else {
453         currentPosition = temp;
454         return -1;
455       }
456       //        if (withoutUnicodePtr != 0)
457       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
458       return result;
459       //     }
460     } catch (IndexOutOfBoundsException e) {
461       currentPosition = temp;
462       return -1;
463     }
464   }
465   public final boolean getNextCharAsDigit() {
466     //BOOLEAN
467     //handle the case of unicode.
468     //when a unicode appears then we must use a buffer that holds char
469     // internal values
470     //At the end of this method currentCharacter holds the new visited char
471     //and currentPosition points right next after it
472     //Both previous lines are true if the currentCharacter is a digit
473     //On false, no side effect has occured.
474     //ALL getNextChar.... ARE OPTIMIZED COPIES
475     int temp = currentPosition;
476     try {
477       currentCharacter = source[currentPosition++];
478       //      if (((currentCharacter = source[currentPosition++]) == '\\')
479       //        && (source[currentPosition] == 'u')) {
480       //        //-------------unicode traitement ------------
481       //        int c1, c2, c3, c4;
482       //        int unicodeSize = 6;
483       //        currentPosition++;
484       //        while (source[currentPosition] == 'u') {
485       //          currentPosition++;
486       //          unicodeSize++;
487       //        }
488       //
489       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
490       //          || c1 < 0)
491       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
492       //            || c2 < 0)
493       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
494       //            || c3 < 0)
495       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
496       //            || c4 < 0)) {
497       //          currentPosition = temp;
498       //          return false;
499       //        }
500       //
501       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
502       //        if (!Character.isDigit(currentCharacter)) {
503       //          currentPosition = temp;
504       //          return false;
505       //        }
506       //
507       //        //need the unicode buffer
508       //        if (withoutUnicodePtr == 0) {
509       //          //buffer all the entries that have been left aside....
510       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
511       //          System.arraycopy(
512       //            source,
513       //            startPosition,
514       //            withoutUnicodeBuffer,
515       //            1,
516       //            withoutUnicodePtr);
517       //        }
518       //        //fill the buffer with the char
519       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
520       //        return true;
521       //      } //-------------end unicode traitement--------------
522       //      else {
523       if (!Character.isDigit(currentCharacter)) {
524         currentPosition = temp;
525         return false;
526       }
527       //        if (withoutUnicodePtr != 0)
528       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
529       return true;
530       //      }
531     } catch (IndexOutOfBoundsException e) {
532       currentPosition = temp;
533       return false;
534     }
535   }
536   public final boolean getNextCharAsDigit(int radix) {
537     //BOOLEAN
538     //handle the case of unicode.
539     //when a unicode appears then we must use a buffer that holds char
540     // internal values
541     //At the end of this method currentCharacter holds the new visited char
542     //and currentPosition points right next after it
543     //Both previous lines are true if the currentCharacter is a digit base on
544     // radix
545     //On false, no side effect has occured.
546     //ALL getNextChar.... ARE OPTIMIZED COPIES
547     int temp = currentPosition;
548     try {
549       currentCharacter = source[currentPosition++];
550       //      if (((currentCharacter = source[currentPosition++]) == '\\')
551       //        && (source[currentPosition] == 'u')) {
552       //        //-------------unicode traitement ------------
553       //        int c1, c2, c3, c4;
554       //        int unicodeSize = 6;
555       //        currentPosition++;
556       //        while (source[currentPosition] == 'u') {
557       //          currentPosition++;
558       //          unicodeSize++;
559       //        }
560       //
561       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
562       //          || c1 < 0)
563       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
564       //            || c2 < 0)
565       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
566       //            || c3 < 0)
567       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
568       //            || c4 < 0)) {
569       //          currentPosition = temp;
570       //          return false;
571       //        }
572       //
573       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
574       //        if (Character.digit(currentCharacter, radix) == -1) {
575       //          currentPosition = temp;
576       //          return false;
577       //        }
578       //
579       //        //need the unicode buffer
580       //        if (withoutUnicodePtr == 0) {
581       //          //buffer all the entries that have been left aside....
582       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
583       //          System.arraycopy(
584       //            source,
585       //            startPosition,
586       //            withoutUnicodeBuffer,
587       //            1,
588       //            withoutUnicodePtr);
589       //        }
590       //        //fill the buffer with the char
591       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
592       //        return true;
593       //      } //-------------end unicode traitement--------------
594       //      else {
595       if (Character.digit(currentCharacter, radix) == -1) {
596         currentPosition = temp;
597         return false;
598       }
599       //        if (withoutUnicodePtr != 0)
600       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
601       return true;
602       //      }
603     } catch (IndexOutOfBoundsException e) {
604       currentPosition = temp;
605       return false;
606     }
607   }
608   public boolean getNextCharAsJavaIdentifierPart() {
609     //BOOLEAN
610     //handle the case of unicode.
611     //when a unicode appears then we must use a buffer that holds char
612     // internal values
613     //At the end of this method currentCharacter holds the new visited char
614     //and currentPosition points right next after it
615     //Both previous lines are true if the currentCharacter is a
616     // JavaIdentifierPart
617     //On false, no side effect has occured.
618     //ALL getNextChar.... ARE OPTIMIZED COPIES
619     int temp = currentPosition;
620     try {
621       currentCharacter = source[currentPosition++];
622       //      if (((currentCharacter = source[currentPosition++]) == '\\')
623       //        && (source[currentPosition] == 'u')) {
624       //        //-------------unicode traitement ------------
625       //        int c1, c2, c3, c4;
626       //        int unicodeSize = 6;
627       //        currentPosition++;
628       //        while (source[currentPosition] == 'u') {
629       //          currentPosition++;
630       //          unicodeSize++;
631       //        }
632       //
633       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
634       //          || c1 < 0)
635       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
636       //            || c2 < 0)
637       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
638       //            || c3 < 0)
639       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
640       //            || c4 < 0)) {
641       //          currentPosition = temp;
642       //          return false;
643       //        }
644       //
645       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
646       //        if (!isPHPIdentifierPart(currentCharacter)) {
647       //          currentPosition = temp;
648       //          return false;
649       //        }
650       //
651       //        //need the unicode buffer
652       //        if (withoutUnicodePtr == 0) {
653       //          //buffer all the entries that have been left aside....
654       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
655       //          System.arraycopy(
656       //            source,
657       //            startPosition,
658       //            withoutUnicodeBuffer,
659       //            1,
660       //            withoutUnicodePtr);
661       //        }
662       //        //fill the buffer with the char
663       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
664       //        return true;
665       //      } //-------------end unicode traitement--------------
666       //      else {
667       if (!isPHPIdentifierPart(currentCharacter)) {
668         currentPosition = temp;
669         return false;
670       }
671       //        if (withoutUnicodePtr != 0)
672       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
673       return true;
674       //      }
675     } catch (IndexOutOfBoundsException e) {
676       currentPosition = temp;
677       return false;
678     }
679   }
680   public int getCastOrParen() {
681     int tempPosition = currentPosition;
682     char tempCharacter = currentCharacter;
683     int tempToken = TokenNameLPAREN;
684     boolean found = false;
685     StringBuffer buf = new StringBuffer();
686     try {
687       do {
688         currentCharacter = source[currentPosition++];
689       } while (currentCharacter == ' ' || currentCharacter == '\t');
690       while ((currentCharacter >= 'a' && currentCharacter <= 'z')
691           || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
692         buf.append(currentCharacter);
693         currentCharacter = source[currentPosition++];
694       }
695       if (buf.length() >= 3 && buf.length() <= 7) {
696         char[] data = buf.toString().toCharArray();
697         int index = 0;
698         switch (data.length) {
699           case 3 :
700             // int
701             if ((data[index] == 'i') && (data[++index] == 'n')
702                 && (data[++index] == 't')) {
703               found = true;
704               tempToken = TokenNameintCAST;
705             }
706             break;
707           case 4 :
708             // bool real
709             if ((data[index] == 'b') && (data[++index] == 'o')
710                 && (data[++index] == 'o') && (data[++index] == 'l')) {
711               found = true;
712               tempToken = TokenNameboolCAST;
713             } else {
714               index = 0;
715               if ((data[index] == 'r') && (data[++index] == 'e')
716                   && (data[++index] == 'a') && (data[++index] == 'l')) {
717                 found = true;
718                 tempToken = TokenNamedoubleCAST;
719               }
720             }
721             break;
722           case 5 :
723             // array unset float
724             if ((data[index] == 'a') && (data[++index] == 'r')
725                 && (data[++index] == 'r') && (data[++index] == 'a')
726                 && (data[++index] == 'y')) {
727               found = true;
728               tempToken = TokenNamearrayCAST;
729             } else {
730               index = 0;
731               if ((data[index] == 'u') && (data[++index] == 'n')
732                   && (data[++index] == 's') && (data[++index] == 'e')
733                   && (data[++index] == 't')) {
734                 found = true;
735                 tempToken = TokenNameunsetCAST;
736               } else {
737                 index = 0;
738                 if ((data[index] == 'f') && (data[++index] == 'l')
739                     && (data[++index] == 'o') && (data[++index] == 'a')
740                     && (data[++index] == 't')) {
741                   found = true;
742                   tempToken = TokenNamedoubleCAST;
743                 }
744               }
745             }
746             break;
747           case 6 :
748             // object string double
749             if ((data[index] == 'o') && (data[++index] == 'b')
750                 && (data[++index] == 'j') && (data[++index] == 'e')
751                 && (data[++index] == 'c') && (data[++index] == 't')) {
752               found = true;
753               tempToken = TokenNameobjectCAST;
754             } else {
755               index = 0;
756               if ((data[index] == 's') && (data[++index] == 't')
757                   && (data[++index] == 'r') && (data[++index] == 'i')
758                   && (data[++index] == 'n') && (data[++index] == 'g')) {
759                 found = true;
760                 tempToken = TokenNamestringCAST;
761               } else {
762                 index = 0;
763                 if ((data[index] == 'd') && (data[++index] == 'o')
764                     && (data[++index] == 'u') && (data[++index] == 'b')
765                     && (data[++index] == 'l') && (data[++index] == 'e')) {
766                   found = true;
767                   tempToken = TokenNamedoubleCAST;
768                 }
769               }
770             }
771             break;
772           case 7 :
773             // boolean integer
774             if ((data[index] == 'b') && (data[++index] == 'o')
775                 && (data[++index] == 'o') && (data[++index] == 'l')
776                 && (data[++index] == 'e') && (data[++index] == 'a')
777                 && (data[++index] == 'n')) {
778               found = true;
779               tempToken = TokenNameboolCAST;
780             } else {
781               index = 0;
782               if ((data[index] == 'i') && (data[++index] == 'n')
783                   && (data[++index] == 't') && (data[++index] == 'e')
784                   && (data[++index] == 'g') && (data[++index] == 'e')
785                   && (data[++index] == 'r')) {
786                 found = true;
787                 tempToken = TokenNameintCAST;
788               }
789             }
790             break;
791         }
792         if (found) {
793           while (currentCharacter == ' ' || currentCharacter == '\t') {
794             currentCharacter = source[currentPosition++];
795           }
796           if (currentCharacter == ')') {
797             return tempToken;
798           }
799         }
800       }
801     } catch (IndexOutOfBoundsException e) {
802     }
803     currentCharacter = tempCharacter;
804     currentPosition = tempPosition;
805     return TokenNameLPAREN;
806   }
807   public void consumeStringInterpolated() throws InvalidInputException {
808     try {
809       // consume next character
810       unicodeAsBackSlash = false;
811       currentCharacter = source[currentPosition++];
812       //                if (((currentCharacter = source[currentPosition++]) == '\\')
813       //                  && (source[currentPosition] == 'u')) {
814       //                  getNextUnicodeChar();
815       //                } else {
816       //                  if (withoutUnicodePtr != 0) {
817       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
818       //                      currentCharacter;
819       //                  }
820       //                }
821       while (currentCharacter != '`') {
822         /** ** in PHP \r and \n are valid in string literals *** */
823         //                if ((currentCharacter == '\n')
824         //                  || (currentCharacter == '\r')) {
825         //                  // relocate if finding another quote fairly close: thus unicode
826         // '/u000D' will be fully consumed
827         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
828         //                    if (currentPosition + lookAhead == source.length)
829         //                      break;
830         //                    if (source[currentPosition + lookAhead] == '\n')
831         //                      break;
832         //                    if (source[currentPosition + lookAhead] == '\"') {
833         //                      currentPosition += lookAhead + 1;
834         //                      break;
835         //                    }
836         //                  }
837         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
838         //                }
839         if (currentCharacter == '\\') {
840           int escapeSize = currentPosition;
841           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
842           //scanEscapeCharacter make a side effect on this value and we need
843           // the previous value few lines down this one
844           scanDoubleQuotedEscapeCharacter();
845           escapeSize = currentPosition - escapeSize;
846           if (withoutUnicodePtr == 0) {
847             //buffer all the entries that have been left aside....
848             withoutUnicodePtr = currentPosition - escapeSize - 1
849                 - startPosition;
850             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
851                 withoutUnicodePtr);
852             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
853           } else { //overwrite the / in the buffer
854             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
855             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
856                                               // where only one is correct
857               withoutUnicodePtr--;
858             }
859           }
860         }
861         // consume next character
862         unicodeAsBackSlash = false;
863         currentCharacter = source[currentPosition++];
864         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
865         //                    && (source[currentPosition] == 'u')) {
866         //                    getNextUnicodeChar();
867         //                  } else {
868         if (withoutUnicodePtr != 0) {
869           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
870         }
871         //                  }
872       }
873     } catch (IndexOutOfBoundsException e) {
874       throw new InvalidInputException(UNTERMINATED_STRING);
875     } catch (InvalidInputException e) {
876       if (e.getMessage().equals(INVALID_ESCAPE)) {
877         // relocate if finding another quote fairly close: thus unicode
878         // '/u000D' will be fully consumed
879         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
880           if (currentPosition + lookAhead == source.length)
881             break;
882           if (source[currentPosition + lookAhead] == '\n')
883             break;
884           if (source[currentPosition + lookAhead] == '`') {
885             currentPosition += lookAhead + 1;
886             break;
887           }
888         }
889       }
890       throw e; // rethrow
891     }
892     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
893                                               // //$NON-NLS-?$ where ? is an
894                                               // int.
895       if (currentLine == null) {
896         currentLine = new NLSLine();
897         lines.add(currentLine);
898       }
899       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
900           startPosition, currentPosition - 1));
901     }
902   }
903   public void consumeStringConstant() throws InvalidInputException {
904     try {
905       // consume next character
906       unicodeAsBackSlash = false;
907       currentCharacter = source[currentPosition++];
908       //                if (((currentCharacter = source[currentPosition++]) == '\\')
909       //                  && (source[currentPosition] == 'u')) {
910       //                  getNextUnicodeChar();
911       //                } else {
912       //                  if (withoutUnicodePtr != 0) {
913       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
914       //                      currentCharacter;
915       //                  }
916       //                }
917       while (currentCharacter != '\'') {
918         /** ** in PHP \r and \n are valid in string literals *** */
919         //                  if ((currentCharacter == '\n')
920         //                    || (currentCharacter == '\r')) {
921         //                    // relocate if finding another quote fairly close: thus unicode
922         // '/u000D' will be fully consumed
923         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
924         //                      if (currentPosition + lookAhead == source.length)
925         //                        break;
926         //                      if (source[currentPosition + lookAhead] == '\n')
927         //                        break;
928         //                      if (source[currentPosition + lookAhead] == '\"') {
929         //                        currentPosition += lookAhead + 1;
930         //                        break;
931         //                      }
932         //                    }
933         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
934         //                  }
935         if (currentCharacter == '\\') {
936           int escapeSize = currentPosition;
937           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
938           //scanEscapeCharacter make a side effect on this value and we need
939           // the previous value few lines down this one
940           scanSingleQuotedEscapeCharacter();
941           escapeSize = currentPosition - escapeSize;
942           if (withoutUnicodePtr == 0) {
943             //buffer all the entries that have been left aside....
944             withoutUnicodePtr = currentPosition - escapeSize - 1
945                 - startPosition;
946             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
947                 withoutUnicodePtr);
948             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
949           } else { //overwrite the / in the buffer
950             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
951             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
952                                               // where only one is correct
953               withoutUnicodePtr--;
954             }
955           }
956         }
957         // consume next character
958         unicodeAsBackSlash = false;
959         currentCharacter = source[currentPosition++];
960         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
961         //                    && (source[currentPosition] == 'u')) {
962         //                    getNextUnicodeChar();
963         //                  } else {
964         if (withoutUnicodePtr != 0) {
965           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
966         }
967         //                  }
968       }
969     } catch (IndexOutOfBoundsException e) {
970       throw new InvalidInputException(UNTERMINATED_STRING);
971     } catch (InvalidInputException e) {
972       if (e.getMessage().equals(INVALID_ESCAPE)) {
973         // relocate if finding another quote fairly close: thus unicode
974         // '/u000D' will be fully consumed
975         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
976           if (currentPosition + lookAhead == source.length)
977             break;
978           if (source[currentPosition + lookAhead] == '\n')
979             break;
980           if (source[currentPosition + lookAhead] == '\'') {
981             currentPosition += lookAhead + 1;
982             break;
983           }
984         }
985       }
986       throw e; // rethrow
987     }
988     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
989                                               // //$NON-NLS-?$ where ? is an
990                                               // int.
991       if (currentLine == null) {
992         currentLine = new NLSLine();
993         lines.add(currentLine);
994       }
995       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
996           startPosition, currentPosition - 1));
997     }
998   }
999   public void consumeStringLiteral() throws InvalidInputException {
1000     try {
1001       // consume next character
1002       unicodeAsBackSlash = false;
1003       currentCharacter = source[currentPosition++];
1004       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1005       //                  && (source[currentPosition] == 'u')) {
1006       //                  getNextUnicodeChar();
1007       //                } else {
1008       //                  if (withoutUnicodePtr != 0) {
1009       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1010       //                      currentCharacter;
1011       //                  }
1012       //                }
1013       while (currentCharacter != '"') {
1014         /** ** in PHP \r and \n are valid in string literals *** */
1015         //                  if ((currentCharacter == '\n')
1016         //                    || (currentCharacter == '\r')) {
1017         //                    // relocate if finding another quote fairly close: thus unicode
1018         // '/u000D' will be fully consumed
1019         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1020         //                      if (currentPosition + lookAhead == source.length)
1021         //                        break;
1022         //                      if (source[currentPosition + lookAhead] == '\n')
1023         //                        break;
1024         //                      if (source[currentPosition + lookAhead] == '\"') {
1025         //                        currentPosition += lookAhead + 1;
1026         //                        break;
1027         //                      }
1028         //                    }
1029         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1030         //                  }
1031         if (currentCharacter == '\\') {
1032           int escapeSize = currentPosition;
1033           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1034           //scanEscapeCharacter make a side effect on this value and we need
1035           // the previous value few lines down this one
1036           scanDoubleQuotedEscapeCharacter();
1037           escapeSize = currentPosition - escapeSize;
1038           if (withoutUnicodePtr == 0) {
1039             //buffer all the entries that have been left aside....
1040             withoutUnicodePtr = currentPosition - escapeSize - 1
1041                 - startPosition;
1042             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1043                 withoutUnicodePtr);
1044             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1045           } else { //overwrite the / in the buffer
1046             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1047             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1048                                               // where only one is correct
1049               withoutUnicodePtr--;
1050             }
1051           }
1052         }
1053         // consume next character
1054         unicodeAsBackSlash = false;
1055         currentCharacter = source[currentPosition++];
1056         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1057         //                    && (source[currentPosition] == 'u')) {
1058         //                    getNextUnicodeChar();
1059         //                  } else {
1060         if (withoutUnicodePtr != 0) {
1061           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1062         }
1063         //                  }
1064       }
1065     } catch (IndexOutOfBoundsException e) {
1066       throw new InvalidInputException(UNTERMINATED_STRING);
1067     } catch (InvalidInputException e) {
1068       if (e.getMessage().equals(INVALID_ESCAPE)) {
1069         // relocate if finding another quote fairly close: thus unicode
1070         // '/u000D' will be fully consumed
1071         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1072           if (currentPosition + lookAhead == source.length)
1073             break;
1074           if (source[currentPosition + lookAhead] == '\n')
1075             break;
1076           if (source[currentPosition + lookAhead] == '\"') {
1077             currentPosition += lookAhead + 1;
1078             break;
1079           }
1080         }
1081       }
1082       throw e; // rethrow
1083     }
1084     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1085                                               // //$NON-NLS-?$ where ? is an
1086                                               // int.
1087       if (currentLine == null) {
1088         currentLine = new NLSLine();
1089         lines.add(currentLine);
1090       }
1091       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1092           startPosition, currentPosition - 1));
1093     }
1094   }
1095   public int getNextToken() throws InvalidInputException {
1096     if (!phpMode) {
1097       return getInlinedHTML(currentPosition);
1098     }
1099     if (phpMode) {
1100       this.wasAcr = false;
1101       if (diet) {
1102         jumpOverMethodBody();
1103         diet = false;
1104         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1105       }
1106       try {
1107         while (true) {
1108           withoutUnicodePtr = 0;
1109           //start with a new token
1110           char encapsedChar = ' ';
1111           if (!encapsedStringStack.isEmpty()) {
1112             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1113           }
1114           if (encapsedChar != '$' && encapsedChar != ' ') {
1115             currentCharacter = source[currentPosition++];
1116             if (currentCharacter == encapsedChar) {
1117               switch (currentCharacter) {
1118                 case '`' :
1119                   return TokenNameEncapsedString0;
1120                 case '\'' :
1121                   return TokenNameEncapsedString1;
1122                 case '"' :
1123                   return TokenNameEncapsedString2;
1124               }
1125             }
1126             while (currentCharacter != encapsedChar) {
1127               /** ** in PHP \r and \n are valid in string literals *** */
1128               switch (currentCharacter) {
1129                 case '\\' :
1130                   int escapeSize = currentPosition;
1131                   boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1132                   //scanEscapeCharacter make a side effect on this value and
1133                   // we need the previous value few lines down this one
1134                   scanDoubleQuotedEscapeCharacter();
1135                   escapeSize = currentPosition - escapeSize;
1136                   if (withoutUnicodePtr == 0) {
1137                     //buffer all the entries that have been left aside....
1138                     withoutUnicodePtr = currentPosition - escapeSize - 1
1139                         - startPosition;
1140                     System.arraycopy(source, startPosition,
1141                         withoutUnicodeBuffer, 1, withoutUnicodePtr);
1142                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1143                   } else { //overwrite the / in the buffer
1144                     withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1145                     if (backSlashAsUnicodeInString) { //there are TWO \ in
1146                       withoutUnicodePtr--;
1147                     }
1148                   }
1149                   break;
1150                 case '$' :
1151                   if (isPHPIdentifierStart(source[currentPosition])
1152                       || source[currentPosition] == '{') {
1153                     currentPosition--;
1154                     encapsedStringStack.push(new Character('$'));
1155                     return TokenNameSTRING;
1156                   }
1157                   break;
1158                 case '{' :
1159                   if (source[currentPosition] == '$') { // CURLY_OPEN
1160                     currentPosition--;
1161                     encapsedStringStack.push(new Character('$'));
1162                     return TokenNameSTRING;
1163                   }
1164               }
1165               // consume next character
1166               unicodeAsBackSlash = false;
1167               currentCharacter = source[currentPosition++];
1168               if (withoutUnicodePtr != 0) {
1169                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1170               }
1171               //                  }
1172             } // end while
1173             currentPosition--;
1174             return TokenNameSTRING;
1175           }
1176           // ---------Consume white space and handles startPosition---------
1177           int whiteStart = currentPosition;
1178           startPosition = currentPosition;
1179           currentCharacter = source[currentPosition++];
1180           if (encapsedChar == '$') {
1181             switch (currentCharacter) {
1182               case '\\' :
1183                 currentCharacter = source[currentPosition++];
1184                 return TokenNameSTRING;
1185               case '{' :
1186                 if (encapsedChar == '$') {
1187                   if (getNextChar('$'))
1188                     return TokenNameCURLY_OPEN;
1189                 }
1190                 return TokenNameLBRACE;
1191               case '}' :
1192                 return TokenNameRBRACE;
1193               case '[' :
1194                 return TokenNameLBRACKET;
1195               case ']' :
1196                 return TokenNameRBRACKET;
1197               case '\'' :
1198                 if (tokenizeStrings) {
1199                   consumeStringConstant();
1200                   return TokenNameStringSingleQuote;
1201                 }
1202                 return TokenNameEncapsedString1;
1203               case '"' :
1204                 return TokenNameEncapsedString2;
1205               case '`' :
1206                 if (tokenizeStrings) {
1207                   consumeStringInterpolated();
1208                   return TokenNameStringInterpolated;
1209                 }
1210                 return TokenNameEncapsedString0;
1211               case '-' :
1212                 if (getNextChar('>'))
1213                   return TokenNameMINUS_GREATER;
1214                 return TokenNameSTRING;
1215               default :
1216                 if (currentCharacter == '$') {
1217                   int oldPosition = currentPosition;
1218                   try {
1219                     currentCharacter = source[currentPosition++];
1220                     if (currentCharacter == '{') {
1221                       return TokenNameDOLLAR_LBRACE;
1222                     }
1223                     if (isPHPIdentifierStart(currentCharacter)) {
1224                       return scanIdentifierOrKeyword(true);
1225                     } else {
1226                       currentPosition = oldPosition;
1227                       return TokenNameSTRING;
1228                     }
1229                   } catch (IndexOutOfBoundsException e) {
1230                     currentPosition = oldPosition;
1231                     return TokenNameSTRING;
1232                   }
1233                 }
1234                 if (isPHPIdentifierStart(currentCharacter))
1235                   return scanIdentifierOrKeyword(false);
1236                 if (Character.isDigit(currentCharacter))
1237                   return scanNumber(false);
1238                 return TokenNameERROR;
1239             }
1240           }
1241           //          boolean isWhiteSpace;
1242           
1243           while ((currentCharacter == ' ')
1244               || Character.isWhitespace(currentCharacter)) {
1245             startPosition = currentPosition;
1246             currentCharacter = source[currentPosition++];
1247             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1248             //              && (source[currentPosition] == 'u')) {
1249             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1250             //            } else {
1251             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1252               checkNonExternalizeString();
1253               if (recordLineSeparator) {
1254                 pushLineSeparator();
1255               } else {
1256                 currentLine = null;
1257               }
1258             }
1259             //            isWhiteSpace = (currentCharacter == ' ')
1260             //                || Character.isWhitespace(currentCharacter);
1261             //            }
1262           }
1263           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1264             // reposition scanner in case we are interested by spaces as tokens
1265             currentPosition--;
1266             startPosition = whiteStart;
1267             return TokenNameWHITESPACE;
1268           }
1269           //little trick to get out in the middle of a source compuation
1270           if (currentPosition > eofPosition)
1271             return TokenNameEOF;
1272           // ---------Identify the next token-------------
1273           switch (currentCharacter) {
1274             case '(' :
1275               return getCastOrParen();
1276             case ')' :
1277               return TokenNameRPAREN;
1278             case '{' :
1279               return TokenNameLBRACE;
1280             case '}' :
1281               return TokenNameRBRACE;
1282             case '[' :
1283               return TokenNameLBRACKET;
1284             case ']' :
1285               return TokenNameRBRACKET;
1286             case ';' :
1287               return TokenNameSEMICOLON;
1288             case ',' :
1289               return TokenNameCOMMA;
1290             case '.' :
1291               if (getNextChar('='))
1292                 return TokenNameDOT_EQUAL;
1293               if (getNextCharAsDigit())
1294                 return scanNumber(true);
1295               return TokenNameDOT;
1296             case '+' :
1297               {
1298                 int test;
1299                 if ((test = getNextChar('+', '=')) == 0)
1300                   return TokenNamePLUS_PLUS;
1301                 if (test > 0)
1302                   return TokenNamePLUS_EQUAL;
1303                 return TokenNamePLUS;
1304               }
1305             case '-' :
1306               {
1307                 int test;
1308                 if ((test = getNextChar('-', '=')) == 0)
1309                   return TokenNameMINUS_MINUS;
1310                 if (test > 0)
1311                   return TokenNameMINUS_EQUAL;
1312                 if (getNextChar('>'))
1313                   return TokenNameMINUS_GREATER;
1314                 return TokenNameMINUS;
1315               }
1316             case '~' :
1317               if (getNextChar('='))
1318                 return TokenNameTWIDDLE_EQUAL;
1319               return TokenNameTWIDDLE;
1320             case '!' :
1321               if (getNextChar('=')) {
1322                 if (getNextChar('=')) {
1323                   return TokenNameNOT_EQUAL_EQUAL;
1324                 }
1325                 return TokenNameNOT_EQUAL;
1326               }
1327               return TokenNameNOT;
1328             case '*' :
1329               if (getNextChar('='))
1330                 return TokenNameMULTIPLY_EQUAL;
1331               return TokenNameMULTIPLY;
1332             case '%' :
1333               if (getNextChar('='))
1334                 return TokenNameREMAINDER_EQUAL;
1335               return TokenNameREMAINDER;
1336             case '<' :
1337               {
1338                 int oldPosition = currentPosition;
1339                 try {
1340                   currentCharacter = source[currentPosition++];
1341                 } catch (IndexOutOfBoundsException e) {
1342                   currentPosition = oldPosition;
1343                   return TokenNameLESS;
1344                 }
1345                 switch (currentCharacter) {
1346                   case '=' :
1347                     return TokenNameLESS_EQUAL;
1348                   case '>' :
1349                     return TokenNameNOT_EQUAL;
1350                   case '<' :
1351                     if (getNextChar('='))
1352                       return TokenNameLEFT_SHIFT_EQUAL;
1353                     if (getNextChar('<')) {
1354                       currentCharacter = source[currentPosition++];
1355                       while (Character.isWhitespace(currentCharacter)) {
1356                         currentCharacter = source[currentPosition++];
1357                       }
1358                       int heredocStart = currentPosition - 1;
1359                       int heredocLength = 0;
1360                       if (isPHPIdentifierStart(currentCharacter)) {
1361                         currentCharacter = source[currentPosition++];
1362                       } else {
1363                         return TokenNameERROR;
1364                       }
1365                       while (isPHPIdentifierPart(currentCharacter)) {
1366                         currentCharacter = source[currentPosition++];
1367                       }
1368                       heredocLength = currentPosition - heredocStart - 1;
1369                       // heredoc end-tag determination
1370                       boolean endTag = true;
1371                       char ch;
1372                       do {
1373                         ch = source[currentPosition++];
1374                         if (ch == '\r' || ch == '\n') {
1375                           if (recordLineSeparator) {
1376                             pushLineSeparator();
1377                           } else {
1378                             currentLine = null;
1379                           }
1380                           for (int i = 0; i < heredocLength; i++) {
1381                             if (source[currentPosition + i] != source[heredocStart
1382                                 + i]) {
1383                               endTag = false;
1384                               break;
1385                             }
1386                           }
1387                           if (endTag) {
1388                             currentPosition += heredocLength - 1;
1389                             currentCharacter = source[currentPosition++];
1390                             break; // do...while loop
1391                           } else {
1392                             endTag = true;
1393                           }
1394                         }
1395                       } while (true);
1396                       return TokenNameHEREDOC;
1397                     }
1398                     return TokenNameLEFT_SHIFT;
1399                 }
1400                 currentPosition = oldPosition;
1401                 return TokenNameLESS;
1402               }
1403             case '>' :
1404               {
1405                 int test;
1406                 if ((test = getNextChar('=', '>')) == 0)
1407                   return TokenNameGREATER_EQUAL;
1408                 if (test > 0) {
1409                   if ((test = getNextChar('=', '>')) == 0)
1410                     return TokenNameRIGHT_SHIFT_EQUAL;
1411                   return TokenNameRIGHT_SHIFT;
1412                 }
1413                 return TokenNameGREATER;
1414               }
1415             case '=' :
1416               if (getNextChar('=')) {
1417                 if (getNextChar('=')) {
1418                   return TokenNameEQUAL_EQUAL_EQUAL;
1419                 }
1420                 return TokenNameEQUAL_EQUAL;
1421               }
1422               if (getNextChar('>'))
1423                 return TokenNameEQUAL_GREATER;
1424               return TokenNameEQUAL;
1425             case '&' :
1426               {
1427                 int test;
1428                 if ((test = getNextChar('&', '=')) == 0)
1429                   return TokenNameAND_AND;
1430                 if (test > 0)
1431                   return TokenNameAND_EQUAL;
1432                 return TokenNameAND;
1433               }
1434             case '|' :
1435               {
1436                 int test;
1437                 if ((test = getNextChar('|', '=')) == 0)
1438                   return TokenNameOR_OR;
1439                 if (test > 0)
1440                   return TokenNameOR_EQUAL;
1441                 return TokenNameOR;
1442               }
1443             case '^' :
1444               if (getNextChar('='))
1445                 return TokenNameXOR_EQUAL;
1446               return TokenNameXOR;
1447             case '?' :
1448               if (getNextChar('>')) {
1449                 phpMode = false;
1450                 if (currentPosition == source.length) {
1451                   phpMode = true;
1452                   return TokenNameINLINE_HTML;
1453                 }
1454                 return getInlinedHTML(currentPosition - 2);
1455               }
1456               return TokenNameQUESTION;
1457             case ':' :
1458               if (getNextChar(':'))
1459                 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1460               return TokenNameCOLON;
1461             case '@' :
1462               return TokenNameAT;
1463             case '\'' :
1464               consumeStringConstant();
1465               return TokenNameStringSingleQuote;
1466             case '"' :
1467               if (tokenizeStrings) {
1468                 consumeStringLiteral();
1469                 return TokenNameStringDoubleQuote;
1470               }
1471               return TokenNameEncapsedString2;
1472             case '`' :
1473               if (tokenizeStrings) {
1474                 consumeStringInterpolated();
1475                 return TokenNameStringInterpolated;
1476               }
1477               return TokenNameEncapsedString0;
1478             case '#' :
1479             case '/' :
1480               {
1481                 char startChar = currentCharacter;
1482                 if (getNextChar('=')) {
1483                   return TokenNameDIVIDE_EQUAL;
1484                 }
1485                 int test;
1486                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1487                   //line comment
1488                         this.lastCommentLinePosition = this.currentPosition;
1489                   int endPositionForLineComment = 0;
1490                   try { //get the next char
1491                     currentCharacter = source[currentPosition++];
1492                     //                    if (((currentCharacter = source[currentPosition++])
1493                     //                      == '\\')
1494                     //                      && (source[currentPosition] == 'u')) {
1495                     //                      //-------------unicode traitement ------------
1496                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1497                     //                      currentPosition++;
1498                     //                      while (source[currentPosition] == 'u') {
1499                     //                        currentPosition++;
1500                     //                      }
1501                     //                      if ((c1 =
1502                     //                        Character.getNumericValue(source[currentPosition++]))
1503                     //                        > 15
1504                     //                        || c1 < 0
1505                     //                        || (c2 =
1506                     //                          Character.getNumericValue(source[currentPosition++]))
1507                     //                          > 15
1508                     //                        || c2 < 0
1509                     //                        || (c3 =
1510                     //                          Character.getNumericValue(source[currentPosition++]))
1511                     //                          > 15
1512                     //                        || c3 < 0
1513                     //                        || (c4 =
1514                     //                          Character.getNumericValue(source[currentPosition++]))
1515                     //                          > 15
1516                     //                        || c4 < 0) {
1517                     //                        throw new
1518                     // InvalidInputException(INVALID_UNICODE_ESCAPE);
1519                     //                      } else {
1520                     //                        currentCharacter =
1521                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1522                     //                      }
1523                     //                    }
1524                     //handle the \\u case manually into comment
1525                     //                    if (currentCharacter == '\\') {
1526                     //                      if (source[currentPosition] == '\\')
1527                     //                        currentPosition++;
1528                     //                    } //jump over the \\
1529                     boolean isUnicode = false;
1530                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1531                         this.lastCommentLinePosition = this.currentPosition;
1532                       if (currentCharacter == '?') {
1533                         if (getNextChar('>')) {
1534                           startPosition = currentPosition - 2;
1535                           phpMode = false;
1536                           return TokenNameINLINE_HTML;
1537                         }
1538                       }
1539                       //get the next char
1540                       isUnicode = false;
1541                       currentCharacter = source[currentPosition++];
1542                       //                      if (((currentCharacter = source[currentPosition++])
1543                       //                        == '\\')
1544                       //                        && (source[currentPosition] == 'u')) {
1545                       //                        isUnicode = true;
1546                       //                        //-------------unicode traitement ------------
1547                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1548                       //                        currentPosition++;
1549                       //                        while (source[currentPosition] == 'u') {
1550                       //                          currentPosition++;
1551                       //                        }
1552                       //                        if ((c1 =
1553                       //                          Character.getNumericValue(source[currentPosition++]))
1554                       //                          > 15
1555                       //                          || c1 < 0
1556                       //                          || (c2 =
1557                       //                            Character.getNumericValue(
1558                       //                              source[currentPosition++]))
1559                       //                            > 15
1560                       //                          || c2 < 0
1561                       //                          || (c3 =
1562                       //                            Character.getNumericValue(
1563                       //                              source[currentPosition++]))
1564                       //                            > 15
1565                       //                          || c3 < 0
1566                       //                          || (c4 =
1567                       //                            Character.getNumericValue(
1568                       //                              source[currentPosition++]))
1569                       //                            > 15
1570                       //                          || c4 < 0) {
1571                       //                          throw new
1572                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1573                       //                        } else {
1574                       //                          currentCharacter =
1575                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1576                       //                        }
1577                       //                      }
1578                       //handle the \\u case manually into comment
1579                       //                      if (currentCharacter == '\\') {
1580                       //                        if (source[currentPosition] == '\\')
1581                       //                          currentPosition++;
1582                       //                      } //jump over the \\
1583                     }
1584                     if (isUnicode) {
1585                       endPositionForLineComment = currentPosition - 6;
1586                     } else {
1587                       endPositionForLineComment = currentPosition - 1;
1588                     }
1589 //                    recordComment(false);
1590                     recordComment(TokenNameCOMMENT_LINE);
1591                     if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1592                                         if ((currentCharacter == '\r')
1593                         || (currentCharacter == '\n')) {
1594                       checkNonExternalizeString();
1595                       if (recordLineSeparator) {
1596                         if (isUnicode) {
1597                           pushUnicodeLineSeparator();
1598                         } else {
1599                           pushLineSeparator();
1600                         }
1601                       } else {
1602                         currentLine = null;
1603                       }
1604                     }
1605                     if (tokenizeComments) {
1606                       if (!isUnicode) {
1607                         currentPosition = endPositionForLineComment;
1608                         // reset one character behind
1609                       }
1610                       return TokenNameCOMMENT_LINE;
1611                     }
1612                   } catch (IndexOutOfBoundsException e) { //an eof will them
1613                     // be generated
1614                     if (tokenizeComments) {
1615                       currentPosition--;
1616                       // reset one character behind
1617                       return TokenNameCOMMENT_LINE;
1618                     }
1619                   }
1620                   break;
1621                 }
1622                 if (test > 0) {
1623                   //traditional and annotation comment
1624                   boolean isJavadoc = false, star = false;
1625                   // consume next character
1626                   unicodeAsBackSlash = false;
1627                   currentCharacter = source[currentPosition++];
1628                   //                  if (((currentCharacter = source[currentPosition++]) ==
1629                   // '\\')
1630                   //                    && (source[currentPosition] == 'u')) {
1631                   //                    getNextUnicodeChar();
1632                   //                  } else {
1633                   //                    if (withoutUnicodePtr != 0) {
1634                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1635                   //                        currentCharacter;
1636                   //                    }
1637                   //                  }
1638                   if (currentCharacter == '*') {
1639                     isJavadoc = true;
1640                     star = true;
1641                   }
1642                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1643                     checkNonExternalizeString();
1644                     if (recordLineSeparator) {
1645                       pushLineSeparator();
1646                     } else {
1647                       currentLine = null;
1648                     }
1649                   }
1650                   try { //get the next char
1651                     currentCharacter = source[currentPosition++];
1652                     //                    if (((currentCharacter = source[currentPosition++])
1653                     //                      == '\\')
1654                     //                      && (source[currentPosition] == 'u')) {
1655                     //                      //-------------unicode traitement ------------
1656                     //                      getNextUnicodeChar();
1657                     //                    }
1658                     //handle the \\u case manually into comment
1659                     //                    if (currentCharacter == '\\') {
1660                     //                      if (source[currentPosition] == '\\')
1661                     //                        currentPosition++;
1662                     //                      //jump over the \\
1663                     //                    }
1664                     // empty comment is not a javadoc /**/
1665                     if (currentCharacter == '/') {
1666                       isJavadoc = false;
1667                     }
1668                     //loop until end of comment */
1669                     while ((currentCharacter != '/') || (!star)) {
1670                       if ((currentCharacter == '\r')
1671                           || (currentCharacter == '\n')) {
1672                         checkNonExternalizeString();
1673                         if (recordLineSeparator) {
1674                           pushLineSeparator();
1675                         } else {
1676                           currentLine = null;
1677                         }
1678                       }
1679                       star = currentCharacter == '*';
1680                       //get next char
1681                       currentCharacter = source[currentPosition++];
1682                       //                      if (((currentCharacter = source[currentPosition++])
1683                       //                        == '\\')
1684                       //                        && (source[currentPosition] == 'u')) {
1685                       //                        //-------------unicode traitement ------------
1686                       //                        getNextUnicodeChar();
1687                       //                      }
1688                       //handle the \\u case manually into comment
1689                       //                      if (currentCharacter == '\\') {
1690                       //                        if (source[currentPosition] == '\\')
1691                       //                          currentPosition++;
1692                       //                      } //jump over the \\
1693                     }
1694                     //recordComment(isJavadoc);
1695                     if (isJavadoc) {
1696                         recordComment(TokenNameCOMMENT_PHPDOC);
1697                     } else {
1698                         recordComment(TokenNameCOMMENT_BLOCK);
1699                     }
1700                     
1701                     if (tokenizeComments) {
1702                       if (isJavadoc)
1703                         return TokenNameCOMMENT_PHPDOC;
1704                       return TokenNameCOMMENT_BLOCK;
1705                     }
1706                   } catch (IndexOutOfBoundsException e) {
1707                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1708                   }
1709                   break;
1710                 }
1711                 return TokenNameDIVIDE;
1712               }
1713             case '\u001a' :
1714               if (atEnd())
1715                 return TokenNameEOF;
1716               //the atEnd may not be <currentPosition == source.length> if
1717               // source is only some part of a real (external) stream
1718               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1719             default :
1720               if (currentCharacter == '$') {
1721                 int oldPosition = currentPosition;
1722                 try {
1723                   currentCharacter = source[currentPosition++];
1724                   if (isPHPIdentifierStart(currentCharacter)) {
1725                     return scanIdentifierOrKeyword(true);
1726                   } else {
1727                     currentPosition = oldPosition;
1728                     return TokenNameDOLLAR;
1729                   }
1730                 } catch (IndexOutOfBoundsException e) {
1731                   currentPosition = oldPosition;
1732                   return TokenNameDOLLAR;
1733                 }
1734               }
1735               if (isPHPIdentifierStart(currentCharacter))
1736                 return scanIdentifierOrKeyword(false);
1737               if (Character.isDigit(currentCharacter))
1738                 return scanNumber(false);
1739               return TokenNameERROR;
1740           }
1741         }
1742       } //-----------------end switch while try--------------------
1743       catch (IndexOutOfBoundsException e) {
1744       }
1745     }
1746     return TokenNameEOF;
1747   }
1748   
1749   private int getInlinedHTML(int start) throws InvalidInputException {
1750         int token = getInlinedHTMLToken(start);
1751         if (token == TokenNameINLINE_HTML) {
1752 //              Stack stack = new Stack();
1753 //              // scan html for errors
1754 //              Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1755 //              int lastPHPEndPos=0;
1756 //              for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1757 //                  Tag tag=(Tag)i.next();
1758 //                  
1759 //                  if (tag instanceof StartTag) {
1760 //                      StartTag startTag=(StartTag)tag;
1761 //                    //  System.out.println("startTag: "+tag);
1762 //                      if (startTag.isServerTag()) {
1763 //                        // TODO : what to do with a server tag ? 
1764 //                      } else {
1765 //                          // do whatever with HTML start tag
1766 //                          // use startTag.getElement() to find the element corresponding
1767 //                          // to this start tag which may be useful if you implement code
1768 //                          // folding etc
1769 //                              stack.push(startTag);
1770 //                      }
1771 //                  } else {
1772 //                      EndTag endTag=(EndTag)tag;
1773 //                      StartTag stag = (StartTag) stack.peek();
1774 ////                  System.out.println("endTag: "+tag);
1775 //                      // do whatever with HTML end tag.
1776 //                  }
1777 //              }
1778         }
1779         return token;
1780   }
1781   /**
1782    * @return @throws
1783    *         InvalidInputException
1784    */
1785   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1786     //    int htmlPosition = start;
1787     if (currentPosition > source.length) {
1788       currentPosition = source.length;
1789       return TokenNameEOF;
1790     }
1791     startPosition = start;
1792     try {
1793       while (!phpMode) {
1794         currentCharacter = source[currentPosition++];
1795         if (currentCharacter == '<') {
1796           if (getNextChar('?')) {
1797             currentCharacter = source[currentPosition++];
1798             if ((currentCharacter == ' ')
1799                 || Character.isWhitespace(currentCharacter)) {
1800               // <?
1801               phpMode = true;
1802               return TokenNameINLINE_HTML;
1803             } else {
1804               boolean phpStart = (currentCharacter == 'P')
1805                   || (currentCharacter == 'p');
1806               if (phpStart) {
1807                 int test = getNextChar('H', 'h');
1808                 if (test >= 0) {
1809                   test = getNextChar('P', 'p');
1810                   if (test >= 0) {
1811                     // <?PHP <?php
1812                     phpMode = true;
1813                     return TokenNameINLINE_HTML;
1814                   }
1815                 }
1816               }
1817             }
1818           }
1819         }
1820         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1821           if (recordLineSeparator) {
1822             pushLineSeparator();
1823           } else {
1824             currentLine = null;
1825           }
1826         }
1827       } //-----------------while--------------------
1828       phpMode = true;
1829       return TokenNameINLINE_HTML;
1830     } //-----------------try--------------------
1831     catch (IndexOutOfBoundsException e) {
1832       startPosition = start;
1833       currentPosition--;
1834     }
1835     phpMode = true;
1836     return TokenNameINLINE_HTML;
1837   }
1838   //  public final void getNextUnicodeChar()
1839   //    throws IndexOutOfBoundsException, InvalidInputException {
1840   //    //VOID
1841   //    //handle the case of unicode.
1842   //    //when a unicode appears then we must use a buffer that holds char
1843   // internal values
1844   //    //At the end of this method currentCharacter holds the new visited char
1845   //    //and currentPosition points right next after it
1846   //
1847   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1848   //
1849   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1850   //    currentPosition++;
1851   //    while (source[currentPosition] == 'u') {
1852   //      currentPosition++;
1853   //      unicodeSize++;
1854   //    }
1855   //
1856   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1857   //      || c1 < 0
1858   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1859   //      || c2 < 0
1860   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1861   //      || c3 < 0
1862   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1863   //      || c4 < 0) {
1864   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1865   //    } else {
1866   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1867   //      //need the unicode buffer
1868   //      if (withoutUnicodePtr == 0) {
1869   //        //buffer all the entries that have been left aside....
1870   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1871   //        System.arraycopy(
1872   //          source,
1873   //          startPosition,
1874   //          withoutUnicodeBuffer,
1875   //          1,
1876   //          withoutUnicodePtr);
1877   //      }
1878   //      //fill the buffer with the char
1879   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1880   //    }
1881   //    unicodeAsBackSlash = currentCharacter == '\\';
1882   //  }
1883   /*
1884    * Tokenize a method body, assuming that curly brackets are properly
1885    * balanced.
1886    */
1887   public final void jumpOverMethodBody() {
1888     this.wasAcr = false;
1889     int found = 1;
1890     try {
1891       while (true) { //loop for jumping over comments
1892         // ---------Consume white space and handles startPosition---------
1893         boolean isWhiteSpace;
1894         do {
1895           startPosition = currentPosition;
1896           currentCharacter = source[currentPosition++];
1897           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1898           //            && (source[currentPosition] == 'u')) {
1899           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1900           //          } else {
1901           if (recordLineSeparator
1902               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1903             pushLineSeparator();
1904           isWhiteSpace = Character.isWhitespace(currentCharacter);
1905           //          }
1906         } while (isWhiteSpace);
1907         // -------consume token until } is found---------
1908         switch (currentCharacter) {
1909           case '{' :
1910             found++;
1911             break;
1912           case '}' :
1913             found--;
1914             if (found == 0)
1915               return;
1916             break;
1917           case '\'' :
1918             {
1919               boolean test;
1920               test = getNextChar('\\');
1921               if (test) {
1922                 try {
1923                   scanDoubleQuotedEscapeCharacter();
1924                 } catch (InvalidInputException ex) {
1925                 };
1926               } else {
1927                 //                try { // consume next character
1928                 unicodeAsBackSlash = false;
1929                 currentCharacter = source[currentPosition++];
1930                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1931                 //                    && (source[currentPosition] == 'u')) {
1932                 //                    getNextUnicodeChar();
1933                 //                  } else {
1934                 if (withoutUnicodePtr != 0) {
1935                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1936                 }
1937                 //                  }
1938                 //                } catch (InvalidInputException ex) {
1939                 //                };
1940               }
1941               getNextChar('\'');
1942               break;
1943             }
1944           case '"' :
1945             try {
1946               //              try { // consume next character
1947               unicodeAsBackSlash = false;
1948               currentCharacter = source[currentPosition++];
1949               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1950               //                  && (source[currentPosition] == 'u')) {
1951               //                  getNextUnicodeChar();
1952               //                } else {
1953               if (withoutUnicodePtr != 0) {
1954                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1955               }
1956               //                }
1957               //              } catch (InvalidInputException ex) {
1958               //              };
1959               while (currentCharacter != '"') {
1960                 if (currentCharacter == '\r') {
1961                   if (source[currentPosition] == '\n')
1962                     currentPosition++;
1963                   break;
1964                   // the string cannot go further that the line
1965                 }
1966                 if (currentCharacter == '\n') {
1967                   break;
1968                   // the string cannot go further that the line
1969                 }
1970                 if (currentCharacter == '\\') {
1971                   try {
1972                     scanDoubleQuotedEscapeCharacter();
1973                   } catch (InvalidInputException ex) {
1974                   };
1975                 }
1976                 //                try { // consume next character
1977                 unicodeAsBackSlash = false;
1978                 currentCharacter = source[currentPosition++];
1979                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1980                 //                    && (source[currentPosition] == 'u')) {
1981                 //                    getNextUnicodeChar();
1982                 //                  } else {
1983                 if (withoutUnicodePtr != 0) {
1984                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1985                 }
1986                 //                  }
1987                 //                } catch (InvalidInputException ex) {
1988                 //                };
1989               }
1990             } catch (IndexOutOfBoundsException e) {
1991               return;
1992             }
1993             break;
1994           case '/' :
1995             {
1996               int test;
1997               if ((test = getNextChar('/', '*')) == 0) {
1998                 //line comment
1999                 try {
2000                   //get the next char
2001                   currentCharacter = source[currentPosition++];
2002                   //                  if (((currentCharacter = source[currentPosition++]) ==
2003                   // '\\')
2004                   //                    && (source[currentPosition] == 'u')) {
2005                   //                    //-------------unicode traitement ------------
2006                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2007                   //                    currentPosition++;
2008                   //                    while (source[currentPosition] == 'u') {
2009                   //                      currentPosition++;
2010                   //                    }
2011                   //                    if ((c1 =
2012                   //                      Character.getNumericValue(source[currentPosition++]))
2013                   //                      > 15
2014                   //                      || c1 < 0
2015                   //                      || (c2 =
2016                   //                        Character.getNumericValue(source[currentPosition++]))
2017                   //                        > 15
2018                   //                      || c2 < 0
2019                   //                      || (c3 =
2020                   //                        Character.getNumericValue(source[currentPosition++]))
2021                   //                        > 15
2022                   //                      || c3 < 0
2023                   //                      || (c4 =
2024                   //                        Character.getNumericValue(source[currentPosition++]))
2025                   //                        > 15
2026                   //                      || c4 < 0) {
2027                   //                      //error don't care of the value
2028                   //                      currentCharacter = 'A';
2029                   //                    } //something different from \n and \r
2030                   //                    else {
2031                   //                      currentCharacter =
2032                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2033                   //                    }
2034                   //                  }
2035                   while (currentCharacter != '\r' && currentCharacter != '\n') {
2036                     //get the next char
2037                     currentCharacter = source[currentPosition++];
2038                     //                    if (((currentCharacter = source[currentPosition++])
2039                     //                      == '\\')
2040                     //                      && (source[currentPosition] == 'u')) {
2041                     //                      //-------------unicode traitement ------------
2042                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2043                     //                      currentPosition++;
2044                     //                      while (source[currentPosition] == 'u') {
2045                     //                        currentPosition++;
2046                     //                      }
2047                     //                      if ((c1 =
2048                     //                        Character.getNumericValue(source[currentPosition++]))
2049                     //                        > 15
2050                     //                        || c1 < 0
2051                     //                        || (c2 =
2052                     //                          Character.getNumericValue(source[currentPosition++]))
2053                     //                          > 15
2054                     //                        || c2 < 0
2055                     //                        || (c3 =
2056                     //                          Character.getNumericValue(source[currentPosition++]))
2057                     //                          > 15
2058                     //                        || c3 < 0
2059                     //                        || (c4 =
2060                     //                          Character.getNumericValue(source[currentPosition++]))
2061                     //                          > 15
2062                     //                        || c4 < 0) {
2063                     //                        //error don't care of the value
2064                     //                        currentCharacter = 'A';
2065                     //                      } //something different from \n and \r
2066                     //                      else {
2067                     //                        currentCharacter =
2068                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2069                     //                      }
2070                     //                    }
2071                   }
2072                   if (recordLineSeparator
2073                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2074                     pushLineSeparator();
2075                 } catch (IndexOutOfBoundsException e) {
2076                 } //an eof will them be generated
2077                 break;
2078               }
2079               if (test > 0) {
2080                 //traditional and annotation comment
2081                 boolean star = false;
2082                 //                try { // consume next character
2083                 unicodeAsBackSlash = false;
2084                 currentCharacter = source[currentPosition++];
2085                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2086                 //                    && (source[currentPosition] == 'u')) {
2087                 //                    getNextUnicodeChar();
2088                 //                  } else {
2089                 if (withoutUnicodePtr != 0) {
2090                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2091                 }
2092                 //                  };
2093                 //                } catch (InvalidInputException ex) {
2094                 //                };
2095                 if (currentCharacter == '*') {
2096                   star = true;
2097                 }
2098                 if (recordLineSeparator
2099                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2100                   pushLineSeparator();
2101                 try { //get the next char
2102                   currentCharacter = source[currentPosition++];
2103                   //                  if (((currentCharacter = source[currentPosition++]) ==
2104                   // '\\')
2105                   //                    && (source[currentPosition] == 'u')) {
2106                   //                    //-------------unicode traitement ------------
2107                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2108                   //                    currentPosition++;
2109                   //                    while (source[currentPosition] == 'u') {
2110                   //                      currentPosition++;
2111                   //                    }
2112                   //                    if ((c1 =
2113                   //                      Character.getNumericValue(source[currentPosition++]))
2114                   //                      > 15
2115                   //                      || c1 < 0
2116                   //                      || (c2 =
2117                   //                        Character.getNumericValue(source[currentPosition++]))
2118                   //                        > 15
2119                   //                      || c2 < 0
2120                   //                      || (c3 =
2121                   //                        Character.getNumericValue(source[currentPosition++]))
2122                   //                        > 15
2123                   //                      || c3 < 0
2124                   //                      || (c4 =
2125                   //                        Character.getNumericValue(source[currentPosition++]))
2126                   //                        > 15
2127                   //                      || c4 < 0) {
2128                   //                      //error don't care of the value
2129                   //                      currentCharacter = 'A';
2130                   //                    } //something different from * and /
2131                   //                    else {
2132                   //                      currentCharacter =
2133                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2134                   //                    }
2135                   //                  }
2136                   //loop until end of comment */
2137                   while ((currentCharacter != '/') || (!star)) {
2138                     if (recordLineSeparator
2139                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2140                       pushLineSeparator();
2141                     star = currentCharacter == '*';
2142                     //get next char
2143                     currentCharacter = source[currentPosition++];
2144                     //                    if (((currentCharacter = source[currentPosition++])
2145                     //                      == '\\')
2146                     //                      && (source[currentPosition] == 'u')) {
2147                     //                      //-------------unicode traitement ------------
2148                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2149                     //                      currentPosition++;
2150                     //                      while (source[currentPosition] == 'u') {
2151                     //                        currentPosition++;
2152                     //                      }
2153                     //                      if ((c1 =
2154                     //                        Character.getNumericValue(source[currentPosition++]))
2155                     //                        > 15
2156                     //                        || c1 < 0
2157                     //                        || (c2 =
2158                     //                          Character.getNumericValue(source[currentPosition++]))
2159                     //                          > 15
2160                     //                        || c2 < 0
2161                     //                        || (c3 =
2162                     //                          Character.getNumericValue(source[currentPosition++]))
2163                     //                          > 15
2164                     //                        || c3 < 0
2165                     //                        || (c4 =
2166                     //                          Character.getNumericValue(source[currentPosition++]))
2167                     //                          > 15
2168                     //                        || c4 < 0) {
2169                     //                        //error don't care of the value
2170                     //                        currentCharacter = 'A';
2171                     //                      } //something different from * and /
2172                     //                      else {
2173                     //                        currentCharacter =
2174                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2175                     //                      }
2176                     //                    }
2177                   }
2178                 } catch (IndexOutOfBoundsException e) {
2179                   return;
2180                 }
2181                 break;
2182               }
2183               break;
2184             }
2185           default :
2186             if (isPHPIdentifierStart(currentCharacter)
2187                 || currentCharacter == '$') {
2188               try {
2189                 scanIdentifierOrKeyword((currentCharacter == '$'));
2190               } catch (InvalidInputException ex) {
2191               };
2192               break;
2193             }
2194             if (Character.isDigit(currentCharacter)) {
2195               try {
2196                 scanNumber(false);
2197               } catch (InvalidInputException ex) {
2198               };
2199               break;
2200             }
2201         }
2202       }
2203       //-----------------end switch while try--------------------
2204     } catch (IndexOutOfBoundsException e) {
2205     } catch (InvalidInputException e) {
2206     }
2207     return;
2208   }
2209   //  public final boolean jumpOverUnicodeWhiteSpace()
2210   //    throws InvalidInputException {
2211   //    //BOOLEAN
2212   //    //handle the case of unicode. Jump over the next whiteSpace
2213   //    //making startPosition pointing on the next available char
2214   //    //On false, the currentCharacter is filled up with a potential
2215   //    //correct char
2216   //
2217   //    try {
2218   //      this.wasAcr = false;
2219   //      int c1, c2, c3, c4;
2220   //      int unicodeSize = 6;
2221   //      currentPosition++;
2222   //      while (source[currentPosition] == 'u') {
2223   //        currentPosition++;
2224   //        unicodeSize++;
2225   //      }
2226   //
2227   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2228   //        || c1 < 0)
2229   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2230   //          || c2 < 0)
2231   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2232   //          || c3 < 0)
2233   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2234   //          || c4 < 0)) {
2235   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2236   //      }
2237   //
2238   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2239   //      if (recordLineSeparator
2240   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2241   //        pushLineSeparator();
2242   //      if (Character.isWhitespace(currentCharacter))
2243   //        return true;
2244   //
2245   //      //buffer the new char which is not a white space
2246   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2247   //      //withoutUnicodePtr == 1 is true here
2248   //      return false;
2249   //    } catch (IndexOutOfBoundsException e) {
2250   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2251   //    }
2252   //  }
2253   public final int[] getLineEnds() {
2254     //return a bounded copy of this.lineEnds
2255     int[] copy;
2256     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2257     return copy;
2258   }
2259   public char[] getSource() {
2260     return this.source;
2261   }
2262   public static boolean isIdentifierOrKeyword(int token) {
2263     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2264   }
2265   final char[] optimizedCurrentTokenSource1() {
2266     //return always the same char[] build only once
2267     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2268     char charOne = source[startPosition];
2269     switch (charOne) {
2270       case 'a' :
2271         return charArray_a;
2272       case 'b' :
2273         return charArray_b;
2274       case 'c' :
2275         return charArray_c;
2276       case 'd' :
2277         return charArray_d;
2278       case 'e' :
2279         return charArray_e;
2280       case 'f' :
2281         return charArray_f;
2282       case 'g' :
2283         return charArray_g;
2284       case 'h' :
2285         return charArray_h;
2286       case 'i' :
2287         return charArray_i;
2288       case 'j' :
2289         return charArray_j;
2290       case 'k' :
2291         return charArray_k;
2292       case 'l' :
2293         return charArray_l;
2294       case 'm' :
2295         return charArray_m;
2296       case 'n' :
2297         return charArray_n;
2298       case 'o' :
2299         return charArray_o;
2300       case 'p' :
2301         return charArray_p;
2302       case 'q' :
2303         return charArray_q;
2304       case 'r' :
2305         return charArray_r;
2306       case 's' :
2307         return charArray_s;
2308       case 't' :
2309         return charArray_t;
2310       case 'u' :
2311         return charArray_u;
2312       case 'v' :
2313         return charArray_v;
2314       case 'w' :
2315         return charArray_w;
2316       case 'x' :
2317         return charArray_x;
2318       case 'y' :
2319         return charArray_y;
2320       case 'z' :
2321         return charArray_z;
2322       default :
2323         return new char[]{charOne};
2324     }
2325   }
2326   final char[] optimizedCurrentTokenSource2() {
2327     //try to return the same char[] build only once
2328     char c0, c1;
2329     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2330         % TableSize;
2331     char[][] table = charArray_length[0][hash];
2332     int i = newEntry2;
2333     while (++i < InternalTableSize) {
2334       char[] charArray = table[i];
2335       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2336         return charArray;
2337     }
2338     //---------other side---------
2339     i = -1;
2340     int max = newEntry2;
2341     while (++i <= max) {
2342       char[] charArray = table[i];
2343       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2344         return charArray;
2345     }
2346     //--------add the entry-------
2347     if (++max >= InternalTableSize)
2348       max = 0;
2349     char[] r;
2350     table[max] = (r = new char[]{c0, c1});
2351     newEntry2 = max;
2352     return r;
2353   }
2354   final char[] optimizedCurrentTokenSource3() {
2355     //try to return the same char[] build only once
2356     char c0, c1, c2;
2357     int hash = (((c0 = source[startPosition]) << 12)
2358         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2359         % TableSize;
2360     char[][] table = charArray_length[1][hash];
2361     int i = newEntry3;
2362     while (++i < InternalTableSize) {
2363       char[] charArray = table[i];
2364       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2365         return charArray;
2366     }
2367     //---------other side---------
2368     i = -1;
2369     int max = newEntry3;
2370     while (++i <= max) {
2371       char[] charArray = table[i];
2372       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2373         return charArray;
2374     }
2375     //--------add the entry-------
2376     if (++max >= InternalTableSize)
2377       max = 0;
2378     char[] r;
2379     table[max] = (r = new char[]{c0, c1, c2});
2380     newEntry3 = max;
2381     return r;
2382   }
2383   final char[] optimizedCurrentTokenSource4() {
2384     //try to return the same char[] build only once
2385     char c0, c1, c2, c3;
2386     long hash = ((((long) (c0 = source[startPosition])) << 18)
2387         + ((c1 = source[startPosition + 1]) << 12)
2388         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2389         % TableSize;
2390     char[][] table = charArray_length[2][(int) hash];
2391     int i = newEntry4;
2392     while (++i < InternalTableSize) {
2393       char[] charArray = table[i];
2394       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2395           && (c3 == charArray[3]))
2396         return charArray;
2397     }
2398     //---------other side---------
2399     i = -1;
2400     int max = newEntry4;
2401     while (++i <= max) {
2402       char[] charArray = table[i];
2403       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2404           && (c3 == charArray[3]))
2405         return charArray;
2406     }
2407     //--------add the entry-------
2408     if (++max >= InternalTableSize)
2409       max = 0;
2410     char[] r;
2411     table[max] = (r = new char[]{c0, c1, c2, c3});
2412     newEntry4 = max;
2413     return r;
2414   }
2415   final char[] optimizedCurrentTokenSource5() {
2416     //try to return the same char[] build only once
2417     char c0, c1, c2, c3, c4;
2418     long hash = ((((long) (c0 = source[startPosition])) << 24)
2419         + (((long) (c1 = source[startPosition + 1])) << 18)
2420         + ((c2 = source[startPosition + 2]) << 12)
2421         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2422         % TableSize;
2423     char[][] table = charArray_length[3][(int) hash];
2424     int i = newEntry5;
2425     while (++i < InternalTableSize) {
2426       char[] charArray = table[i];
2427       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2428           && (c3 == charArray[3]) && (c4 == charArray[4]))
2429         return charArray;
2430     }
2431     //---------other side---------
2432     i = -1;
2433     int max = newEntry5;
2434     while (++i <= max) {
2435       char[] charArray = table[i];
2436       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2437           && (c3 == charArray[3]) && (c4 == charArray[4]))
2438         return charArray;
2439     }
2440     //--------add the entry-------
2441     if (++max >= InternalTableSize)
2442       max = 0;
2443     char[] r;
2444     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2445     newEntry5 = max;
2446     return r;
2447   }
2448   final char[] optimizedCurrentTokenSource6() {
2449     //try to return the same char[] build only once
2450     char c0, c1, c2, c3, c4, c5;
2451     long hash = ((((long) (c0 = source[startPosition])) << 32)
2452         + (((long) (c1 = source[startPosition + 1])) << 24)
2453         + (((long) (c2 = source[startPosition + 2])) << 18)
2454         + ((c3 = source[startPosition + 3]) << 12)
2455         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2456         % TableSize;
2457     char[][] table = charArray_length[4][(int) hash];
2458     int i = newEntry6;
2459     while (++i < InternalTableSize) {
2460       char[] charArray = table[i];
2461       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2462           && (c3 == charArray[3]) && (c4 == charArray[4])
2463           && (c5 == charArray[5]))
2464         return charArray;
2465     }
2466     //---------other side---------
2467     i = -1;
2468     int max = newEntry6;
2469     while (++i <= max) {
2470       char[] charArray = table[i];
2471       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2472           && (c3 == charArray[3]) && (c4 == charArray[4])
2473           && (c5 == charArray[5]))
2474         return charArray;
2475     }
2476     //--------add the entry-------
2477     if (++max >= InternalTableSize)
2478       max = 0;
2479     char[] r;
2480     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2481     newEntry6 = max;
2482     return r;
2483   }
2484   public final void pushLineSeparator() throws InvalidInputException {
2485     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2486     final int INCREMENT = 250;
2487     if (this.checkNonExternalizedStringLiterals) {
2488       // reinitialize the current line for non externalize strings purpose
2489       currentLine = null;
2490     }
2491     //currentCharacter is at position currentPosition-1
2492     // cr 000D
2493     if (currentCharacter == '\r') {
2494       int separatorPos = currentPosition - 1;
2495       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2496         return;
2497       //System.out.println("CR-" + separatorPos);
2498       try {
2499         lineEnds[++linePtr] = separatorPos;
2500       } catch (IndexOutOfBoundsException e) {
2501         //linePtr value is correct
2502         int oldLength = lineEnds.length;
2503         int[] old = lineEnds;
2504         lineEnds = new int[oldLength + INCREMENT];
2505         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2506         lineEnds[linePtr] = separatorPos;
2507       }
2508       // look-ahead for merged cr+lf
2509       try {
2510         if (source[currentPosition] == '\n') {
2511           //System.out.println("look-ahead LF-" + currentPosition);
2512           lineEnds[linePtr] = currentPosition;
2513           currentPosition++;
2514           wasAcr = false;
2515         } else {
2516           wasAcr = true;
2517         }
2518       } catch (IndexOutOfBoundsException e) {
2519         wasAcr = true;
2520       }
2521     } else {
2522       // lf 000A
2523       if (currentCharacter == '\n') {
2524         //must merge eventual cr followed by lf
2525         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2526           //System.out.println("merge LF-" + (currentPosition - 1));
2527           lineEnds[linePtr] = currentPosition - 1;
2528         } else {
2529           int separatorPos = currentPosition - 1;
2530           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2531             return;
2532           // System.out.println("LF-" + separatorPos);
2533           try {
2534             lineEnds[++linePtr] = separatorPos;
2535           } catch (IndexOutOfBoundsException e) {
2536             //linePtr value is correct
2537             int oldLength = lineEnds.length;
2538             int[] old = lineEnds;
2539             lineEnds = new int[oldLength + INCREMENT];
2540             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2541             lineEnds[linePtr] = separatorPos;
2542           }
2543         }
2544         wasAcr = false;
2545       }
2546     }
2547   }
2548   public final void pushUnicodeLineSeparator() {
2549     // isUnicode means that the \r or \n has been read as a unicode character
2550     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2551     final int INCREMENT = 250;
2552     //currentCharacter is at position currentPosition-1
2553     if (this.checkNonExternalizedStringLiterals) {
2554       // reinitialize the current line for non externalize strings purpose
2555       currentLine = null;
2556     }
2557     // cr 000D
2558     if (currentCharacter == '\r') {
2559       int separatorPos = currentPosition - 6;
2560       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2561         return;
2562       //System.out.println("CR-" + separatorPos);
2563       try {
2564         lineEnds[++linePtr] = separatorPos;
2565       } catch (IndexOutOfBoundsException e) {
2566         //linePtr value is correct
2567         int oldLength = lineEnds.length;
2568         int[] old = lineEnds;
2569         lineEnds = new int[oldLength + INCREMENT];
2570         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2571         lineEnds[linePtr] = separatorPos;
2572       }
2573       // look-ahead for merged cr+lf
2574       if (source[currentPosition] == '\n') {
2575         //System.out.println("look-ahead LF-" + currentPosition);
2576         lineEnds[linePtr] = currentPosition;
2577         currentPosition++;
2578         wasAcr = false;
2579       } else {
2580         wasAcr = true;
2581       }
2582     } else {
2583       // lf 000A
2584       if (currentCharacter == '\n') {
2585         //must merge eventual cr followed by lf
2586         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2587           //System.out.println("merge LF-" + (currentPosition - 1));
2588           lineEnds[linePtr] = currentPosition - 6;
2589         } else {
2590           int separatorPos = currentPosition - 6;
2591           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2592             return;
2593           // System.out.println("LF-" + separatorPos);
2594           try {
2595             lineEnds[++linePtr] = separatorPos;
2596           } catch (IndexOutOfBoundsException e) {
2597             //linePtr value is correct
2598             int oldLength = lineEnds.length;
2599             int[] old = lineEnds;
2600             lineEnds = new int[oldLength + INCREMENT];
2601             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2602             lineEnds[linePtr] = separatorPos;
2603           }
2604         }
2605         wasAcr = false;
2606       }
2607     }
2608   }
2609   public void recordComment(int token) {
2610         // compute position
2611         int stopPosition = this.currentPosition;
2612         switch (token) {
2613                 case TokenNameCOMMENT_LINE:
2614                         stopPosition = -this.lastCommentLinePosition;
2615                         break;
2616                 case TokenNameCOMMENT_BLOCK:
2617                         stopPosition = -this.currentPosition;
2618                         break;
2619         }
2620
2621         // a new comment is recorded
2622         int length = this.commentStops.length;
2623         if (++this.commentPtr >=  length) {
2624                 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2625                 //grows the positions buffers too
2626                 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2627         }
2628         this.commentStops[this.commentPtr] = stopPosition;
2629         this.commentStarts[this.commentPtr] = this.startPosition;
2630 }
2631 //  public final void recordComment(boolean isJavadoc) {
2632 //    // a new annotation comment is recorded
2633 //    try {
2634 //      commentStops[++commentPtr] = isJavadoc
2635 //          ? currentPosition
2636 //          : -currentPosition;
2637 //    } catch (IndexOutOfBoundsException e) {
2638 //      int oldStackLength = commentStops.length;
2639 //      int[] oldStack = commentStops;
2640 //      commentStops = new int[oldStackLength + 30];
2641 //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2642 //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2643 //      //grows the positions buffers too
2644 //      int[] old = commentStarts;
2645 //      commentStarts = new int[oldStackLength + 30];
2646 //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2647 //    }
2648 //    //the buffer is of a correct size here
2649 //    commentStarts[commentPtr] = startPosition;
2650 //  }
2651   public void resetTo(int begin, int end) {
2652     //reset the scanner to a given position where it may rescan again
2653     diet = false;
2654     initialPosition = startPosition = currentPosition = begin;
2655     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2656     commentPtr = -1; // reset comment stack
2657   }
2658   public final void scanSingleQuotedEscapeCharacter()
2659       throws InvalidInputException {
2660     // the string with "\\u" is a legal string of two chars \ and u
2661     //thus we use a direct access to the source (for regular cases).
2662     //    if (unicodeAsBackSlash) {
2663     //      // consume next character
2664     //      unicodeAsBackSlash = false;
2665     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2666     //        && (source[currentPosition] == 'u')) {
2667     //        getNextUnicodeChar();
2668     //      } else {
2669     //        if (withoutUnicodePtr != 0) {
2670     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2671     //        }
2672     //      }
2673     //    } else
2674     currentCharacter = source[currentPosition++];
2675     switch (currentCharacter) {
2676       case '\'' :
2677         currentCharacter = '\'';
2678         break;
2679       case '\\' :
2680         currentCharacter = '\\';
2681         break;
2682       default :
2683         currentCharacter = '\\';
2684         currentPosition--;
2685     }
2686   }
2687   public final void scanDoubleQuotedEscapeCharacter()
2688       throws InvalidInputException {
2689     // the string with "\\u" is a legal string of two chars \ and u
2690     //thus we use a direct access to the source (for regular cases).
2691     //    if (unicodeAsBackSlash) {
2692     //      // consume next character
2693     //      unicodeAsBackSlash = false;
2694     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2695     //        && (source[currentPosition] == 'u')) {
2696     //        getNextUnicodeChar();
2697     //      } else {
2698     //        if (withoutUnicodePtr != 0) {
2699     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2700     //        }
2701     //      }
2702     //    } else
2703     currentCharacter = source[currentPosition++];
2704     switch (currentCharacter) {
2705       //      case 'b' :
2706       //        currentCharacter = '\b';
2707       //        break;
2708       case 't' :
2709         currentCharacter = '\t';
2710         break;
2711       case 'n' :
2712         currentCharacter = '\n';
2713         break;
2714       //      case 'f' :
2715       //        currentCharacter = '\f';
2716       //        break;
2717       case 'r' :
2718         currentCharacter = '\r';
2719         break;
2720       case '\"' :
2721         currentCharacter = '\"';
2722         break;
2723       case '\'' :
2724         currentCharacter = '\'';
2725         break;
2726       case '\\' :
2727         currentCharacter = '\\';
2728         break;
2729       case '$' :
2730         currentCharacter = '$';
2731         break;
2732       default :
2733         // -----------octal escape--------------
2734         // OctalDigit
2735         // OctalDigit OctalDigit
2736         // ZeroToThree OctalDigit OctalDigit
2737         int number = Character.getNumericValue(currentCharacter);
2738         if (number >= 0 && number <= 7) {
2739           boolean zeroToThreeNot = number > 3;
2740           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2741             int digit = Character.getNumericValue(currentCharacter);
2742             if (digit >= 0 && digit <= 7) {
2743               number = (number * 8) + digit;
2744               if (Character
2745                   .isDigit(currentCharacter = source[currentPosition++])) {
2746                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2747                   // Digit --> ignore last character
2748                   currentPosition--;
2749                 } else {
2750                   digit = Character.getNumericValue(currentCharacter);
2751                   if (digit >= 0 && digit <= 7) {
2752                     // has read \ZeroToThree OctalDigit OctalDigit
2753                     number = (number * 8) + digit;
2754                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2755                     // --> ignore last character
2756                     currentPosition--;
2757                   }
2758                 }
2759               } else { // has read \OctalDigit NonDigit--> ignore last
2760                 // character
2761                 currentPosition--;
2762               }
2763             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2764               // character
2765               currentPosition--;
2766             }
2767           } else { // has read \OctalDigit --> ignore last character
2768             currentPosition--;
2769           }
2770           if (number > 255)
2771             throw new InvalidInputException(INVALID_ESCAPE);
2772           currentCharacter = (char) number;
2773         }
2774     //else
2775     //     throw new InvalidInputException(INVALID_ESCAPE);
2776     }
2777   }
2778   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2779   //    return scanIdentifierOrKeyword( false );
2780   //  }
2781   public int scanIdentifierOrKeyword(boolean isVariable)
2782       throws InvalidInputException {
2783     //test keywords
2784     //first dispatch on the first char.
2785     //then the length. If there are several
2786     //keywors with the same length AND the same first char, then do another
2787     //disptach on the second char :-)...cool....but fast !
2788     useAssertAsAnIndentifier = false;
2789     while (getNextCharAsJavaIdentifierPart()) {
2790     };
2791     if (isVariable) {
2792       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2793       //        return TokenNamethis;
2794       //      }
2795       return TokenNameVariable;
2796     }
2797     int index, length;
2798     char[] data;
2799     char firstLetter;
2800     //    if (withoutUnicodePtr == 0)
2801     //quick test on length == 1 but not on length > 12 while most identifier
2802     //have a length which is <= 12...but there are lots of identifier with
2803     //only one char....
2804     //      {
2805     if ((length = currentPosition - startPosition) == 1)
2806       return TokenNameIdentifier;
2807     //  data = source;
2808     data = new char[length];
2809     index = startPosition;
2810     for (int i = 0; i < length; i++) {
2811       data[i] = Character.toLowerCase(source[index + i]);
2812     }
2813     index = 0;
2814     //    } else {
2815     //      if ((length = withoutUnicodePtr) == 1)
2816     //        return TokenNameIdentifier;
2817     //      // data = withoutUnicodeBuffer;
2818     //      data = new char[withoutUnicodeBuffer.length];
2819     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2820     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2821     //      }
2822     //      index = 1;
2823     //    }
2824     firstLetter = data[index];
2825     switch (firstLetter) {
2826       case '_' :
2827         switch (length) {
2828           case 8 :
2829             //__FILE__
2830             if ((data[++index] == '_') && (data[++index] == 'f')
2831                 && (data[++index] == 'i') && (data[++index] == 'l')
2832                 && (data[++index] == 'e') && (data[++index] == '_')
2833                 && (data[++index] == '_'))
2834               return TokenNameFILE;
2835             index = 0; //__LINE__
2836             if ((data[++index] == '_') && (data[++index] == 'l')
2837                 && (data[++index] == 'i') && (data[++index] == 'n')
2838                 && (data[++index] == 'e') && (data[++index] == '_')
2839                 && (data[++index] == '_'))
2840               return TokenNameLINE;
2841             break;
2842           case 9 :
2843             //__CLASS__
2844             if ((data[++index] == '_') && (data[++index] == 'c')
2845                 && (data[++index] == 'l') && (data[++index] == 'a')
2846                 && (data[++index] == 's') && (data[++index] == 's')
2847                 && (data[++index] == '_') && (data[++index] == '_'))
2848               return TokenNameCLASS_C;
2849             break;
2850           case 11 :
2851             //__METHOD__
2852             if ((data[++index] == '_') && (data[++index] == 'm')
2853                 && (data[++index] == 'e') && (data[++index] == 't')
2854                 && (data[++index] == 'h') && (data[++index] == 'o')
2855                 && (data[++index] == 'd') && (data[++index] == '_')
2856                 && (data[++index] == '_'))
2857               return TokenNameMETHOD_C;
2858             break;
2859           case 12 :
2860             //__FUNCTION__
2861             if ((data[++index] == '_') && (data[++index] == 'f')
2862                 && (data[++index] == 'u') && (data[++index] == 'n')
2863                 && (data[++index] == 'c') && (data[++index] == 't')
2864                 && (data[++index] == 'i') && (data[++index] == 'o')
2865                 && (data[++index] == 'n') && (data[++index] == '_')
2866                 && (data[++index] == '_'))
2867               return TokenNameFUNC_C;
2868             break;
2869         }
2870         return TokenNameIdentifier;
2871       case 'a' :
2872         // as and array abstract
2873         switch (length) {
2874           case 2 :
2875             //as
2876             if ((data[++index] == 's')) {
2877               return TokenNameas;
2878             } else {
2879               return TokenNameIdentifier;
2880             }
2881           case 3 :
2882             //and
2883             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2884               return TokenNameand;
2885             } else {
2886               return TokenNameIdentifier;
2887             }
2888           case 5 :
2889             // array
2890             if ((data[++index] == 'r') && (data[++index] == 'r')
2891                 && (data[++index] == 'a') && (data[++index] == 'y'))
2892               return TokenNamearray;
2893             else
2894               return TokenNameIdentifier;
2895           case 8 :
2896             if ((data[++index] == 'b') && (data[++index] == 's')
2897                 && (data[++index] == 't') && (data[++index] == 'r')
2898                 && (data[++index] == 'a') && (data[++index] == 'c')
2899                 && (data[++index] == 't'))
2900               return TokenNameabstract;
2901             else
2902               return TokenNameIdentifier;
2903           default :
2904             return TokenNameIdentifier;
2905         }
2906       case 'b' :
2907         //break
2908         switch (length) {
2909           case 5 :
2910             if ((data[++index] == 'r') && (data[++index] == 'e')
2911                 && (data[++index] == 'a') && (data[++index] == 'k'))
2912               return TokenNamebreak;
2913             else
2914               return TokenNameIdentifier;
2915           default :
2916             return TokenNameIdentifier;
2917         }
2918       case 'c' :
2919         //case catch class clone const continue
2920         switch (length) {
2921           case 4 :
2922             if ((data[++index] == 'a') && (data[++index] == 's')
2923                 && (data[++index] == 'e'))
2924               return TokenNamecase;
2925             else
2926               return TokenNameIdentifier;
2927           case 5 :
2928             if ((data[++index] == 'a') && (data[++index] == 't')
2929                 && (data[++index] == 'c') && (data[++index] == 'h'))
2930               return TokenNamecatch;
2931             index = 0;
2932             if ((data[++index] == 'l') && (data[++index] == 'a')
2933                 && (data[++index] == 's') && (data[++index] == 's'))
2934               return TokenNameclass;
2935             index = 0;
2936             if ((data[++index] == 'l') && (data[++index] == 'o')
2937                 && (data[++index] == 'n') && (data[++index] == 'e'))
2938               return TokenNameclone;
2939             index = 0;
2940             if ((data[++index] == 'o') && (data[++index] == 'n')
2941                 && (data[++index] == 's') && (data[++index] == 't'))
2942               return TokenNameconst;
2943             else
2944               return TokenNameIdentifier;
2945           case 8 :
2946             if ((data[++index] == 'o') && (data[++index] == 'n')
2947                 && (data[++index] == 't') && (data[++index] == 'i')
2948                 && (data[++index] == 'n') && (data[++index] == 'u')
2949                 && (data[++index] == 'e'))
2950               return TokenNamecontinue;
2951             else
2952               return TokenNameIdentifier;
2953           default :
2954             return TokenNameIdentifier;
2955         }
2956       case 'd' :
2957         // declare default do die
2958         // TODO delete define ==> no keyword !
2959         switch (length) {
2960           case 2 :
2961             if ((data[++index] == 'o'))
2962               return TokenNamedo;
2963             else
2964               return TokenNameIdentifier;
2965           //          case 6 :
2966           //            if ((data[++index] == 'e')
2967           //              && (data[++index] == 'f')
2968           //              && (data[++index] == 'i')
2969           //              && (data[++index] == 'n')
2970           //              && (data[++index] == 'e'))
2971           //              return TokenNamedefine;
2972           //            else
2973           //              return TokenNameIdentifier;
2974           case 7 :
2975             if ((data[++index] == 'e') && (data[++index] == 'c')
2976                 && (data[++index] == 'l') && (data[++index] == 'a')
2977                 && (data[++index] == 'r') && (data[++index] == 'e'))
2978               return TokenNamedeclare;
2979             index = 0;
2980             if ((data[++index] == 'e') && (data[++index] == 'f')
2981                 && (data[++index] == 'a') && (data[++index] == 'u')
2982                 && (data[++index] == 'l') && (data[++index] == 't'))
2983               return TokenNamedefault;
2984             else
2985               return TokenNameIdentifier;
2986           default :
2987             return TokenNameIdentifier;
2988         }
2989       case 'e' :
2990         //echo else exit elseif extends eval
2991         switch (length) {
2992           case 4 :
2993             if ((data[++index] == 'c') && (data[++index] == 'h')
2994                 && (data[++index] == 'o'))
2995               return TokenNameecho;
2996             else if ((data[index] == 'l') && (data[++index] == 's')
2997                 && (data[++index] == 'e'))
2998               return TokenNameelse;
2999             else if ((data[index] == 'x') && (data[++index] == 'i')
3000                 && (data[++index] == 't'))
3001               return TokenNameexit;
3002             else if ((data[index] == 'v') && (data[++index] == 'a')
3003                 && (data[++index] == 'l'))
3004               return TokenNameeval;
3005             else
3006               return TokenNameIdentifier;
3007           case 5 :
3008             // endif empty
3009             if ((data[++index] == 'n') && (data[++index] == 'd')
3010                 && (data[++index] == 'i') && (data[++index] == 'f'))
3011               return TokenNameendif;
3012             if ((data[index] == 'm') && (data[++index] == 'p')
3013                 && (data[++index] == 't') && (data[++index] == 'y'))
3014               return TokenNameempty;
3015             else
3016               return TokenNameIdentifier;
3017           case 6 :
3018             // endfor
3019             if ((data[++index] == 'n') && (data[++index] == 'd')
3020                 && (data[++index] == 'f') && (data[++index] == 'o')
3021                 && (data[++index] == 'r'))
3022               return TokenNameendfor;
3023             else if ((data[index] == 'l') && (data[++index] == 's')
3024                 && (data[++index] == 'e') && (data[++index] == 'i')
3025                 && (data[++index] == 'f'))
3026               return TokenNameelseif;
3027             else
3028               return TokenNameIdentifier;
3029           case 7 :
3030             if ((data[++index] == 'x') && (data[++index] == 't')
3031                 && (data[++index] == 'e') && (data[++index] == 'n')
3032                 && (data[++index] == 'd') && (data[++index] == 's'))
3033               return TokenNameextends;
3034             else
3035               return TokenNameIdentifier;
3036           case 8 :
3037             // endwhile
3038             if ((data[++index] == 'n') && (data[++index] == 'd')
3039                 && (data[++index] == 'w') && (data[++index] == 'h')
3040                 && (data[++index] == 'i') && (data[++index] == 'l')
3041                 && (data[++index] == 'e'))
3042               return TokenNameendwhile;
3043             else
3044               return TokenNameIdentifier;
3045           case 9 :
3046             // endswitch
3047             if ((data[++index] == 'n') && (data[++index] == 'd')
3048                 && (data[++index] == 's') && (data[++index] == 'w')
3049                 && (data[++index] == 'i') && (data[++index] == 't')
3050                 && (data[++index] == 'c') && (data[++index] == 'h'))
3051               return TokenNameendswitch;
3052             else
3053               return TokenNameIdentifier;
3054           case 10 :
3055             // enddeclare
3056             if ((data[++index] == 'n') && (data[++index] == 'd')
3057                 && (data[++index] == 'd') && (data[++index] == 'e')
3058                 && (data[++index] == 'c') && (data[++index] == 'l')
3059                 && (data[++index] == 'a') && (data[++index] == 'r')
3060                 && (data[++index] == 'e'))
3061               return TokenNameendforeach;
3062             index = 0;
3063             if ((data[++index] == 'n') // endforeach
3064                 && (data[++index] == 'd') && (data[++index] == 'f')
3065                 && (data[++index] == 'o') && (data[++index] == 'r')
3066                 && (data[++index] == 'e') && (data[++index] == 'a')
3067                 && (data[++index] == 'c') && (data[++index] == 'h'))
3068               return TokenNameendforeach;
3069             else
3070               return TokenNameIdentifier;
3071           default :
3072             return TokenNameIdentifier;
3073         }
3074       case 'f' :
3075         //for false final function
3076         switch (length) {
3077           case 3 :
3078             if ((data[++index] == 'o') && (data[++index] == 'r'))
3079               return TokenNamefor;
3080             else
3081               return TokenNameIdentifier;
3082           case 5 :
3083             //            if ((data[++index] == 'a') && (data[++index] == 'l')
3084             //                && (data[++index] == 's') && (data[++index] == 'e'))
3085             //              return TokenNamefalse;
3086             if ((data[++index] == 'i') && (data[++index] == 'n')
3087                 && (data[++index] == 'a') && (data[++index] == 'l'))
3088               return TokenNamefinal;
3089             else
3090               return TokenNameIdentifier;
3091           case 7 :
3092             // foreach
3093             if ((data[++index] == 'o') && (data[++index] == 'r')
3094                 && (data[++index] == 'e') && (data[++index] == 'a')
3095                 && (data[++index] == 'c') && (data[++index] == 'h'))
3096               return TokenNameforeach;
3097             else
3098               return TokenNameIdentifier;
3099           case 8 :
3100             // function
3101             if ((data[++index] == 'u') && (data[++index] == 'n')
3102                 && (data[++index] == 'c') && (data[++index] == 't')
3103                 && (data[++index] == 'i') && (data[++index] == 'o')
3104                 && (data[++index] == 'n'))
3105               return TokenNamefunction;
3106             else
3107               return TokenNameIdentifier;
3108           default :
3109             return TokenNameIdentifier;
3110         }
3111       case 'g' :
3112         //global
3113         if (length == 6) {
3114           if ((data[++index] == 'l') && (data[++index] == 'o')
3115               && (data[++index] == 'b') && (data[++index] == 'a')
3116               && (data[++index] == 'l')) {
3117             return TokenNameglobal;
3118           }
3119         }
3120         return TokenNameIdentifier;
3121       case 'i' :
3122         //if int isset include include_once instanceof interface implements
3123         switch (length) {
3124           case 2 :
3125             if (data[++index] == 'f')
3126               return TokenNameif;
3127             else
3128               return TokenNameIdentifier;
3129           //          case 3 :
3130           //            if ((data[++index] == 'n') && (data[++index] == 't'))
3131           //              return TokenNameint;
3132           //            else
3133           //              return TokenNameIdentifier;
3134           case 5 :
3135             if ((data[++index] == 's') && (data[++index] == 's')
3136                 && (data[++index] == 'e') && (data[++index] == 't'))
3137               return TokenNameisset;
3138             else
3139               return TokenNameIdentifier;
3140           case 7 :
3141             if ((data[++index] == 'n') && (data[++index] == 'c')
3142                 && (data[++index] == 'l') && (data[++index] == 'u')
3143                 && (data[++index] == 'd') && (data[++index] == 'e'))
3144               return TokenNameinclude;
3145             else
3146               return TokenNameIdentifier;
3147           case 9 :
3148             // interface
3149             if ((data[++index] == 'n') && (data[++index] == 't')
3150                 && (data[++index] == 'e') && (data[++index] == 'r')
3151                 && (data[++index] == 'f') && (data[++index] == 'a')
3152                 && (data[++index] == 'c') && (data[++index] == 'e'))
3153               return TokenNameinterface;
3154             else
3155               return TokenNameIdentifier;
3156           case 10 :
3157             // instanceof
3158             if ((data[++index] == 'n') && (data[++index] == 's')
3159                 && (data[++index] == 't') && (data[++index] == 'a')
3160                 && (data[++index] == 'n') && (data[++index] == 'c')
3161                 && (data[++index] == 'e') && (data[++index] == 'o')
3162                 && (data[++index] == 'f'))
3163               return TokenNameinstanceof;
3164             if ((data[index] == 'm') && (data[++index] == 'p')
3165                 && (data[++index] == 'l') && (data[++index] == 'e')
3166                 && (data[++index] == 'm') && (data[++index] == 'e')
3167                 && (data[++index] == 'n') && (data[++index] == 't')
3168                 && (data[++index] == 's'))
3169               return TokenNameimplements;
3170             else
3171               return TokenNameIdentifier;
3172           case 12 :
3173             if ((data[++index] == 'n') && (data[++index] == 'c')
3174                 && (data[++index] == 'l') && (data[++index] == 'u')
3175                 && (data[++index] == 'd') && (data[++index] == 'e')
3176                 && (data[++index] == '_') && (data[++index] == 'o')
3177                 && (data[++index] == 'n') && (data[++index] == 'c')
3178                 && (data[++index] == 'e'))
3179               return TokenNameinclude_once;
3180             else
3181               return TokenNameIdentifier;
3182           default :
3183             return TokenNameIdentifier;
3184         }
3185       case 'l' :
3186         //list
3187         if (length == 4) {
3188           if ((data[++index] == 'i') && (data[++index] == 's')
3189               && (data[++index] == 't')) {
3190             return TokenNamelist;
3191           }
3192         }
3193         return TokenNameIdentifier;
3194       case 'n' :
3195         // new null
3196         switch (length) {
3197           case 3 :
3198             if ((data[++index] == 'e') && (data[++index] == 'w'))
3199               return TokenNamenew;
3200             else
3201               return TokenNameIdentifier;
3202           //          case 4 :
3203           //            if ((data[++index] == 'u') && (data[++index] == 'l')
3204           //                && (data[++index] == 'l'))
3205           //              return TokenNamenull;
3206           //            else
3207           //              return TokenNameIdentifier;
3208           default :
3209             return TokenNameIdentifier;
3210         }
3211       case 'o' :
3212         // or old_function
3213         if (length == 2) {
3214           if (data[++index] == 'r') {
3215             return TokenNameor;
3216           }
3217         }
3218         //        if (length == 12) {
3219         //          if ((data[++index] == 'l')
3220         //            && (data[++index] == 'd')
3221         //            && (data[++index] == '_')
3222         //            && (data[++index] == 'f')
3223         //            && (data[++index] == 'u')
3224         //            && (data[++index] == 'n')
3225         //            && (data[++index] == 'c')
3226         //            && (data[++index] == 't')
3227         //            && (data[++index] == 'i')
3228         //            && (data[++index] == 'o')
3229         //            && (data[++index] == 'n')) {
3230         //            return TokenNameold_function;
3231         //          }
3232         //        }
3233         return TokenNameIdentifier;
3234       case 'p' :
3235         // print public private protected
3236         switch (length) {
3237           case 5 :
3238             if ((data[++index] == 'r') && (data[++index] == 'i')
3239                 && (data[++index] == 'n') && (data[++index] == 't')) {
3240               return TokenNameprint;
3241             } else
3242               return TokenNameIdentifier;
3243           case 6 :
3244             if ((data[++index] == 'u') && (data[++index] == 'b')
3245                 && (data[++index] == 'l') && (data[++index] == 'i')
3246                 && (data[++index] == 'c')) {
3247               return TokenNamepublic;
3248             } else
3249               return TokenNameIdentifier;
3250           case 7 :
3251             if ((data[++index] == 'r') && (data[++index] == 'i')
3252                 && (data[++index] == 'v') && (data[++index] == 'a')
3253                 && (data[++index] == 't') && (data[++index] == 'e')) {
3254               return TokenNameprivate;
3255             } else
3256               return TokenNameIdentifier;
3257           case 9 :
3258             if ((data[++index] == 'r') && (data[++index] == 'o')
3259                 && (data[++index] == 't') && (data[++index] == 'e')
3260                 && (data[++index] == 'c') && (data[++index] == 't')
3261                 && (data[++index] == 'e') && (data[++index] == 'd')) {
3262               return TokenNameprotected;
3263             } else
3264               return TokenNameIdentifier;
3265         }
3266         return TokenNameIdentifier;
3267       case 'r' :
3268         //return require require_once
3269         if (length == 6) {
3270           if ((data[++index] == 'e') && (data[++index] == 't')
3271               && (data[++index] == 'u') && (data[++index] == 'r')
3272               && (data[++index] == 'n')) {
3273             return TokenNamereturn;
3274           }
3275         } else if (length == 7) {
3276           if ((data[++index] == 'e') && (data[++index] == 'q')
3277               && (data[++index] == 'u') && (data[++index] == 'i')
3278               && (data[++index] == 'r') && (data[++index] == 'e')) {
3279             return TokenNamerequire;
3280           }
3281         } else if (length == 12) {
3282           if ((data[++index] == 'e') && (data[++index] == 'q')
3283               && (data[++index] == 'u') && (data[++index] == 'i')
3284               && (data[++index] == 'r') && (data[++index] == 'e')
3285               && (data[++index] == '_') && (data[++index] == 'o')
3286               && (data[++index] == 'n') && (data[++index] == 'c')
3287               && (data[++index] == 'e')) {
3288             return TokenNamerequire_once;
3289           }
3290         } else
3291           return TokenNameIdentifier;
3292       case 's' :
3293         //static switch
3294         switch (length) {
3295           case 6 :
3296             if (data[++index] == 't')
3297               if ((data[++index] == 'a') && (data[++index] == 't')
3298                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3299                 return TokenNamestatic;
3300               } else
3301                 return TokenNameIdentifier;
3302             else if ((data[index] == 'w') && (data[++index] == 'i')
3303                 && (data[++index] == 't') && (data[++index] == 'c')
3304                 && (data[++index] == 'h'))
3305               return TokenNameswitch;
3306             else
3307               return TokenNameIdentifier;
3308           default :
3309             return TokenNameIdentifier;
3310         }
3311       case 't' :
3312         // try true throw
3313         switch (length) {
3314           case 3 :
3315             if ((data[++index] == 'r') && (data[++index] == 'y'))
3316               return TokenNametry;
3317             else
3318               return TokenNameIdentifier;
3319           //          case 4 :
3320           //            if ((data[++index] == 'r') && (data[++index] == 'u')
3321           //                && (data[++index] == 'e'))
3322           //              return TokenNametrue;
3323           //            else
3324           //              return TokenNameIdentifier;
3325           case 5 :
3326             if ((data[++index] == 'h') && (data[++index] == 'r')
3327                 && (data[++index] == 'o') && (data[++index] == 'w'))
3328               return TokenNamethrow;
3329             else
3330               return TokenNameIdentifier;
3331           default :
3332             return TokenNameIdentifier;
3333         }
3334       case 'u' :
3335         //use unset
3336         switch (length) {
3337           case 3 :
3338             if ((data[++index] == 's') && (data[++index] == 'e'))
3339               return TokenNameuse;
3340             else
3341               return TokenNameIdentifier;
3342           case 5 :
3343             if ((data[++index] == 'n') && (data[++index] == 's')
3344                 && (data[++index] == 'e') && (data[++index] == 't'))
3345               return TokenNameunset;
3346             else
3347               return TokenNameIdentifier;
3348           default :
3349             return TokenNameIdentifier;
3350         }
3351       case 'v' :
3352         //var
3353         switch (length) {
3354           case 3 :
3355             if ((data[++index] == 'a') && (data[++index] == 'r'))
3356               return TokenNamevar;
3357             else
3358               return TokenNameIdentifier;
3359           default :
3360             return TokenNameIdentifier;
3361         }
3362       case 'w' :
3363         //while
3364         switch (length) {
3365           case 5 :
3366             if ((data[++index] == 'h') && (data[++index] == 'i')
3367                 && (data[++index] == 'l') && (data[++index] == 'e'))
3368               return TokenNamewhile;
3369             else
3370               return TokenNameIdentifier;
3371           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3372           // (data[++index]=='e') && (data[++index]=='f')&&
3373           // (data[++index]=='p'))
3374           //return TokenNamewidefp ;
3375           //else
3376           //return TokenNameIdentifier;
3377           default :
3378             return TokenNameIdentifier;
3379         }
3380       case 'x' :
3381         //xor
3382         switch (length) {
3383           case 3 :
3384             if ((data[++index] == 'o') && (data[++index] == 'r'))
3385               return TokenNamexor;
3386             else
3387               return TokenNameIdentifier;
3388           default :
3389             return TokenNameIdentifier;
3390         }
3391       default :
3392         return TokenNameIdentifier;
3393     }
3394   }
3395   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3396     //when entering this method the currentCharacter is the firt
3397     //digit of the number , i.e. it may be preceeded by a . when
3398     //dotPrefix is true
3399     boolean floating = dotPrefix;
3400     if ((!dotPrefix) && (currentCharacter == '0')) {
3401       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3402         //force the first char of the hexa number do exist...
3403         // consume next character
3404         unicodeAsBackSlash = false;
3405         currentCharacter = source[currentPosition++];
3406         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3407         //          && (source[currentPosition] == 'u')) {
3408         //          getNextUnicodeChar();
3409         //        } else {
3410         //          if (withoutUnicodePtr != 0) {
3411         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3412         //          }
3413         //        }
3414         if (Character.digit(currentCharacter, 16) == -1)
3415           throw new InvalidInputException(INVALID_HEXA);
3416         //---end forcing--
3417         while (getNextCharAsDigit(16)) {
3418         };
3419         //        if (getNextChar('l', 'L') >= 0)
3420         //          return TokenNameLongLiteral;
3421         //        else
3422         return TokenNameIntegerLiteral;
3423       }
3424       //there is x or X in the number
3425       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3426       // 00078.0 is true !!!!! crazy language
3427       if (getNextCharAsDigit()) {
3428         //-------------potential octal-----------------
3429         while (getNextCharAsDigit()) {
3430         };
3431         //        if (getNextChar('l', 'L') >= 0) {
3432         //          return TokenNameLongLiteral;
3433         //        }
3434         //
3435         //        if (getNextChar('f', 'F') >= 0) {
3436         //          return TokenNameFloatingPointLiteral;
3437         //        }
3438         if (getNextChar('d', 'D') >= 0) {
3439           return TokenNameDoubleLiteral;
3440         } else { //make the distinction between octal and float ....
3441           if (getNextChar('.')) { //bingo ! ....
3442             while (getNextCharAsDigit()) {
3443             };
3444             if (getNextChar('e', 'E') >= 0) {
3445               // consume next character
3446               unicodeAsBackSlash = false;
3447               currentCharacter = source[currentPosition++];
3448               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3449               //                && (source[currentPosition] == 'u')) {
3450               //                getNextUnicodeChar();
3451               //              } else {
3452               //                if (withoutUnicodePtr != 0) {
3453               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3454               //                }
3455               //              }
3456               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3457                 // consume next character
3458                 unicodeAsBackSlash = false;
3459                 currentCharacter = source[currentPosition++];
3460                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3461                 //                  && (source[currentPosition] == 'u')) {
3462                 //                  getNextUnicodeChar();
3463                 //                } else {
3464                 //                  if (withoutUnicodePtr != 0) {
3465                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3466                 //                      currentCharacter;
3467                 //                  }
3468                 //                }
3469               }
3470               if (!Character.isDigit(currentCharacter))
3471                 throw new InvalidInputException(INVALID_FLOAT);
3472               while (getNextCharAsDigit()) {
3473               };
3474             }
3475             //            if (getNextChar('f', 'F') >= 0)
3476             //              return TokenNameFloatingPointLiteral;
3477             getNextChar('d', 'D'); //jump over potential d or D
3478             return TokenNameDoubleLiteral;
3479           } else {
3480             return TokenNameIntegerLiteral;
3481           }
3482         }
3483       } else {
3484         /* carry on */
3485       }
3486     }
3487     while (getNextCharAsDigit()) {
3488     };
3489     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3490     //      return TokenNameLongLiteral;
3491     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3492       while (getNextCharAsDigit()) {
3493       };
3494       floating = true;
3495     }
3496     //if floating is true both exponant and suffix may be optional
3497     if (getNextChar('e', 'E') >= 0) {
3498       floating = true;
3499       // consume next character
3500       unicodeAsBackSlash = false;
3501       currentCharacter = source[currentPosition++];
3502       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3503       //        && (source[currentPosition] == 'u')) {
3504       //        getNextUnicodeChar();
3505       //      } else {
3506       //        if (withoutUnicodePtr != 0) {
3507       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3508       //        }
3509       //      }
3510       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3511         // next
3512         // character
3513         unicodeAsBackSlash = false;
3514         currentCharacter = source[currentPosition++];
3515         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3516         //          && (source[currentPosition] == 'u')) {
3517         //          getNextUnicodeChar();
3518         //        } else {
3519         //          if (withoutUnicodePtr != 0) {
3520         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3521         //          }
3522         //        }
3523       }
3524       if (!Character.isDigit(currentCharacter))
3525         throw new InvalidInputException(INVALID_FLOAT);
3526       while (getNextCharAsDigit()) {
3527       };
3528     }
3529     if (getNextChar('d', 'D') >= 0)
3530       return TokenNameDoubleLiteral;
3531     //    if (getNextChar('f', 'F') >= 0)
3532     //      return TokenNameFloatingPointLiteral;
3533     //the long flag has been tested before
3534     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3535   }
3536   /**
3537    * Search the line number corresponding to a specific position
3538    *  
3539    */
3540   public final int getLineNumber(int position) {
3541     if (lineEnds == null)
3542       return 1;
3543     int length = linePtr + 1;
3544     if (length == 0)
3545       return 1;
3546     int g = 0, d = length - 1;
3547     int m = 0;
3548     while (g <= d) {
3549       m = (g + d) / 2;
3550       if (position < lineEnds[m]) {
3551         d = m - 1;
3552       } else if (position > lineEnds[m]) {
3553         g = m + 1;
3554       } else {
3555         return m + 1;
3556       }
3557     }
3558     if (position < lineEnds[m]) {
3559       return m + 1;
3560     }
3561     return m + 2;
3562   }
3563   public void setPHPMode(boolean mode) {
3564     phpMode = mode;
3565   }
3566   public final void setSource(char[] source) {
3567     //the source-buffer is set to sourceString
3568     if (source == null) {
3569       this.source = new char[0];
3570     } else {
3571       this.source = source;
3572     }
3573     startPosition = -1;
3574     initialPosition = currentPosition = 0;
3575     containsAssertKeyword = false;
3576     withoutUnicodeBuffer = new char[this.source.length];
3577     encapsedStringStack = new Stack();
3578   }
3579   public String toString() {
3580     if (startPosition == source.length)
3581       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3582     if (currentPosition > source.length)
3583       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3584     char front[] = new char[startPosition];
3585     System.arraycopy(source, 0, front, 0, startPosition);
3586     int middleLength = (currentPosition - 1) - startPosition + 1;
3587     char middle[];
3588     if (middleLength > -1) {
3589       middle = new char[middleLength];
3590       System.arraycopy(source, startPosition, middle, 0, middleLength);
3591     } else {
3592       middle = new char[0];
3593     }
3594     char end[] = new char[source.length - (currentPosition - 1)];
3595     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3596         - (currentPosition - 1) - 1);
3597     return new String(front)
3598         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3599         + new String(middle)
3600         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3601         + new String(end);
3602   }
3603   public final String toStringAction(int act) {
3604     switch (act) {
3605       case TokenNameERROR :
3606         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3607       // //$NON-NLS-1$
3608       case TokenNameINLINE_HTML :
3609         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3610       case TokenNameIdentifier :
3611         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3612       case TokenNameVariable :
3613         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3614       case TokenNameabstract :
3615         return "abstract"; //$NON-NLS-1$
3616       case TokenNameand :
3617         return "AND"; //$NON-NLS-1$
3618       case TokenNamearray :
3619         return "array"; //$NON-NLS-1$
3620       case TokenNameas :
3621         return "as"; //$NON-NLS-1$
3622       case TokenNamebreak :
3623         return "break"; //$NON-NLS-1$
3624       case TokenNamecase :
3625         return "case"; //$NON-NLS-1$
3626       case TokenNameclass :
3627         return "class"; //$NON-NLS-1$
3628       case TokenNamecatch :
3629         return "catch"; //$NON-NLS-1$
3630       case TokenNameclone :
3631         //$NON-NLS-1$
3632         return "clone";
3633       case TokenNameconst :
3634         //$NON-NLS-1$
3635         return "const";
3636       case TokenNamecontinue :
3637         return "continue"; //$NON-NLS-1$
3638       case TokenNamedefault :
3639         return "default"; //$NON-NLS-1$
3640       //      case TokenNamedefine :
3641       //        return "define"; //$NON-NLS-1$
3642       case TokenNamedo :
3643         return "do"; //$NON-NLS-1$
3644       case TokenNameecho :
3645         return "echo"; //$NON-NLS-1$
3646       case TokenNameelse :
3647         return "else"; //$NON-NLS-1$
3648       case TokenNameelseif :
3649         return "elseif"; //$NON-NLS-1$
3650       case TokenNameendfor :
3651         return "endfor"; //$NON-NLS-1$
3652       case TokenNameendforeach :
3653         return "endforeach"; //$NON-NLS-1$
3654       case TokenNameendif :
3655         return "endif"; //$NON-NLS-1$
3656       case TokenNameendswitch :
3657         return "endswitch"; //$NON-NLS-1$
3658       case TokenNameendwhile :
3659         return "endwhile"; //$NON-NLS-1$
3660       case TokenNameexit:
3661         return "exit";
3662       case TokenNameextends :
3663         return "extends"; //$NON-NLS-1$
3664       //      case TokenNamefalse :
3665       //        return "false"; //$NON-NLS-1$
3666       case TokenNamefinal :
3667         return "final"; //$NON-NLS-1$
3668       case TokenNamefor :
3669         return "for"; //$NON-NLS-1$
3670       case TokenNameforeach :
3671         return "foreach"; //$NON-NLS-1$
3672       case TokenNamefunction :
3673         return "function"; //$NON-NLS-1$
3674       case TokenNameglobal :
3675         return "global"; //$NON-NLS-1$
3676       case TokenNameif :
3677         return "if"; //$NON-NLS-1$
3678       case TokenNameimplements :
3679         return "implements"; //$NON-NLS-1$
3680       case TokenNameinclude :
3681         return "include"; //$NON-NLS-1$
3682       case TokenNameinclude_once :
3683         return "include_once"; //$NON-NLS-1$
3684       case TokenNameinstanceof :
3685         return "instanceof"; //$NON-NLS-1$
3686       case TokenNameinterface :
3687         return "interface"; //$NON-NLS-1$
3688       case TokenNameisset :
3689         return "isset"; //$NON-NLS-1$
3690       case TokenNamelist :
3691         return "list"; //$NON-NLS-1$
3692       case TokenNamenew :
3693         return "new"; //$NON-NLS-1$
3694       //      case TokenNamenull :
3695       //        return "null"; //$NON-NLS-1$
3696       case TokenNameor :
3697         return "OR"; //$NON-NLS-1$
3698       case TokenNameprint :
3699         return "print"; //$NON-NLS-1$
3700       case TokenNameprivate :
3701         return "private"; //$NON-NLS-1$
3702       case TokenNameprotected :
3703         return "protected"; //$NON-NLS-1$
3704       case TokenNamepublic :
3705         return "public"; //$NON-NLS-1$
3706       case TokenNamerequire :
3707         return "require"; //$NON-NLS-1$
3708       case TokenNamerequire_once :
3709         return "require_once"; //$NON-NLS-1$
3710       case TokenNamereturn :
3711         return "return"; //$NON-NLS-1$
3712       case TokenNamestatic :
3713         return "static"; //$NON-NLS-1$
3714       case TokenNameswitch :
3715         return "switch"; //$NON-NLS-1$
3716       //      case TokenNametrue :
3717       //        return "true"; //$NON-NLS-1$
3718       case TokenNameunset :
3719         return "unset"; //$NON-NLS-1$
3720       case TokenNamevar :
3721         return "var"; //$NON-NLS-1$
3722       case TokenNamewhile :
3723         return "while"; //$NON-NLS-1$
3724       case TokenNamexor :
3725         return "XOR"; //$NON-NLS-1$
3726       //      case TokenNamethis :
3727       //        return "$this"; //$NON-NLS-1$
3728       case TokenNameIntegerLiteral :
3729         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3730       case TokenNameDoubleLiteral :
3731         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3732       case TokenNameStringDoubleQuote :
3733         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3734       case TokenNameStringSingleQuote :
3735         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3736       case TokenNameStringInterpolated :
3737         return "StringInterpolated(" + new String(getCurrentTokenSource())
3738             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3739       case TokenNameEncapsedString0 :
3740         return "`"; //$NON-NLS-1$  
3741       case TokenNameEncapsedString1 :
3742         return "\'"; //$NON-NLS-1$  
3743       case TokenNameEncapsedString2 :
3744         return "\""; //$NON-NLS-1$  
3745       case TokenNameSTRING :
3746         return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3747       case TokenNameHEREDOC :
3748         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3749       case TokenNamePLUS_PLUS :
3750         return "++"; //$NON-NLS-1$
3751       case TokenNameMINUS_MINUS :
3752         return "--"; //$NON-NLS-1$
3753       case TokenNameEQUAL_EQUAL :
3754         return "=="; //$NON-NLS-1$
3755       case TokenNameEQUAL_EQUAL_EQUAL :
3756         return "==="; //$NON-NLS-1$
3757       case TokenNameEQUAL_GREATER :
3758         return "=>"; //$NON-NLS-1$
3759       case TokenNameLESS_EQUAL :
3760         return "<="; //$NON-NLS-1$
3761       case TokenNameGREATER_EQUAL :
3762         return ">="; //$NON-NLS-1$
3763       case TokenNameNOT_EQUAL :
3764         return "!="; //$NON-NLS-1$
3765       case TokenNameNOT_EQUAL_EQUAL :
3766         return "!=="; //$NON-NLS-1$
3767       case TokenNameLEFT_SHIFT :
3768         return "<<"; //$NON-NLS-1$
3769       case TokenNameRIGHT_SHIFT :
3770         return ">>"; //$NON-NLS-1$
3771       case TokenNamePLUS_EQUAL :
3772         return "+="; //$NON-NLS-1$
3773       case TokenNameMINUS_EQUAL :
3774         return "-="; //$NON-NLS-1$
3775       case TokenNameMULTIPLY_EQUAL :
3776         return "*="; //$NON-NLS-1$
3777       case TokenNameDIVIDE_EQUAL :
3778         return "/="; //$NON-NLS-1$
3779       case TokenNameAND_EQUAL :
3780         return "&="; //$NON-NLS-1$
3781       case TokenNameOR_EQUAL :
3782         return "|="; //$NON-NLS-1$
3783       case TokenNameXOR_EQUAL :
3784         return "^="; //$NON-NLS-1$
3785       case TokenNameREMAINDER_EQUAL :
3786         return "%="; //$NON-NLS-1$
3787       case TokenNameDOT_EQUAL :
3788         return ".="; //$NON-NLS-1$
3789       case TokenNameLEFT_SHIFT_EQUAL :
3790         return "<<="; //$NON-NLS-1$
3791       case TokenNameRIGHT_SHIFT_EQUAL :
3792         return ">>="; //$NON-NLS-1$
3793       case TokenNameOR_OR :
3794         return "||"; //$NON-NLS-1$
3795       case TokenNameAND_AND :
3796         return "&&"; //$NON-NLS-1$
3797       case TokenNamePLUS :
3798         return "+"; //$NON-NLS-1$
3799       case TokenNameMINUS :
3800         return "-"; //$NON-NLS-1$
3801       case TokenNameMINUS_GREATER :
3802         return "->";
3803       case TokenNameNOT :
3804         return "!"; //$NON-NLS-1$
3805       case TokenNameREMAINDER :
3806         return "%"; //$NON-NLS-1$
3807       case TokenNameXOR :
3808         return "^"; //$NON-NLS-1$
3809       case TokenNameAND :
3810         return "&"; //$NON-NLS-1$
3811       case TokenNameMULTIPLY :
3812         return "*"; //$NON-NLS-1$
3813       case TokenNameOR :
3814         return "|"; //$NON-NLS-1$
3815       case TokenNameTWIDDLE :
3816         return "~"; //$NON-NLS-1$
3817       case TokenNameTWIDDLE_EQUAL :
3818         return "~="; //$NON-NLS-1$
3819       case TokenNameDIVIDE :
3820         return "/"; //$NON-NLS-1$
3821       case TokenNameGREATER :
3822         return ">"; //$NON-NLS-1$
3823       case TokenNameLESS :
3824         return "<"; //$NON-NLS-1$
3825       case TokenNameLPAREN :
3826         return "("; //$NON-NLS-1$
3827       case TokenNameRPAREN :
3828         return ")"; //$NON-NLS-1$
3829       case TokenNameLBRACE :
3830         return "{"; //$NON-NLS-1$
3831       case TokenNameRBRACE :
3832         return "}"; //$NON-NLS-1$
3833       case TokenNameLBRACKET :
3834         return "["; //$NON-NLS-1$
3835       case TokenNameRBRACKET :
3836         return "]"; //$NON-NLS-1$
3837       case TokenNameSEMICOLON :
3838         return ";"; //$NON-NLS-1$
3839       case TokenNameQUESTION :
3840         return "?"; //$NON-NLS-1$
3841       case TokenNameCOLON :
3842         return ":"; //$NON-NLS-1$
3843       case TokenNameCOMMA :
3844         return ","; //$NON-NLS-1$
3845       case TokenNameDOT :
3846         return "."; //$NON-NLS-1$
3847       case TokenNameEQUAL :
3848         return "="; //$NON-NLS-1$
3849       case TokenNameAT :
3850         return "@";
3851       case TokenNameDOLLAR :
3852         return "$";
3853       case TokenNameDOLLAR_LBRACE :
3854         return "${";
3855       case TokenNameEOF :
3856         return "EOF"; //$NON-NLS-1$
3857       case TokenNameWHITESPACE :
3858         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3859       case TokenNameCOMMENT_LINE :
3860         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3861       case TokenNameCOMMENT_BLOCK :
3862         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3863       case TokenNameCOMMENT_PHPDOC :
3864         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3865       //      case TokenNameHTML :
3866       //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3867       // //$NON-NLS-1$
3868       case TokenNameFILE :
3869         return "__FILE__"; //$NON-NLS-1$
3870       case TokenNameLINE :
3871         return "__LINE__"; //$NON-NLS-1$
3872       case TokenNameCLASS_C :
3873         return "__CLASS__"; //$NON-NLS-1$
3874       case TokenNameMETHOD_C :
3875         return "__METHOD__"; //$NON-NLS-1$
3876       case TokenNameFUNC_C :
3877         return "__FUNCTION__"; //$NON-NLS-1
3878       case TokenNameboolCAST :
3879         return "( bool )"; //$NON-NLS-1$
3880       case TokenNameintCAST :
3881         return "( int )"; //$NON-NLS-1$
3882       case TokenNamedoubleCAST :
3883         return "( double )"; //$NON-NLS-1$
3884       case TokenNameobjectCAST :
3885         return "( object )"; //$NON-NLS-1$
3886       case TokenNamestringCAST :
3887         return "( string )"; //$NON-NLS-1$
3888       default :
3889         return "not-a-token(" + (new Integer(act)) + ") "
3890             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3891     }
3892   }
3893   
3894   public Scanner() {
3895     this(false, false);
3896   }
3897   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3898     this(tokenizeComments, tokenizeWhiteSpace, false);
3899   }
3900   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3901       boolean checkNonExternalizedStringLiterals) {
3902     this(tokenizeComments, tokenizeWhiteSpace,
3903         checkNonExternalizedStringLiterals, false);
3904   }
3905   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3906       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3907     this(tokenizeComments, tokenizeWhiteSpace,
3908         checkNonExternalizedStringLiterals, assertMode, false, null, null);
3909   }
3910   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3911       boolean checkNonExternalizedStringLiterals, boolean assertMode,
3912       boolean tokenizeStrings,
3913       char[][] taskTags,
3914           char[][] taskPriorities) {
3915     this.eofPosition = Integer.MAX_VALUE;
3916     this.tokenizeComments = tokenizeComments;
3917     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3918     this.tokenizeStrings = tokenizeStrings;
3919     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3920     this.assertMode = assertMode;
3921     this.encapsedStringStack = null;
3922     this.taskTags = taskTags;
3923         this.taskPriorities = taskPriorities;
3924   }
3925   private void checkNonExternalizeString() throws InvalidInputException {
3926     if (currentLine == null)
3927       return;
3928     parseTags(currentLine);
3929   }
3930   private void parseTags(NLSLine line) throws InvalidInputException {
3931     String s = new String(getCurrentTokenSource());
3932     int pos = s.indexOf(TAG_PREFIX);
3933     int lineLength = line.size();
3934     while (pos != -1) {
3935       int start = pos + TAG_PREFIX_LENGTH;
3936       int end = s.indexOf(TAG_POSTFIX, start);
3937       String index = s.substring(start, end);
3938       int i = 0;
3939       try {
3940         i = Integer.parseInt(index) - 1;
3941         // Tags are one based not zero based.
3942       } catch (NumberFormatException e) {
3943         i = -1; // we don't want to consider this as a valid NLS tag
3944       }
3945       if (line.exists(i)) {
3946         line.set(i, null);
3947       }
3948       pos = s.indexOf(TAG_PREFIX, start);
3949     }
3950     this.nonNLSStrings = new StringLiteral[lineLength];
3951     int nonNLSCounter = 0;
3952     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3953       StringLiteral literal = (StringLiteral) iterator.next();
3954       if (literal != null) {
3955         this.nonNLSStrings[nonNLSCounter++] = literal;
3956       }
3957     }
3958     if (nonNLSCounter == 0) {
3959       this.nonNLSStrings = null;
3960       currentLine = null;
3961       return;
3962     }
3963     this.wasNonExternalizedStringLiteral = true;
3964     if (nonNLSCounter != lineLength) {
3965       System.arraycopy(this.nonNLSStrings, 0,
3966           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3967           nonNLSCounter);
3968     }
3969     currentLine = null;
3970   }
3971   public final void scanEscapeCharacter() throws InvalidInputException {
3972     // the string with "\\u" is a legal string of two chars \ and u
3973     //thus we use a direct access to the source (for regular cases).
3974     if (unicodeAsBackSlash) {
3975       // consume next character
3976       unicodeAsBackSlash = false;
3977       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3978       // (source[currentPosition] == 'u')) {
3979       //                                getNextUnicodeChar();
3980       //                        } else {
3981       if (withoutUnicodePtr != 0) {
3982         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3983         //                              }
3984       }
3985     } else
3986       currentCharacter = source[currentPosition++];
3987     switch (currentCharacter) {
3988       case 'b' :
3989         currentCharacter = '\b';
3990         break;
3991       case 't' :
3992         currentCharacter = '\t';
3993         break;
3994       case 'n' :
3995         currentCharacter = '\n';
3996         break;
3997       case 'f' :
3998         currentCharacter = '\f';
3999         break;
4000       case 'r' :
4001         currentCharacter = '\r';
4002         break;
4003       case '\"' :
4004         currentCharacter = '\"';
4005         break;
4006       case '\'' :
4007         currentCharacter = '\'';
4008         break;
4009       case '\\' :
4010         currentCharacter = '\\';
4011         break;
4012       default :
4013         // -----------octal escape--------------
4014         // OctalDigit
4015         // OctalDigit OctalDigit
4016         // ZeroToThree OctalDigit OctalDigit
4017         int number = Character.getNumericValue(currentCharacter);
4018         if (number >= 0 && number <= 7) {
4019           boolean zeroToThreeNot = number > 3;
4020           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4021             int digit = Character.getNumericValue(currentCharacter);
4022             if (digit >= 0 && digit <= 7) {
4023               number = (number * 8) + digit;
4024               if (Character
4025                   .isDigit(currentCharacter = source[currentPosition++])) {
4026                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4027                   // Digit --> ignore last character
4028                   currentPosition--;
4029                 } else {
4030                   digit = Character.getNumericValue(currentCharacter);
4031                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4032                     // OctalDigit OctalDigit
4033                     number = (number * 8) + digit;
4034                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4035                     // --> ignore last character
4036                     currentPosition--;
4037                   }
4038                 }
4039               } else { // has read \OctalDigit NonDigit--> ignore last
4040                 // character
4041                 currentPosition--;
4042               }
4043             } else { // has read \OctalDigit NonOctalDigit--> ignore last
4044               // character
4045               currentPosition--;
4046             }
4047           } else { // has read \OctalDigit --> ignore last character
4048             currentPosition--;
4049           }
4050           if (number > 255)
4051             throw new InvalidInputException(INVALID_ESCAPE);
4052           currentCharacter = (char) number;
4053         } else
4054           throw new InvalidInputException(INVALID_ESCAPE);
4055     }
4056   }
4057   // chech presence of task: tags
4058   public void checkTaskTag(int commentStart, int commentEnd) {
4059     // only look for newer task: tags
4060     if (this.foundTaskCount > 0
4061         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4062       return;
4063     }
4064     int foundTaskIndex = this.foundTaskCount;
4065     nextChar : for (int i = commentStart; i < commentEnd
4066         && i < this.eofPosition; i++) {
4067       char[] tag = null;
4068       char[] priority = null;
4069       // check for tag occurrence
4070       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4071         tag = this.taskTags[itag];
4072         priority = this.taskPriorities != null
4073             && itag < this.taskPriorities.length
4074             ? this.taskPriorities[itag]
4075             : null;
4076         int tagLength = tag.length;
4077         for (int t = 0; t < tagLength; t++) {
4078           if (this.source[i + t] != tag[t])
4079             continue nextTag;
4080         }
4081         if (this.foundTaskTags == null) {
4082           this.foundTaskTags = new char[5][];
4083           this.foundTaskMessages = new char[5][];
4084           this.foundTaskPriorities = new char[5][];
4085           this.foundTaskPositions = new int[5][];
4086         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4087           System.arraycopy(this.foundTaskTags, 0,
4088               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4089               this.foundTaskCount);
4090           System.arraycopy(this.foundTaskMessages, 0,
4091               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4092               this.foundTaskCount);
4093           System.arraycopy(this.foundTaskPriorities, 0,
4094               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4095               0, this.foundTaskCount);
4096           System.arraycopy(this.foundTaskPositions, 0,
4097               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4098               this.foundTaskCount);
4099         }
4100         this.foundTaskTags[this.foundTaskCount] = tag;
4101         this.foundTaskPriorities[this.foundTaskCount] = priority;
4102         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4103             i + tagLength - 1};
4104         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4105         this.foundTaskCount++;
4106         i += tagLength - 1; // will be incremented when looping
4107       }
4108     }
4109     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4110       // retrieve message start and end positions
4111       int msgStart = this.foundTaskPositions[i][0]
4112           + this.foundTaskTags[i].length;
4113       int max_value = i + 1 < this.foundTaskCount
4114           ? this.foundTaskPositions[i + 1][0] - 1
4115           : commentEnd - 1;
4116       // at most beginning of next task
4117       if (max_value < msgStart)
4118         max_value = msgStart; // would only occur if tag is before EOF.
4119       int end = -1;
4120       char c;
4121       for (int j = msgStart; j < max_value; j++) {
4122         if ((c = this.source[j]) == '\n' || c == '\r') {
4123           end = j - 1;
4124           break;
4125         }
4126       }
4127       if (end == -1) {
4128         for (int j = max_value; j > msgStart; j--) {
4129           if ((c = this.source[j]) == '*') {
4130             end = j - 1;
4131             break;
4132           }
4133         }
4134         if (end == -1)
4135           end = max_value;
4136       }
4137       if (msgStart == end)
4138         continue; // empty
4139       // trim the message
4140       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4141         end--;
4142       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4143         msgStart++;
4144       // update the end position of the task
4145       this.foundTaskPositions[i][1] = end;
4146       // get the message source
4147       final int messageLength = end - msgStart + 1;
4148       char[] message = new char[messageLength];
4149       System.arraycopy(source, msgStart, message, 0, messageLength);
4150       this.foundTaskMessages[i] = message;
4151     }
4152   }
4153 }