Fixed a bug in the String regex
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
16
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19
20 public class Scanner implements IScanner, ITerminalSymbols {
21
22   /* APIs ares
23    - getNextToken() which return the current type of the token
24      (this value is not memorized by the scanner)
25    - getCurrentTokenSource() which provides with the token "REAL" source
26      (aka all unicode have been transformed into a correct char)
27    - sourceStart gives the position into the stream
28    - currentPosition-1 gives the sourceEnd position into the stream 
29   */
30
31   // 1.4 feature 
32   private boolean assertMode;
33   public boolean useAssertAsAnIndentifier = false;
34   //flag indicating if processed source contains occurrences of keyword assert 
35   public boolean containsAssertKeyword = false;
36
37   public boolean recordLineSeparator;
38   public boolean phpMode = false;
39
40   public char currentCharacter;
41   public int startPosition;
42   public int currentPosition;
43   public int initialPosition, eofPosition;
44   // after this position eof are generated instead of real token from the source
45
46   public boolean tokenizeComments;
47   public boolean tokenizeWhiteSpace;
48
49   //source should be viewed as a window (aka a part)
50   //of a entire very large stream
51   public char source[];
52
53   //unicode support
54   public char[] withoutUnicodeBuffer;
55   public int withoutUnicodePtr;
56   //when == 0 ==> no unicode in the current token
57   public boolean unicodeAsBackSlash = false;
58
59   public boolean scanningFloatLiteral = false;
60
61   //support for /** comments
62   //public char[][] comments = new char[10][];
63   public int[] commentStops = new int[10];
64   public int[] commentStarts = new int[10];
65   public int commentPtr = -1; // no comment test with commentPtr value -1
66
67   //diet parsing support - jump over some method body when requested
68   public boolean diet = false;
69
70   //support for the  poor-line-debuggers ....
71   //remember the position of the cr/lf
72   public int[] lineEnds = new int[250];
73   public int linePtr = -1;
74   public boolean wasAcr = false;
75
76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
77
78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
85
86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
90
91   //----------------optimized identifier managment------------------
92   static final char[] charArray_a = new char[] { 'a' },
93     charArray_b = new char[] { 'b' },
94     charArray_c = new char[] { 'c' },
95     charArray_d = new char[] { 'd' },
96     charArray_e = new char[] { 'e' },
97     charArray_f = new char[] { 'f' },
98     charArray_g = new char[] { 'g' },
99     charArray_h = new char[] { 'h' },
100     charArray_i = new char[] { 'i' },
101     charArray_j = new char[] { 'j' },
102     charArray_k = new char[] { 'k' },
103     charArray_l = new char[] { 'l' },
104     charArray_m = new char[] { 'm' },
105     charArray_n = new char[] { 'n' },
106     charArray_o = new char[] { 'o' },
107     charArray_p = new char[] { 'p' },
108     charArray_q = new char[] { 'q' },
109     charArray_r = new char[] { 'r' },
110     charArray_s = new char[] { 's' },
111     charArray_t = new char[] { 't' },
112     charArray_u = new char[] { 'u' },
113     charArray_v = new char[] { 'v' },
114     charArray_w = new char[] { 'w' },
115     charArray_x = new char[] { 'x' },
116     charArray_y = new char[] { 'y' },
117     charArray_z = new char[] { 'z' };
118
119   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
120   static final int TableSize = 30, InternalTableSize = 6;
121   //30*6 = 180 entries
122   public static final int OptimizedLength = 6;
123   public /*static*/
124   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
125   // support for detecting non-externalized string literals
126   int currentLineNr = -1;
127   int previousLineNr = -1;
128   NLSLine currentLine = null;
129   List lines = new ArrayList();
130   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
131   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
132   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
133   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
134   public StringLiteral[] nonNLSStrings = null;
135   public boolean checkNonExternalizedStringLiterals = true;
136   public boolean wasNonExternalizedStringLiteral = false;
137
138   /*static*/ {
139     for (int i = 0; i < 6; i++) {
140       for (int j = 0; j < TableSize; j++) {
141         for (int k = 0; k < InternalTableSize; k++) {
142           charArray_length[i][j][k] = initCharArray;
143         }
144       }
145     }
146   }
147   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
148
149   public static final int RoundBracket = 0;
150   public static final int SquareBracket = 1;
151   public static final int CurlyBracket = 2;
152   public static final int BracketKinds = 3;
153
154   public static final boolean DEBUG = false;
155
156   public Scanner() {
157     this(false, false);
158   }
159   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
160     this(tokenizeComments, tokenizeWhiteSpace, false);
161   }
162
163   /**
164    * Determines if the specified character is
165    * permissible as the first character in a PHP identifier
166    */
167   public static boolean isPHPIdentifierStart(char ch) {
168     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
169   }
170
171   /**
172    * Determines if the specified character may be part of a PHP identifier as
173    * other than the first character
174    */
175   public static boolean isPHPIdentifierPart(char ch) {
176     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
177   }
178
179   public final boolean atEnd() {
180     // This code is not relevant if source is 
181     // Only a part of the real stream input
182
183     return source.length == currentPosition;
184   }
185   public char[] getCurrentIdentifierSource() {
186     //return the token REAL source (aka unicodes are precomputed)
187
188     char[] result;
189     //    if (withoutUnicodePtr != 0)
190     //      //0 is used as a fast test flag so the real first char is in position 1
191     //      System.arraycopy(
192     //        withoutUnicodeBuffer,
193     //        1,
194     //        result = new char[withoutUnicodePtr],
195     //        0,
196     //        withoutUnicodePtr);
197     //    else {
198     int length = currentPosition - startPosition;
199     switch (length) { // see OptimizedLength
200       case 1 :
201         return optimizedCurrentTokenSource1();
202       case 2 :
203         return optimizedCurrentTokenSource2();
204       case 3 :
205         return optimizedCurrentTokenSource3();
206       case 4 :
207         return optimizedCurrentTokenSource4();
208       case 5 :
209         return optimizedCurrentTokenSource5();
210       case 6 :
211         return optimizedCurrentTokenSource6();
212     }
213     //no optimization
214     System.arraycopy(source, startPosition, result = new char[length], 0, length);
215     //   }
216     return result;
217   }
218   public int getCurrentTokenEndPosition() {
219     return this.currentPosition - 1;
220   }
221   public final char[] getCurrentTokenSource() {
222     // Return the token REAL source (aka unicodes are precomputed)
223
224     char[] result;
225     //    if (withoutUnicodePtr != 0)
226     //      // 0 is used as a fast test flag so the real first char is in position 1
227     //      System.arraycopy(
228     //        withoutUnicodeBuffer,
229     //        1,
230     //        result = new char[withoutUnicodePtr],
231     //        0,
232     //        withoutUnicodePtr);
233     //    else {
234     int length;
235     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
236     //    }
237     return result;
238   }
239
240   public final char[] getCurrentTokenSource(int startPos) {
241     // Return the token REAL source (aka unicodes are precomputed)
242
243     char[] result;
244     //    if (withoutUnicodePtr != 0)
245     //      // 0 is used as a fast test flag so the real first char is in position 1
246     //      System.arraycopy(
247     //        withoutUnicodeBuffer,
248     //        1,
249     //        result = new char[withoutUnicodePtr],
250     //        0,
251     //        withoutUnicodePtr);
252     //    else {
253     int length;
254     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
255     //  }
256     return result;
257   }
258
259   public final char[] getCurrentTokenSourceString() {
260     //return the token REAL source (aka unicodes are precomputed).
261     //REMOVE the two " that are at the beginning and the end.
262
263     char[] result;
264     if (withoutUnicodePtr != 0)
265       //0 is used as a fast test flag so the real first char is in position 1
266       System.arraycopy(withoutUnicodeBuffer, 2,
267       //2 is 1 (real start) + 1 (to jump over the ")
268       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
269     else {
270       int length;
271       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
272     }
273     return result;
274   }
275   public int getCurrentTokenStartPosition() {
276     return this.startPosition;
277   }
278   /*
279    * Search the source position corresponding to the end of a given line number
280    *
281    * Line numbers are 1-based, and relative to the scanner initialPosition. 
282    * Character positions are 0-based.
283    *
284    * In case the given line number is inconsistent, answers -1.
285    */
286   public final int getLineEnd(int lineNumber) {
287
288     if (lineEnds == null)
289       return -1;
290     if (lineNumber >= lineEnds.length)
291       return -1;
292     if (lineNumber <= 0)
293       return -1;
294
295     if (lineNumber == lineEnds.length - 1)
296       return eofPosition;
297     return lineEnds[lineNumber - 1];
298     // next line start one character behind the lineEnd of the previous line
299   }
300   /**
301    * Search the source position corresponding to the beginning of a given line number
302    *
303    * Line numbers are 1-based, and relative to the scanner initialPosition. 
304    * Character positions are 0-based.
305    *
306    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
307    *
308    * In case the given line number is inconsistent, answers -1.
309    */
310   public final int getLineStart(int lineNumber) {
311
312     if (lineEnds == null)
313       return -1;
314     if (lineNumber >= lineEnds.length)
315       return -1;
316     if (lineNumber <= 0)
317       return -1;
318
319     if (lineNumber == 1)
320       return initialPosition;
321     return lineEnds[lineNumber - 2] + 1;
322     // next line start one character behind the lineEnd of the previous line
323   }
324   public final boolean getNextChar(char testedChar) {
325     //BOOLEAN
326     //handle the case of unicode.
327     //when a unicode appears then we must use a buffer that holds char internal values
328     //At the end of this method currentCharacter holds the new visited char
329     //and currentPosition points right next after it
330     //Both previous lines are true if the currentCharacter is == to the testedChar
331     //On false, no side effect has occured.
332
333     //ALL getNextChar.... ARE OPTIMIZED COPIES 
334
335     int temp = currentPosition;
336     try {
337       currentCharacter = source[currentPosition++];
338       //      if (((currentCharacter = source[currentPosition++]) == '\\')
339       //        && (source[currentPosition] == 'u')) {
340       //        //-------------unicode traitement ------------
341       //        int c1, c2, c3, c4;
342       //        int unicodeSize = 6;
343       //        currentPosition++;
344       //        while (source[currentPosition] == 'u') {
345       //          currentPosition++;
346       //          unicodeSize++;
347       //        }
348       //
349       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
350       //          || c1 < 0)
351       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
352       //            || c2 < 0)
353       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
354       //            || c3 < 0)
355       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
356       //            || c4 < 0)) {
357       //          currentPosition = temp;
358       //          return false;
359       //        }
360       //
361       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
362       //        if (currentCharacter != testedChar) {
363       //          currentPosition = temp;
364       //          return false;
365       //        }
366       //        unicodeAsBackSlash = currentCharacter == '\\';
367       //
368       //        //need the unicode buffer
369       //        if (withoutUnicodePtr == 0) {
370       //          //buffer all the entries that have been left aside....
371       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
372       //          System.arraycopy(
373       //            source,
374       //            startPosition,
375       //            withoutUnicodeBuffer,
376       //            1,
377       //            withoutUnicodePtr);
378       //        }
379       //        //fill the buffer with the char
380       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
381       //        return true;
382       //
383       //      } //-------------end unicode traitement--------------
384       //      else {
385       if (currentCharacter != testedChar) {
386         currentPosition = temp;
387         return false;
388       }
389       unicodeAsBackSlash = false;
390       //        if (withoutUnicodePtr != 0)
391       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
392       return true;
393       //      }
394     } catch (IndexOutOfBoundsException e) {
395       unicodeAsBackSlash = false;
396       currentPosition = temp;
397       return false;
398     }
399   }
400   public final int getNextChar(char testedChar1, char testedChar2) {
401     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
402     //test can be done with (x==0) for the first and (x>0) for the second
403     //handle the case of unicode.
404     //when a unicode appears then we must use a buffer that holds char internal values
405     //At the end of this method currentCharacter holds the new visited char
406     //and currentPosition points right next after it
407     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
408     //On false, no side effect has occured.
409
410     //ALL getNextChar.... ARE OPTIMIZED COPIES 
411
412     int temp = currentPosition;
413     try {
414       int result;
415       currentCharacter = source[currentPosition++];
416       //      if (((currentCharacter = source[currentPosition++]) == '\\')
417       //        && (source[currentPosition] == 'u')) {
418       //        //-------------unicode traitement ------------
419       //        int c1, c2, c3, c4;
420       //        int unicodeSize = 6;
421       //        currentPosition++;
422       //        while (source[currentPosition] == 'u') {
423       //          currentPosition++;
424       //          unicodeSize++;
425       //        }
426       //
427       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
428       //          || c1 < 0)
429       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
430       //            || c2 < 0)
431       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
432       //            || c3 < 0)
433       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
434       //            || c4 < 0)) {
435       //          currentPosition = temp;
436       //          return 2;
437       //        }
438       //
439       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
440       //        if (currentCharacter == testedChar1)
441       //          result = 0;
442       //        else if (currentCharacter == testedChar2)
443       //          result = 1;
444       //        else {
445       //          currentPosition = temp;
446       //          return -1;
447       //        }
448       //
449       //        //need the unicode buffer
450       //        if (withoutUnicodePtr == 0) {
451       //          //buffer all the entries that have been left aside....
452       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
453       //          System.arraycopy(
454       //            source,
455       //            startPosition,
456       //            withoutUnicodeBuffer,
457       //            1,
458       //            withoutUnicodePtr);
459       //        }
460       //        //fill the buffer with the char
461       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
462       //        return result;
463       //      } //-------------end unicode traitement--------------
464       //      else {
465       if (currentCharacter == testedChar1)
466         result = 0;
467       else if (currentCharacter == testedChar2)
468         result = 1;
469       else {
470         currentPosition = temp;
471         return -1;
472       }
473
474       //        if (withoutUnicodePtr != 0)
475       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
476       return result;
477       //     }
478     } catch (IndexOutOfBoundsException e) {
479       currentPosition = temp;
480       return -1;
481     }
482   }
483   public final boolean getNextCharAsDigit() {
484     //BOOLEAN
485     //handle the case of unicode.
486     //when a unicode appears then we must use a buffer that holds char internal values
487     //At the end of this method currentCharacter holds the new visited char
488     //and currentPosition points right next after it
489     //Both previous lines are true if the currentCharacter is a digit
490     //On false, no side effect has occured.
491
492     //ALL getNextChar.... ARE OPTIMIZED COPIES 
493
494     int temp = currentPosition;
495     try {
496       currentCharacter = source[currentPosition++];
497       //      if (((currentCharacter = source[currentPosition++]) == '\\')
498       //        && (source[currentPosition] == 'u')) {
499       //        //-------------unicode traitement ------------
500       //        int c1, c2, c3, c4;
501       //        int unicodeSize = 6;
502       //        currentPosition++;
503       //        while (source[currentPosition] == 'u') {
504       //          currentPosition++;
505       //          unicodeSize++;
506       //        }
507       //
508       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
509       //          || c1 < 0)
510       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
511       //            || c2 < 0)
512       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
513       //            || c3 < 0)
514       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
515       //            || c4 < 0)) {
516       //          currentPosition = temp;
517       //          return false;
518       //        }
519       //
520       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
521       //        if (!Character.isDigit(currentCharacter)) {
522       //          currentPosition = temp;
523       //          return false;
524       //        }
525       //
526       //        //need the unicode buffer
527       //        if (withoutUnicodePtr == 0) {
528       //          //buffer all the entries that have been left aside....
529       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
530       //          System.arraycopy(
531       //            source,
532       //            startPosition,
533       //            withoutUnicodeBuffer,
534       //            1,
535       //            withoutUnicodePtr);
536       //        }
537       //        //fill the buffer with the char
538       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
539       //        return true;
540       //      } //-------------end unicode traitement--------------
541       //      else {
542       if (!Character.isDigit(currentCharacter)) {
543         currentPosition = temp;
544         return false;
545       }
546       //        if (withoutUnicodePtr != 0)
547       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
548       return true;
549       //      }
550     } catch (IndexOutOfBoundsException e) {
551       currentPosition = temp;
552       return false;
553     }
554   }
555   public final boolean getNextCharAsDigit(int radix) {
556     //BOOLEAN
557     //handle the case of unicode.
558     //when a unicode appears then we must use a buffer that holds char internal values
559     //At the end of this method currentCharacter holds the new visited char
560     //and currentPosition points right next after it
561     //Both previous lines are true if the currentCharacter is a digit base on radix
562     //On false, no side effect has occured.
563
564     //ALL getNextChar.... ARE OPTIMIZED COPIES 
565
566     int temp = currentPosition;
567     try {
568       currentCharacter = source[currentPosition++];
569       //      if (((currentCharacter = source[currentPosition++]) == '\\')
570       //        && (source[currentPosition] == 'u')) {
571       //        //-------------unicode traitement ------------
572       //        int c1, c2, c3, c4;
573       //        int unicodeSize = 6;
574       //        currentPosition++;
575       //        while (source[currentPosition] == 'u') {
576       //          currentPosition++;
577       //          unicodeSize++;
578       //        }
579       //
580       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
581       //          || c1 < 0)
582       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
583       //            || c2 < 0)
584       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
585       //            || c3 < 0)
586       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
587       //            || c4 < 0)) {
588       //          currentPosition = temp;
589       //          return false;
590       //        }
591       //
592       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
593       //        if (Character.digit(currentCharacter, radix) == -1) {
594       //          currentPosition = temp;
595       //          return false;
596       //        }
597       //
598       //        //need the unicode buffer
599       //        if (withoutUnicodePtr == 0) {
600       //          //buffer all the entries that have been left aside....
601       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
602       //          System.arraycopy(
603       //            source,
604       //            startPosition,
605       //            withoutUnicodeBuffer,
606       //            1,
607       //            withoutUnicodePtr);
608       //        }
609       //        //fill the buffer with the char
610       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
611       //        return true;
612       //      } //-------------end unicode traitement--------------
613       //      else {
614       if (Character.digit(currentCharacter, radix) == -1) {
615         currentPosition = temp;
616         return false;
617       }
618       //        if (withoutUnicodePtr != 0)
619       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
620       return true;
621       //      }
622     } catch (IndexOutOfBoundsException e) {
623       currentPosition = temp;
624       return false;
625     }
626   }
627   public boolean getNextCharAsJavaIdentifierPart() {
628     //BOOLEAN
629     //handle the case of unicode.
630     //when a unicode appears then we must use a buffer that holds char internal values
631     //At the end of this method currentCharacter holds the new visited char
632     //and currentPosition points right next after it
633     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
634     //On false, no side effect has occured.
635
636     //ALL getNextChar.... ARE OPTIMIZED COPIES 
637
638     int temp = currentPosition;
639     try {
640       currentCharacter = source[currentPosition++];
641       //      if (((currentCharacter = source[currentPosition++]) == '\\')
642       //        && (source[currentPosition] == 'u')) {
643       //        //-------------unicode traitement ------------
644       //        int c1, c2, c3, c4;
645       //        int unicodeSize = 6;
646       //        currentPosition++;
647       //        while (source[currentPosition] == 'u') {
648       //          currentPosition++;
649       //          unicodeSize++;
650       //        }
651       //
652       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
653       //          || c1 < 0)
654       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
655       //            || c2 < 0)
656       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
657       //            || c3 < 0)
658       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
659       //            || c4 < 0)) {
660       //          currentPosition = temp;
661       //          return false;
662       //        }
663       //
664       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
665       //        if (!isPHPIdentifierPart(currentCharacter)) {
666       //          currentPosition = temp;
667       //          return false;
668       //        }
669       //
670       //        //need the unicode buffer
671       //        if (withoutUnicodePtr == 0) {
672       //          //buffer all the entries that have been left aside....
673       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
674       //          System.arraycopy(
675       //            source,
676       //            startPosition,
677       //            withoutUnicodeBuffer,
678       //            1,
679       //            withoutUnicodePtr);
680       //        }
681       //        //fill the buffer with the char
682       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
683       //        return true;
684       //      } //-------------end unicode traitement--------------
685       //      else {
686       if (!isPHPIdentifierPart(currentCharacter)) {
687         currentPosition = temp;
688         return false;
689       }
690
691       //        if (withoutUnicodePtr != 0)
692       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
693       return true;
694       //      }
695     } catch (IndexOutOfBoundsException e) {
696       currentPosition = temp;
697       return false;
698     }
699   }
700
701   public int getNextToken() throws InvalidInputException {
702     int htmlPosition = currentPosition;
703     try {
704       while (!phpMode) {
705         currentCharacter = source[currentPosition++];
706         if (currentCharacter == '<') {
707           if (getNextChar('?')) {
708             currentCharacter = source[currentPosition++];
709             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
710               // <?
711               startPosition = currentPosition;
712               phpMode = true;
713               if (tokenizeWhiteSpace) {
714                 // && (whiteStart != currentPosition - 1)) {
715                 // reposition scanner in case we are interested by spaces as tokens
716                 startPosition = htmlPosition;
717                 return TokenNameHTML;
718               }
719             } else {
720               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
721               if (phpStart) {
722                 int test = getNextChar('H', 'h');
723                 if (test >= 0) {
724                   test = getNextChar('P', 'p');
725                   if (test >= 0) {
726                     // <?PHP  <?php
727                     startPosition = currentPosition;
728                     phpMode = true;
729
730                     if (tokenizeWhiteSpace) {
731                       // && (whiteStart != currentPosition - 1)) {
732                       // reposition scanner in case we are interested by spaces as tokens
733                       startPosition = htmlPosition;
734                       return TokenNameHTML;
735                     }
736                   }
737                 }
738               }
739             }
740           }
741         }
742
743         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
744           if (recordLineSeparator) {
745             pushLineSeparator();
746           } else {
747             currentLine = null;
748           }
749         }
750       }
751     } //-----------------end switch while try--------------------
752     catch (IndexOutOfBoundsException e) {
753       if (tokenizeWhiteSpace) {
754         // && (whiteStart != currentPosition - 1)) {
755         // reposition scanner in case we are interested by spaces as tokens
756         startPosition = htmlPosition;
757       }
758       return TokenNameEOF;
759     }
760
761     if (phpMode) {
762       this.wasAcr = false;
763       if (diet) {
764         jumpOverMethodBody();
765         diet = false;
766         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
767       }
768       try {
769         while (true) { //loop for jumping over comments
770           withoutUnicodePtr = 0;
771           //start with a new token (even comment written with unicode )
772
773           // ---------Consume white space and handles startPosition---------
774           int whiteStart = currentPosition;
775           boolean isWhiteSpace;
776           do {
777             startPosition = currentPosition;
778             currentCharacter = source[currentPosition++];
779             //            if (((currentCharacter = source[currentPosition++]) == '\\')
780             //              && (source[currentPosition] == 'u')) {
781             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
782             //            } else {
783             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
784               checkNonExternalizeString();
785               if (recordLineSeparator) {
786                 pushLineSeparator();
787               } else {
788                 currentLine = null;
789               }
790             }
791             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
792             //            }
793           } while (isWhiteSpace);
794           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
795             // reposition scanner in case we are interested by spaces as tokens
796             currentPosition--;
797             startPosition = whiteStart;
798             return TokenNameWHITESPACE;
799           }
800           //little trick to get out in the middle of a source compuation
801           if (currentPosition > eofPosition)
802             return TokenNameEOF;
803
804           // ---------Identify the next token-------------
805
806           switch (currentCharacter) {
807             case '(' :
808               return TokenNameLPAREN;
809             case ')' :
810               return TokenNameRPAREN;
811             case '{' :
812               return TokenNameLBRACE;
813             case '}' :
814               return TokenNameRBRACE;
815             case '[' :
816               return TokenNameLBRACKET;
817             case ']' :
818               return TokenNameRBRACKET;
819             case ';' :
820               return TokenNameSEMICOLON;
821             case ',' :
822               return TokenNameCOMMA;
823
824             case '.' :
825               if (getNextCharAsDigit())
826                 return scanNumber(true);
827               return TokenNameDOT;
828             case '+' :
829               {
830                 int test;
831                 if ((test = getNextChar('+', '=')) == 0)
832                   return TokenNamePLUS_PLUS;
833                 if (test > 0)
834                   return TokenNamePLUS_EQUAL;
835                 return TokenNamePLUS;
836               }
837             case '-' :
838               {
839                 int test;
840                 if ((test = getNextChar('-', '=')) == 0)
841                   return TokenNameMINUS_MINUS;
842                 if (test > 0)
843                   return TokenNameMINUS_EQUAL;
844                 if (getNextChar('>'))
845                   return TokenNameMINUS_GREATER;
846
847                 return TokenNameMINUS;
848               }
849             case '~' :
850               if (getNextChar('='))
851                 return TokenNameTWIDDLE_EQUAL;
852               return TokenNameTWIDDLE;
853             case '!' :
854               if (getNextChar('='))
855                 return TokenNameNOT_EQUAL;
856               return TokenNameNOT;
857             case '*' :
858               if (getNextChar('='))
859                 return TokenNameMULTIPLY_EQUAL;
860               return TokenNameMULTIPLY;
861             case '%' :
862               if (getNextChar('='))
863                 return TokenNameREMAINDER_EQUAL;
864               return TokenNameREMAINDER;
865             case '<' :
866               {
867                 int test;
868                 if ((test = getNextChar('=', '<')) == 0)
869                   return TokenNameLESS_EQUAL;
870                 if (test > 0) {
871                   if (getNextChar('='))
872                     return TokenNameLEFT_SHIFT_EQUAL;
873                   if (getNextChar('<')) {
874                     int heredocStart = currentPosition;
875                     int heredocLength = 0;
876                     currentCharacter = source[currentPosition++];
877                     if (isPHPIdentifierStart(currentCharacter)) {
878                       currentCharacter = source[currentPosition++];
879                     } else {
880                       return TokenNameERROR;
881                     }
882                     while (isPHPIdentifierPart(currentCharacter)) {
883                       currentCharacter = source[currentPosition++];
884                     }
885
886                     heredocLength = currentPosition - heredocStart - 1;
887
888                     // heredoc end-tag determination
889                     boolean endTag = true;
890                     char ch;
891                     do {
892                       ch = source[currentPosition++];
893                       if (ch == '\r' || ch == '\n') {
894                         if (recordLineSeparator) {
895                           pushLineSeparator();
896                         } else {
897                           currentLine = null;
898                         }
899                         for (int i = 0; i < heredocLength; i++) {
900                           if (source[currentPosition + i] != source[heredocStart + i]) {
901                             endTag = false;
902                             break;
903                           }
904                         }
905                         if (endTag) {
906                           currentPosition += heredocLength - 1;
907                           currentCharacter = source[currentPosition++];
908                           break; // do...while loop
909                         } else {
910                           endTag = true;
911                         }
912                       }
913
914                     } while (true);
915
916                     return TokenNameHEREDOC;
917                   }
918                   return TokenNameLEFT_SHIFT;
919                 }
920                 return TokenNameLESS;
921               }
922             case '>' :
923               {
924                 int test;
925                 if ((test = getNextChar('=', '>')) == 0)
926                   return TokenNameGREATER_EQUAL;
927                 if (test > 0) {
928                   if ((test = getNextChar('=', '>')) == 0)
929                     return TokenNameRIGHT_SHIFT_EQUAL;
930                   return TokenNameRIGHT_SHIFT;
931                 }
932                 return TokenNameGREATER;
933               }
934             case '=' :
935               if (getNextChar('='))
936                 return TokenNameEQUAL_EQUAL;
937               if (getNextChar('>'))
938                 return TokenNameEQUAL_GREATER;
939               return TokenNameEQUAL;
940             case '&' :
941               {
942                 int test;
943                 if ((test = getNextChar('&', '=')) == 0)
944                   return TokenNameAND_AND;
945                 if (test > 0)
946                   return TokenNameAND_EQUAL;
947                 return TokenNameAND;
948               }
949             case '|' :
950               {
951                 int test;
952                 if ((test = getNextChar('|', '=')) == 0)
953                   return TokenNameOR_OR;
954                 if (test > 0)
955                   return TokenNameOR_EQUAL;
956                 return TokenNameOR;
957               }
958             case '^' :
959               if (getNextChar('='))
960                 return TokenNameXOR_EQUAL;
961               return TokenNameXOR;
962             case '?' :
963               if (getNextChar('>')) {
964                 phpMode = false;
965                 return TokenNameStopPHP;
966               }
967               return TokenNameQUESTION;
968             case ':' :
969               if (getNextChar(':'))
970                 return TokenNameCOLON_COLON;
971               return TokenNameCOLON;
972             case '@' :
973               return TokenNameAT;
974               //                                        case '\'' :
975               //                                                {
976               //                                                        int test;
977               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
978               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
979               //                                                        }
980               //                                                        if (test > 0) {
981               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
982               //                                                                for (int lookAhead = 0;
983               //                                                                        lookAhead < 3;
984               //                                                                        lookAhead++) {
985               //                                                                        if (currentPosition + lookAhead
986               //                                                                                == source.length)
987               //                                                                                break;
988               //                                                                        if (source[currentPosition + lookAhead]
989               //                                                                                == '\n')
990               //                                                                                break;
991               //                                                                        if (source[currentPosition + lookAhead]
992               //                                                                                == '\'') {
993               //                                                                                currentPosition += lookAhead + 1;
994               //                                                                                break;
995               //                                                                        }
996               //                                                                }
997               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
998               //                                                        }
999               //                                                }
1000               //                                                if (getNextChar('\'')) {
1001               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1002               //                                                        for (int lookAhead = 0;
1003               //                                                                lookAhead < 3;
1004               //                                                                lookAhead++) {
1005               //                                                                if (currentPosition + lookAhead
1006               //                                                                        == source.length)
1007               //                                                                        break;
1008               //                                                                if (source[currentPosition + lookAhead]
1009               //                                                                        == '\n')
1010               //                                                                        break;
1011               //                                                                if (source[currentPosition + lookAhead]
1012               //                                                                        == '\'') {
1013               //                                                                        currentPosition += lookAhead + 1;
1014               //                                                                        break;
1015               //                                                                }
1016               //                                                        }
1017               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1018               //                                                }
1019               //                                                if (getNextChar('\\'))
1020               //                                                        scanEscapeCharacter();
1021               //                                                else { // consume next character
1022               //                                                        unicodeAsBackSlash = false;
1023               //                                                        if (((currentCharacter = source[currentPosition++])
1024               //                                                                == '\\')
1025               //                                                                && (source[currentPosition] == 'u')) {
1026               //                                                                getNextUnicodeChar();
1027               //                                                        } else {
1028               //                                                                if (withoutUnicodePtr != 0) {
1029               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1030               //                                                                                currentCharacter;
1031               //                                                                }
1032               //                                                        }
1033               //                                                }
1034               //                                                //            if (getNextChar('\''))
1035               //                                                //              return TokenNameCharacterLiteral;
1036               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1037               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1038               //                                                        if (currentPosition + lookAhead == source.length)
1039               //                                                                break;
1040               //                                                        if (source[currentPosition + lookAhead] == '\n')
1041               //                                                                break;
1042               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1043               //                                                                currentPosition += lookAhead + 1;
1044               //                                                                break;
1045               //                                                        }
1046               //                                                }
1047               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1048             case '\'' :
1049               try {
1050                 // consume next character
1051                 unicodeAsBackSlash = false;
1052                 currentCharacter = source[currentPosition++];
1053                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1054                 //                  && (source[currentPosition] == 'u')) {
1055                 //                  getNextUnicodeChar();
1056                 //                } else {
1057                 //                  if (withoutUnicodePtr != 0) {
1058                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1059                 //                      currentCharacter;
1060                 //                  }
1061                 //                }
1062
1063                 while (currentCharacter != '\'') {
1064
1065                   /**** in PHP \r and \n are valid in string literals ****/
1066                   //                  if ((currentCharacter == '\n')
1067                   //                    || (currentCharacter == '\r')) {
1068                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1069                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1070                   //                      if (currentPosition + lookAhead == source.length)
1071                   //                        break;
1072                   //                      if (source[currentPosition + lookAhead] == '\n')
1073                   //                        break;
1074                   //                      if (source[currentPosition + lookAhead] == '\"') {
1075                   //                        currentPosition += lookAhead + 1;
1076                   //                        break;
1077                   //                      }
1078                   //                    }
1079                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1080                   //                  }
1081                   if (currentCharacter == '\\') {
1082                     int escapeSize = currentPosition;
1083                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1084                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1085                     scanSingleQuotedEscapeCharacter();
1086                     escapeSize = currentPosition - escapeSize;
1087                     if (withoutUnicodePtr == 0) {
1088                       //buffer all the entries that have been left aside....
1089                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1090                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1091                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1092                     } else { //overwrite the / in the buffer
1093                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1094                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1095                         withoutUnicodePtr--;
1096                       }
1097                     }
1098                   }
1099                   // consume next character
1100                   unicodeAsBackSlash = false;
1101                   currentCharacter = source[currentPosition++];
1102                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1103                   //                    && (source[currentPosition] == 'u')) {
1104                   //                    getNextUnicodeChar();
1105                   //                  } else {
1106                   if (withoutUnicodePtr != 0) {
1107                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1108                   }
1109                   //                  }
1110
1111                 }
1112               } catch (IndexOutOfBoundsException e) {
1113                 throw new InvalidInputException(UNTERMINATED_STRING);
1114               } catch (InvalidInputException e) {
1115                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1116                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1117                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1118                     if (currentPosition + lookAhead == source.length)
1119                       break;
1120                     if (source[currentPosition + lookAhead] == '\n')
1121                       break;
1122                     if (source[currentPosition + lookAhead] == '\'') {
1123                       currentPosition += lookAhead + 1;
1124                       break;
1125                     }
1126                   }
1127
1128                 }
1129                 throw e; // rethrow
1130               }
1131               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1132                 if (currentLine == null) {
1133                   currentLine = new NLSLine();
1134                   lines.add(currentLine);
1135                 }
1136                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1137               }
1138               return TokenNameStringConstant;
1139             case '"' :
1140               try {
1141                 // consume next character
1142                 unicodeAsBackSlash = false;
1143                 currentCharacter = source[currentPosition++];
1144                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1145                 //                  && (source[currentPosition] == 'u')) {
1146                 //                  getNextUnicodeChar();
1147                 //                } else {
1148                 //                  if (withoutUnicodePtr != 0) {
1149                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1150                 //                      currentCharacter;
1151                 //                  }
1152                 //                }
1153
1154                 while (currentCharacter != '"') {
1155
1156                   /**** in PHP \r and \n are valid in string literals ****/
1157                   //                  if ((currentCharacter == '\n')
1158                   //                    || (currentCharacter == '\r')) {
1159                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1160                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1161                   //                      if (currentPosition + lookAhead == source.length)
1162                   //                        break;
1163                   //                      if (source[currentPosition + lookAhead] == '\n')
1164                   //                        break;
1165                   //                      if (source[currentPosition + lookAhead] == '\"') {
1166                   //                        currentPosition += lookAhead + 1;
1167                   //                        break;
1168                   //                      }
1169                   //                    }
1170                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1171                   //                  }
1172                   if (currentCharacter == '\\') {
1173                     int escapeSize = currentPosition;
1174                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1175                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1176                     scanDoubleQuotedEscapeCharacter();
1177                     escapeSize = currentPosition - escapeSize;
1178                     if (withoutUnicodePtr == 0) {
1179                       //buffer all the entries that have been left aside....
1180                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1181                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1182                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1183                     } else { //overwrite the / in the buffer
1184                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1185                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1186                         withoutUnicodePtr--;
1187                       }
1188                     }
1189                   }
1190                   // consume next character
1191                   unicodeAsBackSlash = false;
1192                   currentCharacter = source[currentPosition++];
1193                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1194                   //                    && (source[currentPosition] == 'u')) {
1195                   //                    getNextUnicodeChar();
1196                   //                  } else {
1197                   if (withoutUnicodePtr != 0) {
1198                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1199                   }
1200                   //                  }
1201
1202                 }
1203               } catch (IndexOutOfBoundsException e) {
1204                 throw new InvalidInputException(UNTERMINATED_STRING);
1205               } catch (InvalidInputException e) {
1206                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1207                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1208                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1209                     if (currentPosition + lookAhead == source.length)
1210                       break;
1211                     if (source[currentPosition + lookAhead] == '\n')
1212                       break;
1213                     if (source[currentPosition + lookAhead] == '\"') {
1214                       currentPosition += lookAhead + 1;
1215                       break;
1216                     }
1217                   }
1218
1219                 }
1220                 throw e; // rethrow
1221               }
1222               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1223                 if (currentLine == null) {
1224                   currentLine = new NLSLine();
1225                   lines.add(currentLine);
1226                 }
1227                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1228               }
1229               return TokenNameStringLiteral;
1230             case '`' :
1231               try {
1232                 // consume next character
1233                 unicodeAsBackSlash = false;
1234                 currentCharacter = source[currentPosition++];
1235                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1236                 //                  && (source[currentPosition] == 'u')) {
1237                 //                  getNextUnicodeChar();
1238                 //                } else {
1239                 //                  if (withoutUnicodePtr != 0) {
1240                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1241                 //                      currentCharacter;
1242                 //                  }
1243                 //                }
1244
1245                 while (currentCharacter != '`') {
1246
1247                   /**** in PHP \r and \n are valid in string literals ****/
1248                   //                if ((currentCharacter == '\n')
1249                   //                  || (currentCharacter == '\r')) {
1250                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1251                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1252                   //                    if (currentPosition + lookAhead == source.length)
1253                   //                      break;
1254                   //                    if (source[currentPosition + lookAhead] == '\n')
1255                   //                      break;
1256                   //                    if (source[currentPosition + lookAhead] == '\"') {
1257                   //                      currentPosition += lookAhead + 1;
1258                   //                      break;
1259                   //                    }
1260                   //                  }
1261                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1262                   //                }
1263                   if (currentCharacter == '\\') {
1264                     int escapeSize = currentPosition;
1265                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1266                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1267                     scanDoubleQuotedEscapeCharacter();
1268                     escapeSize = currentPosition - escapeSize;
1269                     if (withoutUnicodePtr == 0) {
1270                       //buffer all the entries that have been left aside....
1271                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1272                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1273                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1274                     } else { //overwrite the / in the buffer
1275                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1276                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1277                         withoutUnicodePtr--;
1278                       }
1279                     }
1280                   }
1281                   // consume next character
1282                   unicodeAsBackSlash = false;
1283                   currentCharacter = source[currentPosition++];
1284                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1285                   //                    && (source[currentPosition] == 'u')) {
1286                   //                    getNextUnicodeChar();
1287                   //                  } else {
1288                   if (withoutUnicodePtr != 0) {
1289                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1290                   }
1291                   //                  }
1292
1293                 }
1294               } catch (IndexOutOfBoundsException e) {
1295                 throw new InvalidInputException(UNTERMINATED_STRING);
1296               } catch (InvalidInputException e) {
1297                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1298                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1299                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1300                     if (currentPosition + lookAhead == source.length)
1301                       break;
1302                     if (source[currentPosition + lookAhead] == '\n')
1303                       break;
1304                     if (source[currentPosition + lookAhead] == '`') {
1305                       currentPosition += lookAhead + 1;
1306                       break;
1307                     }
1308                   }
1309
1310                 }
1311                 throw e; // rethrow
1312               }
1313               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1314                 if (currentLine == null) {
1315                   currentLine = new NLSLine();
1316                   lines.add(currentLine);
1317                 }
1318                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1319               }
1320               return TokenNameStringInterpolated;
1321             case '#' :
1322             case '/' :
1323               {
1324                 int test;
1325                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1326                   //line comment 
1327                   int endPositionForLineComment = 0;
1328                   try { //get the next char 
1329                     currentCharacter = source[currentPosition++];
1330                     //                    if (((currentCharacter = source[currentPosition++])
1331                     //                      == '\\')
1332                     //                      && (source[currentPosition] == 'u')) {
1333                     //                      //-------------unicode traitement ------------
1334                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1335                     //                      currentPosition++;
1336                     //                      while (source[currentPosition] == 'u') {
1337                     //                        currentPosition++;
1338                     //                      }
1339                     //                      if ((c1 =
1340                     //                        Character.getNumericValue(source[currentPosition++]))
1341                     //                        > 15
1342                     //                        || c1 < 0
1343                     //                        || (c2 =
1344                     //                          Character.getNumericValue(source[currentPosition++]))
1345                     //                          > 15
1346                     //                        || c2 < 0
1347                     //                        || (c3 =
1348                     //                          Character.getNumericValue(source[currentPosition++]))
1349                     //                          > 15
1350                     //                        || c3 < 0
1351                     //                        || (c4 =
1352                     //                          Character.getNumericValue(source[currentPosition++]))
1353                     //                          > 15
1354                     //                        || c4 < 0) {
1355                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1356                     //                      } else {
1357                     //                        currentCharacter =
1358                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1359                     //                      }
1360                     //                    }
1361
1362                     //handle the \\u case manually into comment
1363                     //                    if (currentCharacter == '\\') {
1364                     //                      if (source[currentPosition] == '\\')
1365                     //                        currentPosition++;
1366                     //                    } //jump over the \\
1367                     boolean isUnicode = false;
1368                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1369                       if (currentCharacter == '?') {
1370                         if (getNextChar('>')) {
1371                           startPosition = currentPosition - 2;
1372                           phpMode = false;
1373                           return TokenNameStopPHP;
1374                         }
1375                       }
1376
1377                       //get the next char
1378                       isUnicode = false;
1379                       currentCharacter = source[currentPosition++];
1380                       //                      if (((currentCharacter = source[currentPosition++])
1381                       //                        == '\\')
1382                       //                        && (source[currentPosition] == 'u')) {
1383                       //                        isUnicode = true;
1384                       //                        //-------------unicode traitement ------------
1385                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1386                       //                        currentPosition++;
1387                       //                        while (source[currentPosition] == 'u') {
1388                       //                          currentPosition++;
1389                       //                        }
1390                       //                        if ((c1 =
1391                       //                          Character.getNumericValue(source[currentPosition++]))
1392                       //                          > 15
1393                       //                          || c1 < 0
1394                       //                          || (c2 =
1395                       //                            Character.getNumericValue(
1396                       //                              source[currentPosition++]))
1397                       //                            > 15
1398                       //                          || c2 < 0
1399                       //                          || (c3 =
1400                       //                            Character.getNumericValue(
1401                       //                              source[currentPosition++]))
1402                       //                            > 15
1403                       //                          || c3 < 0
1404                       //                          || (c4 =
1405                       //                            Character.getNumericValue(
1406                       //                              source[currentPosition++]))
1407                       //                            > 15
1408                       //                          || c4 < 0) {
1409                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1410                       //                        } else {
1411                       //                          currentCharacter =
1412                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1413                       //                        }
1414                       //                      }
1415                       //handle the \\u case manually into comment
1416                       //                      if (currentCharacter == '\\') {
1417                       //                        if (source[currentPosition] == '\\')
1418                       //                          currentPosition++;
1419                       //                      } //jump over the \\
1420                     }
1421                     if (isUnicode) {
1422                       endPositionForLineComment = currentPosition - 6;
1423                     } else {
1424                       endPositionForLineComment = currentPosition - 1;
1425                     }
1426                     recordComment(false);
1427                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1428                       checkNonExternalizeString();
1429                       if (recordLineSeparator) {
1430                         if (isUnicode) {
1431                           pushUnicodeLineSeparator();
1432                         } else {
1433                           pushLineSeparator();
1434                         }
1435                       } else {
1436                         currentLine = null;
1437                       }
1438                     }
1439                     if (tokenizeComments) {
1440                       if (!isUnicode) {
1441                         currentPosition = endPositionForLineComment;
1442                         // reset one character behind
1443                       }
1444                       return TokenNameCOMMENT_LINE;
1445                     }
1446                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1447                     if (tokenizeComments) {
1448                       currentPosition--;
1449                       // reset one character behind
1450                       return TokenNameCOMMENT_LINE;
1451                     }
1452                   }
1453                   break;
1454                 }
1455                 if (test > 0) {
1456                   //traditional and annotation comment
1457                   boolean isJavadoc = false, star = false;
1458                   // consume next character
1459                   unicodeAsBackSlash = false;
1460                   currentCharacter = source[currentPosition++];
1461                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1462                   //                    && (source[currentPosition] == 'u')) {
1463                   //                    getNextUnicodeChar();
1464                   //                  } else {
1465                   //                    if (withoutUnicodePtr != 0) {
1466                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1467                   //                        currentCharacter;
1468                   //                    }
1469                   //                  }
1470
1471                   if (currentCharacter == '*') {
1472                     isJavadoc = true;
1473                     star = true;
1474                   }
1475                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1476                     checkNonExternalizeString();
1477                     if (recordLineSeparator) {
1478                       pushLineSeparator();
1479                     } else {
1480                       currentLine = null;
1481                     }
1482                   }
1483                   try { //get the next char 
1484                     currentCharacter = source[currentPosition++];
1485                     //                    if (((currentCharacter = source[currentPosition++])
1486                     //                      == '\\')
1487                     //                      && (source[currentPosition] == 'u')) {
1488                     //                      //-------------unicode traitement ------------
1489                     //                      getNextUnicodeChar();
1490                     //                    }
1491                     //handle the \\u case manually into comment
1492                     //                    if (currentCharacter == '\\') {
1493                     //                      if (source[currentPosition] == '\\')
1494                     //                        currentPosition++;
1495                     //                      //jump over the \\
1496                     //                    }
1497                     // empty comment is not a javadoc /**/
1498                     if (currentCharacter == '/') {
1499                       isJavadoc = false;
1500                     }
1501                     //loop until end of comment */
1502                     while ((currentCharacter != '/') || (!star)) {
1503                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1504                         checkNonExternalizeString();
1505                         if (recordLineSeparator) {
1506                           pushLineSeparator();
1507                         } else {
1508                           currentLine = null;
1509                         }
1510                       }
1511                       star = currentCharacter == '*';
1512                       //get next char
1513                       currentCharacter = source[currentPosition++];
1514                       //                      if (((currentCharacter = source[currentPosition++])
1515                       //                        == '\\')
1516                       //                        && (source[currentPosition] == 'u')) {
1517                       //                        //-------------unicode traitement ------------
1518                       //                        getNextUnicodeChar();
1519                       //                      }
1520                       //handle the \\u case manually into comment
1521                       //                      if (currentCharacter == '\\') {
1522                       //                        if (source[currentPosition] == '\\')
1523                       //                          currentPosition++;
1524                       //                      } //jump over the \\
1525                     }
1526                     recordComment(isJavadoc);
1527                     if (tokenizeComments) {
1528                       if (isJavadoc)
1529                         return TokenNameCOMMENT_PHPDOC;
1530                       return TokenNameCOMMENT_BLOCK;
1531                     }
1532                   } catch (IndexOutOfBoundsException e) {
1533                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1534                   }
1535                   break;
1536                 }
1537                 if (getNextChar('='))
1538                   return TokenNameDIVIDE_EQUAL;
1539                 return TokenNameDIVIDE;
1540               }
1541             case '\u001a' :
1542               if (atEnd())
1543                 return TokenNameEOF;
1544               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1545               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1546
1547             default :
1548               if (currentCharacter == '$') {
1549                 while ((currentCharacter = source[currentPosition++]) == '$') {
1550                 }
1551                 if (currentCharacter == '{')
1552                   return TokenNameDOLLAR_LBRACE;
1553                 if (isPHPIdentifierStart(currentCharacter))
1554                   return scanIdentifierOrKeyword(true);
1555                 return TokenNameERROR;
1556               }
1557               if (isPHPIdentifierStart(currentCharacter))
1558                 return scanIdentifierOrKeyword(false);
1559               if (Character.isDigit(currentCharacter))
1560                 return scanNumber(false);
1561               return TokenNameERROR;
1562           }
1563         }
1564       } //-----------------end switch while try--------------------
1565       catch (IndexOutOfBoundsException e) {
1566       }
1567     }
1568     return TokenNameEOF;
1569   }
1570
1571   //  public final void getNextUnicodeChar()
1572   //    throws IndexOutOfBoundsException, InvalidInputException {
1573   //    //VOID
1574   //    //handle the case of unicode.
1575   //    //when a unicode appears then we must use a buffer that holds char internal values
1576   //    //At the end of this method currentCharacter holds the new visited char
1577   //    //and currentPosition points right next after it
1578   //
1579   //    //ALL getNextChar.... ARE OPTIMIZED COPIES 
1580   //
1581   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1582   //    currentPosition++;
1583   //    while (source[currentPosition] == 'u') {
1584   //      currentPosition++;
1585   //      unicodeSize++;
1586   //    }
1587   //
1588   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1589   //      || c1 < 0
1590   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1591   //      || c2 < 0
1592   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1593   //      || c3 < 0
1594   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1595   //      || c4 < 0) {
1596   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1597   //    } else {
1598   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1599   //      //need the unicode buffer
1600   //      if (withoutUnicodePtr == 0) {
1601   //        //buffer all the entries that have been left aside....
1602   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1603   //        System.arraycopy(
1604   //          source,
1605   //          startPosition,
1606   //          withoutUnicodeBuffer,
1607   //          1,
1608   //          withoutUnicodePtr);
1609   //      }
1610   //      //fill the buffer with the char
1611   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1612   //    }
1613   //    unicodeAsBackSlash = currentCharacter == '\\';
1614   //  }
1615   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1616    */
1617   public final void jumpOverMethodBody() {
1618
1619     this.wasAcr = false;
1620     int found = 1;
1621     try {
1622       while (true) { //loop for jumping over comments
1623         // ---------Consume white space and handles startPosition---------
1624         boolean isWhiteSpace;
1625         do {
1626           startPosition = currentPosition;
1627           currentCharacter = source[currentPosition++];
1628           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1629           //            && (source[currentPosition] == 'u')) {
1630           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1631           //          } else {
1632           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1633             pushLineSeparator();
1634           isWhiteSpace = Character.isWhitespace(currentCharacter);
1635           //          }
1636         } while (isWhiteSpace);
1637
1638         // -------consume token until } is found---------
1639         switch (currentCharacter) {
1640           case '{' :
1641             found++;
1642             break;
1643           case '}' :
1644             found--;
1645             if (found == 0)
1646               return;
1647             break;
1648           case '\'' :
1649             {
1650               boolean test;
1651               test = getNextChar('\\');
1652               if (test) {
1653                 try {
1654                   scanDoubleQuotedEscapeCharacter();
1655                 } catch (InvalidInputException ex) {
1656                 };
1657               } else {
1658                 //                try { // consume next character
1659                 unicodeAsBackSlash = false;
1660                 currentCharacter = source[currentPosition++];
1661                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1662                 //                    && (source[currentPosition] == 'u')) {
1663                 //                    getNextUnicodeChar();
1664                 //                  } else {
1665                 if (withoutUnicodePtr != 0) {
1666                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1667                 }
1668                 //                  }
1669                 //                } catch (InvalidInputException ex) {
1670                 //                };
1671               }
1672               getNextChar('\'');
1673               break;
1674             }
1675           case '"' :
1676             try {
1677               //              try { // consume next character
1678               unicodeAsBackSlash = false;
1679               currentCharacter = source[currentPosition++];
1680               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1681               //                  && (source[currentPosition] == 'u')) {
1682               //                  getNextUnicodeChar();
1683               //                } else {
1684               if (withoutUnicodePtr != 0) {
1685                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1686               }
1687               //                }
1688               //              } catch (InvalidInputException ex) {
1689               //              };
1690               while (currentCharacter != '"') {
1691                 if (currentCharacter == '\r') {
1692                   if (source[currentPosition] == '\n')
1693                     currentPosition++;
1694                   break;
1695                   // the string cannot go further that the line
1696                 }
1697                 if (currentCharacter == '\n') {
1698                   break;
1699                   // the string cannot go further that the line
1700                 }
1701                 if (currentCharacter == '\\') {
1702                   try {
1703                     scanDoubleQuotedEscapeCharacter();
1704                   } catch (InvalidInputException ex) {
1705                   };
1706                 }
1707                 //                try { // consume next character
1708                 unicodeAsBackSlash = false;
1709                 currentCharacter = source[currentPosition++];
1710                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1711                 //                    && (source[currentPosition] == 'u')) {
1712                 //                    getNextUnicodeChar();
1713                 //                  } else {
1714                 if (withoutUnicodePtr != 0) {
1715                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1716                 }
1717                 //                  }
1718                 //                } catch (InvalidInputException ex) {
1719                 //                };
1720               }
1721             } catch (IndexOutOfBoundsException e) {
1722               return;
1723             }
1724             break;
1725           case '/' :
1726             {
1727               int test;
1728               if ((test = getNextChar('/', '*')) == 0) {
1729                 //line comment 
1730                 try {
1731                   //get the next char 
1732                   currentCharacter = source[currentPosition++];
1733                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1734                   //                    && (source[currentPosition] == 'u')) {
1735                   //                    //-------------unicode traitement ------------
1736                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1737                   //                    currentPosition++;
1738                   //                    while (source[currentPosition] == 'u') {
1739                   //                      currentPosition++;
1740                   //                    }
1741                   //                    if ((c1 =
1742                   //                      Character.getNumericValue(source[currentPosition++]))
1743                   //                      > 15
1744                   //                      || c1 < 0
1745                   //                      || (c2 =
1746                   //                        Character.getNumericValue(source[currentPosition++]))
1747                   //                        > 15
1748                   //                      || c2 < 0
1749                   //                      || (c3 =
1750                   //                        Character.getNumericValue(source[currentPosition++]))
1751                   //                        > 15
1752                   //                      || c3 < 0
1753                   //                      || (c4 =
1754                   //                        Character.getNumericValue(source[currentPosition++]))
1755                   //                        > 15
1756                   //                      || c4 < 0) {
1757                   //                      //error don't care of the value
1758                   //                      currentCharacter = 'A';
1759                   //                    } //something different from \n and \r
1760                   //                    else {
1761                   //                      currentCharacter =
1762                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1763                   //                    }
1764                   //                  }
1765
1766                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1767                     //get the next char 
1768                     currentCharacter = source[currentPosition++];
1769                     //                    if (((currentCharacter = source[currentPosition++])
1770                     //                      == '\\')
1771                     //                      && (source[currentPosition] == 'u')) {
1772                     //                      //-------------unicode traitement ------------
1773                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1774                     //                      currentPosition++;
1775                     //                      while (source[currentPosition] == 'u') {
1776                     //                        currentPosition++;
1777                     //                      }
1778                     //                      if ((c1 =
1779                     //                        Character.getNumericValue(source[currentPosition++]))
1780                     //                        > 15
1781                     //                        || c1 < 0
1782                     //                        || (c2 =
1783                     //                          Character.getNumericValue(source[currentPosition++]))
1784                     //                          > 15
1785                     //                        || c2 < 0
1786                     //                        || (c3 =
1787                     //                          Character.getNumericValue(source[currentPosition++]))
1788                     //                          > 15
1789                     //                        || c3 < 0
1790                     //                        || (c4 =
1791                     //                          Character.getNumericValue(source[currentPosition++]))
1792                     //                          > 15
1793                     //                        || c4 < 0) {
1794                     //                        //error don't care of the value
1795                     //                        currentCharacter = 'A';
1796                     //                      } //something different from \n and \r
1797                     //                      else {
1798                     //                        currentCharacter =
1799                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1800                     //                      }
1801                     //                    }
1802                   }
1803                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1804                     pushLineSeparator();
1805                 } catch (IndexOutOfBoundsException e) {
1806                 } //an eof will them be generated
1807                 break;
1808               }
1809               if (test > 0) {
1810                 //traditional and annotation comment
1811                 boolean star = false;
1812                 //                try { // consume next character
1813                 unicodeAsBackSlash = false;
1814                 currentCharacter = source[currentPosition++];
1815                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1816                 //                    && (source[currentPosition] == 'u')) {
1817                 //                    getNextUnicodeChar();
1818                 //                  } else {
1819                 if (withoutUnicodePtr != 0) {
1820                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1821                 }
1822                 //                  };
1823                 //                } catch (InvalidInputException ex) {
1824                 //                };
1825                 if (currentCharacter == '*') {
1826                   star = true;
1827                 }
1828                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1829                   pushLineSeparator();
1830                 try { //get the next char 
1831                   currentCharacter = source[currentPosition++];
1832                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1833                   //                    && (source[currentPosition] == 'u')) {
1834                   //                    //-------------unicode traitement ------------
1835                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1836                   //                    currentPosition++;
1837                   //                    while (source[currentPosition] == 'u') {
1838                   //                      currentPosition++;
1839                   //                    }
1840                   //                    if ((c1 =
1841                   //                      Character.getNumericValue(source[currentPosition++]))
1842                   //                      > 15
1843                   //                      || c1 < 0
1844                   //                      || (c2 =
1845                   //                        Character.getNumericValue(source[currentPosition++]))
1846                   //                        > 15
1847                   //                      || c2 < 0
1848                   //                      || (c3 =
1849                   //                        Character.getNumericValue(source[currentPosition++]))
1850                   //                        > 15
1851                   //                      || c3 < 0
1852                   //                      || (c4 =
1853                   //                        Character.getNumericValue(source[currentPosition++]))
1854                   //                        > 15
1855                   //                      || c4 < 0) {
1856                   //                      //error don't care of the value
1857                   //                      currentCharacter = 'A';
1858                   //                    } //something different from * and /
1859                   //                    else {
1860                   //                      currentCharacter =
1861                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1862                   //                    }
1863                   //                  }
1864                   //loop until end of comment */ 
1865                   while ((currentCharacter != '/') || (!star)) {
1866                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1867                       pushLineSeparator();
1868                     star = currentCharacter == '*';
1869                     //get next char
1870                     currentCharacter = source[currentPosition++];
1871                     //                    if (((currentCharacter = source[currentPosition++])
1872                     //                      == '\\')
1873                     //                      && (source[currentPosition] == 'u')) {
1874                     //                      //-------------unicode traitement ------------
1875                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1876                     //                      currentPosition++;
1877                     //                      while (source[currentPosition] == 'u') {
1878                     //                        currentPosition++;
1879                     //                      }
1880                     //                      if ((c1 =
1881                     //                        Character.getNumericValue(source[currentPosition++]))
1882                     //                        > 15
1883                     //                        || c1 < 0
1884                     //                        || (c2 =
1885                     //                          Character.getNumericValue(source[currentPosition++]))
1886                     //                          > 15
1887                     //                        || c2 < 0
1888                     //                        || (c3 =
1889                     //                          Character.getNumericValue(source[currentPosition++]))
1890                     //                          > 15
1891                     //                        || c3 < 0
1892                     //                        || (c4 =
1893                     //                          Character.getNumericValue(source[currentPosition++]))
1894                     //                          > 15
1895                     //                        || c4 < 0) {
1896                     //                        //error don't care of the value
1897                     //                        currentCharacter = 'A';
1898                     //                      } //something different from * and /
1899                     //                      else {
1900                     //                        currentCharacter =
1901                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1902                     //                      }
1903                     //                    }
1904                   }
1905                 } catch (IndexOutOfBoundsException e) {
1906                   return;
1907                 }
1908                 break;
1909               }
1910               break;
1911             }
1912
1913           default :
1914             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1915               try {
1916                 scanIdentifierOrKeyword((currentCharacter == '$'));
1917               } catch (InvalidInputException ex) {
1918               };
1919               break;
1920             }
1921             if (Character.isDigit(currentCharacter)) {
1922               try {
1923                 scanNumber(false);
1924               } catch (InvalidInputException ex) {
1925               };
1926               break;
1927             }
1928         }
1929       }
1930       //-----------------end switch while try--------------------
1931     } catch (IndexOutOfBoundsException e) {
1932     } catch (InvalidInputException e) {
1933     }
1934     return;
1935   }
1936   //  public final boolean jumpOverUnicodeWhiteSpace()
1937   //    throws InvalidInputException {
1938   //    //BOOLEAN
1939   //    //handle the case of unicode. Jump over the next whiteSpace
1940   //    //making startPosition pointing on the next available char
1941   //    //On false, the currentCharacter is filled up with a potential
1942   //    //correct char
1943   //
1944   //    try {
1945   //      this.wasAcr = false;
1946   //      int c1, c2, c3, c4;
1947   //      int unicodeSize = 6;
1948   //      currentPosition++;
1949   //      while (source[currentPosition] == 'u') {
1950   //        currentPosition++;
1951   //        unicodeSize++;
1952   //      }
1953   //
1954   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1955   //        || c1 < 0)
1956   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1957   //          || c2 < 0)
1958   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1959   //          || c3 < 0)
1960   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1961   //          || c4 < 0)) {
1962   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1963   //      }
1964   //
1965   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1966   //      if (recordLineSeparator
1967   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1968   //        pushLineSeparator();
1969   //      if (Character.isWhitespace(currentCharacter))
1970   //        return true;
1971   //
1972   //      //buffer the new char which is not a white space
1973   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1974   //      //withoutUnicodePtr == 1 is true here
1975   //      return false;
1976   //    } catch (IndexOutOfBoundsException e) {
1977   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1978   //    }
1979   //  }
1980   public final int[] getLineEnds() {
1981     //return a bounded copy of this.lineEnds 
1982
1983     int[] copy;
1984     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
1985     return copy;
1986   }
1987
1988   public char[] getSource() {
1989     return this.source;
1990   }
1991   final char[] optimizedCurrentTokenSource1() {
1992     //return always the same char[] build only once
1993
1994     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
1995     char charOne = source[startPosition];
1996     switch (charOne) {
1997       case 'a' :
1998         return charArray_a;
1999       case 'b' :
2000         return charArray_b;
2001       case 'c' :
2002         return charArray_c;
2003       case 'd' :
2004         return charArray_d;
2005       case 'e' :
2006         return charArray_e;
2007       case 'f' :
2008         return charArray_f;
2009       case 'g' :
2010         return charArray_g;
2011       case 'h' :
2012         return charArray_h;
2013       case 'i' :
2014         return charArray_i;
2015       case 'j' :
2016         return charArray_j;
2017       case 'k' :
2018         return charArray_k;
2019       case 'l' :
2020         return charArray_l;
2021       case 'm' :
2022         return charArray_m;
2023       case 'n' :
2024         return charArray_n;
2025       case 'o' :
2026         return charArray_o;
2027       case 'p' :
2028         return charArray_p;
2029       case 'q' :
2030         return charArray_q;
2031       case 'r' :
2032         return charArray_r;
2033       case 's' :
2034         return charArray_s;
2035       case 't' :
2036         return charArray_t;
2037       case 'u' :
2038         return charArray_u;
2039       case 'v' :
2040         return charArray_v;
2041       case 'w' :
2042         return charArray_w;
2043       case 'x' :
2044         return charArray_x;
2045       case 'y' :
2046         return charArray_y;
2047       case 'z' :
2048         return charArray_z;
2049       default :
2050         return new char[] { charOne };
2051     }
2052   }
2053
2054   final char[] optimizedCurrentTokenSource2() {
2055     //try to return the same char[] build only once
2056
2057     char c0, c1;
2058     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2059     char[][] table = charArray_length[0][hash];
2060     int i = newEntry2;
2061     while (++i < InternalTableSize) {
2062       char[] charArray = table[i];
2063       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2064         return charArray;
2065     }
2066     //---------other side---------
2067     i = -1;
2068     int max = newEntry2;
2069     while (++i <= max) {
2070       char[] charArray = table[i];
2071       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2072         return charArray;
2073     }
2074     //--------add the entry-------
2075     if (++max >= InternalTableSize)
2076       max = 0;
2077     char[] r;
2078     table[max] = (r = new char[] { c0, c1 });
2079     newEntry2 = max;
2080     return r;
2081   }
2082
2083   final char[] optimizedCurrentTokenSource3() {
2084     //try to return the same char[] build only once
2085
2086     char c0, c1, c2;
2087     int hash =
2088       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2089         % TableSize;
2090     char[][] table = charArray_length[1][hash];
2091     int i = newEntry3;
2092     while (++i < InternalTableSize) {
2093       char[] charArray = table[i];
2094       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2095         return charArray;
2096     }
2097     //---------other side---------
2098     i = -1;
2099     int max = newEntry3;
2100     while (++i <= max) {
2101       char[] charArray = table[i];
2102       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2103         return charArray;
2104     }
2105     //--------add the entry-------
2106     if (++max >= InternalTableSize)
2107       max = 0;
2108     char[] r;
2109     table[max] = (r = new char[] { c0, c1, c2 });
2110     newEntry3 = max;
2111     return r;
2112   }
2113
2114   final char[] optimizedCurrentTokenSource4() {
2115     //try to return the same char[] build only once
2116
2117     char c0, c1, c2, c3;
2118     long hash =
2119       ((((long) (c0 = source[startPosition])) << 18)
2120         + ((c1 = source[startPosition + 1]) << 12)
2121         + ((c2 = source[startPosition + 2]) << 6)
2122         + (c3 = source[startPosition + 3]))
2123         % TableSize;
2124     char[][] table = charArray_length[2][(int) hash];
2125     int i = newEntry4;
2126     while (++i < InternalTableSize) {
2127       char[] charArray = table[i];
2128       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2129         return charArray;
2130     }
2131     //---------other side---------
2132     i = -1;
2133     int max = newEntry4;
2134     while (++i <= max) {
2135       char[] charArray = table[i];
2136       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2137         return charArray;
2138     }
2139     //--------add the entry-------
2140     if (++max >= InternalTableSize)
2141       max = 0;
2142     char[] r;
2143     table[max] = (r = new char[] { c0, c1, c2, c3 });
2144     newEntry4 = max;
2145     return r;
2146
2147   }
2148
2149   final char[] optimizedCurrentTokenSource5() {
2150     //try to return the same char[] build only once
2151
2152     char c0, c1, c2, c3, c4;
2153     long hash =
2154       ((((long) (c0 = source[startPosition])) << 24)
2155         + (((long) (c1 = source[startPosition + 1])) << 18)
2156         + ((c2 = source[startPosition + 2]) << 12)
2157         + ((c3 = source[startPosition + 3]) << 6)
2158         + (c4 = source[startPosition + 4]))
2159         % TableSize;
2160     char[][] table = charArray_length[3][(int) hash];
2161     int i = newEntry5;
2162     while (++i < InternalTableSize) {
2163       char[] charArray = table[i];
2164       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2165         return charArray;
2166     }
2167     //---------other side---------
2168     i = -1;
2169     int max = newEntry5;
2170     while (++i <= max) {
2171       char[] charArray = table[i];
2172       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2173         return charArray;
2174     }
2175     //--------add the entry-------
2176     if (++max >= InternalTableSize)
2177       max = 0;
2178     char[] r;
2179     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2180     newEntry5 = max;
2181     return r;
2182
2183   }
2184
2185   final char[] optimizedCurrentTokenSource6() {
2186     //try to return the same char[] build only once
2187
2188     char c0, c1, c2, c3, c4, c5;
2189     long hash =
2190       ((((long) (c0 = source[startPosition])) << 32)
2191         + (((long) (c1 = source[startPosition + 1])) << 24)
2192         + (((long) (c2 = source[startPosition + 2])) << 18)
2193         + ((c3 = source[startPosition + 3]) << 12)
2194         + ((c4 = source[startPosition + 4]) << 6)
2195         + (c5 = source[startPosition + 5]))
2196         % TableSize;
2197     char[][] table = charArray_length[4][(int) hash];
2198     int i = newEntry6;
2199     while (++i < InternalTableSize) {
2200       char[] charArray = table[i];
2201       if ((c0 == charArray[0])
2202         && (c1 == charArray[1])
2203         && (c2 == charArray[2])
2204         && (c3 == charArray[3])
2205         && (c4 == charArray[4])
2206         && (c5 == charArray[5]))
2207         return charArray;
2208     }
2209     //---------other side---------
2210     i = -1;
2211     int max = newEntry6;
2212     while (++i <= max) {
2213       char[] charArray = table[i];
2214       if ((c0 == charArray[0])
2215         && (c1 == charArray[1])
2216         && (c2 == charArray[2])
2217         && (c3 == charArray[3])
2218         && (c4 == charArray[4])
2219         && (c5 == charArray[5]))
2220         return charArray;
2221     }
2222     //--------add the entry-------
2223     if (++max >= InternalTableSize)
2224       max = 0;
2225     char[] r;
2226     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2227     newEntry6 = max;
2228     return r;
2229   }
2230
2231   public final void pushLineSeparator() throws InvalidInputException {
2232     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2233     final int INCREMENT = 250;
2234
2235     if (this.checkNonExternalizedStringLiterals) {
2236       // reinitialize the current line for non externalize strings purpose
2237       currentLine = null;
2238     }
2239     //currentCharacter is at position currentPosition-1
2240
2241     // cr 000D
2242     if (currentCharacter == '\r') {
2243       int separatorPos = currentPosition - 1;
2244       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2245         return;
2246       //System.out.println("CR-" + separatorPos);
2247       try {
2248         lineEnds[++linePtr] = separatorPos;
2249       } catch (IndexOutOfBoundsException e) {
2250         //linePtr value is correct
2251         int oldLength = lineEnds.length;
2252         int[] old = lineEnds;
2253         lineEnds = new int[oldLength + INCREMENT];
2254         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2255         lineEnds[linePtr] = separatorPos;
2256       }
2257       // look-ahead for merged cr+lf
2258       try {
2259         if (source[currentPosition] == '\n') {
2260           //System.out.println("look-ahead LF-" + currentPosition);                     
2261           lineEnds[linePtr] = currentPosition;
2262           currentPosition++;
2263           wasAcr = false;
2264         } else {
2265           wasAcr = true;
2266         }
2267       } catch (IndexOutOfBoundsException e) {
2268         wasAcr = true;
2269       }
2270     } else {
2271       // lf 000A
2272       if (currentCharacter == '\n') {
2273         //must merge eventual cr followed by lf
2274         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2275           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2276           lineEnds[linePtr] = currentPosition - 1;
2277         } else {
2278           int separatorPos = currentPosition - 1;
2279           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2280             return;
2281           // System.out.println("LF-" + separatorPos);                                                  
2282           try {
2283             lineEnds[++linePtr] = separatorPos;
2284           } catch (IndexOutOfBoundsException e) {
2285             //linePtr value is correct
2286             int oldLength = lineEnds.length;
2287             int[] old = lineEnds;
2288             lineEnds = new int[oldLength + INCREMENT];
2289             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2290             lineEnds[linePtr] = separatorPos;
2291           }
2292         }
2293         wasAcr = false;
2294       }
2295     }
2296   }
2297   public final void pushUnicodeLineSeparator() {
2298     // isUnicode means that the \r or \n has been read as a unicode character
2299
2300     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2301
2302     final int INCREMENT = 250;
2303     //currentCharacter is at position currentPosition-1
2304
2305     if (this.checkNonExternalizedStringLiterals) {
2306       // reinitialize the current line for non externalize strings purpose
2307       currentLine = null;
2308     }
2309
2310     // cr 000D
2311     if (currentCharacter == '\r') {
2312       int separatorPos = currentPosition - 6;
2313       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2314         return;
2315       //System.out.println("CR-" + separatorPos);
2316       try {
2317         lineEnds[++linePtr] = separatorPos;
2318       } catch (IndexOutOfBoundsException e) {
2319         //linePtr value is correct
2320         int oldLength = lineEnds.length;
2321         int[] old = lineEnds;
2322         lineEnds = new int[oldLength + INCREMENT];
2323         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2324         lineEnds[linePtr] = separatorPos;
2325       }
2326       // look-ahead for merged cr+lf
2327       if (source[currentPosition] == '\n') {
2328         //System.out.println("look-ahead LF-" + currentPosition);                       
2329         lineEnds[linePtr] = currentPosition;
2330         currentPosition++;
2331         wasAcr = false;
2332       } else {
2333         wasAcr = true;
2334       }
2335     } else {
2336       // lf 000A
2337       if (currentCharacter == '\n') {
2338         //must merge eventual cr followed by lf
2339         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2340           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2341           lineEnds[linePtr] = currentPosition - 6;
2342         } else {
2343           int separatorPos = currentPosition - 6;
2344           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2345             return;
2346           // System.out.println("LF-" + separatorPos);                                                  
2347           try {
2348             lineEnds[++linePtr] = separatorPos;
2349           } catch (IndexOutOfBoundsException e) {
2350             //linePtr value is correct
2351             int oldLength = lineEnds.length;
2352             int[] old = lineEnds;
2353             lineEnds = new int[oldLength + INCREMENT];
2354             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2355             lineEnds[linePtr] = separatorPos;
2356           }
2357         }
2358         wasAcr = false;
2359       }
2360     }
2361   }
2362   public final void recordComment(boolean isJavadoc) {
2363
2364     // a new annotation comment is recorded
2365     try {
2366       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2367     } catch (IndexOutOfBoundsException e) {
2368       int oldStackLength = commentStops.length;
2369       int[] oldStack = commentStops;
2370       commentStops = new int[oldStackLength + 30];
2371       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2372       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2373       //grows the positions buffers too
2374       int[] old = commentStarts;
2375       commentStarts = new int[oldStackLength + 30];
2376       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2377     }
2378
2379     //the buffer is of a correct size here
2380     commentStarts[commentPtr] = startPosition;
2381   }
2382   public void resetTo(int begin, int end) {
2383     //reset the scanner to a given position where it may rescan again
2384
2385     diet = false;
2386     initialPosition = startPosition = currentPosition = begin;
2387     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2388     commentPtr = -1; // reset comment stack
2389   }
2390
2391   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2392     // the string with "\\u" is a legal string of two chars \ and u
2393     //thus we use a direct access to the source (for regular cases).
2394
2395     //    if (unicodeAsBackSlash) {
2396     //      // consume next character
2397     //      unicodeAsBackSlash = false;
2398     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2399     //        && (source[currentPosition] == 'u')) {
2400     //        getNextUnicodeChar();
2401     //      } else {
2402     //        if (withoutUnicodePtr != 0) {
2403     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2404     //        }
2405     //      }
2406     //    } else
2407     currentCharacter = source[currentPosition++];
2408     switch (currentCharacter) {
2409       case '\'' :
2410         currentCharacter = '\'';
2411         break;
2412       case '\\' :
2413         currentCharacter = '\\';
2414         break;
2415       default :
2416         currentCharacter = '\\';
2417         currentPosition--;
2418     }
2419   }
2420
2421   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2422     // the string with "\\u" is a legal string of two chars \ and u
2423     //thus we use a direct access to the source (for regular cases).
2424
2425     //    if (unicodeAsBackSlash) {
2426     //      // consume next character
2427     //      unicodeAsBackSlash = false;
2428     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2429     //        && (source[currentPosition] == 'u')) {
2430     //        getNextUnicodeChar();
2431     //      } else {
2432     //        if (withoutUnicodePtr != 0) {
2433     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2434     //        }
2435     //      }
2436     //    } else
2437     currentCharacter = source[currentPosition++];
2438     switch (currentCharacter) {
2439       //      case 'b' :
2440       //        currentCharacter = '\b';
2441       //        break;
2442       case 't' :
2443         currentCharacter = '\t';
2444         break;
2445       case 'n' :
2446         currentCharacter = '\n';
2447         break;
2448         //      case 'f' :
2449         //        currentCharacter = '\f';
2450         //        break;
2451       case 'r' :
2452         currentCharacter = '\r';
2453         break;
2454       case '\"' :
2455         currentCharacter = '\"';
2456         break;
2457       case '\'' :
2458         currentCharacter = '\'';
2459         break;
2460       case '\\' :
2461         currentCharacter = '\\';
2462         break;
2463       case '$' :
2464         currentCharacter = '$';
2465         break;
2466       default :
2467         // -----------octal escape--------------
2468         // OctalDigit
2469         // OctalDigit OctalDigit
2470         // ZeroToThree OctalDigit OctalDigit
2471
2472         int number = Character.getNumericValue(currentCharacter);
2473         if (number >= 0 && number <= 7) {
2474           boolean zeroToThreeNot = number > 3;
2475           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2476             int digit = Character.getNumericValue(currentCharacter);
2477             if (digit >= 0 && digit <= 7) {
2478               number = (number * 8) + digit;
2479               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2480                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2481                   currentPosition--;
2482                 } else {
2483                   digit = Character.getNumericValue(currentCharacter);
2484                   if (digit >= 0 && digit <= 7) {
2485                     // has read \ZeroToThree OctalDigit OctalDigit
2486                     number = (number * 8) + digit;
2487                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2488                     currentPosition--;
2489                   }
2490                 }
2491               } else { // has read \OctalDigit NonDigit--> ignore last character
2492                 currentPosition--;
2493               }
2494             } else { // has read \OctalDigit NonOctalDigit--> ignore last character                                             
2495               currentPosition--;
2496             }
2497           } else { // has read \OctalDigit --> ignore last character
2498             currentPosition--;
2499           }
2500           if (number > 255)
2501             throw new InvalidInputException(INVALID_ESCAPE);
2502           currentCharacter = (char) number;
2503         }
2504         //else
2505         //     throw new InvalidInputException(INVALID_ESCAPE);
2506     }
2507   }
2508
2509   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2510   //    return scanIdentifierOrKeyword( false );
2511   //  }
2512
2513   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2514     //test keywords
2515
2516     //first dispatch on the first char.
2517     //then the length. If there are several
2518     //keywors with the same length AND the same first char, then do another
2519     //disptach on the second char :-)...cool....but fast !
2520
2521     useAssertAsAnIndentifier = false;
2522
2523     while (getNextCharAsJavaIdentifierPart()) {
2524     };
2525
2526     if (isVariable) {
2527       return TokenNameVariable;
2528     }
2529     int index, length;
2530     char[] data;
2531     char firstLetter;
2532     //    if (withoutUnicodePtr == 0)
2533
2534     //quick test on length == 1 but not on length > 12 while most identifier
2535     //have a length which is <= 12...but there are lots of identifier with
2536     //only one char....
2537
2538     //      {
2539     if ((length = currentPosition - startPosition) == 1)
2540       return TokenNameIdentifier;
2541     //  data = source;
2542     data = new char[length];
2543     index = startPosition;
2544     for (int i = 0; i < length; i++) {
2545       data[i] = Character.toLowerCase(source[index + i]);
2546     }
2547     index = 0;
2548     //    } else {
2549     //      if ((length = withoutUnicodePtr) == 1)
2550     //        return TokenNameIdentifier;
2551     //      // data = withoutUnicodeBuffer;
2552     //      data = new char[withoutUnicodeBuffer.length];
2553     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2554     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2555     //      }
2556     //      index = 1;
2557     //    }
2558
2559     firstLetter = data[index];
2560     switch (firstLetter) {
2561
2562       case 'a' : // as and array
2563         switch (length) {
2564           case 2 : //as
2565             if ((data[++index] == 's')) {
2566               return TokenNameas;
2567             } else {
2568               return TokenNameIdentifier;
2569             }
2570           case 3 : //and
2571             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2572               return TokenNameAND;
2573             } else {
2574               return TokenNameIdentifier;
2575             }
2576             //          case 5 :
2577             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2578             //              return TokenNamearray;
2579             //            else
2580             //              return TokenNameIdentifier;
2581           default :
2582             return TokenNameIdentifier;
2583         }
2584       case 'b' : //break
2585         switch (length) {
2586           case 5 :
2587             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2588               return TokenNamebreak;
2589             else
2590               return TokenNameIdentifier;
2591           default :
2592             return TokenNameIdentifier;
2593         }
2594
2595       case 'c' : //case class continue
2596         switch (length) {
2597           case 4 :
2598             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2599               return TokenNamecase;
2600             else
2601               return TokenNameIdentifier;
2602           case 5 :
2603             if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2604               return TokenNameclass;
2605             else
2606               return TokenNameIdentifier;
2607           case 8 :
2608             if ((data[++index] == 'o')
2609               && (data[++index] == 'n')
2610               && (data[++index] == 't')
2611               && (data[++index] == 'i')
2612               && (data[++index] == 'n')
2613               && (data[++index] == 'u')
2614               && (data[++index] == 'e'))
2615               return TokenNamecontinue;
2616             else
2617               return TokenNameIdentifier;
2618           default :
2619             return TokenNameIdentifier;
2620         }
2621
2622       case 'd' : //define default do 
2623         switch (length) {
2624           case 2 :
2625             if ((data[++index] == 'o'))
2626               return TokenNamedo;
2627             else
2628               return TokenNameIdentifier;
2629           case 6 :
2630             if ((data[++index] == 'e')
2631               && (data[++index] == 'f')
2632               && (data[++index] == 'i')
2633               && (data[++index] == 'n')
2634               && (data[++index] == 'e'))
2635               return TokenNamedefine;
2636             else
2637               return TokenNameIdentifier;
2638           case 7 :
2639             if ((data[++index] == 'e')
2640               && (data[++index] == 'f')
2641               && (data[++index] == 'a')
2642               && (data[++index] == 'u')
2643               && (data[++index] == 'l')
2644               && (data[++index] == 't'))
2645               return TokenNamedefault;
2646             else
2647               return TokenNameIdentifier;
2648           default :
2649             return TokenNameIdentifier;
2650         }
2651       case 'e' : //echo else elseif extends
2652         switch (length) {
2653           case 4 :
2654             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2655               return TokenNameecho;
2656             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2657               return TokenNameelse;
2658             else
2659               return TokenNameIdentifier;
2660           case 5 : // endif
2661             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2662               return TokenNameendif;
2663             else
2664               return TokenNameIdentifier;
2665           case 6 : // endfor
2666             if ((data[++index] == 'n')
2667               && (data[++index] == 'd')
2668               && (data[++index] == 'f')
2669               && (data[++index] == 'o')
2670               && (data[++index] == 'r'))
2671               return TokenNameendfor;
2672             else if (
2673               (data[index] == 'l')
2674                 && (data[++index] == 's')
2675                 && (data[++index] == 'e')
2676                 && (data[++index] == 'i')
2677                 && (data[++index] == 'f'))
2678               return TokenNameelseif;
2679             else
2680               return TokenNameIdentifier;
2681           case 7 :
2682             if ((data[++index] == 'x')
2683               && (data[++index] == 't')
2684               && (data[++index] == 'e')
2685               && (data[++index] == 'n')
2686               && (data[++index] == 'd')
2687               && (data[++index] == 's'))
2688               return TokenNameextends;
2689             else
2690               return TokenNameIdentifier;
2691           case 8 : // endwhile
2692             if ((data[++index] == 'n')
2693               && (data[++index] == 'd')
2694               && (data[++index] == 'w')
2695               && (data[++index] == 'h')
2696               && (data[++index] == 'i')
2697               && (data[++index] == 'l')
2698               && (data[++index] == 'e'))
2699               return TokenNameendwhile;
2700             else
2701               return TokenNameIdentifier;
2702           case 9 : // endswitch
2703             if ((data[++index] == 'n')
2704               && (data[++index] == 'd')
2705               && (data[++index] == 's')
2706               && (data[++index] == 'w')
2707               && (data[++index] == 'i')
2708               && (data[++index] == 't')
2709               && (data[++index] == 'c')
2710               && (data[++index] == 'h'))
2711               return TokenNameendswitch;
2712             else
2713               return TokenNameIdentifier;
2714           case 10 : // endforeach
2715             if ((data[++index] == 'n')
2716               && (data[++index] == 'd')
2717               && (data[++index] == 'f')
2718               && (data[++index] == 'o')
2719               && (data[++index] == 'r')
2720               && (data[++index] == 'e')
2721               && (data[++index] == 'a')
2722               && (data[++index] == 'c')
2723               && (data[++index] == 'h'))
2724               return TokenNameendforeach;
2725             else
2726               return TokenNameIdentifier;
2727
2728           default :
2729             return TokenNameIdentifier;
2730         }
2731
2732       case 'f' : //for false function
2733         switch (length) {
2734           case 3 :
2735             if ((data[++index] == 'o') && (data[++index] == 'r'))
2736               return TokenNamefor;
2737             else
2738               return TokenNameIdentifier;
2739           case 5 :
2740             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2741               return TokenNamefalse;
2742             else
2743               return TokenNameIdentifier;
2744           case 7 : // function
2745             if ((data[++index] == 'o')
2746               && (data[++index] == 'r')
2747               && (data[++index] == 'e')
2748               && (data[++index] == 'a')
2749               && (data[++index] == 'c')
2750               && (data[++index] == 'h'))
2751               return TokenNameforeach;
2752             else
2753               return TokenNameIdentifier;
2754           case 8 : // function
2755             if ((data[++index] == 'u')
2756               && (data[++index] == 'n')
2757               && (data[++index] == 'c')
2758               && (data[++index] == 't')
2759               && (data[++index] == 'i')
2760               && (data[++index] == 'o')
2761               && (data[++index] == 'n'))
2762               return TokenNamefunction;
2763             else
2764               return TokenNameIdentifier;
2765           default :
2766             return TokenNameIdentifier;
2767         }
2768       case 'g' : //global
2769         if (length == 6) {
2770           if ((data[++index] == 'l')
2771             && (data[++index] == 'o')
2772             && (data[++index] == 'b')
2773             && (data[++index] == 'a')
2774             && (data[++index] == 'l')) {
2775             return TokenNameglobal;
2776           }
2777         }
2778         return TokenNameIdentifier;
2779
2780       case 'i' : //if int 
2781         switch (length) {
2782           case 2 :
2783             if (data[++index] == 'f')
2784               return TokenNameif;
2785             else
2786               return TokenNameIdentifier;
2787             //          case 3 :
2788             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2789             //              return TokenNameint;
2790             //            else
2791             //              return TokenNameIdentifier;
2792           case 7 :
2793             if ((data[++index] == 'n')
2794               && (data[++index] == 'c')
2795               && (data[++index] == 'l')
2796               && (data[++index] == 'u')
2797               && (data[++index] == 'd')
2798               && (data[++index] == 'e'))
2799               return TokenNameinclude;
2800             else
2801               return TokenNameIdentifier;
2802           case 12 :
2803             if ((data[++index] == 'n')
2804               && (data[++index] == 'c')
2805               && (data[++index] == 'l')
2806               && (data[++index] == 'u')
2807               && (data[++index] == 'd')
2808               && (data[++index] == 'e')
2809               && (data[++index] == '_')
2810               && (data[++index] == 'o')
2811               && (data[++index] == 'n')
2812               && (data[++index] == 'c')
2813               && (data[++index] == 'e'))
2814               return TokenNameinclude_once;
2815             else
2816               return TokenNameIdentifier;
2817           default :
2818             return TokenNameIdentifier;
2819         }
2820
2821       case 'l' : //list
2822         if (length == 4) {
2823           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2824             return TokenNamelist;
2825           }
2826         }
2827         return TokenNameIdentifier;
2828
2829       case 'n' : // new null
2830         switch (length) {
2831           case 3 :
2832             if ((data[++index] == 'e') && (data[++index] == 'w'))
2833               return TokenNamenew;
2834             else
2835               return TokenNameIdentifier;
2836           case 4 :
2837             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2838               return TokenNamenull;
2839             else
2840               return TokenNameIdentifier;
2841
2842           default :
2843             return TokenNameIdentifier;
2844         }
2845       case 'o' : // or old_function
2846         if (length == 2) {
2847           if (data[++index] == 'r') {
2848             return TokenNameOR;
2849           }
2850         }
2851         //        if (length == 12) {
2852         //          if ((data[++index] == 'l')
2853         //            && (data[++index] == 'd')
2854         //            && (data[++index] == '_')
2855         //            && (data[++index] == 'f')
2856         //            && (data[++index] == 'u')
2857         //            && (data[++index] == 'n')
2858         //            && (data[++index] == 'c')
2859         //            && (data[++index] == 't')
2860         //            && (data[++index] == 'i')
2861         //            && (data[++index] == 'o')
2862         //            && (data[++index] == 'n')) {
2863         //            return TokenNameold_function;
2864         //          }
2865         //        }
2866         return TokenNameIdentifier;
2867
2868       case 'p' : // print
2869         if (length == 5) {
2870           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2871             return TokenNameprint;
2872           }
2873         }
2874         return TokenNameIdentifier;
2875       case 'r' : //return require require_once
2876         if (length == 6) {
2877           if ((data[++index] == 'e')
2878             && (data[++index] == 't')
2879             && (data[++index] == 'u')
2880             && (data[++index] == 'r')
2881             && (data[++index] == 'n')) {
2882             return TokenNamereturn;
2883           }
2884         } else if (length == 7) {
2885           if ((data[++index] == 'e')
2886             && (data[++index] == 'q')
2887             && (data[++index] == 'u')
2888             && (data[++index] == 'i')
2889             && (data[++index] == 'r')
2890             && (data[++index] == 'e')) {
2891             return TokenNamerequire;
2892           }
2893         } else if (length == 12) {
2894           if ((data[++index] == 'e')
2895             && (data[++index] == 'q')
2896             && (data[++index] == 'u')
2897             && (data[++index] == 'i')
2898             && (data[++index] == 'r')
2899             && (data[++index] == 'e')
2900             && (data[++index] == '_')
2901             && (data[++index] == 'o')
2902             && (data[++index] == 'n')
2903             && (data[++index] == 'c')
2904             && (data[++index] == 'e')) {
2905             return TokenNamerequire_once;
2906           }
2907         } else
2908           return TokenNameIdentifier;
2909
2910       case 's' : //static switch 
2911         switch (length) {
2912           case 6 :
2913             if (data[++index] == 't')
2914               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2915                 return TokenNamestatic;
2916               } else
2917                 return TokenNameIdentifier;
2918             else if (
2919               (data[index] == 'w')
2920                 && (data[++index] == 'i')
2921                 && (data[++index] == 't')
2922                 && (data[++index] == 'c')
2923                 && (data[++index] == 'h'))
2924               return TokenNameswitch;
2925             else
2926               return TokenNameIdentifier;
2927           default :
2928             return TokenNameIdentifier;
2929         }
2930
2931       case 't' : // true
2932         switch (length) {
2933
2934           case 4 :
2935             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2936               return TokenNametrue;
2937             else
2938               return TokenNameIdentifier;
2939             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2940             //              return TokenNamethis;
2941
2942           default :
2943             return TokenNameIdentifier;
2944         }
2945
2946       case 'v' : //var 
2947         switch (length) {
2948           case 3 :
2949             if ((data[++index] == 'a') && (data[++index] == 'r'))
2950               return TokenNamevar;
2951             else
2952               return TokenNameIdentifier;
2953
2954           default :
2955             return TokenNameIdentifier;
2956         }
2957
2958       case 'w' : //while 
2959         switch (length) {
2960           case 5 :
2961             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2962               return TokenNamewhile;
2963             else
2964               return TokenNameIdentifier;
2965             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2966             //return TokenNamewidefp ;
2967             //else
2968             //return TokenNameIdentifier;
2969           default :
2970             return TokenNameIdentifier;
2971         }
2972
2973       case 'x' : //xor
2974         switch (length) {
2975           case 3 :
2976             if ((data[++index] == 'o') && (data[++index] == 'r'))
2977               return TokenNameXOR;
2978             else
2979               return TokenNameIdentifier;
2980
2981           default :
2982             return TokenNameIdentifier;
2983         }
2984       default :
2985         return TokenNameIdentifier;
2986     }
2987   }
2988   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
2989
2990     //when entering this method the currentCharacter is the firt
2991     //digit of the number , i.e. it may be preceeded by a . when
2992     //dotPrefix is true
2993
2994     boolean floating = dotPrefix;
2995     if ((!dotPrefix) && (currentCharacter == '0')) {
2996       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
2997         //force the first char of the hexa number do exist...
2998         // consume next character
2999         unicodeAsBackSlash = false;
3000         currentCharacter = source[currentPosition++];
3001         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3002         //          && (source[currentPosition] == 'u')) {
3003         //          getNextUnicodeChar();
3004         //        } else {
3005         //          if (withoutUnicodePtr != 0) {
3006         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3007         //          }
3008         //        }
3009         if (Character.digit(currentCharacter, 16) == -1)
3010           throw new InvalidInputException(INVALID_HEXA);
3011         //---end forcing--
3012         while (getNextCharAsDigit(16)) {
3013         };
3014         //        if (getNextChar('l', 'L') >= 0)
3015         //          return TokenNameLongLiteral;
3016         //        else
3017         return TokenNameIntegerLiteral;
3018       }
3019
3020       //there is x or X in the number
3021       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3022       if (getNextCharAsDigit()) {
3023         //-------------potential octal-----------------
3024         while (getNextCharAsDigit()) {
3025         };
3026
3027         //        if (getNextChar('l', 'L') >= 0) {
3028         //          return TokenNameLongLiteral;
3029         //        }
3030         //
3031         //        if (getNextChar('f', 'F') >= 0) {
3032         //          return TokenNameFloatingPointLiteral;
3033         //        }
3034
3035         if (getNextChar('d', 'D') >= 0) {
3036           return TokenNameDoubleLiteral;
3037         } else { //make the distinction between octal and float ....
3038           if (getNextChar('.')) { //bingo ! ....
3039             while (getNextCharAsDigit()) {
3040             };
3041             if (getNextChar('e', 'E') >= 0) {
3042               // consume next character
3043               unicodeAsBackSlash = false;
3044               currentCharacter = source[currentPosition++];
3045               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3046               //                && (source[currentPosition] == 'u')) {
3047               //                getNextUnicodeChar();
3048               //              } else {
3049               //                if (withoutUnicodePtr != 0) {
3050               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3051               //                }
3052               //              }
3053
3054               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3055                 // consume next character
3056                 unicodeAsBackSlash = false;
3057                 currentCharacter = source[currentPosition++];
3058                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3059                 //                  && (source[currentPosition] == 'u')) {
3060                 //                  getNextUnicodeChar();
3061                 //                } else {
3062                 //                  if (withoutUnicodePtr != 0) {
3063                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3064                 //                      currentCharacter;
3065                 //                  }
3066                 //                }
3067               }
3068               if (!Character.isDigit(currentCharacter))
3069                 throw new InvalidInputException(INVALID_FLOAT);
3070               while (getNextCharAsDigit()) {
3071               };
3072             }
3073             //            if (getNextChar('f', 'F') >= 0)
3074             //              return TokenNameFloatingPointLiteral;
3075             getNextChar('d', 'D'); //jump over potential d or D
3076             return TokenNameDoubleLiteral;
3077           } else {
3078             return TokenNameIntegerLiteral;
3079           }
3080         }
3081       } else {
3082         /* carry on */
3083       }
3084     }
3085
3086     while (getNextCharAsDigit()) {
3087     };
3088
3089     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3090     //      return TokenNameLongLiteral;
3091
3092     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3093       while (getNextCharAsDigit()) {
3094       };
3095       floating = true;
3096     }
3097
3098     //if floating is true both exponant and suffix may be optional
3099
3100     if (getNextChar('e', 'E') >= 0) {
3101       floating = true;
3102       // consume next character
3103       unicodeAsBackSlash = false;
3104       currentCharacter = source[currentPosition++];
3105       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3106       //        && (source[currentPosition] == 'u')) {
3107       //        getNextUnicodeChar();
3108       //      } else {
3109       //        if (withoutUnicodePtr != 0) {
3110       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3111       //        }
3112       //      }
3113
3114       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3115         unicodeAsBackSlash = false;
3116         currentCharacter = source[currentPosition++];
3117         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3118         //          && (source[currentPosition] == 'u')) {
3119         //          getNextUnicodeChar();
3120         //        } else {
3121         //          if (withoutUnicodePtr != 0) {
3122         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3123         //          }
3124         //        }
3125       }
3126       if (!Character.isDigit(currentCharacter))
3127         throw new InvalidInputException(INVALID_FLOAT);
3128       while (getNextCharAsDigit()) {
3129       };
3130     }
3131
3132     if (getNextChar('d', 'D') >= 0)
3133       return TokenNameDoubleLiteral;
3134     //    if (getNextChar('f', 'F') >= 0)
3135     //      return TokenNameFloatingPointLiteral;
3136
3137     //the long flag has been tested before
3138
3139     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3140   }
3141   /**
3142    * Search the line number corresponding to a specific position
3143    *
3144    */
3145   public final int getLineNumber(int position) {
3146
3147     if (lineEnds == null)
3148       return 1;
3149     int length = linePtr + 1;
3150     if (length == 0)
3151       return 1;
3152     int g = 0, d = length - 1;
3153     int m = 0;
3154     while (g <= d) {
3155       m = (g + d) / 2;
3156       if (position < lineEnds[m]) {
3157         d = m - 1;
3158       } else if (position > lineEnds[m]) {
3159         g = m + 1;
3160       } else {
3161         return m + 1;
3162       }
3163     }
3164     if (position < lineEnds[m]) {
3165       return m + 1;
3166     }
3167     return m + 2;
3168   }
3169
3170   public void setPHPMode(boolean mode) {
3171     phpMode = mode;
3172   }
3173
3174   public final void setSource(char[] source) {
3175     //the source-buffer is set to sourceString
3176
3177     if (source == null) {
3178       this.source = new char[0];
3179     } else {
3180       this.source = source;
3181     }
3182     startPosition = -1;
3183     initialPosition = currentPosition = 0;
3184     containsAssertKeyword = false;
3185     withoutUnicodeBuffer = new char[this.source.length];
3186
3187   }
3188
3189   public String toString() {
3190     if (startPosition == source.length)
3191       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3192     if (currentPosition > source.length)
3193       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3194
3195     char front[] = new char[startPosition];
3196     System.arraycopy(source, 0, front, 0, startPosition);
3197
3198     int middleLength = (currentPosition - 1) - startPosition + 1;
3199     char middle[];
3200     if (middleLength > -1) {
3201       middle = new char[middleLength];
3202       System.arraycopy(source, startPosition, middle, 0, middleLength);
3203     } else {
3204       middle = new char[0];
3205     }
3206
3207     char end[] = new char[source.length - (currentPosition - 1)];
3208     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3209
3210     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3211     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3212     + new String(end);
3213   }
3214   public final String toStringAction(int act) {
3215
3216     switch (act) {
3217       case TokenNameERROR :
3218         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3219       case TokenNameStopPHP :
3220         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3221       case TokenNameIdentifier :
3222         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3223       case TokenNameVariable :
3224         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3225       case TokenNameas :
3226         return "as"; //$NON-NLS-1$
3227       case TokenNamebreak :
3228         return "break"; //$NON-NLS-1$
3229       case TokenNamecase :
3230         return "case"; //$NON-NLS-1$
3231       case TokenNameclass :
3232         return "class"; //$NON-NLS-1$
3233       case TokenNamecontinue :
3234         return "continue"; //$NON-NLS-1$
3235       case TokenNamedefault :
3236         return "default"; //$NON-NLS-1$
3237       case TokenNamedefine :
3238         return "define"; //$NON-NLS-1$
3239       case TokenNamedo :
3240         return "do"; //$NON-NLS-1$
3241       case TokenNameecho :
3242         return "echo"; //$NON-NLS-1$
3243       case TokenNameelse :
3244         return "else"; //$NON-NLS-1$
3245       case TokenNameelseif :
3246         return "elseif"; //$NON-NLS-1$
3247       case TokenNameendfor :
3248         return "endfor"; //$NON-NLS-1$
3249       case TokenNameendforeach :
3250         return "endforeach"; //$NON-NLS-1$
3251       case TokenNameendif :
3252         return "endif"; //$NON-NLS-1$
3253       case TokenNameendswitch :
3254         return "endswitch"; //$NON-NLS-1$
3255       case TokenNameendwhile :
3256         return "endwhile"; //$NON-NLS-1$
3257       case TokenNameextends :
3258         return "extends"; //$NON-NLS-1$
3259       case TokenNamefalse :
3260         return "false"; //$NON-NLS-1$
3261       case TokenNamefor :
3262         return "for"; //$NON-NLS-1$
3263       case TokenNameforeach :
3264         return "foreach"; //$NON-NLS-1$
3265       case TokenNamefunction :
3266         return "function"; //$NON-NLS-1$
3267       case TokenNameglobal :
3268         return "global"; //$NON-NLS-1$
3269       case TokenNameif :
3270         return "if"; //$NON-NLS-1$
3271       case TokenNameinclude :
3272         return "include"; //$NON-NLS-1$
3273       case TokenNameinclude_once :
3274         return "include_once"; //$NON-NLS-1$
3275       case TokenNamelist :
3276         return "list"; //$NON-NLS-1$
3277       case TokenNamenew :
3278         return "new"; //$NON-NLS-1$
3279       case TokenNamenull :
3280         return "null"; //$NON-NLS-1$
3281       case TokenNameprint :
3282         return "print"; //$NON-NLS-1$
3283       case TokenNamerequire :
3284         return "require"; //$NON-NLS-1$
3285       case TokenNamerequire_once :
3286         return "require_once"; //$NON-NLS-1$
3287       case TokenNamereturn :
3288         return "return"; //$NON-NLS-1$
3289       case TokenNamestatic :
3290         return "static"; //$NON-NLS-1$
3291       case TokenNameswitch :
3292         return "switch"; //$NON-NLS-1$
3293       case TokenNametrue :
3294         return "true"; //$NON-NLS-1$
3295       case TokenNamevar :
3296         return "var"; //$NON-NLS-1$
3297       case TokenNamewhile :
3298         return "while"; //$NON-NLS-1$
3299       case TokenNameIntegerLiteral :
3300         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3301       case TokenNameDoubleLiteral :
3302         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3303       case TokenNameStringLiteral :
3304         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3305       case TokenNameStringConstant :
3306         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3307       case TokenNameStringInterpolated :
3308         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3309       case TokenNameHEREDOC :
3310         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3311
3312       case TokenNamePLUS_PLUS :
3313         return "++"; //$NON-NLS-1$
3314       case TokenNameMINUS_MINUS :
3315         return "--"; //$NON-NLS-1$
3316       case TokenNameEQUAL_EQUAL :
3317         return "=="; //$NON-NLS-1$
3318       case TokenNameEQUAL_GREATER :
3319         return "=>"; //$NON-NLS-1$
3320       case TokenNameLESS_EQUAL :
3321         return "<="; //$NON-NLS-1$
3322       case TokenNameGREATER_EQUAL :
3323         return ">="; //$NON-NLS-1$
3324       case TokenNameNOT_EQUAL :
3325         return "!="; //$NON-NLS-1$
3326       case TokenNameLEFT_SHIFT :
3327         return "<<"; //$NON-NLS-1$
3328       case TokenNameRIGHT_SHIFT :
3329         return ">>"; //$NON-NLS-1$
3330       case TokenNamePLUS_EQUAL :
3331         return "+="; //$NON-NLS-1$
3332       case TokenNameMINUS_EQUAL :
3333         return "-="; //$NON-NLS-1$
3334       case TokenNameMULTIPLY_EQUAL :
3335         return "*="; //$NON-NLS-1$
3336       case TokenNameDIVIDE_EQUAL :
3337         return "/="; //$NON-NLS-1$
3338       case TokenNameAND_EQUAL :
3339         return "&="; //$NON-NLS-1$
3340       case TokenNameOR_EQUAL :
3341         return "|="; //$NON-NLS-1$
3342       case TokenNameXOR_EQUAL :
3343         return "^="; //$NON-NLS-1$
3344       case TokenNameREMAINDER_EQUAL :
3345         return "%="; //$NON-NLS-1$
3346       case TokenNameLEFT_SHIFT_EQUAL :
3347         return "<<="; //$NON-NLS-1$
3348       case TokenNameRIGHT_SHIFT_EQUAL :
3349         return ">>="; //$NON-NLS-1$
3350       case TokenNameOR_OR :
3351         return "||"; //$NON-NLS-1$
3352       case TokenNameAND_AND :
3353         return "&&"; //$NON-NLS-1$
3354       case TokenNamePLUS :
3355         return "+"; //$NON-NLS-1$
3356       case TokenNameMINUS :
3357         return "-"; //$NON-NLS-1$
3358       case TokenNameMINUS_GREATER :
3359         return "->";
3360       case TokenNameNOT :
3361         return "!"; //$NON-NLS-1$
3362       case TokenNameREMAINDER :
3363         return "%"; //$NON-NLS-1$
3364       case TokenNameXOR :
3365         return "^"; //$NON-NLS-1$
3366       case TokenNameAND :
3367         return "&"; //$NON-NLS-1$
3368       case TokenNameMULTIPLY :
3369         return "*"; //$NON-NLS-1$
3370       case TokenNameOR :
3371         return "|"; //$NON-NLS-1$
3372       case TokenNameTWIDDLE :
3373         return "~"; //$NON-NLS-1$
3374       case TokenNameTWIDDLE_EQUAL :
3375         return "~="; //$NON-NLS-1$
3376       case TokenNameDIVIDE :
3377         return "/"; //$NON-NLS-1$
3378       case TokenNameGREATER :
3379         return ">"; //$NON-NLS-1$
3380       case TokenNameLESS :
3381         return "<"; //$NON-NLS-1$
3382       case TokenNameLPAREN :
3383         return "("; //$NON-NLS-1$
3384       case TokenNameRPAREN :
3385         return ")"; //$NON-NLS-1$
3386       case TokenNameLBRACE :
3387         return "{"; //$NON-NLS-1$
3388       case TokenNameRBRACE :
3389         return "}"; //$NON-NLS-1$
3390       case TokenNameLBRACKET :
3391         return "["; //$NON-NLS-1$
3392       case TokenNameRBRACKET :
3393         return "]"; //$NON-NLS-1$
3394       case TokenNameSEMICOLON :
3395         return ";"; //$NON-NLS-1$
3396       case TokenNameQUESTION :
3397         return "?"; //$NON-NLS-1$
3398       case TokenNameCOLON :
3399         return ":"; //$NON-NLS-1$
3400       case TokenNameCOMMA :
3401         return ","; //$NON-NLS-1$
3402       case TokenNameDOT :
3403         return "."; //$NON-NLS-1$
3404       case TokenNameEQUAL :
3405         return "="; //$NON-NLS-1$
3406       case TokenNameAT :
3407         return "@";
3408       case TokenNameDOLLAR_LBRACE :
3409         return "${";
3410       case TokenNameEOF :
3411         return "EOF"; //$NON-NLS-1$
3412       case TokenNameWHITESPACE :
3413         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3414       case TokenNameCOMMENT_LINE :
3415         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3416       case TokenNameCOMMENT_BLOCK :
3417         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3418       case TokenNameCOMMENT_PHPDOC :
3419         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3420       case TokenNameHTML :
3421         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3422       default :
3423         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3424     }
3425   }
3426
3427   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3428     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3429   }
3430
3431   public Scanner(
3432     boolean tokenizeComments,
3433     boolean tokenizeWhiteSpace,
3434     boolean checkNonExternalizedStringLiterals,
3435     boolean assertMode) {
3436     this.eofPosition = Integer.MAX_VALUE;
3437     this.tokenizeComments = tokenizeComments;
3438     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3439     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3440     this.assertMode = assertMode;
3441   }
3442
3443   private void checkNonExternalizeString() throws InvalidInputException {
3444     if (currentLine == null)
3445       return;
3446     parseTags(currentLine);
3447   }
3448
3449   private void parseTags(NLSLine line) throws InvalidInputException {
3450     String s = new String(getCurrentTokenSource());
3451     int pos = s.indexOf(TAG_PREFIX);
3452     int lineLength = line.size();
3453     while (pos != -1) {
3454       int start = pos + TAG_PREFIX_LENGTH;
3455       int end = s.indexOf(TAG_POSTFIX, start);
3456       String index = s.substring(start, end);
3457       int i = 0;
3458       try {
3459         i = Integer.parseInt(index) - 1;
3460         // Tags are one based not zero based.
3461       } catch (NumberFormatException e) {
3462         i = -1; // we don't want to consider this as a valid NLS tag
3463       }
3464       if (line.exists(i)) {
3465         line.set(i, null);
3466       }
3467       pos = s.indexOf(TAG_PREFIX, start);
3468     }
3469
3470     this.nonNLSStrings = new StringLiteral[lineLength];
3471     int nonNLSCounter = 0;
3472     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3473       StringLiteral literal = (StringLiteral) iterator.next();
3474       if (literal != null) {
3475         this.nonNLSStrings[nonNLSCounter++] = literal;
3476       }
3477     }
3478     if (nonNLSCounter == 0) {
3479       this.nonNLSStrings = null;
3480       currentLine = null;
3481       return;
3482     }
3483     this.wasNonExternalizedStringLiteral = true;
3484     if (nonNLSCounter != lineLength) {
3485       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3486     }
3487     currentLine = null;
3488   }
3489 }