initial nl support for phphelp plugin
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
16
17 import net.sourceforge.phpdt.core.compiler.IScanner;
18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
21
22 public class Scanner implements IScanner, ITerminalSymbols {
23
24   /* APIs ares
25    - getNextToken() which return the current type of the token
26      (this value is not memorized by the scanner)
27    - getCurrentTokenSource() which provides with the token "REAL" source
28      (aka all unicode have been transformed into a correct char)
29    - sourceStart gives the position into the stream
30    - currentPosition-1 gives the sourceEnd position into the stream 
31   */
32
33   // 1.4 feature 
34   private boolean assertMode;
35   public boolean useAssertAsAnIndentifier = false;
36   //flag indicating if processed source contains occurrences of keyword assert 
37   public boolean containsAssertKeyword = false;
38
39   public boolean recordLineSeparator;
40   public boolean phpMode = false;
41
42   public char currentCharacter;
43   public int startPosition;
44   public int currentPosition;
45   public int initialPosition, eofPosition;
46   // after this position eof are generated instead of real token from the source
47
48   public boolean tokenizeComments;
49   public boolean tokenizeWhiteSpace;
50
51   //source should be viewed as a window (aka a part)
52   //of a entire very large stream
53   public char source[];
54
55   //unicode support
56   public char[] withoutUnicodeBuffer;
57   public int withoutUnicodePtr;
58   //when == 0 ==> no unicode in the current token
59   public boolean unicodeAsBackSlash = false;
60
61   public boolean scanningFloatLiteral = false;
62
63   //support for /** comments
64   //public char[][] comments = new char[10][];
65   public int[] commentStops = new int[10];
66   public int[] commentStarts = new int[10];
67   public int commentPtr = -1; // no comment test with commentPtr value -1
68
69   //diet parsing support - jump over some method body when requested
70   public boolean diet = false;
71
72   //support for the  poor-line-debuggers ....
73   //remember the position of the cr/lf
74   public int[] lineEnds = new int[250];
75   public int linePtr = -1;
76   public boolean wasAcr = false;
77
78   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
79
80   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
81   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
82   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
83   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
84   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
85   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
86   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
87
88   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
89   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
90   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
91   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
92
93   //----------------optimized identifier managment------------------
94   static final char[] charArray_a = new char[] { 'a' },
95     charArray_b = new char[] { 'b' },
96     charArray_c = new char[] { 'c' },
97     charArray_d = new char[] { 'd' },
98     charArray_e = new char[] { 'e' },
99     charArray_f = new char[] { 'f' },
100     charArray_g = new char[] { 'g' },
101     charArray_h = new char[] { 'h' },
102     charArray_i = new char[] { 'i' },
103     charArray_j = new char[] { 'j' },
104     charArray_k = new char[] { 'k' },
105     charArray_l = new char[] { 'l' },
106     charArray_m = new char[] { 'm' },
107     charArray_n = new char[] { 'n' },
108     charArray_o = new char[] { 'o' },
109     charArray_p = new char[] { 'p' },
110     charArray_q = new char[] { 'q' },
111     charArray_r = new char[] { 'r' },
112     charArray_s = new char[] { 's' },
113     charArray_t = new char[] { 't' },
114     charArray_u = new char[] { 'u' },
115     charArray_v = new char[] { 'v' },
116     charArray_w = new char[] { 'w' },
117     charArray_x = new char[] { 'x' },
118     charArray_y = new char[] { 'y' },
119     charArray_z = new char[] { 'z' };
120
121   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
122   static final int TableSize = 30, InternalTableSize = 6;
123   //30*6 = 180 entries
124   public static final int OptimizedLength = 6;
125   public /*static*/
126   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
127   // support for detecting non-externalized string literals
128   int currentLineNr = -1;
129   int previousLineNr = -1;
130   NLSLine currentLine = null;
131   List lines = new ArrayList();
132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136   public StringLiteral[] nonNLSStrings = null;
137   public boolean checkNonExternalizedStringLiterals = true;
138   public boolean wasNonExternalizedStringLiteral = false;
139
140   /*static*/ {
141     for (int i = 0; i < 6; i++) {
142       for (int j = 0; j < TableSize; j++) {
143         for (int k = 0; k < InternalTableSize; k++) {
144           charArray_length[i][j][k] = initCharArray;
145         }
146       }
147     }
148   }
149   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
150
151   public static final int RoundBracket = 0;
152   public static final int SquareBracket = 1;
153   public static final int CurlyBracket = 2;
154   public static final int BracketKinds = 3;
155
156   public static final boolean DEBUG = false;
157
158   public Scanner() {
159     this(false, false);
160   }
161   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
162     this(tokenizeComments, tokenizeWhiteSpace, false);
163   }
164
165   /**
166    * Determines if the specified character is
167    * permissible as the first character in a PHP identifier
168    */
169   public static boolean isPHPIdentifierStart(char ch) {
170     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
171   }
172
173   /**
174    * Determines if the specified character may be part of a PHP identifier as
175    * other than the first character
176    */
177   public static boolean isPHPIdentifierPart(char ch) {
178     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
179   }
180
181   public final boolean atEnd() {
182     // This code is not relevant if source is 
183     // Only a part of the real stream input
184
185     return source.length == currentPosition;
186   }
187   public char[] getCurrentIdentifierSource() {
188     //return the token REAL source (aka unicodes are precomputed)
189
190     char[] result;
191     //    if (withoutUnicodePtr != 0)
192     //      //0 is used as a fast test flag so the real first char is in position 1
193     //      System.arraycopy(
194     //        withoutUnicodeBuffer,
195     //        1,
196     //        result = new char[withoutUnicodePtr],
197     //        0,
198     //        withoutUnicodePtr);
199     //    else {
200     int length = currentPosition - startPosition;
201     switch (length) { // see OptimizedLength
202       case 1 :
203         return optimizedCurrentTokenSource1();
204       case 2 :
205         return optimizedCurrentTokenSource2();
206       case 3 :
207         return optimizedCurrentTokenSource3();
208       case 4 :
209         return optimizedCurrentTokenSource4();
210       case 5 :
211         return optimizedCurrentTokenSource5();
212       case 6 :
213         return optimizedCurrentTokenSource6();
214     }
215     //no optimization
216     System.arraycopy(source, startPosition, result = new char[length], 0, length);
217     //   }
218     return result;
219   }
220   public int getCurrentTokenEndPosition() {
221     return this.currentPosition - 1;
222   }
223
224   public final char[] getCurrentTokenSource() {
225     // Return the token REAL source (aka unicodes are precomputed)
226
227     char[] result;
228     //    if (withoutUnicodePtr != 0)
229     //      // 0 is used as a fast test flag so the real first char is in position 1
230     //      System.arraycopy(
231     //        withoutUnicodeBuffer,
232     //        1,
233     //        result = new char[withoutUnicodePtr],
234     //        0,
235     //        withoutUnicodePtr);
236     //    else {
237     int length;
238     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
239     //    }
240     return result;
241   }
242
243   public final char[] getCurrentTokenSource(int startPos) {
244     // Return the token REAL source (aka unicodes are precomputed)
245
246     char[] result;
247     //    if (withoutUnicodePtr != 0)
248     //      // 0 is used as a fast test flag so the real first char is in position 1
249     //      System.arraycopy(
250     //        withoutUnicodeBuffer,
251     //        1,
252     //        result = new char[withoutUnicodePtr],
253     //        0,
254     //        withoutUnicodePtr);
255     //    else {
256     int length;
257     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
258     //  }
259     return result;
260   }
261
262   public final char[] getCurrentTokenSourceString() {
263     //return the token REAL source (aka unicodes are precomputed).
264     //REMOVE the two " that are at the beginning and the end.
265
266     char[] result;
267     if (withoutUnicodePtr != 0)
268       //0 is used as a fast test flag so the real first char is in position 1
269       System.arraycopy(withoutUnicodeBuffer, 2,
270       //2 is 1 (real start) + 1 (to jump over the ")
271       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
272     else {
273       int length;
274       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
275     }
276     return result;
277   }
278   public int getCurrentTokenStartPosition() {
279     return this.startPosition;
280   }
281
282   public final char[] getCurrentStringLiteralSource() {
283     // Return the token REAL source (aka unicodes are precomputed)
284
285     char[] result;
286
287     int length;
288     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
289     //    }
290     return result;
291   }
292
293   /*
294    * Search the source position corresponding to the end of a given line number
295    *
296    * Line numbers are 1-based, and relative to the scanner initialPosition. 
297    * Character positions are 0-based.
298    *
299    * In case the given line number is inconsistent, answers -1.
300    */
301   public final int getLineEnd(int lineNumber) {
302
303     if (lineEnds == null)
304       return -1;
305     if (lineNumber >= lineEnds.length)
306       return -1;
307     if (lineNumber <= 0)
308       return -1;
309
310     if (lineNumber == lineEnds.length - 1)
311       return eofPosition;
312     return lineEnds[lineNumber - 1];
313     // next line start one character behind the lineEnd of the previous line
314   }
315   /**
316    * Search the source position corresponding to the beginning of a given line number
317    *
318    * Line numbers are 1-based, and relative to the scanner initialPosition. 
319    * Character positions are 0-based.
320    *
321    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
322    *
323    * In case the given line number is inconsistent, answers -1.
324    */
325   public final int getLineStart(int lineNumber) {
326
327     if (lineEnds == null)
328       return -1;
329     if (lineNumber >= lineEnds.length)
330       return -1;
331     if (lineNumber <= 0)
332       return -1;
333
334     if (lineNumber == 1)
335       return initialPosition;
336     return lineEnds[lineNumber - 2] + 1;
337     // next line start one character behind the lineEnd of the previous line
338   }
339   public final boolean getNextChar(char testedChar) {
340     //BOOLEAN
341     //handle the case of unicode.
342     //when a unicode appears then we must use a buffer that holds char internal values
343     //At the end of this method currentCharacter holds the new visited char
344     //and currentPosition points right next after it
345     //Both previous lines are true if the currentCharacter is == to the testedChar
346     //On false, no side effect has occured.
347
348     //ALL getNextChar.... ARE OPTIMIZED COPIES 
349
350     int temp = currentPosition;
351     try {
352       currentCharacter = source[currentPosition++];
353       //      if (((currentCharacter = source[currentPosition++]) == '\\')
354       //        && (source[currentPosition] == 'u')) {
355       //        //-------------unicode traitement ------------
356       //        int c1, c2, c3, c4;
357       //        int unicodeSize = 6;
358       //        currentPosition++;
359       //        while (source[currentPosition] == 'u') {
360       //          currentPosition++;
361       //          unicodeSize++;
362       //        }
363       //
364       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
365       //          || c1 < 0)
366       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
367       //            || c2 < 0)
368       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
369       //            || c3 < 0)
370       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
371       //            || c4 < 0)) {
372       //          currentPosition = temp;
373       //          return false;
374       //        }
375       //
376       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
377       //        if (currentCharacter != testedChar) {
378       //          currentPosition = temp;
379       //          return false;
380       //        }
381       //        unicodeAsBackSlash = currentCharacter == '\\';
382       //
383       //        //need the unicode buffer
384       //        if (withoutUnicodePtr == 0) {
385       //          //buffer all the entries that have been left aside....
386       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
387       //          System.arraycopy(
388       //            source,
389       //            startPosition,
390       //            withoutUnicodeBuffer,
391       //            1,
392       //            withoutUnicodePtr);
393       //        }
394       //        //fill the buffer with the char
395       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
396       //        return true;
397       //
398       //      } //-------------end unicode traitement--------------
399       //      else {
400       if (currentCharacter != testedChar) {
401         currentPosition = temp;
402         return false;
403       }
404       unicodeAsBackSlash = false;
405       //        if (withoutUnicodePtr != 0)
406       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
407       return true;
408       //      }
409     } catch (IndexOutOfBoundsException e) {
410       unicodeAsBackSlash = false;
411       currentPosition = temp;
412       return false;
413     }
414   }
415   public final int getNextChar(char testedChar1, char testedChar2) {
416     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
417     //test can be done with (x==0) for the first and (x>0) for the second
418     //handle the case of unicode.
419     //when a unicode appears then we must use a buffer that holds char internal values
420     //At the end of this method currentCharacter holds the new visited char
421     //and currentPosition points right next after it
422     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
423     //On false, no side effect has occured.
424
425     //ALL getNextChar.... ARE OPTIMIZED COPIES 
426
427     int temp = currentPosition;
428     try {
429       int result;
430       currentCharacter = source[currentPosition++];
431       //      if (((currentCharacter = source[currentPosition++]) == '\\')
432       //        && (source[currentPosition] == 'u')) {
433       //        //-------------unicode traitement ------------
434       //        int c1, c2, c3, c4;
435       //        int unicodeSize = 6;
436       //        currentPosition++;
437       //        while (source[currentPosition] == 'u') {
438       //          currentPosition++;
439       //          unicodeSize++;
440       //        }
441       //
442       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
443       //          || c1 < 0)
444       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
445       //            || c2 < 0)
446       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
447       //            || c3 < 0)
448       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
449       //            || c4 < 0)) {
450       //          currentPosition = temp;
451       //          return 2;
452       //        }
453       //
454       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
455       //        if (currentCharacter == testedChar1)
456       //          result = 0;
457       //        else if (currentCharacter == testedChar2)
458       //          result = 1;
459       //        else {
460       //          currentPosition = temp;
461       //          return -1;
462       //        }
463       //
464       //        //need the unicode buffer
465       //        if (withoutUnicodePtr == 0) {
466       //          //buffer all the entries that have been left aside....
467       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
468       //          System.arraycopy(
469       //            source,
470       //            startPosition,
471       //            withoutUnicodeBuffer,
472       //            1,
473       //            withoutUnicodePtr);
474       //        }
475       //        //fill the buffer with the char
476       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
477       //        return result;
478       //      } //-------------end unicode traitement--------------
479       //      else {
480       if (currentCharacter == testedChar1)
481         result = 0;
482       else if (currentCharacter == testedChar2)
483         result = 1;
484       else {
485         currentPosition = temp;
486         return -1;
487       }
488
489       //        if (withoutUnicodePtr != 0)
490       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
491       return result;
492       //     }
493     } catch (IndexOutOfBoundsException e) {
494       currentPosition = temp;
495       return -1;
496     }
497   }
498   public final boolean getNextCharAsDigit() {
499     //BOOLEAN
500     //handle the case of unicode.
501     //when a unicode appears then we must use a buffer that holds char internal values
502     //At the end of this method currentCharacter holds the new visited char
503     //and currentPosition points right next after it
504     //Both previous lines are true if the currentCharacter is a digit
505     //On false, no side effect has occured.
506
507     //ALL getNextChar.... ARE OPTIMIZED COPIES 
508
509     int temp = currentPosition;
510     try {
511       currentCharacter = source[currentPosition++];
512       //      if (((currentCharacter = source[currentPosition++]) == '\\')
513       //        && (source[currentPosition] == 'u')) {
514       //        //-------------unicode traitement ------------
515       //        int c1, c2, c3, c4;
516       //        int unicodeSize = 6;
517       //        currentPosition++;
518       //        while (source[currentPosition] == 'u') {
519       //          currentPosition++;
520       //          unicodeSize++;
521       //        }
522       //
523       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
524       //          || c1 < 0)
525       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
526       //            || c2 < 0)
527       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
528       //            || c3 < 0)
529       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
530       //            || c4 < 0)) {
531       //          currentPosition = temp;
532       //          return false;
533       //        }
534       //
535       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
536       //        if (!Character.isDigit(currentCharacter)) {
537       //          currentPosition = temp;
538       //          return false;
539       //        }
540       //
541       //        //need the unicode buffer
542       //        if (withoutUnicodePtr == 0) {
543       //          //buffer all the entries that have been left aside....
544       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
545       //          System.arraycopy(
546       //            source,
547       //            startPosition,
548       //            withoutUnicodeBuffer,
549       //            1,
550       //            withoutUnicodePtr);
551       //        }
552       //        //fill the buffer with the char
553       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
554       //        return true;
555       //      } //-------------end unicode traitement--------------
556       //      else {
557       if (!Character.isDigit(currentCharacter)) {
558         currentPosition = temp;
559         return false;
560       }
561       //        if (withoutUnicodePtr != 0)
562       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
563       return true;
564       //      }
565     } catch (IndexOutOfBoundsException e) {
566       currentPosition = temp;
567       return false;
568     }
569   }
570   public final boolean getNextCharAsDigit(int radix) {
571     //BOOLEAN
572     //handle the case of unicode.
573     //when a unicode appears then we must use a buffer that holds char internal values
574     //At the end of this method currentCharacter holds the new visited char
575     //and currentPosition points right next after it
576     //Both previous lines are true if the currentCharacter is a digit base on radix
577     //On false, no side effect has occured.
578
579     //ALL getNextChar.... ARE OPTIMIZED COPIES 
580
581     int temp = currentPosition;
582     try {
583       currentCharacter = source[currentPosition++];
584       //      if (((currentCharacter = source[currentPosition++]) == '\\')
585       //        && (source[currentPosition] == 'u')) {
586       //        //-------------unicode traitement ------------
587       //        int c1, c2, c3, c4;
588       //        int unicodeSize = 6;
589       //        currentPosition++;
590       //        while (source[currentPosition] == 'u') {
591       //          currentPosition++;
592       //          unicodeSize++;
593       //        }
594       //
595       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
596       //          || c1 < 0)
597       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
598       //            || c2 < 0)
599       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
600       //            || c3 < 0)
601       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
602       //            || c4 < 0)) {
603       //          currentPosition = temp;
604       //          return false;
605       //        }
606       //
607       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
608       //        if (Character.digit(currentCharacter, radix) == -1) {
609       //          currentPosition = temp;
610       //          return false;
611       //        }
612       //
613       //        //need the unicode buffer
614       //        if (withoutUnicodePtr == 0) {
615       //          //buffer all the entries that have been left aside....
616       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
617       //          System.arraycopy(
618       //            source,
619       //            startPosition,
620       //            withoutUnicodeBuffer,
621       //            1,
622       //            withoutUnicodePtr);
623       //        }
624       //        //fill the buffer with the char
625       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
626       //        return true;
627       //      } //-------------end unicode traitement--------------
628       //      else {
629       if (Character.digit(currentCharacter, radix) == -1) {
630         currentPosition = temp;
631         return false;
632       }
633       //        if (withoutUnicodePtr != 0)
634       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
635       return true;
636       //      }
637     } catch (IndexOutOfBoundsException e) {
638       currentPosition = temp;
639       return false;
640     }
641   }
642   public boolean getNextCharAsJavaIdentifierPart() {
643     //BOOLEAN
644     //handle the case of unicode.
645     //when a unicode appears then we must use a buffer that holds char internal values
646     //At the end of this method currentCharacter holds the new visited char
647     //and currentPosition points right next after it
648     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
649     //On false, no side effect has occured.
650
651     //ALL getNextChar.... ARE OPTIMIZED COPIES 
652
653     int temp = currentPosition;
654     try {
655       currentCharacter = source[currentPosition++];
656       //      if (((currentCharacter = source[currentPosition++]) == '\\')
657       //        && (source[currentPosition] == 'u')) {
658       //        //-------------unicode traitement ------------
659       //        int c1, c2, c3, c4;
660       //        int unicodeSize = 6;
661       //        currentPosition++;
662       //        while (source[currentPosition] == 'u') {
663       //          currentPosition++;
664       //          unicodeSize++;
665       //        }
666       //
667       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
668       //          || c1 < 0)
669       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
670       //            || c2 < 0)
671       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
672       //            || c3 < 0)
673       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
674       //            || c4 < 0)) {
675       //          currentPosition = temp;
676       //          return false;
677       //        }
678       //
679       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
680       //        if (!isPHPIdentifierPart(currentCharacter)) {
681       //          currentPosition = temp;
682       //          return false;
683       //        }
684       //
685       //        //need the unicode buffer
686       //        if (withoutUnicodePtr == 0) {
687       //          //buffer all the entries that have been left aside....
688       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
689       //          System.arraycopy(
690       //            source,
691       //            startPosition,
692       //            withoutUnicodeBuffer,
693       //            1,
694       //            withoutUnicodePtr);
695       //        }
696       //        //fill the buffer with the char
697       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
698       //        return true;
699       //      } //-------------end unicode traitement--------------
700       //      else {
701       if (!isPHPIdentifierPart(currentCharacter)) {
702         currentPosition = temp;
703         return false;
704       }
705
706       //        if (withoutUnicodePtr != 0)
707       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
708       return true;
709       //      }
710     } catch (IndexOutOfBoundsException e) {
711       currentPosition = temp;
712       return false;
713     }
714   }
715
716   public int getNextToken() throws InvalidInputException {
717     int htmlPosition = currentPosition;
718     try {
719       while (!phpMode) {
720         currentCharacter = source[currentPosition++];
721         if (currentCharacter == '<') {
722           if (getNextChar('?')) {
723             currentCharacter = source[currentPosition++];
724             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
725               // <?
726               startPosition = currentPosition;
727               phpMode = true;
728               if (tokenizeWhiteSpace) {
729                 // && (whiteStart != currentPosition - 1)) {
730                 // reposition scanner in case we are interested by spaces as tokens
731                 startPosition = htmlPosition;
732                 return TokenNameHTML;
733               }
734             } else {
735               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
736               if (phpStart) {
737                 int test = getNextChar('H', 'h');
738                 if (test >= 0) {
739                   test = getNextChar('P', 'p');
740                   if (test >= 0) {
741                     // <?PHP  <?php
742                     startPosition = currentPosition;
743                     phpMode = true;
744
745                     if (tokenizeWhiteSpace) {
746                       // && (whiteStart != currentPosition - 1)) {
747                       // reposition scanner in case we are interested by spaces as tokens
748                       startPosition = htmlPosition;
749                       return TokenNameHTML;
750                     }
751                   }
752                 }
753               }
754             }
755           }
756         }
757
758         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
759           if (recordLineSeparator) {
760             pushLineSeparator();
761           } else {
762             currentLine = null;
763           }
764         }
765       }
766     } //-----------------end switch while try--------------------
767     catch (IndexOutOfBoundsException e) {
768       if (tokenizeWhiteSpace) {
769         // && (whiteStart != currentPosition - 1)) {
770         // reposition scanner in case we are interested by spaces as tokens
771         startPosition = htmlPosition;
772       }
773       return TokenNameEOF;
774     }
775
776     if (phpMode) {
777       this.wasAcr = false;
778       if (diet) {
779         jumpOverMethodBody();
780         diet = false;
781         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
782       }
783       try {
784         while (true) { //loop for jumping over comments
785           withoutUnicodePtr = 0;
786           //start with a new token (even comment written with unicode )
787
788           // ---------Consume white space and handles startPosition---------
789           int whiteStart = currentPosition;
790           boolean isWhiteSpace;
791           do {
792             startPosition = currentPosition;
793             currentCharacter = source[currentPosition++];
794             //            if (((currentCharacter = source[currentPosition++]) == '\\')
795             //              && (source[currentPosition] == 'u')) {
796             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
797             //            } else {
798             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
799               checkNonExternalizeString();
800               if (recordLineSeparator) {
801                 pushLineSeparator();
802               } else {
803                 currentLine = null;
804               }
805             }
806             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
807             //            }
808           } while (isWhiteSpace);
809           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
810             // reposition scanner in case we are interested by spaces as tokens
811             currentPosition--;
812             startPosition = whiteStart;
813             return TokenNameWHITESPACE;
814           }
815           //little trick to get out in the middle of a source compuation
816           if (currentPosition > eofPosition)
817             return TokenNameEOF;
818
819           // ---------Identify the next token-------------
820
821           switch (currentCharacter) {
822             case '(' :
823               return TokenNameLPAREN;
824             case ')' :
825               return TokenNameRPAREN;
826             case '{' :
827               return TokenNameLBRACE;
828             case '}' :
829               return TokenNameRBRACE;
830             case '[' :
831               return TokenNameLBRACKET;
832             case ']' :
833               return TokenNameRBRACKET;
834             case ';' :
835               return TokenNameSEMICOLON;
836             case ',' :
837               return TokenNameCOMMA;
838
839             case '.' :
840               if (getNextCharAsDigit())
841                 return scanNumber(true);
842               return TokenNameDOT;
843             case '+' :
844               {
845                 int test;
846                 if ((test = getNextChar('+', '=')) == 0)
847                   return TokenNamePLUS_PLUS;
848                 if (test > 0)
849                   return TokenNamePLUS_EQUAL;
850                 return TokenNamePLUS;
851               }
852             case '-' :
853               {
854                 int test;
855                 if ((test = getNextChar('-', '=')) == 0)
856                   return TokenNameMINUS_MINUS;
857                 if (test > 0)
858                   return TokenNameMINUS_EQUAL;
859                 if (getNextChar('>'))
860                   return TokenNameMINUS_GREATER;
861
862                 return TokenNameMINUS;
863               }
864             case '~' :
865               if (getNextChar('='))
866                 return TokenNameTWIDDLE_EQUAL;
867               return TokenNameTWIDDLE;
868             case '!' :
869               if (getNextChar('=')) {
870                 if (getNextChar('=')) {
871                   return TokenNameNOT_EQUAL_EQUAL;
872                 }
873                 return TokenNameNOT_EQUAL;
874               }
875               return TokenNameNOT;
876             case '*' :
877               if (getNextChar('='))
878                 return TokenNameMULTIPLY_EQUAL;
879               return TokenNameMULTIPLY;
880             case '%' :
881               if (getNextChar('='))
882                 return TokenNameREMAINDER_EQUAL;
883               return TokenNameREMAINDER;
884             case '<' :
885               {
886                 int test;
887                 if ((test = getNextChar('=', '<')) == 0)
888                   return TokenNameLESS_EQUAL;
889                 if (test > 0) {
890                   if (getNextChar('='))
891                     return TokenNameLEFT_SHIFT_EQUAL;
892                   if (getNextChar('<')) {
893                     int heredocStart = currentPosition;
894                     int heredocLength = 0;
895                     currentCharacter = source[currentPosition++];
896                     if (isPHPIdentifierStart(currentCharacter)) {
897                       currentCharacter = source[currentPosition++];
898                     } else {
899                       return TokenNameERROR;
900                     }
901                     while (isPHPIdentifierPart(currentCharacter)) {
902                       currentCharacter = source[currentPosition++];
903                     }
904
905                     heredocLength = currentPosition - heredocStart - 1;
906
907                     // heredoc end-tag determination
908                     boolean endTag = true;
909                     char ch;
910                     do {
911                       ch = source[currentPosition++];
912                       if (ch == '\r' || ch == '\n') {
913                         if (recordLineSeparator) {
914                           pushLineSeparator();
915                         } else {
916                           currentLine = null;
917                         }
918                         for (int i = 0; i < heredocLength; i++) {
919                           if (source[currentPosition + i] != source[heredocStart + i]) {
920                             endTag = false;
921                             break;
922                           }
923                         }
924                         if (endTag) {
925                           currentPosition += heredocLength - 1;
926                           currentCharacter = source[currentPosition++];
927                           break; // do...while loop
928                         } else {
929                           endTag = true;
930                         }
931                       }
932
933                     } while (true);
934
935                     return TokenNameHEREDOC;
936                   }
937                   return TokenNameLEFT_SHIFT;
938                 }
939                 return TokenNameLESS;
940               }
941             case '>' :
942               {
943                 int test;
944                 if ((test = getNextChar('=', '>')) == 0)
945                   return TokenNameGREATER_EQUAL;
946                 if (test > 0) {
947                   if ((test = getNextChar('=', '>')) == 0)
948                     return TokenNameRIGHT_SHIFT_EQUAL;
949                   return TokenNameRIGHT_SHIFT;
950                 }
951                 return TokenNameGREATER;
952               }
953             case '=' :
954               if (getNextChar('=')) {
955                 if (getNextChar('=')) {
956                   return TokenNameEQUAL_EQUAL_EQUAL;
957                 }
958                 return TokenNameEQUAL_EQUAL;
959               }
960               if (getNextChar('>'))
961                 return TokenNameEQUAL_GREATER;
962               return TokenNameEQUAL;
963             case '&' :
964               {
965                 int test;
966                 if ((test = getNextChar('&', '=')) == 0)
967                   return TokenNameAND_AND;
968                 if (test > 0)
969                   return TokenNameAND_EQUAL;
970                 return TokenNameAND;
971               }
972             case '|' :
973               {
974                 int test;
975                 if ((test = getNextChar('|', '=')) == 0)
976                   return TokenNameOR_OR;
977                 if (test > 0)
978                   return TokenNameOR_EQUAL;
979                 return TokenNameOR;
980               }
981             case '^' :
982               if (getNextChar('='))
983                 return TokenNameXOR_EQUAL;
984               return TokenNameXOR;
985             case '?' :
986               if (getNextChar('>')) {
987                 phpMode = false;
988                 return TokenNameStopPHP;
989               }
990               return TokenNameQUESTION;
991             case ':' :
992               if (getNextChar(':'))
993                 return TokenNameCOLON_COLON;
994               return TokenNameCOLON;
995             case '@' :
996               return TokenNameAT;
997               //                                        case '\'' :
998               //                                                {
999               //                                                        int test;
1000               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1001               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1002               //                                                        }
1003               //                                                        if (test > 0) {
1004               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1005               //                                                                for (int lookAhead = 0;
1006               //                                                                        lookAhead < 3;
1007               //                                                                        lookAhead++) {
1008               //                                                                        if (currentPosition + lookAhead
1009               //                                                                                == source.length)
1010               //                                                                                break;
1011               //                                                                        if (source[currentPosition + lookAhead]
1012               //                                                                                == '\n')
1013               //                                                                                break;
1014               //                                                                        if (source[currentPosition + lookAhead]
1015               //                                                                                == '\'') {
1016               //                                                                                currentPosition += lookAhead + 1;
1017               //                                                                                break;
1018               //                                                                        }
1019               //                                                                }
1020               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1021               //                                                        }
1022               //                                                }
1023               //                                                if (getNextChar('\'')) {
1024               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1025               //                                                        for (int lookAhead = 0;
1026               //                                                                lookAhead < 3;
1027               //                                                                lookAhead++) {
1028               //                                                                if (currentPosition + lookAhead
1029               //                                                                        == source.length)
1030               //                                                                        break;
1031               //                                                                if (source[currentPosition + lookAhead]
1032               //                                                                        == '\n')
1033               //                                                                        break;
1034               //                                                                if (source[currentPosition + lookAhead]
1035               //                                                                        == '\'') {
1036               //                                                                        currentPosition += lookAhead + 1;
1037               //                                                                        break;
1038               //                                                                }
1039               //                                                        }
1040               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1041               //                                                }
1042               //                                                if (getNextChar('\\'))
1043               //                                                        scanEscapeCharacter();
1044               //                                                else { // consume next character
1045               //                                                        unicodeAsBackSlash = false;
1046               //                                                        if (((currentCharacter = source[currentPosition++])
1047               //                                                                == '\\')
1048               //                                                                && (source[currentPosition] == 'u')) {
1049               //                                                                getNextUnicodeChar();
1050               //                                                        } else {
1051               //                                                                if (withoutUnicodePtr != 0) {
1052               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1053               //                                                                                currentCharacter;
1054               //                                                                }
1055               //                                                        }
1056               //                                                }
1057               //                                                //            if (getNextChar('\''))
1058               //                                                //              return TokenNameCharacterLiteral;
1059               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1060               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1061               //                                                        if (currentPosition + lookAhead == source.length)
1062               //                                                                break;
1063               //                                                        if (source[currentPosition + lookAhead] == '\n')
1064               //                                                                break;
1065               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1066               //                                                                currentPosition += lookAhead + 1;
1067               //                                                                break;
1068               //                                                        }
1069               //                                                }
1070               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1071             case '\'' :
1072               try {
1073                 // consume next character
1074                 unicodeAsBackSlash = false;
1075                 currentCharacter = source[currentPosition++];
1076                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1077                 //                  && (source[currentPosition] == 'u')) {
1078                 //                  getNextUnicodeChar();
1079                 //                } else {
1080                 //                  if (withoutUnicodePtr != 0) {
1081                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1082                 //                      currentCharacter;
1083                 //                  }
1084                 //                }
1085
1086                 while (currentCharacter != '\'') {
1087
1088                   /**** in PHP \r and \n are valid in string literals ****/
1089                   //                  if ((currentCharacter == '\n')
1090                   //                    || (currentCharacter == '\r')) {
1091                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1092                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1093                   //                      if (currentPosition + lookAhead == source.length)
1094                   //                        break;
1095                   //                      if (source[currentPosition + lookAhead] == '\n')
1096                   //                        break;
1097                   //                      if (source[currentPosition + lookAhead] == '\"') {
1098                   //                        currentPosition += lookAhead + 1;
1099                   //                        break;
1100                   //                      }
1101                   //                    }
1102                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1103                   //                  }
1104                   if (currentCharacter == '\\') {
1105                     int escapeSize = currentPosition;
1106                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1107                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1108                     scanSingleQuotedEscapeCharacter();
1109                     escapeSize = currentPosition - escapeSize;
1110                     if (withoutUnicodePtr == 0) {
1111                       //buffer all the entries that have been left aside....
1112                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1113                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1114                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1115                     } else { //overwrite the / in the buffer
1116                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1117                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1118                         withoutUnicodePtr--;
1119                       }
1120                     }
1121                   }
1122                   // consume next character
1123                   unicodeAsBackSlash = false;
1124                   currentCharacter = source[currentPosition++];
1125                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1126                   //                    && (source[currentPosition] == 'u')) {
1127                   //                    getNextUnicodeChar();
1128                   //                  } else {
1129                   if (withoutUnicodePtr != 0) {
1130                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1131                   }
1132                   //                  }
1133
1134                 }
1135               } catch (IndexOutOfBoundsException e) {
1136                 throw new InvalidInputException(UNTERMINATED_STRING);
1137               } catch (InvalidInputException e) {
1138                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1139                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1140                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1141                     if (currentPosition + lookAhead == source.length)
1142                       break;
1143                     if (source[currentPosition + lookAhead] == '\n')
1144                       break;
1145                     if (source[currentPosition + lookAhead] == '\'') {
1146                       currentPosition += lookAhead + 1;
1147                       break;
1148                     }
1149                   }
1150
1151                 }
1152                 throw e; // rethrow
1153               }
1154               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1155                 if (currentLine == null) {
1156                   currentLine = new NLSLine();
1157                   lines.add(currentLine);
1158                 }
1159                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1160               }
1161               return TokenNameStringConstant;
1162             case '"' :
1163               try {
1164                 // consume next character
1165                 unicodeAsBackSlash = false;
1166                 currentCharacter = source[currentPosition++];
1167                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1168                 //                  && (source[currentPosition] == 'u')) {
1169                 //                  getNextUnicodeChar();
1170                 //                } else {
1171                 //                  if (withoutUnicodePtr != 0) {
1172                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1173                 //                      currentCharacter;
1174                 //                  }
1175                 //                }
1176
1177                 while (currentCharacter != '"') {
1178
1179                   /**** in PHP \r and \n are valid in string literals ****/
1180                   //                  if ((currentCharacter == '\n')
1181                   //                    || (currentCharacter == '\r')) {
1182                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1183                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1184                   //                      if (currentPosition + lookAhead == source.length)
1185                   //                        break;
1186                   //                      if (source[currentPosition + lookAhead] == '\n')
1187                   //                        break;
1188                   //                      if (source[currentPosition + lookAhead] == '\"') {
1189                   //                        currentPosition += lookAhead + 1;
1190                   //                        break;
1191                   //                      }
1192                   //                    }
1193                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1194                   //                  }
1195                   if (currentCharacter == '\\') {
1196                     int escapeSize = currentPosition;
1197                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1198                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1199                     scanDoubleQuotedEscapeCharacter();
1200                     escapeSize = currentPosition - escapeSize;
1201                     if (withoutUnicodePtr == 0) {
1202                       //buffer all the entries that have been left aside....
1203                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1204                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1205                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1206                     } else { //overwrite the / in the buffer
1207                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1208                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1209                         withoutUnicodePtr--;
1210                       }
1211                     }
1212                   }
1213                   // consume next character
1214                   unicodeAsBackSlash = false;
1215                   currentCharacter = source[currentPosition++];
1216                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1217                   //                    && (source[currentPosition] == 'u')) {
1218                   //                    getNextUnicodeChar();
1219                   //                  } else {
1220                   if (withoutUnicodePtr != 0) {
1221                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1222                   }
1223                   //                  }
1224
1225                 }
1226               } catch (IndexOutOfBoundsException e) {
1227                 throw new InvalidInputException(UNTERMINATED_STRING);
1228               } catch (InvalidInputException e) {
1229                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1230                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1231                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1232                     if (currentPosition + lookAhead == source.length)
1233                       break;
1234                     if (source[currentPosition + lookAhead] == '\n')
1235                       break;
1236                     if (source[currentPosition + lookAhead] == '\"') {
1237                       currentPosition += lookAhead + 1;
1238                       break;
1239                     }
1240                   }
1241
1242                 }
1243                 throw e; // rethrow
1244               }
1245               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1246                 if (currentLine == null) {
1247                   currentLine = new NLSLine();
1248                   lines.add(currentLine);
1249                 }
1250                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1251               }
1252               return TokenNameStringLiteral;
1253             case '`' :
1254               try {
1255                 // consume next character
1256                 unicodeAsBackSlash = false;
1257                 currentCharacter = source[currentPosition++];
1258                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1259                 //                  && (source[currentPosition] == 'u')) {
1260                 //                  getNextUnicodeChar();
1261                 //                } else {
1262                 //                  if (withoutUnicodePtr != 0) {
1263                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1264                 //                      currentCharacter;
1265                 //                  }
1266                 //                }
1267
1268                 while (currentCharacter != '`') {
1269
1270                   /**** in PHP \r and \n are valid in string literals ****/
1271                   //                if ((currentCharacter == '\n')
1272                   //                  || (currentCharacter == '\r')) {
1273                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1274                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1275                   //                    if (currentPosition + lookAhead == source.length)
1276                   //                      break;
1277                   //                    if (source[currentPosition + lookAhead] == '\n')
1278                   //                      break;
1279                   //                    if (source[currentPosition + lookAhead] == '\"') {
1280                   //                      currentPosition += lookAhead + 1;
1281                   //                      break;
1282                   //                    }
1283                   //                  }
1284                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1285                   //                }
1286                   if (currentCharacter == '\\') {
1287                     int escapeSize = currentPosition;
1288                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1289                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1290                     scanDoubleQuotedEscapeCharacter();
1291                     escapeSize = currentPosition - escapeSize;
1292                     if (withoutUnicodePtr == 0) {
1293                       //buffer all the entries that have been left aside....
1294                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1295                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1296                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1297                     } else { //overwrite the / in the buffer
1298                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1299                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1300                         withoutUnicodePtr--;
1301                       }
1302                     }
1303                   }
1304                   // consume next character
1305                   unicodeAsBackSlash = false;
1306                   currentCharacter = source[currentPosition++];
1307                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1308                   //                    && (source[currentPosition] == 'u')) {
1309                   //                    getNextUnicodeChar();
1310                   //                  } else {
1311                   if (withoutUnicodePtr != 0) {
1312                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1313                   }
1314                   //                  }
1315
1316                 }
1317               } catch (IndexOutOfBoundsException e) {
1318                 throw new InvalidInputException(UNTERMINATED_STRING);
1319               } catch (InvalidInputException e) {
1320                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1321                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1322                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1323                     if (currentPosition + lookAhead == source.length)
1324                       break;
1325                     if (source[currentPosition + lookAhead] == '\n')
1326                       break;
1327                     if (source[currentPosition + lookAhead] == '`') {
1328                       currentPosition += lookAhead + 1;
1329                       break;
1330                     }
1331                   }
1332
1333                 }
1334                 throw e; // rethrow
1335               }
1336               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1337                 if (currentLine == null) {
1338                   currentLine = new NLSLine();
1339                   lines.add(currentLine);
1340                 }
1341                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1342               }
1343               return TokenNameStringInterpolated;
1344             case '#' :
1345             case '/' :
1346               {
1347                 int test;
1348                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1349                   //line comment 
1350                   int endPositionForLineComment = 0;
1351                   try { //get the next char 
1352                     currentCharacter = source[currentPosition++];
1353                     //                    if (((currentCharacter = source[currentPosition++])
1354                     //                      == '\\')
1355                     //                      && (source[currentPosition] == 'u')) {
1356                     //                      //-------------unicode traitement ------------
1357                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1358                     //                      currentPosition++;
1359                     //                      while (source[currentPosition] == 'u') {
1360                     //                        currentPosition++;
1361                     //                      }
1362                     //                      if ((c1 =
1363                     //                        Character.getNumericValue(source[currentPosition++]))
1364                     //                        > 15
1365                     //                        || c1 < 0
1366                     //                        || (c2 =
1367                     //                          Character.getNumericValue(source[currentPosition++]))
1368                     //                          > 15
1369                     //                        || c2 < 0
1370                     //                        || (c3 =
1371                     //                          Character.getNumericValue(source[currentPosition++]))
1372                     //                          > 15
1373                     //                        || c3 < 0
1374                     //                        || (c4 =
1375                     //                          Character.getNumericValue(source[currentPosition++]))
1376                     //                          > 15
1377                     //                        || c4 < 0) {
1378                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1379                     //                      } else {
1380                     //                        currentCharacter =
1381                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1382                     //                      }
1383                     //                    }
1384
1385                     //handle the \\u case manually into comment
1386                     //                    if (currentCharacter == '\\') {
1387                     //                      if (source[currentPosition] == '\\')
1388                     //                        currentPosition++;
1389                     //                    } //jump over the \\
1390                     boolean isUnicode = false;
1391                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1392                       if (currentCharacter == '?') {
1393                         if (getNextChar('>')) {
1394                           startPosition = currentPosition - 2;
1395                           phpMode = false;
1396                           return TokenNameStopPHP;
1397                         }
1398                       }
1399
1400                       //get the next char
1401                       isUnicode = false;
1402                       currentCharacter = source[currentPosition++];
1403                       //                      if (((currentCharacter = source[currentPosition++])
1404                       //                        == '\\')
1405                       //                        && (source[currentPosition] == 'u')) {
1406                       //                        isUnicode = true;
1407                       //                        //-------------unicode traitement ------------
1408                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1409                       //                        currentPosition++;
1410                       //                        while (source[currentPosition] == 'u') {
1411                       //                          currentPosition++;
1412                       //                        }
1413                       //                        if ((c1 =
1414                       //                          Character.getNumericValue(source[currentPosition++]))
1415                       //                          > 15
1416                       //                          || c1 < 0
1417                       //                          || (c2 =
1418                       //                            Character.getNumericValue(
1419                       //                              source[currentPosition++]))
1420                       //                            > 15
1421                       //                          || c2 < 0
1422                       //                          || (c3 =
1423                       //                            Character.getNumericValue(
1424                       //                              source[currentPosition++]))
1425                       //                            > 15
1426                       //                          || c3 < 0
1427                       //                          || (c4 =
1428                       //                            Character.getNumericValue(
1429                       //                              source[currentPosition++]))
1430                       //                            > 15
1431                       //                          || c4 < 0) {
1432                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1433                       //                        } else {
1434                       //                          currentCharacter =
1435                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1436                       //                        }
1437                       //                      }
1438                       //handle the \\u case manually into comment
1439                       //                      if (currentCharacter == '\\') {
1440                       //                        if (source[currentPosition] == '\\')
1441                       //                          currentPosition++;
1442                       //                      } //jump over the \\
1443                     }
1444                     if (isUnicode) {
1445                       endPositionForLineComment = currentPosition - 6;
1446                     } else {
1447                       endPositionForLineComment = currentPosition - 1;
1448                     }
1449                     recordComment(false);
1450                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1451                       checkNonExternalizeString();
1452                       if (recordLineSeparator) {
1453                         if (isUnicode) {
1454                           pushUnicodeLineSeparator();
1455                         } else {
1456                           pushLineSeparator();
1457                         }
1458                       } else {
1459                         currentLine = null;
1460                       }
1461                     }
1462                     if (tokenizeComments) {
1463                       if (!isUnicode) {
1464                         currentPosition = endPositionForLineComment;
1465                         // reset one character behind
1466                       }
1467                       return TokenNameCOMMENT_LINE;
1468                     }
1469                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1470                     if (tokenizeComments) {
1471                       currentPosition--;
1472                       // reset one character behind
1473                       return TokenNameCOMMENT_LINE;
1474                     }
1475                   }
1476                   break;
1477                 }
1478                 if (test > 0) {
1479                   //traditional and annotation comment
1480                   boolean isJavadoc = false, star = false;
1481                   // consume next character
1482                   unicodeAsBackSlash = false;
1483                   currentCharacter = source[currentPosition++];
1484                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1485                   //                    && (source[currentPosition] == 'u')) {
1486                   //                    getNextUnicodeChar();
1487                   //                  } else {
1488                   //                    if (withoutUnicodePtr != 0) {
1489                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1490                   //                        currentCharacter;
1491                   //                    }
1492                   //                  }
1493
1494                   if (currentCharacter == '*') {
1495                     isJavadoc = true;
1496                     star = true;
1497                   }
1498                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1499                     checkNonExternalizeString();
1500                     if (recordLineSeparator) {
1501                       pushLineSeparator();
1502                     } else {
1503                       currentLine = null;
1504                     }
1505                   }
1506                   try { //get the next char 
1507                     currentCharacter = source[currentPosition++];
1508                     //                    if (((currentCharacter = source[currentPosition++])
1509                     //                      == '\\')
1510                     //                      && (source[currentPosition] == 'u')) {
1511                     //                      //-------------unicode traitement ------------
1512                     //                      getNextUnicodeChar();
1513                     //                    }
1514                     //handle the \\u case manually into comment
1515                     //                    if (currentCharacter == '\\') {
1516                     //                      if (source[currentPosition] == '\\')
1517                     //                        currentPosition++;
1518                     //                      //jump over the \\
1519                     //                    }
1520                     // empty comment is not a javadoc /**/
1521                     if (currentCharacter == '/') {
1522                       isJavadoc = false;
1523                     }
1524                     //loop until end of comment */
1525                     while ((currentCharacter != '/') || (!star)) {
1526                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1527                         checkNonExternalizeString();
1528                         if (recordLineSeparator) {
1529                           pushLineSeparator();
1530                         } else {
1531                           currentLine = null;
1532                         }
1533                       }
1534                       star = currentCharacter == '*';
1535                       //get next char
1536                       currentCharacter = source[currentPosition++];
1537                       //                      if (((currentCharacter = source[currentPosition++])
1538                       //                        == '\\')
1539                       //                        && (source[currentPosition] == 'u')) {
1540                       //                        //-------------unicode traitement ------------
1541                       //                        getNextUnicodeChar();
1542                       //                      }
1543                       //handle the \\u case manually into comment
1544                       //                      if (currentCharacter == '\\') {
1545                       //                        if (source[currentPosition] == '\\')
1546                       //                          currentPosition++;
1547                       //                      } //jump over the \\
1548                     }
1549                     recordComment(isJavadoc);
1550                     if (tokenizeComments) {
1551                       if (isJavadoc)
1552                         return TokenNameCOMMENT_PHPDOC;
1553                       return TokenNameCOMMENT_BLOCK;
1554                     }
1555                   } catch (IndexOutOfBoundsException e) {
1556                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1557                   }
1558                   break;
1559                 }
1560                 if (getNextChar('='))
1561                   return TokenNameDIVIDE_EQUAL;
1562                 return TokenNameDIVIDE;
1563               }
1564             case '\u001a' :
1565               if (atEnd())
1566                 return TokenNameEOF;
1567               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1568               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1569
1570             default :
1571               if (currentCharacter == '$') {
1572                 while ((currentCharacter = source[currentPosition++]) == '$') {
1573                 }
1574                 if (currentCharacter == '{')
1575                   return TokenNameDOLLAR_LBRACE;
1576                 if (isPHPIdentifierStart(currentCharacter))
1577                   return scanIdentifierOrKeyword(true);
1578                 return TokenNameERROR;
1579               }
1580               if (isPHPIdentifierStart(currentCharacter))
1581                 return scanIdentifierOrKeyword(false);
1582               if (Character.isDigit(currentCharacter))
1583                 return scanNumber(false);
1584               return TokenNameERROR;
1585           }
1586         }
1587       } //-----------------end switch while try--------------------
1588       catch (IndexOutOfBoundsException e) {
1589       }
1590     }
1591     return TokenNameEOF;
1592   }
1593
1594   //  public final void getNextUnicodeChar()
1595   //    throws IndexOutOfBoundsException, InvalidInputException {
1596   //    //VOID
1597   //    //handle the case of unicode.
1598   //    //when a unicode appears then we must use a buffer that holds char internal values
1599   //    //At the end of this method currentCharacter holds the new visited char
1600   //    //and currentPosition points right next after it
1601   //
1602   //    //ALL getNextChar.... ARE OPTIMIZED COPIES 
1603   //
1604   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1605   //    currentPosition++;
1606   //    while (source[currentPosition] == 'u') {
1607   //      currentPosition++;
1608   //      unicodeSize++;
1609   //    }
1610   //
1611   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1612   //      || c1 < 0
1613   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1614   //      || c2 < 0
1615   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1616   //      || c3 < 0
1617   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1618   //      || c4 < 0) {
1619   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1620   //    } else {
1621   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1622   //      //need the unicode buffer
1623   //      if (withoutUnicodePtr == 0) {
1624   //        //buffer all the entries that have been left aside....
1625   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1626   //        System.arraycopy(
1627   //          source,
1628   //          startPosition,
1629   //          withoutUnicodeBuffer,
1630   //          1,
1631   //          withoutUnicodePtr);
1632   //      }
1633   //      //fill the buffer with the char
1634   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1635   //    }
1636   //    unicodeAsBackSlash = currentCharacter == '\\';
1637   //  }
1638   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1639    */
1640   public final void jumpOverMethodBody() {
1641
1642     this.wasAcr = false;
1643     int found = 1;
1644     try {
1645       while (true) { //loop for jumping over comments
1646         // ---------Consume white space and handles startPosition---------
1647         boolean isWhiteSpace;
1648         do {
1649           startPosition = currentPosition;
1650           currentCharacter = source[currentPosition++];
1651           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1652           //            && (source[currentPosition] == 'u')) {
1653           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1654           //          } else {
1655           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1656             pushLineSeparator();
1657           isWhiteSpace = Character.isWhitespace(currentCharacter);
1658           //          }
1659         } while (isWhiteSpace);
1660
1661         // -------consume token until } is found---------
1662         switch (currentCharacter) {
1663           case '{' :
1664             found++;
1665             break;
1666           case '}' :
1667             found--;
1668             if (found == 0)
1669               return;
1670             break;
1671           case '\'' :
1672             {
1673               boolean test;
1674               test = getNextChar('\\');
1675               if (test) {
1676                 try {
1677                   scanDoubleQuotedEscapeCharacter();
1678                 } catch (InvalidInputException ex) {
1679                 };
1680               } else {
1681                 //                try { // consume next character
1682                 unicodeAsBackSlash = false;
1683                 currentCharacter = source[currentPosition++];
1684                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1685                 //                    && (source[currentPosition] == 'u')) {
1686                 //                    getNextUnicodeChar();
1687                 //                  } else {
1688                 if (withoutUnicodePtr != 0) {
1689                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1690                 }
1691                 //                  }
1692                 //                } catch (InvalidInputException ex) {
1693                 //                };
1694               }
1695               getNextChar('\'');
1696               break;
1697             }
1698           case '"' :
1699             try {
1700               //              try { // consume next character
1701               unicodeAsBackSlash = false;
1702               currentCharacter = source[currentPosition++];
1703               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1704               //                  && (source[currentPosition] == 'u')) {
1705               //                  getNextUnicodeChar();
1706               //                } else {
1707               if (withoutUnicodePtr != 0) {
1708                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1709               }
1710               //                }
1711               //              } catch (InvalidInputException ex) {
1712               //              };
1713               while (currentCharacter != '"') {
1714                 if (currentCharacter == '\r') {
1715                   if (source[currentPosition] == '\n')
1716                     currentPosition++;
1717                   break;
1718                   // the string cannot go further that the line
1719                 }
1720                 if (currentCharacter == '\n') {
1721                   break;
1722                   // the string cannot go further that the line
1723                 }
1724                 if (currentCharacter == '\\') {
1725                   try {
1726                     scanDoubleQuotedEscapeCharacter();
1727                   } catch (InvalidInputException ex) {
1728                   };
1729                 }
1730                 //                try { // consume next character
1731                 unicodeAsBackSlash = false;
1732                 currentCharacter = source[currentPosition++];
1733                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1734                 //                    && (source[currentPosition] == 'u')) {
1735                 //                    getNextUnicodeChar();
1736                 //                  } else {
1737                 if (withoutUnicodePtr != 0) {
1738                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1739                 }
1740                 //                  }
1741                 //                } catch (InvalidInputException ex) {
1742                 //                };
1743               }
1744             } catch (IndexOutOfBoundsException e) {
1745               return;
1746             }
1747             break;
1748           case '/' :
1749             {
1750               int test;
1751               if ((test = getNextChar('/', '*')) == 0) {
1752                 //line comment 
1753                 try {
1754                   //get the next char 
1755                   currentCharacter = source[currentPosition++];
1756                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1757                   //                    && (source[currentPosition] == 'u')) {
1758                   //                    //-------------unicode traitement ------------
1759                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1760                   //                    currentPosition++;
1761                   //                    while (source[currentPosition] == 'u') {
1762                   //                      currentPosition++;
1763                   //                    }
1764                   //                    if ((c1 =
1765                   //                      Character.getNumericValue(source[currentPosition++]))
1766                   //                      > 15
1767                   //                      || c1 < 0
1768                   //                      || (c2 =
1769                   //                        Character.getNumericValue(source[currentPosition++]))
1770                   //                        > 15
1771                   //                      || c2 < 0
1772                   //                      || (c3 =
1773                   //                        Character.getNumericValue(source[currentPosition++]))
1774                   //                        > 15
1775                   //                      || c3 < 0
1776                   //                      || (c4 =
1777                   //                        Character.getNumericValue(source[currentPosition++]))
1778                   //                        > 15
1779                   //                      || c4 < 0) {
1780                   //                      //error don't care of the value
1781                   //                      currentCharacter = 'A';
1782                   //                    } //something different from \n and \r
1783                   //                    else {
1784                   //                      currentCharacter =
1785                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1786                   //                    }
1787                   //                  }
1788
1789                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1790                     //get the next char 
1791                     currentCharacter = source[currentPosition++];
1792                     //                    if (((currentCharacter = source[currentPosition++])
1793                     //                      == '\\')
1794                     //                      && (source[currentPosition] == 'u')) {
1795                     //                      //-------------unicode traitement ------------
1796                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1797                     //                      currentPosition++;
1798                     //                      while (source[currentPosition] == 'u') {
1799                     //                        currentPosition++;
1800                     //                      }
1801                     //                      if ((c1 =
1802                     //                        Character.getNumericValue(source[currentPosition++]))
1803                     //                        > 15
1804                     //                        || c1 < 0
1805                     //                        || (c2 =
1806                     //                          Character.getNumericValue(source[currentPosition++]))
1807                     //                          > 15
1808                     //                        || c2 < 0
1809                     //                        || (c3 =
1810                     //                          Character.getNumericValue(source[currentPosition++]))
1811                     //                          > 15
1812                     //                        || c3 < 0
1813                     //                        || (c4 =
1814                     //                          Character.getNumericValue(source[currentPosition++]))
1815                     //                          > 15
1816                     //                        || c4 < 0) {
1817                     //                        //error don't care of the value
1818                     //                        currentCharacter = 'A';
1819                     //                      } //something different from \n and \r
1820                     //                      else {
1821                     //                        currentCharacter =
1822                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1823                     //                      }
1824                     //                    }
1825                   }
1826                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1827                     pushLineSeparator();
1828                 } catch (IndexOutOfBoundsException e) {
1829                 } //an eof will them be generated
1830                 break;
1831               }
1832               if (test > 0) {
1833                 //traditional and annotation comment
1834                 boolean star = false;
1835                 //                try { // consume next character
1836                 unicodeAsBackSlash = false;
1837                 currentCharacter = source[currentPosition++];
1838                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1839                 //                    && (source[currentPosition] == 'u')) {
1840                 //                    getNextUnicodeChar();
1841                 //                  } else {
1842                 if (withoutUnicodePtr != 0) {
1843                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1844                 }
1845                 //                  };
1846                 //                } catch (InvalidInputException ex) {
1847                 //                };
1848                 if (currentCharacter == '*') {
1849                   star = true;
1850                 }
1851                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1852                   pushLineSeparator();
1853                 try { //get the next char 
1854                   currentCharacter = source[currentPosition++];
1855                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1856                   //                    && (source[currentPosition] == 'u')) {
1857                   //                    //-------------unicode traitement ------------
1858                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1859                   //                    currentPosition++;
1860                   //                    while (source[currentPosition] == 'u') {
1861                   //                      currentPosition++;
1862                   //                    }
1863                   //                    if ((c1 =
1864                   //                      Character.getNumericValue(source[currentPosition++]))
1865                   //                      > 15
1866                   //                      || c1 < 0
1867                   //                      || (c2 =
1868                   //                        Character.getNumericValue(source[currentPosition++]))
1869                   //                        > 15
1870                   //                      || c2 < 0
1871                   //                      || (c3 =
1872                   //                        Character.getNumericValue(source[currentPosition++]))
1873                   //                        > 15
1874                   //                      || c3 < 0
1875                   //                      || (c4 =
1876                   //                        Character.getNumericValue(source[currentPosition++]))
1877                   //                        > 15
1878                   //                      || c4 < 0) {
1879                   //                      //error don't care of the value
1880                   //                      currentCharacter = 'A';
1881                   //                    } //something different from * and /
1882                   //                    else {
1883                   //                      currentCharacter =
1884                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1885                   //                    }
1886                   //                  }
1887                   //loop until end of comment */ 
1888                   while ((currentCharacter != '/') || (!star)) {
1889                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1890                       pushLineSeparator();
1891                     star = currentCharacter == '*';
1892                     //get next char
1893                     currentCharacter = source[currentPosition++];
1894                     //                    if (((currentCharacter = source[currentPosition++])
1895                     //                      == '\\')
1896                     //                      && (source[currentPosition] == 'u')) {
1897                     //                      //-------------unicode traitement ------------
1898                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1899                     //                      currentPosition++;
1900                     //                      while (source[currentPosition] == 'u') {
1901                     //                        currentPosition++;
1902                     //                      }
1903                     //                      if ((c1 =
1904                     //                        Character.getNumericValue(source[currentPosition++]))
1905                     //                        > 15
1906                     //                        || c1 < 0
1907                     //                        || (c2 =
1908                     //                          Character.getNumericValue(source[currentPosition++]))
1909                     //                          > 15
1910                     //                        || c2 < 0
1911                     //                        || (c3 =
1912                     //                          Character.getNumericValue(source[currentPosition++]))
1913                     //                          > 15
1914                     //                        || c3 < 0
1915                     //                        || (c4 =
1916                     //                          Character.getNumericValue(source[currentPosition++]))
1917                     //                          > 15
1918                     //                        || c4 < 0) {
1919                     //                        //error don't care of the value
1920                     //                        currentCharacter = 'A';
1921                     //                      } //something different from * and /
1922                     //                      else {
1923                     //                        currentCharacter =
1924                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1925                     //                      }
1926                     //                    }
1927                   }
1928                 } catch (IndexOutOfBoundsException e) {
1929                   return;
1930                 }
1931                 break;
1932               }
1933               break;
1934             }
1935
1936           default :
1937             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1938               try {
1939                 scanIdentifierOrKeyword((currentCharacter == '$'));
1940               } catch (InvalidInputException ex) {
1941               };
1942               break;
1943             }
1944             if (Character.isDigit(currentCharacter)) {
1945               try {
1946                 scanNumber(false);
1947               } catch (InvalidInputException ex) {
1948               };
1949               break;
1950             }
1951         }
1952       }
1953       //-----------------end switch while try--------------------
1954     } catch (IndexOutOfBoundsException e) {
1955     } catch (InvalidInputException e) {
1956     }
1957     return;
1958   }
1959   //  public final boolean jumpOverUnicodeWhiteSpace()
1960   //    throws InvalidInputException {
1961   //    //BOOLEAN
1962   //    //handle the case of unicode. Jump over the next whiteSpace
1963   //    //making startPosition pointing on the next available char
1964   //    //On false, the currentCharacter is filled up with a potential
1965   //    //correct char
1966   //
1967   //    try {
1968   //      this.wasAcr = false;
1969   //      int c1, c2, c3, c4;
1970   //      int unicodeSize = 6;
1971   //      currentPosition++;
1972   //      while (source[currentPosition] == 'u') {
1973   //        currentPosition++;
1974   //        unicodeSize++;
1975   //      }
1976   //
1977   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1978   //        || c1 < 0)
1979   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1980   //          || c2 < 0)
1981   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1982   //          || c3 < 0)
1983   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1984   //          || c4 < 0)) {
1985   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1986   //      }
1987   //
1988   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1989   //      if (recordLineSeparator
1990   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1991   //        pushLineSeparator();
1992   //      if (Character.isWhitespace(currentCharacter))
1993   //        return true;
1994   //
1995   //      //buffer the new char which is not a white space
1996   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1997   //      //withoutUnicodePtr == 1 is true here
1998   //      return false;
1999   //    } catch (IndexOutOfBoundsException e) {
2000   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2001   //    }
2002   //  }
2003   public final int[] getLineEnds() {
2004     //return a bounded copy of this.lineEnds 
2005
2006     int[] copy;
2007     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2008     return copy;
2009   }
2010
2011   public char[] getSource() {
2012     return this.source;
2013   }
2014   final char[] optimizedCurrentTokenSource1() {
2015     //return always the same char[] build only once
2016
2017     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2018     char charOne = source[startPosition];
2019     switch (charOne) {
2020       case 'a' :
2021         return charArray_a;
2022       case 'b' :
2023         return charArray_b;
2024       case 'c' :
2025         return charArray_c;
2026       case 'd' :
2027         return charArray_d;
2028       case 'e' :
2029         return charArray_e;
2030       case 'f' :
2031         return charArray_f;
2032       case 'g' :
2033         return charArray_g;
2034       case 'h' :
2035         return charArray_h;
2036       case 'i' :
2037         return charArray_i;
2038       case 'j' :
2039         return charArray_j;
2040       case 'k' :
2041         return charArray_k;
2042       case 'l' :
2043         return charArray_l;
2044       case 'm' :
2045         return charArray_m;
2046       case 'n' :
2047         return charArray_n;
2048       case 'o' :
2049         return charArray_o;
2050       case 'p' :
2051         return charArray_p;
2052       case 'q' :
2053         return charArray_q;
2054       case 'r' :
2055         return charArray_r;
2056       case 's' :
2057         return charArray_s;
2058       case 't' :
2059         return charArray_t;
2060       case 'u' :
2061         return charArray_u;
2062       case 'v' :
2063         return charArray_v;
2064       case 'w' :
2065         return charArray_w;
2066       case 'x' :
2067         return charArray_x;
2068       case 'y' :
2069         return charArray_y;
2070       case 'z' :
2071         return charArray_z;
2072       default :
2073         return new char[] { charOne };
2074     }
2075   }
2076
2077   final char[] optimizedCurrentTokenSource2() {
2078     //try to return the same char[] build only once
2079
2080     char c0, c1;
2081     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2082     char[][] table = charArray_length[0][hash];
2083     int i = newEntry2;
2084     while (++i < InternalTableSize) {
2085       char[] charArray = table[i];
2086       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2087         return charArray;
2088     }
2089     //---------other side---------
2090     i = -1;
2091     int max = newEntry2;
2092     while (++i <= max) {
2093       char[] charArray = table[i];
2094       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2095         return charArray;
2096     }
2097     //--------add the entry-------
2098     if (++max >= InternalTableSize)
2099       max = 0;
2100     char[] r;
2101     table[max] = (r = new char[] { c0, c1 });
2102     newEntry2 = max;
2103     return r;
2104   }
2105
2106   final char[] optimizedCurrentTokenSource3() {
2107     //try to return the same char[] build only once
2108
2109     char c0, c1, c2;
2110     int hash =
2111       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2112         % TableSize;
2113     char[][] table = charArray_length[1][hash];
2114     int i = newEntry3;
2115     while (++i < InternalTableSize) {
2116       char[] charArray = table[i];
2117       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2118         return charArray;
2119     }
2120     //---------other side---------
2121     i = -1;
2122     int max = newEntry3;
2123     while (++i <= max) {
2124       char[] charArray = table[i];
2125       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2126         return charArray;
2127     }
2128     //--------add the entry-------
2129     if (++max >= InternalTableSize)
2130       max = 0;
2131     char[] r;
2132     table[max] = (r = new char[] { c0, c1, c2 });
2133     newEntry3 = max;
2134     return r;
2135   }
2136
2137   final char[] optimizedCurrentTokenSource4() {
2138     //try to return the same char[] build only once
2139
2140     char c0, c1, c2, c3;
2141     long hash =
2142       ((((long) (c0 = source[startPosition])) << 18)
2143         + ((c1 = source[startPosition + 1]) << 12)
2144         + ((c2 = source[startPosition + 2]) << 6)
2145         + (c3 = source[startPosition + 3]))
2146         % TableSize;
2147     char[][] table = charArray_length[2][(int) hash];
2148     int i = newEntry4;
2149     while (++i < InternalTableSize) {
2150       char[] charArray = table[i];
2151       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2152         return charArray;
2153     }
2154     //---------other side---------
2155     i = -1;
2156     int max = newEntry4;
2157     while (++i <= max) {
2158       char[] charArray = table[i];
2159       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2160         return charArray;
2161     }
2162     //--------add the entry-------
2163     if (++max >= InternalTableSize)
2164       max = 0;
2165     char[] r;
2166     table[max] = (r = new char[] { c0, c1, c2, c3 });
2167     newEntry4 = max;
2168     return r;
2169
2170   }
2171
2172   final char[] optimizedCurrentTokenSource5() {
2173     //try to return the same char[] build only once
2174
2175     char c0, c1, c2, c3, c4;
2176     long hash =
2177       ((((long) (c0 = source[startPosition])) << 24)
2178         + (((long) (c1 = source[startPosition + 1])) << 18)
2179         + ((c2 = source[startPosition + 2]) << 12)
2180         + ((c3 = source[startPosition + 3]) << 6)
2181         + (c4 = source[startPosition + 4]))
2182         % TableSize;
2183     char[][] table = charArray_length[3][(int) hash];
2184     int i = newEntry5;
2185     while (++i < InternalTableSize) {
2186       char[] charArray = table[i];
2187       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2188         return charArray;
2189     }
2190     //---------other side---------
2191     i = -1;
2192     int max = newEntry5;
2193     while (++i <= max) {
2194       char[] charArray = table[i];
2195       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2196         return charArray;
2197     }
2198     //--------add the entry-------
2199     if (++max >= InternalTableSize)
2200       max = 0;
2201     char[] r;
2202     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2203     newEntry5 = max;
2204     return r;
2205
2206   }
2207
2208   final char[] optimizedCurrentTokenSource6() {
2209     //try to return the same char[] build only once
2210
2211     char c0, c1, c2, c3, c4, c5;
2212     long hash =
2213       ((((long) (c0 = source[startPosition])) << 32)
2214         + (((long) (c1 = source[startPosition + 1])) << 24)
2215         + (((long) (c2 = source[startPosition + 2])) << 18)
2216         + ((c3 = source[startPosition + 3]) << 12)
2217         + ((c4 = source[startPosition + 4]) << 6)
2218         + (c5 = source[startPosition + 5]))
2219         % TableSize;
2220     char[][] table = charArray_length[4][(int) hash];
2221     int i = newEntry6;
2222     while (++i < InternalTableSize) {
2223       char[] charArray = table[i];
2224       if ((c0 == charArray[0])
2225         && (c1 == charArray[1])
2226         && (c2 == charArray[2])
2227         && (c3 == charArray[3])
2228         && (c4 == charArray[4])
2229         && (c5 == charArray[5]))
2230         return charArray;
2231     }
2232     //---------other side---------
2233     i = -1;
2234     int max = newEntry6;
2235     while (++i <= max) {
2236       char[] charArray = table[i];
2237       if ((c0 == charArray[0])
2238         && (c1 == charArray[1])
2239         && (c2 == charArray[2])
2240         && (c3 == charArray[3])
2241         && (c4 == charArray[4])
2242         && (c5 == charArray[5]))
2243         return charArray;
2244     }
2245     //--------add the entry-------
2246     if (++max >= InternalTableSize)
2247       max = 0;
2248     char[] r;
2249     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2250     newEntry6 = max;
2251     return r;
2252   }
2253
2254   public final void pushLineSeparator() throws InvalidInputException {
2255     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2256     final int INCREMENT = 250;
2257
2258     if (this.checkNonExternalizedStringLiterals) {
2259       // reinitialize the current line for non externalize strings purpose
2260       currentLine = null;
2261     }
2262     //currentCharacter is at position currentPosition-1
2263
2264     // cr 000D
2265     if (currentCharacter == '\r') {
2266       int separatorPos = currentPosition - 1;
2267       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2268         return;
2269       //System.out.println("CR-" + separatorPos);
2270       try {
2271         lineEnds[++linePtr] = separatorPos;
2272       } catch (IndexOutOfBoundsException e) {
2273         //linePtr value is correct
2274         int oldLength = lineEnds.length;
2275         int[] old = lineEnds;
2276         lineEnds = new int[oldLength + INCREMENT];
2277         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2278         lineEnds[linePtr] = separatorPos;
2279       }
2280       // look-ahead for merged cr+lf
2281       try {
2282         if (source[currentPosition] == '\n') {
2283           //System.out.println("look-ahead LF-" + currentPosition);                     
2284           lineEnds[linePtr] = currentPosition;
2285           currentPosition++;
2286           wasAcr = false;
2287         } else {
2288           wasAcr = true;
2289         }
2290       } catch (IndexOutOfBoundsException e) {
2291         wasAcr = true;
2292       }
2293     } else {
2294       // lf 000A
2295       if (currentCharacter == '\n') {
2296         //must merge eventual cr followed by lf
2297         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2298           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2299           lineEnds[linePtr] = currentPosition - 1;
2300         } else {
2301           int separatorPos = currentPosition - 1;
2302           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2303             return;
2304           // System.out.println("LF-" + separatorPos);                                                  
2305           try {
2306             lineEnds[++linePtr] = separatorPos;
2307           } catch (IndexOutOfBoundsException e) {
2308             //linePtr value is correct
2309             int oldLength = lineEnds.length;
2310             int[] old = lineEnds;
2311             lineEnds = new int[oldLength + INCREMENT];
2312             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2313             lineEnds[linePtr] = separatorPos;
2314           }
2315         }
2316         wasAcr = false;
2317       }
2318     }
2319   }
2320   public final void pushUnicodeLineSeparator() {
2321     // isUnicode means that the \r or \n has been read as a unicode character
2322
2323     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2324
2325     final int INCREMENT = 250;
2326     //currentCharacter is at position currentPosition-1
2327
2328     if (this.checkNonExternalizedStringLiterals) {
2329       // reinitialize the current line for non externalize strings purpose
2330       currentLine = null;
2331     }
2332
2333     // cr 000D
2334     if (currentCharacter == '\r') {
2335       int separatorPos = currentPosition - 6;
2336       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2337         return;
2338       //System.out.println("CR-" + separatorPos);
2339       try {
2340         lineEnds[++linePtr] = separatorPos;
2341       } catch (IndexOutOfBoundsException e) {
2342         //linePtr value is correct
2343         int oldLength = lineEnds.length;
2344         int[] old = lineEnds;
2345         lineEnds = new int[oldLength + INCREMENT];
2346         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2347         lineEnds[linePtr] = separatorPos;
2348       }
2349       // look-ahead for merged cr+lf
2350       if (source[currentPosition] == '\n') {
2351         //System.out.println("look-ahead LF-" + currentPosition);                       
2352         lineEnds[linePtr] = currentPosition;
2353         currentPosition++;
2354         wasAcr = false;
2355       } else {
2356         wasAcr = true;
2357       }
2358     } else {
2359       // lf 000A
2360       if (currentCharacter == '\n') {
2361         //must merge eventual cr followed by lf
2362         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2363           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2364           lineEnds[linePtr] = currentPosition - 6;
2365         } else {
2366           int separatorPos = currentPosition - 6;
2367           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2368             return;
2369           // System.out.println("LF-" + separatorPos);                                                  
2370           try {
2371             lineEnds[++linePtr] = separatorPos;
2372           } catch (IndexOutOfBoundsException e) {
2373             //linePtr value is correct
2374             int oldLength = lineEnds.length;
2375             int[] old = lineEnds;
2376             lineEnds = new int[oldLength + INCREMENT];
2377             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2378             lineEnds[linePtr] = separatorPos;
2379           }
2380         }
2381         wasAcr = false;
2382       }
2383     }
2384   }
2385   public final void recordComment(boolean isJavadoc) {
2386
2387     // a new annotation comment is recorded
2388     try {
2389       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2390     } catch (IndexOutOfBoundsException e) {
2391       int oldStackLength = commentStops.length;
2392       int[] oldStack = commentStops;
2393       commentStops = new int[oldStackLength + 30];
2394       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2395       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2396       //grows the positions buffers too
2397       int[] old = commentStarts;
2398       commentStarts = new int[oldStackLength + 30];
2399       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2400     }
2401
2402     //the buffer is of a correct size here
2403     commentStarts[commentPtr] = startPosition;
2404   }
2405   public void resetTo(int begin, int end) {
2406     //reset the scanner to a given position where it may rescan again
2407
2408     diet = false;
2409     initialPosition = startPosition = currentPosition = begin;
2410     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2411     commentPtr = -1; // reset comment stack
2412   }
2413
2414   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2415     // the string with "\\u" is a legal string of two chars \ and u
2416     //thus we use a direct access to the source (for regular cases).
2417
2418     //    if (unicodeAsBackSlash) {
2419     //      // consume next character
2420     //      unicodeAsBackSlash = false;
2421     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2422     //        && (source[currentPosition] == 'u')) {
2423     //        getNextUnicodeChar();
2424     //      } else {
2425     //        if (withoutUnicodePtr != 0) {
2426     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2427     //        }
2428     //      }
2429     //    } else
2430     currentCharacter = source[currentPosition++];
2431     switch (currentCharacter) {
2432       case '\'' :
2433         currentCharacter = '\'';
2434         break;
2435       case '\\' :
2436         currentCharacter = '\\';
2437         break;
2438       default :
2439         currentCharacter = '\\';
2440         currentPosition--;
2441     }
2442   }
2443
2444   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2445     // the string with "\\u" is a legal string of two chars \ and u
2446     //thus we use a direct access to the source (for regular cases).
2447
2448     //    if (unicodeAsBackSlash) {
2449     //      // consume next character
2450     //      unicodeAsBackSlash = false;
2451     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2452     //        && (source[currentPosition] == 'u')) {
2453     //        getNextUnicodeChar();
2454     //      } else {
2455     //        if (withoutUnicodePtr != 0) {
2456     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2457     //        }
2458     //      }
2459     //    } else
2460     currentCharacter = source[currentPosition++];
2461     switch (currentCharacter) {
2462       //      case 'b' :
2463       //        currentCharacter = '\b';
2464       //        break;
2465       case 't' :
2466         currentCharacter = '\t';
2467         break;
2468       case 'n' :
2469         currentCharacter = '\n';
2470         break;
2471         //      case 'f' :
2472         //        currentCharacter = '\f';
2473         //        break;
2474       case 'r' :
2475         currentCharacter = '\r';
2476         break;
2477       case '\"' :
2478         currentCharacter = '\"';
2479         break;
2480       case '\'' :
2481         currentCharacter = '\'';
2482         break;
2483       case '\\' :
2484         currentCharacter = '\\';
2485         break;
2486       case '$' :
2487         currentCharacter = '$';
2488         break;
2489       default :
2490         // -----------octal escape--------------
2491         // OctalDigit
2492         // OctalDigit OctalDigit
2493         // ZeroToThree OctalDigit OctalDigit
2494
2495         int number = Character.getNumericValue(currentCharacter);
2496         if (number >= 0 && number <= 7) {
2497           boolean zeroToThreeNot = number > 3;
2498           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2499             int digit = Character.getNumericValue(currentCharacter);
2500             if (digit >= 0 && digit <= 7) {
2501               number = (number * 8) + digit;
2502               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2503                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2504                   currentPosition--;
2505                 } else {
2506                   digit = Character.getNumericValue(currentCharacter);
2507                   if (digit >= 0 && digit <= 7) {
2508                     // has read \ZeroToThree OctalDigit OctalDigit
2509                     number = (number * 8) + digit;
2510                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2511                     currentPosition--;
2512                   }
2513                 }
2514               } else { // has read \OctalDigit NonDigit--> ignore last character
2515                 currentPosition--;
2516               }
2517             } else { // has read \OctalDigit NonOctalDigit--> ignore last character                                             
2518               currentPosition--;
2519             }
2520           } else { // has read \OctalDigit --> ignore last character
2521             currentPosition--;
2522           }
2523           if (number > 255)
2524             throw new InvalidInputException(INVALID_ESCAPE);
2525           currentCharacter = (char) number;
2526         }
2527         //else
2528         //     throw new InvalidInputException(INVALID_ESCAPE);
2529     }
2530   }
2531
2532   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2533   //    return scanIdentifierOrKeyword( false );
2534   //  }
2535
2536   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2537     //test keywords
2538
2539     //first dispatch on the first char.
2540     //then the length. If there are several
2541     //keywors with the same length AND the same first char, then do another
2542     //disptach on the second char :-)...cool....but fast !
2543
2544     useAssertAsAnIndentifier = false;
2545
2546     while (getNextCharAsJavaIdentifierPart()) {
2547     };
2548
2549     if (isVariable) {
2550       if (new String(getCurrentTokenSource()).equals("$this")) {
2551         return TokenNamethis;
2552       }
2553       return TokenNameVariable;
2554     }
2555     int index, length;
2556     char[] data;
2557     char firstLetter;
2558     //    if (withoutUnicodePtr == 0)
2559
2560     //quick test on length == 1 but not on length > 12 while most identifier
2561     //have a length which is <= 12...but there are lots of identifier with
2562     //only one char....
2563
2564     //      {
2565     if ((length = currentPosition - startPosition) == 1)
2566       return TokenNameIdentifier;
2567     //  data = source;
2568     data = new char[length];
2569     index = startPosition;
2570     for (int i = 0; i < length; i++) {
2571       data[i] = Character.toLowerCase(source[index + i]);
2572     }
2573     index = 0;
2574     //    } else {
2575     //      if ((length = withoutUnicodePtr) == 1)
2576     //        return TokenNameIdentifier;
2577     //      // data = withoutUnicodeBuffer;
2578     //      data = new char[withoutUnicodeBuffer.length];
2579     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2580     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2581     //      }
2582     //      index = 1;
2583     //    }
2584
2585     firstLetter = data[index];
2586     switch (firstLetter) {
2587
2588       case 'a' : // as and array
2589         switch (length) {
2590           case 2 : //as
2591             if ((data[++index] == 's')) {
2592               return TokenNameas;
2593             } else {
2594               return TokenNameIdentifier;
2595             }
2596           case 3 : //and
2597             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2598               return TokenNameAND;
2599             } else {
2600               return TokenNameIdentifier;
2601             }
2602             //          case 5 :
2603             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2604             //              return TokenNamearray;
2605             //            else
2606             //              return TokenNameIdentifier;
2607           default :
2608             return TokenNameIdentifier;
2609         }
2610       case 'b' : //break
2611         switch (length) {
2612           case 5 :
2613             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2614               return TokenNamebreak;
2615             else
2616               return TokenNameIdentifier;
2617           default :
2618             return TokenNameIdentifier;
2619         }
2620
2621       case 'c' : //case class continue
2622         switch (length) {
2623           case 4 :
2624             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2625               return TokenNamecase;
2626             else
2627               return TokenNameIdentifier;
2628           case 5 :
2629             if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2630               return TokenNameclass;
2631             else
2632               return TokenNameIdentifier;
2633           case 8 :
2634             if ((data[++index] == 'o')
2635               && (data[++index] == 'n')
2636               && (data[++index] == 't')
2637               && (data[++index] == 'i')
2638               && (data[++index] == 'n')
2639               && (data[++index] == 'u')
2640               && (data[++index] == 'e'))
2641               return TokenNamecontinue;
2642             else
2643               return TokenNameIdentifier;
2644           default :
2645             return TokenNameIdentifier;
2646         }
2647
2648       case 'd' : //define default do 
2649         switch (length) {
2650           case 2 :
2651             if ((data[++index] == 'o'))
2652               return TokenNamedo;
2653             else
2654               return TokenNameIdentifier;
2655           case 6 :
2656             if ((data[++index] == 'e')
2657               && (data[++index] == 'f')
2658               && (data[++index] == 'i')
2659               && (data[++index] == 'n')
2660               && (data[++index] == 'e'))
2661               return TokenNamedefine;
2662             else
2663               return TokenNameIdentifier;
2664           case 7 :
2665             if ((data[++index] == 'e')
2666               && (data[++index] == 'f')
2667               && (data[++index] == 'a')
2668               && (data[++index] == 'u')
2669               && (data[++index] == 'l')
2670               && (data[++index] == 't'))
2671               return TokenNamedefault;
2672             else
2673               return TokenNameIdentifier;
2674           default :
2675             return TokenNameIdentifier;
2676         }
2677       case 'e' : //echo else elseif extends
2678         switch (length) {
2679           case 4 :
2680             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2681               return TokenNameecho;
2682             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2683               return TokenNameelse;
2684             else
2685               return TokenNameIdentifier;
2686           case 5 : // endif
2687             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2688               return TokenNameendif;
2689             else
2690               return TokenNameIdentifier;
2691           case 6 : // endfor
2692             if ((data[++index] == 'n')
2693               && (data[++index] == 'd')
2694               && (data[++index] == 'f')
2695               && (data[++index] == 'o')
2696               && (data[++index] == 'r'))
2697               return TokenNameendfor;
2698             else if (
2699               (data[index] == 'l')
2700                 && (data[++index] == 's')
2701                 && (data[++index] == 'e')
2702                 && (data[++index] == 'i')
2703                 && (data[++index] == 'f'))
2704               return TokenNameelseif;
2705             else
2706               return TokenNameIdentifier;
2707           case 7 :
2708             if ((data[++index] == 'x')
2709               && (data[++index] == 't')
2710               && (data[++index] == 'e')
2711               && (data[++index] == 'n')
2712               && (data[++index] == 'd')
2713               && (data[++index] == 's'))
2714               return TokenNameextends;
2715             else
2716               return TokenNameIdentifier;
2717           case 8 : // endwhile
2718             if ((data[++index] == 'n')
2719               && (data[++index] == 'd')
2720               && (data[++index] == 'w')
2721               && (data[++index] == 'h')
2722               && (data[++index] == 'i')
2723               && (data[++index] == 'l')
2724               && (data[++index] == 'e'))
2725               return TokenNameendwhile;
2726             else
2727               return TokenNameIdentifier;
2728           case 9 : // endswitch
2729             if ((data[++index] == 'n')
2730               && (data[++index] == 'd')
2731               && (data[++index] == 's')
2732               && (data[++index] == 'w')
2733               && (data[++index] == 'i')
2734               && (data[++index] == 't')
2735               && (data[++index] == 'c')
2736               && (data[++index] == 'h'))
2737               return TokenNameendswitch;
2738             else
2739               return TokenNameIdentifier;
2740           case 10 : // endforeach
2741             if ((data[++index] == 'n')
2742               && (data[++index] == 'd')
2743               && (data[++index] == 'f')
2744               && (data[++index] == 'o')
2745               && (data[++index] == 'r')
2746               && (data[++index] == 'e')
2747               && (data[++index] == 'a')
2748               && (data[++index] == 'c')
2749               && (data[++index] == 'h'))
2750               return TokenNameendforeach;
2751             else
2752               return TokenNameIdentifier;
2753
2754           default :
2755             return TokenNameIdentifier;
2756         }
2757
2758       case 'f' : //for false function
2759         switch (length) {
2760           case 3 :
2761             if ((data[++index] == 'o') && (data[++index] == 'r'))
2762               return TokenNamefor;
2763             else
2764               return TokenNameIdentifier;
2765           case 5 :
2766             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2767               return TokenNamefalse;
2768             else
2769               return TokenNameIdentifier;
2770           case 7 : // function
2771             if ((data[++index] == 'o')
2772               && (data[++index] == 'r')
2773               && (data[++index] == 'e')
2774               && (data[++index] == 'a')
2775               && (data[++index] == 'c')
2776               && (data[++index] == 'h'))
2777               return TokenNameforeach;
2778             else
2779               return TokenNameIdentifier;
2780           case 8 : // function
2781             if ((data[++index] == 'u')
2782               && (data[++index] == 'n')
2783               && (data[++index] == 'c')
2784               && (data[++index] == 't')
2785               && (data[++index] == 'i')
2786               && (data[++index] == 'o')
2787               && (data[++index] == 'n'))
2788               return TokenNamefunction;
2789             else
2790               return TokenNameIdentifier;
2791           default :
2792             return TokenNameIdentifier;
2793         }
2794       case 'g' : //global
2795         if (length == 6) {
2796           if ((data[++index] == 'l')
2797             && (data[++index] == 'o')
2798             && (data[++index] == 'b')
2799             && (data[++index] == 'a')
2800             && (data[++index] == 'l')) {
2801             return TokenNameglobal;
2802           }
2803         }
2804         return TokenNameIdentifier;
2805
2806       case 'i' : //if int 
2807         switch (length) {
2808           case 2 :
2809             if (data[++index] == 'f')
2810               return TokenNameif;
2811             else
2812               return TokenNameIdentifier;
2813             //          case 3 :
2814             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2815             //              return TokenNameint;
2816             //            else
2817             //              return TokenNameIdentifier;
2818           case 7 :
2819             if ((data[++index] == 'n')
2820               && (data[++index] == 'c')
2821               && (data[++index] == 'l')
2822               && (data[++index] == 'u')
2823               && (data[++index] == 'd')
2824               && (data[++index] == 'e'))
2825               return TokenNameinclude;
2826             else
2827               return TokenNameIdentifier;
2828           case 12 :
2829             if ((data[++index] == 'n')
2830               && (data[++index] == 'c')
2831               && (data[++index] == 'l')
2832               && (data[++index] == 'u')
2833               && (data[++index] == 'd')
2834               && (data[++index] == 'e')
2835               && (data[++index] == '_')
2836               && (data[++index] == 'o')
2837               && (data[++index] == 'n')
2838               && (data[++index] == 'c')
2839               && (data[++index] == 'e'))
2840               return TokenNameinclude_once;
2841             else
2842               return TokenNameIdentifier;
2843           default :
2844             return TokenNameIdentifier;
2845         }
2846
2847       case 'l' : //list
2848         if (length == 4) {
2849           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2850             return TokenNamelist;
2851           }
2852         }
2853         return TokenNameIdentifier;
2854
2855       case 'n' : // new null
2856         switch (length) {
2857           case 3 :
2858             if ((data[++index] == 'e') && (data[++index] == 'w'))
2859               return TokenNamenew;
2860             else
2861               return TokenNameIdentifier;
2862           case 4 :
2863             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2864               return TokenNamenull;
2865             else
2866               return TokenNameIdentifier;
2867
2868           default :
2869             return TokenNameIdentifier;
2870         }
2871       case 'o' : // or old_function
2872         if (length == 2) {
2873           if (data[++index] == 'r') {
2874             return TokenNameOR;
2875           }
2876         }
2877         //        if (length == 12) {
2878         //          if ((data[++index] == 'l')
2879         //            && (data[++index] == 'd')
2880         //            && (data[++index] == '_')
2881         //            && (data[++index] == 'f')
2882         //            && (data[++index] == 'u')
2883         //            && (data[++index] == 'n')
2884         //            && (data[++index] == 'c')
2885         //            && (data[++index] == 't')
2886         //            && (data[++index] == 'i')
2887         //            && (data[++index] == 'o')
2888         //            && (data[++index] == 'n')) {
2889         //            return TokenNameold_function;
2890         //          }
2891         //        }
2892         return TokenNameIdentifier;
2893
2894       case 'p' : // print
2895         if (length == 5) {
2896           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2897             return TokenNameprint;
2898           }
2899         }
2900         return TokenNameIdentifier;
2901       case 'r' : //return require require_once
2902         if (length == 6) {
2903           if ((data[++index] == 'e')
2904             && (data[++index] == 't')
2905             && (data[++index] == 'u')
2906             && (data[++index] == 'r')
2907             && (data[++index] == 'n')) {
2908             return TokenNamereturn;
2909           }
2910         } else if (length == 7) {
2911           if ((data[++index] == 'e')
2912             && (data[++index] == 'q')
2913             && (data[++index] == 'u')
2914             && (data[++index] == 'i')
2915             && (data[++index] == 'r')
2916             && (data[++index] == 'e')) {
2917             return TokenNamerequire;
2918           }
2919         } else if (length == 12) {
2920           if ((data[++index] == 'e')
2921             && (data[++index] == 'q')
2922             && (data[++index] == 'u')
2923             && (data[++index] == 'i')
2924             && (data[++index] == 'r')
2925             && (data[++index] == 'e')
2926             && (data[++index] == '_')
2927             && (data[++index] == 'o')
2928             && (data[++index] == 'n')
2929             && (data[++index] == 'c')
2930             && (data[++index] == 'e')) {
2931             return TokenNamerequire_once;
2932           }
2933         } else
2934           return TokenNameIdentifier;
2935
2936       case 's' : //static switch 
2937         switch (length) {
2938           case 6 :
2939             if (data[++index] == 't')
2940               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2941                 return TokenNamestatic;
2942               } else
2943                 return TokenNameIdentifier;
2944             else if (
2945               (data[index] == 'w')
2946                 && (data[++index] == 'i')
2947                 && (data[++index] == 't')
2948                 && (data[++index] == 'c')
2949                 && (data[++index] == 'h'))
2950               return TokenNameswitch;
2951             else
2952               return TokenNameIdentifier;
2953           default :
2954             return TokenNameIdentifier;
2955         }
2956
2957       case 't' : // true
2958         switch (length) {
2959
2960           case 4 :
2961             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2962               return TokenNametrue;
2963             else
2964               return TokenNameIdentifier;
2965             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2966             //              return TokenNamethis;
2967
2968           default :
2969             return TokenNameIdentifier;
2970         }
2971
2972       case 'v' : //var 
2973         switch (length) {
2974           case 3 :
2975             if ((data[++index] == 'a') && (data[++index] == 'r'))
2976               return TokenNamevar;
2977             else
2978               return TokenNameIdentifier;
2979
2980           default :
2981             return TokenNameIdentifier;
2982         }
2983
2984       case 'w' : //while 
2985         switch (length) {
2986           case 5 :
2987             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2988               return TokenNamewhile;
2989             else
2990               return TokenNameIdentifier;
2991             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2992             //return TokenNamewidefp ;
2993             //else
2994             //return TokenNameIdentifier;
2995           default :
2996             return TokenNameIdentifier;
2997         }
2998
2999       case 'x' : //xor
3000         switch (length) {
3001           case 3 :
3002             if ((data[++index] == 'o') && (data[++index] == 'r'))
3003               return TokenNameXOR;
3004             else
3005               return TokenNameIdentifier;
3006
3007           default :
3008             return TokenNameIdentifier;
3009         }
3010       default :
3011         return TokenNameIdentifier;
3012     }
3013   }
3014   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3015
3016     //when entering this method the currentCharacter is the firt
3017     //digit of the number , i.e. it may be preceeded by a . when
3018     //dotPrefix is true
3019
3020     boolean floating = dotPrefix;
3021     if ((!dotPrefix) && (currentCharacter == '0')) {
3022       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3023         //force the first char of the hexa number do exist...
3024         // consume next character
3025         unicodeAsBackSlash = false;
3026         currentCharacter = source[currentPosition++];
3027         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3028         //          && (source[currentPosition] == 'u')) {
3029         //          getNextUnicodeChar();
3030         //        } else {
3031         //          if (withoutUnicodePtr != 0) {
3032         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3033         //          }
3034         //        }
3035         if (Character.digit(currentCharacter, 16) == -1)
3036           throw new InvalidInputException(INVALID_HEXA);
3037         //---end forcing--
3038         while (getNextCharAsDigit(16)) {
3039         };
3040         //        if (getNextChar('l', 'L') >= 0)
3041         //          return TokenNameLongLiteral;
3042         //        else
3043         return TokenNameIntegerLiteral;
3044       }
3045
3046       //there is x or X in the number
3047       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3048       if (getNextCharAsDigit()) {
3049         //-------------potential octal-----------------
3050         while (getNextCharAsDigit()) {
3051         };
3052
3053         //        if (getNextChar('l', 'L') >= 0) {
3054         //          return TokenNameLongLiteral;
3055         //        }
3056         //
3057         //        if (getNextChar('f', 'F') >= 0) {
3058         //          return TokenNameFloatingPointLiteral;
3059         //        }
3060
3061         if (getNextChar('d', 'D') >= 0) {
3062           return TokenNameDoubleLiteral;
3063         } else { //make the distinction between octal and float ....
3064           if (getNextChar('.')) { //bingo ! ....
3065             while (getNextCharAsDigit()) {
3066             };
3067             if (getNextChar('e', 'E') >= 0) {
3068               // consume next character
3069               unicodeAsBackSlash = false;
3070               currentCharacter = source[currentPosition++];
3071               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3072               //                && (source[currentPosition] == 'u')) {
3073               //                getNextUnicodeChar();
3074               //              } else {
3075               //                if (withoutUnicodePtr != 0) {
3076               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3077               //                }
3078               //              }
3079
3080               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3081                 // consume next character
3082                 unicodeAsBackSlash = false;
3083                 currentCharacter = source[currentPosition++];
3084                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3085                 //                  && (source[currentPosition] == 'u')) {
3086                 //                  getNextUnicodeChar();
3087                 //                } else {
3088                 //                  if (withoutUnicodePtr != 0) {
3089                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3090                 //                      currentCharacter;
3091                 //                  }
3092                 //                }
3093               }
3094               if (!Character.isDigit(currentCharacter))
3095                 throw new InvalidInputException(INVALID_FLOAT);
3096               while (getNextCharAsDigit()) {
3097               };
3098             }
3099             //            if (getNextChar('f', 'F') >= 0)
3100             //              return TokenNameFloatingPointLiteral;
3101             getNextChar('d', 'D'); //jump over potential d or D
3102             return TokenNameDoubleLiteral;
3103           } else {
3104             return TokenNameIntegerLiteral;
3105           }
3106         }
3107       } else {
3108         /* carry on */
3109       }
3110     }
3111
3112     while (getNextCharAsDigit()) {
3113     };
3114
3115     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3116     //      return TokenNameLongLiteral;
3117
3118     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3119       while (getNextCharAsDigit()) {
3120       };
3121       floating = true;
3122     }
3123
3124     //if floating is true both exponant and suffix may be optional
3125
3126     if (getNextChar('e', 'E') >= 0) {
3127       floating = true;
3128       // consume next character
3129       unicodeAsBackSlash = false;
3130       currentCharacter = source[currentPosition++];
3131       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3132       //        && (source[currentPosition] == 'u')) {
3133       //        getNextUnicodeChar();
3134       //      } else {
3135       //        if (withoutUnicodePtr != 0) {
3136       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3137       //        }
3138       //      }
3139
3140       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3141         unicodeAsBackSlash = false;
3142         currentCharacter = source[currentPosition++];
3143         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3144         //          && (source[currentPosition] == 'u')) {
3145         //          getNextUnicodeChar();
3146         //        } else {
3147         //          if (withoutUnicodePtr != 0) {
3148         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3149         //          }
3150         //        }
3151       }
3152       if (!Character.isDigit(currentCharacter))
3153         throw new InvalidInputException(INVALID_FLOAT);
3154       while (getNextCharAsDigit()) {
3155       };
3156     }
3157
3158     if (getNextChar('d', 'D') >= 0)
3159       return TokenNameDoubleLiteral;
3160     //    if (getNextChar('f', 'F') >= 0)
3161     //      return TokenNameFloatingPointLiteral;
3162
3163     //the long flag has been tested before
3164
3165     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3166   }
3167   /**
3168    * Search the line number corresponding to a specific position
3169    *
3170    */
3171   public final int getLineNumber(int position) {
3172
3173     if (lineEnds == null)
3174       return 1;
3175     int length = linePtr + 1;
3176     if (length == 0)
3177       return 1;
3178     int g = 0, d = length - 1;
3179     int m = 0;
3180     while (g <= d) {
3181       m = (g + d) / 2;
3182       if (position < lineEnds[m]) {
3183         d = m - 1;
3184       } else if (position > lineEnds[m]) {
3185         g = m + 1;
3186       } else {
3187         return m + 1;
3188       }
3189     }
3190     if (position < lineEnds[m]) {
3191       return m + 1;
3192     }
3193     return m + 2;
3194   }
3195
3196   public void setPHPMode(boolean mode) {
3197     phpMode = mode;
3198   }
3199
3200   public final void setSource(char[] source) {
3201     //the source-buffer is set to sourceString
3202
3203     if (source == null) {
3204       this.source = new char[0];
3205     } else {
3206       this.source = source;
3207     }
3208     startPosition = -1;
3209     initialPosition = currentPosition = 0;
3210     containsAssertKeyword = false;
3211     withoutUnicodeBuffer = new char[this.source.length];
3212
3213   }
3214
3215   public String toString() {
3216     if (startPosition == source.length)
3217       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3218     if (currentPosition > source.length)
3219       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3220
3221     char front[] = new char[startPosition];
3222     System.arraycopy(source, 0, front, 0, startPosition);
3223
3224     int middleLength = (currentPosition - 1) - startPosition + 1;
3225     char middle[];
3226     if (middleLength > -1) {
3227       middle = new char[middleLength];
3228       System.arraycopy(source, startPosition, middle, 0, middleLength);
3229     } else {
3230       middle = new char[0];
3231     }
3232
3233     char end[] = new char[source.length - (currentPosition - 1)];
3234     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3235
3236     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3237     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3238     + new String(end);
3239   }
3240   public final String toStringAction(int act) {
3241
3242     switch (act) {
3243       case TokenNameERROR :
3244         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3245       case TokenNameStopPHP :
3246         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3247       case TokenNameIdentifier :
3248         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3249       case TokenNameVariable :
3250         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3251       case TokenNameas :
3252         return "as"; //$NON-NLS-1$
3253       case TokenNamebreak :
3254         return "break"; //$NON-NLS-1$
3255       case TokenNamecase :
3256         return "case"; //$NON-NLS-1$
3257       case TokenNameclass :
3258         return "class"; //$NON-NLS-1$
3259       case TokenNamecontinue :
3260         return "continue"; //$NON-NLS-1$
3261       case TokenNamedefault :
3262         return "default"; //$NON-NLS-1$
3263       case TokenNamedefine :
3264         return "define"; //$NON-NLS-1$
3265       case TokenNamedo :
3266         return "do"; //$NON-NLS-1$
3267       case TokenNameecho :
3268         return "echo"; //$NON-NLS-1$
3269       case TokenNameelse :
3270         return "else"; //$NON-NLS-1$
3271       case TokenNameelseif :
3272         return "elseif"; //$NON-NLS-1$
3273       case TokenNameendfor :
3274         return "endfor"; //$NON-NLS-1$
3275       case TokenNameendforeach :
3276         return "endforeach"; //$NON-NLS-1$
3277       case TokenNameendif :
3278         return "endif"; //$NON-NLS-1$
3279       case TokenNameendswitch :
3280         return "endswitch"; //$NON-NLS-1$
3281       case TokenNameendwhile :
3282         return "endwhile"; //$NON-NLS-1$
3283       case TokenNameextends :
3284         return "extends"; //$NON-NLS-1$
3285       case TokenNamefalse :
3286         return "false"; //$NON-NLS-1$
3287       case TokenNamefor :
3288         return "for"; //$NON-NLS-1$
3289       case TokenNameforeach :
3290         return "foreach"; //$NON-NLS-1$
3291       case TokenNamefunction :
3292         return "function"; //$NON-NLS-1$
3293       case TokenNameglobal :
3294         return "global"; //$NON-NLS-1$
3295       case TokenNameif :
3296         return "if"; //$NON-NLS-1$
3297       case TokenNameinclude :
3298         return "include"; //$NON-NLS-1$
3299       case TokenNameinclude_once :
3300         return "include_once"; //$NON-NLS-1$
3301       case TokenNamelist :
3302         return "list"; //$NON-NLS-1$
3303       case TokenNamenew :
3304         return "new"; //$NON-NLS-1$
3305       case TokenNamenull :
3306         return "null"; //$NON-NLS-1$
3307       case TokenNameprint :
3308         return "print"; //$NON-NLS-1$
3309       case TokenNamerequire :
3310         return "require"; //$NON-NLS-1$
3311       case TokenNamerequire_once :
3312         return "require_once"; //$NON-NLS-1$
3313       case TokenNamereturn :
3314         return "return"; //$NON-NLS-1$
3315       case TokenNamestatic :
3316         return "static"; //$NON-NLS-1$
3317       case TokenNameswitch :
3318         return "switch"; //$NON-NLS-1$
3319       case TokenNametrue :
3320         return "true"; //$NON-NLS-1$
3321       case TokenNamevar :
3322         return "var"; //$NON-NLS-1$
3323       case TokenNamewhile :
3324         return "while"; //$NON-NLS-1$
3325       case TokenNamethis :
3326         return "$this"; //$NON-NLS-1$
3327       case TokenNameIntegerLiteral :
3328         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3329       case TokenNameDoubleLiteral :
3330         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3331       case TokenNameStringLiteral :
3332         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3333       case TokenNameStringConstant :
3334         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3335       case TokenNameStringInterpolated :
3336         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3337       case TokenNameHEREDOC :
3338         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3339
3340       case TokenNamePLUS_PLUS :
3341         return "++"; //$NON-NLS-1$
3342       case TokenNameMINUS_MINUS :
3343         return "--"; //$NON-NLS-1$
3344       case TokenNameEQUAL_EQUAL :
3345         return "=="; //$NON-NLS-1$
3346       case TokenNameEQUAL_EQUAL_EQUAL :
3347         return "==="; //$NON-NLS-1$
3348       case TokenNameEQUAL_GREATER :
3349         return "=>"; //$NON-NLS-1$
3350       case TokenNameLESS_EQUAL :
3351         return "<="; //$NON-NLS-1$
3352       case TokenNameGREATER_EQUAL :
3353         return ">="; //$NON-NLS-1$
3354       case TokenNameNOT_EQUAL :
3355         return "!="; //$NON-NLS-1$
3356       case TokenNameNOT_EQUAL_EQUAL :
3357         return "!=="; //$NON-NLS-1$
3358       case TokenNameLEFT_SHIFT :
3359         return "<<"; //$NON-NLS-1$
3360       case TokenNameRIGHT_SHIFT :
3361         return ">>"; //$NON-NLS-1$
3362       case TokenNamePLUS_EQUAL :
3363         return "+="; //$NON-NLS-1$
3364       case TokenNameMINUS_EQUAL :
3365         return "-="; //$NON-NLS-1$
3366       case TokenNameMULTIPLY_EQUAL :
3367         return "*="; //$NON-NLS-1$
3368       case TokenNameDIVIDE_EQUAL :
3369         return "/="; //$NON-NLS-1$
3370       case TokenNameAND_EQUAL :
3371         return "&="; //$NON-NLS-1$
3372       case TokenNameOR_EQUAL :
3373         return "|="; //$NON-NLS-1$
3374       case TokenNameXOR_EQUAL :
3375         return "^="; //$NON-NLS-1$
3376       case TokenNameREMAINDER_EQUAL :
3377         return "%="; //$NON-NLS-1$
3378       case TokenNameLEFT_SHIFT_EQUAL :
3379         return "<<="; //$NON-NLS-1$
3380       case TokenNameRIGHT_SHIFT_EQUAL :
3381         return ">>="; //$NON-NLS-1$
3382       case TokenNameOR_OR :
3383         return "||"; //$NON-NLS-1$
3384       case TokenNameAND_AND :
3385         return "&&"; //$NON-NLS-1$
3386       case TokenNamePLUS :
3387         return "+"; //$NON-NLS-1$
3388       case TokenNameMINUS :
3389         return "-"; //$NON-NLS-1$
3390       case TokenNameMINUS_GREATER :
3391         return "->";
3392       case TokenNameNOT :
3393         return "!"; //$NON-NLS-1$
3394       case TokenNameREMAINDER :
3395         return "%"; //$NON-NLS-1$
3396       case TokenNameXOR :
3397         return "^"; //$NON-NLS-1$
3398       case TokenNameAND :
3399         return "&"; //$NON-NLS-1$
3400       case TokenNameMULTIPLY :
3401         return "*"; //$NON-NLS-1$
3402       case TokenNameOR :
3403         return "|"; //$NON-NLS-1$
3404       case TokenNameTWIDDLE :
3405         return "~"; //$NON-NLS-1$
3406       case TokenNameTWIDDLE_EQUAL :
3407         return "~="; //$NON-NLS-1$
3408       case TokenNameDIVIDE :
3409         return "/"; //$NON-NLS-1$
3410       case TokenNameGREATER :
3411         return ">"; //$NON-NLS-1$
3412       case TokenNameLESS :
3413         return "<"; //$NON-NLS-1$
3414       case TokenNameLPAREN :
3415         return "("; //$NON-NLS-1$
3416       case TokenNameRPAREN :
3417         return ")"; //$NON-NLS-1$
3418       case TokenNameLBRACE :
3419         return "{"; //$NON-NLS-1$
3420       case TokenNameRBRACE :
3421         return "}"; //$NON-NLS-1$
3422       case TokenNameLBRACKET :
3423         return "["; //$NON-NLS-1$
3424       case TokenNameRBRACKET :
3425         return "]"; //$NON-NLS-1$
3426       case TokenNameSEMICOLON :
3427         return ";"; //$NON-NLS-1$
3428       case TokenNameQUESTION :
3429         return "?"; //$NON-NLS-1$
3430       case TokenNameCOLON :
3431         return ":"; //$NON-NLS-1$
3432       case TokenNameCOMMA :
3433         return ","; //$NON-NLS-1$
3434       case TokenNameDOT :
3435         return "."; //$NON-NLS-1$
3436       case TokenNameEQUAL :
3437         return "="; //$NON-NLS-1$
3438       case TokenNameAT :
3439         return "@";
3440       case TokenNameDOLLAR_LBRACE :
3441         return "${";
3442       case TokenNameEOF :
3443         return "EOF"; //$NON-NLS-1$
3444       case TokenNameWHITESPACE :
3445         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3446       case TokenNameCOMMENT_LINE :
3447         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3448       case TokenNameCOMMENT_BLOCK :
3449         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3450       case TokenNameCOMMENT_PHPDOC :
3451         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3452       case TokenNameHTML :
3453         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3454       default :
3455         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3456     }
3457   }
3458
3459   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3460     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3461   }
3462
3463   public Scanner(
3464     boolean tokenizeComments,
3465     boolean tokenizeWhiteSpace,
3466     boolean checkNonExternalizedStringLiterals,
3467     boolean assertMode) {
3468     this.eofPosition = Integer.MAX_VALUE;
3469     this.tokenizeComments = tokenizeComments;
3470     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3471     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3472     this.assertMode = assertMode;
3473   }
3474
3475   private void checkNonExternalizeString() throws InvalidInputException {
3476     if (currentLine == null)
3477       return;
3478     parseTags(currentLine);
3479   }
3480
3481   private void parseTags(NLSLine line) throws InvalidInputException {
3482     String s = new String(getCurrentTokenSource());
3483     int pos = s.indexOf(TAG_PREFIX);
3484     int lineLength = line.size();
3485     while (pos != -1) {
3486       int start = pos + TAG_PREFIX_LENGTH;
3487       int end = s.indexOf(TAG_POSTFIX, start);
3488       String index = s.substring(start, end);
3489       int i = 0;
3490       try {
3491         i = Integer.parseInt(index) - 1;
3492         // Tags are one based not zero based.
3493       } catch (NumberFormatException e) {
3494         i = -1; // we don't want to consider this as a valid NLS tag
3495       }
3496       if (line.exists(i)) {
3497         line.set(i, null);
3498       }
3499       pos = s.indexOf(TAG_PREFIX, start);
3500     }
3501
3502     this.nonNLSStrings = new StringLiteral[lineLength];
3503     int nonNLSCounter = 0;
3504     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3505       StringLiteral literal = (StringLiteral) iterator.next();
3506       if (literal != null) {
3507         this.nonNLSStrings[nonNLSCounter++] = literal;
3508       }
3509     }
3510     if (nonNLSCounter == 0) {
3511       this.nonNLSStrings = null;
3512       currentLine = null;
3513       return;
3514     }
3515     this.wasNonExternalizedStringLiteral = true;
3516     if (nonNLSCounter != lineLength) {
3517       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3518     }
3519     currentLine = null;
3520   }
3521 }