1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
24 public class Scanner implements IScanner, ITerminalSymbols {
26 * APIs ares - getNextToken() which return the current type of the token
27 * (this value is not memorized by the scanner) - getCurrentTokenSource()
28 * which provides with the token "REAL" source (aka all unicode have been
29 * transformed into a correct char) - sourceStart gives the position into the
30 * stream - currentPosition-1 gives the sourceEnd position into the stream
33 private boolean assertMode;
34 public boolean useAssertAsAnIndentifier = false;
35 //flag indicating if processed source contains occurrences of keyword assert
36 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
39 public Stack encapsedStringStack = null;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
48 public boolean tokenizeStrings;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
53 public char[] withoutUnicodeBuffer;
54 public int withoutUnicodePtr;
55 //when == 0 ==> no unicode in the current token
56 public boolean unicodeAsBackSlash = false;
57 public boolean scanningFloatLiteral = false;
58 //support for /** comments
59 public int[] commentStops = new int[10];
60 public int[] commentStarts = new int[10];
61 public int commentPtr = -1; // no comment test with commentPtr value -1
62 protected int lastCommentLinePosition = -1;
63 //diet parsing support - jump over some method body when requested
64 public boolean diet = false;
65 //support for the poor-line-debuggers ....
66 //remember the position of the cr/lf
67 public int[] lineEnds = new int[250];
68 public int linePtr = -1;
69 public boolean wasAcr = false;
70 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
71 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
72 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
73 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
74 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
75 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
76 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
77 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
78 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
79 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
80 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
81 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
82 //----------------optimized identifier managment------------------
83 static final char[] charArray_a = new char[]{'a'},
84 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
85 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
86 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
87 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
88 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
89 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
90 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
91 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
92 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
93 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
94 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
95 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
96 charArray_z = new char[]{'z'};
97 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
98 '\u0000', '\u0000', '\u0000'};
99 static final int TableSize = 30, InternalTableSize = 6;
101 public static final int OptimizedLength = 6;
103 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
104 // support for detecting non-externalized string literals
105 int currentLineNr = -1;
106 int previousLineNr = -1;
107 NLSLine currentLine = null;
108 List lines = new ArrayList();
109 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
110 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
111 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
112 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
113 public StringLiteral[] nonNLSStrings = null;
114 public boolean checkNonExternalizedStringLiterals = true;
115 public boolean wasNonExternalizedStringLiteral = false;
117 for (int i = 0; i < 6; i++) {
118 for (int j = 0; j < TableSize; j++) {
119 for (int k = 0; k < InternalTableSize; k++) {
120 charArray_length[i][j][k] = initCharArray;
125 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
127 public static final int RoundBracket = 0;
128 public static final int SquareBracket = 1;
129 public static final int CurlyBracket = 2;
130 public static final int BracketKinds = 3;
132 public char[][] foundTaskTags = null;
133 public char[][] foundTaskMessages;
134 public char[][] foundTaskPriorities = null;
135 public int[][] foundTaskPositions;
136 public int foundTaskCount = 0;
137 public char[][] taskTags = null;
138 public char[][] taskPriorities = null;
139 public static final boolean DEBUG = false;
140 public static final boolean TRACE = false;
143 * Determines if the specified character is permissible as the first
144 * character in a PHP identifier
146 public static boolean isPHPIdentifierStart(char ch) {
147 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
150 * Determines if the specified character may be part of a PHP identifier as
151 * other than the first character
153 public static boolean isPHPIdentifierPart(char ch) {
154 return Character.isLetterOrDigit(ch) || (ch == '_')
155 || (0x7F <= ch && ch <= 0xFF);
157 public final boolean atEnd() {
158 // This code is not relevant if source is
159 // Only a part of the real stream input
160 return source.length == currentPosition;
162 public char[] getCurrentIdentifierSource() {
163 //return the token REAL source (aka unicodes are precomputed)
165 // if (withoutUnicodePtr != 0)
166 // //0 is used as a fast test flag so the real first char is in position 1
168 // withoutUnicodeBuffer,
170 // result = new char[withoutUnicodePtr],
172 // withoutUnicodePtr);
174 int length = currentPosition - startPosition;
175 switch (length) { // see OptimizedLength
177 return optimizedCurrentTokenSource1();
179 return optimizedCurrentTokenSource2();
181 return optimizedCurrentTokenSource3();
183 return optimizedCurrentTokenSource4();
185 return optimizedCurrentTokenSource5();
187 return optimizedCurrentTokenSource6();
190 System.arraycopy(source, startPosition, result = new char[length], 0,
195 public int getCurrentTokenEndPosition() {
196 return this.currentPosition - 1;
198 public final char[] getCurrentTokenSource() {
199 // Return the token REAL source (aka unicodes are precomputed)
201 // if (withoutUnicodePtr != 0)
202 // // 0 is used as a fast test flag so the real first char is in position 1
204 // withoutUnicodeBuffer,
206 // result = new char[withoutUnicodePtr],
208 // withoutUnicodePtr);
211 System.arraycopy(source, startPosition,
212 result = new char[length = currentPosition - startPosition], 0, length);
216 public final char[] getCurrentTokenSource(int startPos) {
217 // Return the token REAL source (aka unicodes are precomputed)
219 // if (withoutUnicodePtr != 0)
220 // // 0 is used as a fast test flag so the real first char is in position 1
222 // withoutUnicodeBuffer,
224 // result = new char[withoutUnicodePtr],
226 // withoutUnicodePtr);
229 System.arraycopy(source, startPos,
230 result = new char[length = currentPosition - startPos], 0, length);
234 public final char[] getCurrentTokenSourceString() {
235 //return the token REAL source (aka unicodes are precomputed).
236 //REMOVE the two " that are at the beginning and the end.
238 if (withoutUnicodePtr != 0)
239 //0 is used as a fast test flag so the real first char is in position 1
240 System.arraycopy(withoutUnicodeBuffer, 2,
241 //2 is 1 (real start) + 1 (to jump over the ")
242 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
245 System.arraycopy(source, startPosition + 1,
246 result = new char[length = currentPosition - startPosition - 2], 0,
251 public int getCurrentTokenStartPosition() {
252 return this.startPosition;
254 public final char[] getCurrentStringLiteralSource() {
255 // Return the token REAL source (aka unicodes are precomputed)
258 System.arraycopy(source, startPosition + 1,
259 result = new char[length = currentPosition - startPosition - 2], 0,
265 * Search the source position corresponding to the end of a given line number
267 * Line numbers are 1-based, and relative to the scanner initialPosition.
268 * Character positions are 0-based.
270 * In case the given line number is inconsistent, answers -1.
272 public final int getLineEnd(int lineNumber) {
273 if (lineEnds == null)
275 if (lineNumber >= lineEnds.length)
279 if (lineNumber == lineEnds.length - 1)
281 return lineEnds[lineNumber - 1];
282 // next line start one character behind the lineEnd of the previous line
285 * Search the source position corresponding to the beginning of a given line
288 * Line numbers are 1-based, and relative to the scanner initialPosition.
289 * Character positions are 0-based.
291 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
293 * In case the given line number is inconsistent, answers -1.
295 public final int getLineStart(int lineNumber) {
296 if (lineEnds == null)
298 if (lineNumber >= lineEnds.length)
303 return initialPosition;
304 return lineEnds[lineNumber - 2] + 1;
305 // next line start one character behind the lineEnd of the previous line
307 public final boolean getNextChar(char testedChar) {
309 //handle the case of unicode.
310 //when a unicode appears then we must use a buffer that holds char
312 //At the end of this method currentCharacter holds the new visited char
313 //and currentPosition points right next after it
314 //Both previous lines are true if the currentCharacter is == to the
316 //On false, no side effect has occured.
317 //ALL getNextChar.... ARE OPTIMIZED COPIES
318 int temp = currentPosition;
320 currentCharacter = source[currentPosition++];
321 // if (((currentCharacter = source[currentPosition++]) == '\\')
322 // && (source[currentPosition] == 'u')) {
323 // //-------------unicode traitement ------------
324 // int c1, c2, c3, c4;
325 // int unicodeSize = 6;
326 // currentPosition++;
327 // while (source[currentPosition] == 'u') {
328 // currentPosition++;
332 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
334 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
336 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
338 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
340 // currentPosition = temp;
344 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
345 // if (currentCharacter != testedChar) {
346 // currentPosition = temp;
349 // unicodeAsBackSlash = currentCharacter == '\\';
351 // //need the unicode buffer
352 // if (withoutUnicodePtr == 0) {
353 // //buffer all the entries that have been left aside....
354 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
358 // withoutUnicodeBuffer,
360 // withoutUnicodePtr);
362 // //fill the buffer with the char
363 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
366 // } //-------------end unicode traitement--------------
368 if (currentCharacter != testedChar) {
369 currentPosition = temp;
372 unicodeAsBackSlash = false;
373 // if (withoutUnicodePtr != 0)
374 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
377 } catch (IndexOutOfBoundsException e) {
378 unicodeAsBackSlash = false;
379 currentPosition = temp;
383 public final int getNextChar(char testedChar1, char testedChar2) {
384 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
385 //test can be done with (x==0) for the first and (x>0) for the second
386 //handle the case of unicode.
387 //when a unicode appears then we must use a buffer that holds char
389 //At the end of this method currentCharacter holds the new visited char
390 //and currentPosition points right next after it
391 //Both previous lines are true if the currentCharacter is == to the
393 //On false, no side effect has occured.
394 //ALL getNextChar.... ARE OPTIMIZED COPIES
395 int temp = currentPosition;
398 currentCharacter = source[currentPosition++];
399 // if (((currentCharacter = source[currentPosition++]) == '\\')
400 // && (source[currentPosition] == 'u')) {
401 // //-------------unicode traitement ------------
402 // int c1, c2, c3, c4;
403 // int unicodeSize = 6;
404 // currentPosition++;
405 // while (source[currentPosition] == 'u') {
406 // currentPosition++;
410 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
412 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
414 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
416 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
418 // currentPosition = temp;
422 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
423 // if (currentCharacter == testedChar1)
425 // else if (currentCharacter == testedChar2)
428 // currentPosition = temp;
432 // //need the unicode buffer
433 // if (withoutUnicodePtr == 0) {
434 // //buffer all the entries that have been left aside....
435 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
439 // withoutUnicodeBuffer,
441 // withoutUnicodePtr);
443 // //fill the buffer with the char
444 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
446 // } //-------------end unicode traitement--------------
448 if (currentCharacter == testedChar1)
450 else if (currentCharacter == testedChar2)
453 currentPosition = temp;
456 // if (withoutUnicodePtr != 0)
457 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
460 } catch (IndexOutOfBoundsException e) {
461 currentPosition = temp;
465 public final boolean getNextCharAsDigit() {
467 //handle the case of unicode.
468 //when a unicode appears then we must use a buffer that holds char
470 //At the end of this method currentCharacter holds the new visited char
471 //and currentPosition points right next after it
472 //Both previous lines are true if the currentCharacter is a digit
473 //On false, no side effect has occured.
474 //ALL getNextChar.... ARE OPTIMIZED COPIES
475 int temp = currentPosition;
477 currentCharacter = source[currentPosition++];
478 // if (((currentCharacter = source[currentPosition++]) == '\\')
479 // && (source[currentPosition] == 'u')) {
480 // //-------------unicode traitement ------------
481 // int c1, c2, c3, c4;
482 // int unicodeSize = 6;
483 // currentPosition++;
484 // while (source[currentPosition] == 'u') {
485 // currentPosition++;
489 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
491 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
493 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
495 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
497 // currentPosition = temp;
501 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
502 // if (!Character.isDigit(currentCharacter)) {
503 // currentPosition = temp;
507 // //need the unicode buffer
508 // if (withoutUnicodePtr == 0) {
509 // //buffer all the entries that have been left aside....
510 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
514 // withoutUnicodeBuffer,
516 // withoutUnicodePtr);
518 // //fill the buffer with the char
519 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
521 // } //-------------end unicode traitement--------------
523 if (!Character.isDigit(currentCharacter)) {
524 currentPosition = temp;
527 // if (withoutUnicodePtr != 0)
528 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
531 } catch (IndexOutOfBoundsException e) {
532 currentPosition = temp;
536 public final boolean getNextCharAsDigit(int radix) {
538 //handle the case of unicode.
539 //when a unicode appears then we must use a buffer that holds char
541 //At the end of this method currentCharacter holds the new visited char
542 //and currentPosition points right next after it
543 //Both previous lines are true if the currentCharacter is a digit base on
545 //On false, no side effect has occured.
546 //ALL getNextChar.... ARE OPTIMIZED COPIES
547 int temp = currentPosition;
549 currentCharacter = source[currentPosition++];
550 // if (((currentCharacter = source[currentPosition++]) == '\\')
551 // && (source[currentPosition] == 'u')) {
552 // //-------------unicode traitement ------------
553 // int c1, c2, c3, c4;
554 // int unicodeSize = 6;
555 // currentPosition++;
556 // while (source[currentPosition] == 'u') {
557 // currentPosition++;
561 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
563 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
565 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
567 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
569 // currentPosition = temp;
573 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
574 // if (Character.digit(currentCharacter, radix) == -1) {
575 // currentPosition = temp;
579 // //need the unicode buffer
580 // if (withoutUnicodePtr == 0) {
581 // //buffer all the entries that have been left aside....
582 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
586 // withoutUnicodeBuffer,
588 // withoutUnicodePtr);
590 // //fill the buffer with the char
591 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
593 // } //-------------end unicode traitement--------------
595 if (Character.digit(currentCharacter, radix) == -1) {
596 currentPosition = temp;
599 // if (withoutUnicodePtr != 0)
600 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
603 } catch (IndexOutOfBoundsException e) {
604 currentPosition = temp;
608 public boolean getNextCharAsJavaIdentifierPart() {
610 //handle the case of unicode.
611 //when a unicode appears then we must use a buffer that holds char
613 //At the end of this method currentCharacter holds the new visited char
614 //and currentPosition points right next after it
615 //Both previous lines are true if the currentCharacter is a
616 // JavaIdentifierPart
617 //On false, no side effect has occured.
618 //ALL getNextChar.... ARE OPTIMIZED COPIES
619 int temp = currentPosition;
621 currentCharacter = source[currentPosition++];
622 // if (((currentCharacter = source[currentPosition++]) == '\\')
623 // && (source[currentPosition] == 'u')) {
624 // //-------------unicode traitement ------------
625 // int c1, c2, c3, c4;
626 // int unicodeSize = 6;
627 // currentPosition++;
628 // while (source[currentPosition] == 'u') {
629 // currentPosition++;
633 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
635 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
637 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
639 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
641 // currentPosition = temp;
645 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
646 // if (!isPHPIdentifierPart(currentCharacter)) {
647 // currentPosition = temp;
651 // //need the unicode buffer
652 // if (withoutUnicodePtr == 0) {
653 // //buffer all the entries that have been left aside....
654 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
658 // withoutUnicodeBuffer,
660 // withoutUnicodePtr);
662 // //fill the buffer with the char
663 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
665 // } //-------------end unicode traitement--------------
667 if (!isPHPIdentifierPart(currentCharacter)) {
668 currentPosition = temp;
671 // if (withoutUnicodePtr != 0)
672 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
675 } catch (IndexOutOfBoundsException e) {
676 currentPosition = temp;
680 public int getCastOrParen() {
681 int tempPosition = currentPosition;
682 char tempCharacter = currentCharacter;
683 int tempToken = TokenNameLPAREN;
684 boolean found = false;
685 StringBuffer buf = new StringBuffer();
688 currentCharacter = source[currentPosition++];
689 } while (currentCharacter == ' ' || currentCharacter == '\t');
690 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
691 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
692 buf.append(currentCharacter);
693 currentCharacter = source[currentPosition++];
695 if (buf.length() >= 3 && buf.length() <= 7) {
696 char[] data = buf.toString().toCharArray();
698 switch (data.length) {
701 if ((data[index] == 'i') && (data[++index] == 'n')
702 && (data[++index] == 't')) {
704 tempToken = TokenNameintCAST;
709 if ((data[index] == 'b') && (data[++index] == 'o')
710 && (data[++index] == 'o') && (data[++index] == 'l')) {
712 tempToken = TokenNameboolCAST;
715 if ((data[index] == 'r') && (data[++index] == 'e')
716 && (data[++index] == 'a') && (data[++index] == 'l')) {
718 tempToken = TokenNamedoubleCAST;
724 if ((data[index] == 'a') && (data[++index] == 'r')
725 && (data[++index] == 'r') && (data[++index] == 'a')
726 && (data[++index] == 'y')) {
728 tempToken = TokenNamearrayCAST;
731 if ((data[index] == 'u') && (data[++index] == 'n')
732 && (data[++index] == 's') && (data[++index] == 'e')
733 && (data[++index] == 't')) {
735 tempToken = TokenNameunsetCAST;
738 if ((data[index] == 'f') && (data[++index] == 'l')
739 && (data[++index] == 'o') && (data[++index] == 'a')
740 && (data[++index] == 't')) {
742 tempToken = TokenNamedoubleCAST;
748 // object string double
749 if ((data[index] == 'o') && (data[++index] == 'b')
750 && (data[++index] == 'j') && (data[++index] == 'e')
751 && (data[++index] == 'c') && (data[++index] == 't')) {
753 tempToken = TokenNameobjectCAST;
756 if ((data[index] == 's') && (data[++index] == 't')
757 && (data[++index] == 'r') && (data[++index] == 'i')
758 && (data[++index] == 'n') && (data[++index] == 'g')) {
760 tempToken = TokenNamestringCAST;
763 if ((data[index] == 'd') && (data[++index] == 'o')
764 && (data[++index] == 'u') && (data[++index] == 'b')
765 && (data[++index] == 'l') && (data[++index] == 'e')) {
767 tempToken = TokenNamedoubleCAST;
774 if ((data[index] == 'b') && (data[++index] == 'o')
775 && (data[++index] == 'o') && (data[++index] == 'l')
776 && (data[++index] == 'e') && (data[++index] == 'a')
777 && (data[++index] == 'n')) {
779 tempToken = TokenNameboolCAST;
782 if ((data[index] == 'i') && (data[++index] == 'n')
783 && (data[++index] == 't') && (data[++index] == 'e')
784 && (data[++index] == 'g') && (data[++index] == 'e')
785 && (data[++index] == 'r')) {
787 tempToken = TokenNameintCAST;
793 while (currentCharacter == ' ' || currentCharacter == '\t') {
794 currentCharacter = source[currentPosition++];
796 if (currentCharacter == ')') {
801 } catch (IndexOutOfBoundsException e) {
803 currentCharacter = tempCharacter;
804 currentPosition = tempPosition;
805 return TokenNameLPAREN;
807 public void consumeStringInterpolated() throws InvalidInputException {
809 // consume next character
810 unicodeAsBackSlash = false;
811 currentCharacter = source[currentPosition++];
812 // if (((currentCharacter = source[currentPosition++]) == '\\')
813 // && (source[currentPosition] == 'u')) {
814 // getNextUnicodeChar();
816 // if (withoutUnicodePtr != 0) {
817 // withoutUnicodeBuffer[++withoutUnicodePtr] =
821 while (currentCharacter != '`') {
822 /** ** in PHP \r and \n are valid in string literals *** */
823 // if ((currentCharacter == '\n')
824 // || (currentCharacter == '\r')) {
825 // // relocate if finding another quote fairly close: thus unicode
826 // '/u000D' will be fully consumed
827 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
828 // if (currentPosition + lookAhead == source.length)
830 // if (source[currentPosition + lookAhead] == '\n')
832 // if (source[currentPosition + lookAhead] == '\"') {
833 // currentPosition += lookAhead + 1;
837 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
839 if (currentCharacter == '\\') {
840 int escapeSize = currentPosition;
841 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
842 //scanEscapeCharacter make a side effect on this value and we need
843 // the previous value few lines down this one
844 scanDoubleQuotedEscapeCharacter();
845 escapeSize = currentPosition - escapeSize;
846 if (withoutUnicodePtr == 0) {
847 //buffer all the entries that have been left aside....
848 withoutUnicodePtr = currentPosition - escapeSize - 1
850 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
852 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
853 } else { //overwrite the / in the buffer
854 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
855 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
856 // where only one is correct
861 // consume next character
862 unicodeAsBackSlash = false;
863 currentCharacter = source[currentPosition++];
864 // if (((currentCharacter = source[currentPosition++]) == '\\')
865 // && (source[currentPosition] == 'u')) {
866 // getNextUnicodeChar();
868 if (withoutUnicodePtr != 0) {
869 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
873 } catch (IndexOutOfBoundsException e) {
874 throw new InvalidInputException(UNTERMINATED_STRING);
875 } catch (InvalidInputException e) {
876 if (e.getMessage().equals(INVALID_ESCAPE)) {
877 // relocate if finding another quote fairly close: thus unicode
878 // '/u000D' will be fully consumed
879 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
880 if (currentPosition + lookAhead == source.length)
882 if (source[currentPosition + lookAhead] == '\n')
884 if (source[currentPosition + lookAhead] == '`') {
885 currentPosition += lookAhead + 1;
892 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
893 // //$NON-NLS-?$ where ? is an
895 if (currentLine == null) {
896 currentLine = new NLSLine();
897 lines.add(currentLine);
899 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
900 startPosition, currentPosition - 1));
903 public void consumeStringConstant() throws InvalidInputException {
905 // consume next character
906 unicodeAsBackSlash = false;
907 currentCharacter = source[currentPosition++];
908 // if (((currentCharacter = source[currentPosition++]) == '\\')
909 // && (source[currentPosition] == 'u')) {
910 // getNextUnicodeChar();
912 // if (withoutUnicodePtr != 0) {
913 // withoutUnicodeBuffer[++withoutUnicodePtr] =
917 while (currentCharacter != '\'') {
918 /** ** in PHP \r and \n are valid in string literals *** */
919 // if ((currentCharacter == '\n')
920 // || (currentCharacter == '\r')) {
921 // // relocate if finding another quote fairly close: thus unicode
922 // '/u000D' will be fully consumed
923 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
924 // if (currentPosition + lookAhead == source.length)
926 // if (source[currentPosition + lookAhead] == '\n')
928 // if (source[currentPosition + lookAhead] == '\"') {
929 // currentPosition += lookAhead + 1;
933 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
935 if (currentCharacter == '\\') {
936 int escapeSize = currentPosition;
937 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
938 //scanEscapeCharacter make a side effect on this value and we need
939 // the previous value few lines down this one
940 scanSingleQuotedEscapeCharacter();
941 escapeSize = currentPosition - escapeSize;
942 if (withoutUnicodePtr == 0) {
943 //buffer all the entries that have been left aside....
944 withoutUnicodePtr = currentPosition - escapeSize - 1
946 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
948 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
949 } else { //overwrite the / in the buffer
950 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
951 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
952 // where only one is correct
957 // consume next character
958 unicodeAsBackSlash = false;
959 currentCharacter = source[currentPosition++];
960 // if (((currentCharacter = source[currentPosition++]) == '\\')
961 // && (source[currentPosition] == 'u')) {
962 // getNextUnicodeChar();
964 if (withoutUnicodePtr != 0) {
965 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
969 } catch (IndexOutOfBoundsException e) {
970 throw new InvalidInputException(UNTERMINATED_STRING);
971 } catch (InvalidInputException e) {
972 if (e.getMessage().equals(INVALID_ESCAPE)) {
973 // relocate if finding another quote fairly close: thus unicode
974 // '/u000D' will be fully consumed
975 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
976 if (currentPosition + lookAhead == source.length)
978 if (source[currentPosition + lookAhead] == '\n')
980 if (source[currentPosition + lookAhead] == '\'') {
981 currentPosition += lookAhead + 1;
988 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
989 // //$NON-NLS-?$ where ? is an
991 if (currentLine == null) {
992 currentLine = new NLSLine();
993 lines.add(currentLine);
995 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
996 startPosition, currentPosition - 1));
999 public void consumeStringLiteral() throws InvalidInputException {
1001 // consume next character
1002 unicodeAsBackSlash = false;
1003 currentCharacter = source[currentPosition++];
1004 // if (((currentCharacter = source[currentPosition++]) == '\\')
1005 // && (source[currentPosition] == 'u')) {
1006 // getNextUnicodeChar();
1008 // if (withoutUnicodePtr != 0) {
1009 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1010 // currentCharacter;
1013 while (currentCharacter != '"') {
1014 /** ** in PHP \r and \n are valid in string literals *** */
1015 // if ((currentCharacter == '\n')
1016 // || (currentCharacter == '\r')) {
1017 // // relocate if finding another quote fairly close: thus unicode
1018 // '/u000D' will be fully consumed
1019 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1020 // if (currentPosition + lookAhead == source.length)
1022 // if (source[currentPosition + lookAhead] == '\n')
1024 // if (source[currentPosition + lookAhead] == '\"') {
1025 // currentPosition += lookAhead + 1;
1029 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1031 if (currentCharacter == '\\') {
1032 int escapeSize = currentPosition;
1033 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1034 //scanEscapeCharacter make a side effect on this value and we need
1035 // the previous value few lines down this one
1036 scanDoubleQuotedEscapeCharacter();
1037 escapeSize = currentPosition - escapeSize;
1038 if (withoutUnicodePtr == 0) {
1039 //buffer all the entries that have been left aside....
1040 withoutUnicodePtr = currentPosition - escapeSize - 1
1042 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1044 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1045 } else { //overwrite the / in the buffer
1046 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1047 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1048 // where only one is correct
1049 withoutUnicodePtr--;
1053 // consume next character
1054 unicodeAsBackSlash = false;
1055 currentCharacter = source[currentPosition++];
1056 // if (((currentCharacter = source[currentPosition++]) == '\\')
1057 // && (source[currentPosition] == 'u')) {
1058 // getNextUnicodeChar();
1060 if (withoutUnicodePtr != 0) {
1061 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1065 } catch (IndexOutOfBoundsException e) {
1066 throw new InvalidInputException(UNTERMINATED_STRING);
1067 } catch (InvalidInputException e) {
1068 if (e.getMessage().equals(INVALID_ESCAPE)) {
1069 // relocate if finding another quote fairly close: thus unicode
1070 // '/u000D' will be fully consumed
1071 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1072 if (currentPosition + lookAhead == source.length)
1074 if (source[currentPosition + lookAhead] == '\n')
1076 if (source[currentPosition + lookAhead] == '\"') {
1077 currentPosition += lookAhead + 1;
1084 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1085 // //$NON-NLS-?$ where ? is an
1087 if (currentLine == null) {
1088 currentLine = new NLSLine();
1089 lines.add(currentLine);
1091 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1092 startPosition, currentPosition - 1));
1095 public int getNextToken() throws InvalidInputException {
1097 return getInlinedHTML(currentPosition);
1100 this.wasAcr = false;
1102 jumpOverMethodBody();
1104 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1108 withoutUnicodePtr = 0;
1109 //start with a new token
1110 char encapsedChar = ' ';
1111 if (!encapsedStringStack.isEmpty()) {
1112 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1114 if (encapsedChar != '$' && encapsedChar != ' ') {
1115 currentCharacter = source[currentPosition++];
1116 if (currentCharacter == encapsedChar) {
1117 switch (currentCharacter) {
1119 return TokenNameEncapsedString0;
1121 return TokenNameEncapsedString1;
1123 return TokenNameEncapsedString2;
1126 while (currentCharacter != encapsedChar) {
1127 /** ** in PHP \r and \n are valid in string literals *** */
1128 switch (currentCharacter) {
1130 int escapeSize = currentPosition;
1131 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1132 //scanEscapeCharacter make a side effect on this value and
1133 // we need the previous value few lines down this one
1134 scanDoubleQuotedEscapeCharacter();
1135 escapeSize = currentPosition - escapeSize;
1136 if (withoutUnicodePtr == 0) {
1137 //buffer all the entries that have been left aside....
1138 withoutUnicodePtr = currentPosition - escapeSize - 1
1140 System.arraycopy(source, startPosition,
1141 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1142 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1143 } else { //overwrite the / in the buffer
1144 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1145 if (backSlashAsUnicodeInString) { //there are TWO \ in
1146 withoutUnicodePtr--;
1151 if (isPHPIdentifierStart(source[currentPosition])
1152 || source[currentPosition] == '{') {
1154 encapsedStringStack.push(new Character('$'));
1155 return TokenNameSTRING;
1159 if (source[currentPosition] == '$') { // CURLY_OPEN
1161 encapsedStringStack.push(new Character('$'));
1162 return TokenNameSTRING;
1165 // consume next character
1166 unicodeAsBackSlash = false;
1167 currentCharacter = source[currentPosition++];
1168 if (withoutUnicodePtr != 0) {
1169 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1174 return TokenNameSTRING;
1176 // ---------Consume white space and handles startPosition---------
1177 int whiteStart = currentPosition;
1178 startPosition = currentPosition;
1179 currentCharacter = source[currentPosition++];
1180 if (encapsedChar == '$') {
1181 switch (currentCharacter) {
1183 currentCharacter = source[currentPosition++];
1184 return TokenNameSTRING;
1186 if (encapsedChar == '$') {
1187 if (getNextChar('$'))
1188 return TokenNameCURLY_OPEN;
1190 return TokenNameLBRACE;
1192 return TokenNameRBRACE;
1194 return TokenNameLBRACKET;
1196 return TokenNameRBRACKET;
1198 if (tokenizeStrings) {
1199 consumeStringConstant();
1200 return TokenNameStringSingleQuote;
1202 return TokenNameEncapsedString1;
1204 return TokenNameEncapsedString2;
1206 if (tokenizeStrings) {
1207 consumeStringInterpolated();
1208 return TokenNameStringInterpolated;
1210 return TokenNameEncapsedString0;
1212 if (getNextChar('>'))
1213 return TokenNameMINUS_GREATER;
1214 return TokenNameSTRING;
1216 if (currentCharacter == '$') {
1217 int oldPosition = currentPosition;
1219 currentCharacter = source[currentPosition++];
1220 if (currentCharacter == '{') {
1221 return TokenNameDOLLAR_LBRACE;
1223 if (isPHPIdentifierStart(currentCharacter)) {
1224 return scanIdentifierOrKeyword(true);
1226 currentPosition = oldPosition;
1227 return TokenNameSTRING;
1229 } catch (IndexOutOfBoundsException e) {
1230 currentPosition = oldPosition;
1231 return TokenNameSTRING;
1234 if (isPHPIdentifierStart(currentCharacter))
1235 return scanIdentifierOrKeyword(false);
1236 if (Character.isDigit(currentCharacter))
1237 return scanNumber(false);
1238 return TokenNameERROR;
1241 // boolean isWhiteSpace;
1243 while ((currentCharacter == ' ')
1244 || Character.isWhitespace(currentCharacter)) {
1245 startPosition = currentPosition;
1246 currentCharacter = source[currentPosition++];
1247 // if (((currentCharacter = source[currentPosition++]) == '\\')
1248 // && (source[currentPosition] == 'u')) {
1249 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1251 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1252 checkNonExternalizeString();
1253 if (recordLineSeparator) {
1254 pushLineSeparator();
1259 // isWhiteSpace = (currentCharacter == ' ')
1260 // || Character.isWhitespace(currentCharacter);
1263 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1264 // reposition scanner in case we are interested by spaces as tokens
1266 startPosition = whiteStart;
1267 return TokenNameWHITESPACE;
1269 //little trick to get out in the middle of a source compuation
1270 if (currentPosition > eofPosition)
1271 return TokenNameEOF;
1272 // ---------Identify the next token-------------
1273 switch (currentCharacter) {
1275 return getCastOrParen();
1277 return TokenNameRPAREN;
1279 return TokenNameLBRACE;
1281 return TokenNameRBRACE;
1283 return TokenNameLBRACKET;
1285 return TokenNameRBRACKET;
1287 return TokenNameSEMICOLON;
1289 return TokenNameCOMMA;
1291 if (getNextChar('='))
1292 return TokenNameDOT_EQUAL;
1293 if (getNextCharAsDigit())
1294 return scanNumber(true);
1295 return TokenNameDOT;
1299 if ((test = getNextChar('+', '=')) == 0)
1300 return TokenNamePLUS_PLUS;
1302 return TokenNamePLUS_EQUAL;
1303 return TokenNamePLUS;
1308 if ((test = getNextChar('-', '=')) == 0)
1309 return TokenNameMINUS_MINUS;
1311 return TokenNameMINUS_EQUAL;
1312 if (getNextChar('>'))
1313 return TokenNameMINUS_GREATER;
1314 return TokenNameMINUS;
1317 if (getNextChar('='))
1318 return TokenNameTWIDDLE_EQUAL;
1319 return TokenNameTWIDDLE;
1321 if (getNextChar('=')) {
1322 if (getNextChar('=')) {
1323 return TokenNameNOT_EQUAL_EQUAL;
1325 return TokenNameNOT_EQUAL;
1327 return TokenNameNOT;
1329 if (getNextChar('='))
1330 return TokenNameMULTIPLY_EQUAL;
1331 return TokenNameMULTIPLY;
1333 if (getNextChar('='))
1334 return TokenNameREMAINDER_EQUAL;
1335 return TokenNameREMAINDER;
1338 int oldPosition = currentPosition;
1340 currentCharacter = source[currentPosition++];
1341 } catch (IndexOutOfBoundsException e) {
1342 currentPosition = oldPosition;
1343 return TokenNameLESS;
1345 switch (currentCharacter) {
1347 return TokenNameLESS_EQUAL;
1349 return TokenNameNOT_EQUAL;
1351 if (getNextChar('='))
1352 return TokenNameLEFT_SHIFT_EQUAL;
1353 if (getNextChar('<')) {
1354 currentCharacter = source[currentPosition++];
1355 while (Character.isWhitespace(currentCharacter)) {
1356 currentCharacter = source[currentPosition++];
1358 int heredocStart = currentPosition - 1;
1359 int heredocLength = 0;
1360 if (isPHPIdentifierStart(currentCharacter)) {
1361 currentCharacter = source[currentPosition++];
1363 return TokenNameERROR;
1365 while (isPHPIdentifierPart(currentCharacter)) {
1366 currentCharacter = source[currentPosition++];
1368 heredocLength = currentPosition - heredocStart - 1;
1369 // heredoc end-tag determination
1370 boolean endTag = true;
1373 ch = source[currentPosition++];
1374 if (ch == '\r' || ch == '\n') {
1375 if (recordLineSeparator) {
1376 pushLineSeparator();
1380 for (int i = 0; i < heredocLength; i++) {
1381 if (source[currentPosition + i] != source[heredocStart
1388 currentPosition += heredocLength - 1;
1389 currentCharacter = source[currentPosition++];
1390 break; // do...while loop
1396 return TokenNameHEREDOC;
1398 return TokenNameLEFT_SHIFT;
1400 currentPosition = oldPosition;
1401 return TokenNameLESS;
1406 if ((test = getNextChar('=', '>')) == 0)
1407 return TokenNameGREATER_EQUAL;
1409 if ((test = getNextChar('=', '>')) == 0)
1410 return TokenNameRIGHT_SHIFT_EQUAL;
1411 return TokenNameRIGHT_SHIFT;
1413 return TokenNameGREATER;
1416 if (getNextChar('=')) {
1417 if (getNextChar('=')) {
1418 return TokenNameEQUAL_EQUAL_EQUAL;
1420 return TokenNameEQUAL_EQUAL;
1422 if (getNextChar('>'))
1423 return TokenNameEQUAL_GREATER;
1424 return TokenNameEQUAL;
1428 if ((test = getNextChar('&', '=')) == 0)
1429 return TokenNameAND_AND;
1431 return TokenNameAND_EQUAL;
1432 return TokenNameAND;
1437 if ((test = getNextChar('|', '=')) == 0)
1438 return TokenNameOR_OR;
1440 return TokenNameOR_EQUAL;
1444 if (getNextChar('='))
1445 return TokenNameXOR_EQUAL;
1446 return TokenNameXOR;
1448 if (getNextChar('>')) {
1450 if (currentPosition == source.length) {
1452 return TokenNameINLINE_HTML;
1454 return getInlinedHTML(currentPosition - 2);
1456 return TokenNameQUESTION;
1458 if (getNextChar(':'))
1459 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1460 return TokenNameCOLON;
1464 consumeStringConstant();
1465 return TokenNameStringSingleQuote;
1467 if (tokenizeStrings) {
1468 consumeStringLiteral();
1469 return TokenNameStringDoubleQuote;
1471 return TokenNameEncapsedString2;
1473 if (tokenizeStrings) {
1474 consumeStringInterpolated();
1475 return TokenNameStringInterpolated;
1477 return TokenNameEncapsedString0;
1481 char startChar = currentCharacter;
1482 if (getNextChar('=')) {
1483 return TokenNameDIVIDE_EQUAL;
1486 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1488 this.lastCommentLinePosition = this.currentPosition;
1489 int endPositionForLineComment = 0;
1490 try { //get the next char
1491 currentCharacter = source[currentPosition++];
1492 // if (((currentCharacter = source[currentPosition++])
1494 // && (source[currentPosition] == 'u')) {
1495 // //-------------unicode traitement ------------
1496 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1497 // currentPosition++;
1498 // while (source[currentPosition] == 'u') {
1499 // currentPosition++;
1502 // Character.getNumericValue(source[currentPosition++]))
1506 // Character.getNumericValue(source[currentPosition++]))
1510 // Character.getNumericValue(source[currentPosition++]))
1514 // Character.getNumericValue(source[currentPosition++]))
1518 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1520 // currentCharacter =
1521 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1524 //handle the \\u case manually into comment
1525 // if (currentCharacter == '\\') {
1526 // if (source[currentPosition] == '\\')
1527 // currentPosition++;
1528 // } //jump over the \\
1529 boolean isUnicode = false;
1530 while (currentCharacter != '\r' && currentCharacter != '\n') {
1531 this.lastCommentLinePosition = this.currentPosition;
1532 if (currentCharacter == '?') {
1533 if (getNextChar('>')) {
1534 startPosition = currentPosition - 2;
1536 return TokenNameINLINE_HTML;
1541 currentCharacter = source[currentPosition++];
1542 // if (((currentCharacter = source[currentPosition++])
1544 // && (source[currentPosition] == 'u')) {
1545 // isUnicode = true;
1546 // //-------------unicode traitement ------------
1547 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1548 // currentPosition++;
1549 // while (source[currentPosition] == 'u') {
1550 // currentPosition++;
1553 // Character.getNumericValue(source[currentPosition++]))
1557 // Character.getNumericValue(
1558 // source[currentPosition++]))
1562 // Character.getNumericValue(
1563 // source[currentPosition++]))
1567 // Character.getNumericValue(
1568 // source[currentPosition++]))
1572 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1574 // currentCharacter =
1575 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1578 //handle the \\u case manually into comment
1579 // if (currentCharacter == '\\') {
1580 // if (source[currentPosition] == '\\')
1581 // currentPosition++;
1582 // } //jump over the \\
1585 endPositionForLineComment = currentPosition - 6;
1587 endPositionForLineComment = currentPosition - 1;
1589 // recordComment(false);
1590 recordComment(TokenNameCOMMENT_LINE);
1591 if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1592 if ((currentCharacter == '\r')
1593 || (currentCharacter == '\n')) {
1594 checkNonExternalizeString();
1595 if (recordLineSeparator) {
1597 pushUnicodeLineSeparator();
1599 pushLineSeparator();
1605 if (tokenizeComments) {
1607 currentPosition = endPositionForLineComment;
1608 // reset one character behind
1610 return TokenNameCOMMENT_LINE;
1612 } catch (IndexOutOfBoundsException e) { //an eof will them
1614 if (tokenizeComments) {
1616 // reset one character behind
1617 return TokenNameCOMMENT_LINE;
1623 //traditional and annotation comment
1624 boolean isJavadoc = false, star = false;
1625 // consume next character
1626 unicodeAsBackSlash = false;
1627 currentCharacter = source[currentPosition++];
1628 // if (((currentCharacter = source[currentPosition++]) ==
1630 // && (source[currentPosition] == 'u')) {
1631 // getNextUnicodeChar();
1633 // if (withoutUnicodePtr != 0) {
1634 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1635 // currentCharacter;
1638 if (currentCharacter == '*') {
1642 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1643 checkNonExternalizeString();
1644 if (recordLineSeparator) {
1645 pushLineSeparator();
1650 try { //get the next char
1651 currentCharacter = source[currentPosition++];
1652 // if (((currentCharacter = source[currentPosition++])
1654 // && (source[currentPosition] == 'u')) {
1655 // //-------------unicode traitement ------------
1656 // getNextUnicodeChar();
1658 //handle the \\u case manually into comment
1659 // if (currentCharacter == '\\') {
1660 // if (source[currentPosition] == '\\')
1661 // currentPosition++;
1662 // //jump over the \\
1664 // empty comment is not a javadoc /**/
1665 if (currentCharacter == '/') {
1668 //loop until end of comment */
1669 while ((currentCharacter != '/') || (!star)) {
1670 if ((currentCharacter == '\r')
1671 || (currentCharacter == '\n')) {
1672 checkNonExternalizeString();
1673 if (recordLineSeparator) {
1674 pushLineSeparator();
1679 star = currentCharacter == '*';
1681 currentCharacter = source[currentPosition++];
1682 // if (((currentCharacter = source[currentPosition++])
1684 // && (source[currentPosition] == 'u')) {
1685 // //-------------unicode traitement ------------
1686 // getNextUnicodeChar();
1688 //handle the \\u case manually into comment
1689 // if (currentCharacter == '\\') {
1690 // if (source[currentPosition] == '\\')
1691 // currentPosition++;
1692 // } //jump over the \\
1694 //recordComment(isJavadoc);
1696 recordComment(TokenNameCOMMENT_PHPDOC);
1698 recordComment(TokenNameCOMMENT_BLOCK);
1701 if (tokenizeComments) {
1703 return TokenNameCOMMENT_PHPDOC;
1704 return TokenNameCOMMENT_BLOCK;
1706 } catch (IndexOutOfBoundsException e) {
1707 throw new InvalidInputException(UNTERMINATED_COMMENT);
1711 return TokenNameDIVIDE;
1715 return TokenNameEOF;
1716 //the atEnd may not be <currentPosition == source.length> if
1717 // source is only some part of a real (external) stream
1718 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1720 if (currentCharacter == '$') {
1721 int oldPosition = currentPosition;
1723 currentCharacter = source[currentPosition++];
1724 if (isPHPIdentifierStart(currentCharacter)) {
1725 return scanIdentifierOrKeyword(true);
1727 currentPosition = oldPosition;
1728 return TokenNameDOLLAR;
1730 } catch (IndexOutOfBoundsException e) {
1731 currentPosition = oldPosition;
1732 return TokenNameDOLLAR;
1735 if (isPHPIdentifierStart(currentCharacter))
1736 return scanIdentifierOrKeyword(false);
1737 if (Character.isDigit(currentCharacter))
1738 return scanNumber(false);
1739 return TokenNameERROR;
1742 } //-----------------end switch while try--------------------
1743 catch (IndexOutOfBoundsException e) {
1746 return TokenNameEOF;
1749 private int getInlinedHTML(int start) throws InvalidInputException {
1750 int token = getInlinedHTMLToken(start);
1751 if (token == TokenNameINLINE_HTML) {
1752 // Stack stack = new Stack();
1753 // // scan html for errors
1754 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1755 // int lastPHPEndPos=0;
1756 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1757 // Tag tag=(Tag)i.next();
1759 // if (tag instanceof StartTag) {
1760 // StartTag startTag=(StartTag)tag;
1761 // // System.out.println("startTag: "+tag);
1762 // if (startTag.isServerTag()) {
1763 // // TODO : what to do with a server tag ?
1765 // // do whatever with HTML start tag
1766 // // use startTag.getElement() to find the element corresponding
1767 // // to this start tag which may be useful if you implement code
1769 // stack.push(startTag);
1772 // EndTag endTag=(EndTag)tag;
1773 // StartTag stag = (StartTag) stack.peek();
1774 //// System.out.println("endTag: "+tag);
1775 // // do whatever with HTML end tag.
1783 * InvalidInputException
1785 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1786 // int htmlPosition = start;
1787 if (currentPosition > source.length) {
1788 currentPosition = source.length;
1789 return TokenNameEOF;
1791 startPosition = start;
1794 currentCharacter = source[currentPosition++];
1795 if (currentCharacter == '<') {
1796 if (getNextChar('?')) {
1797 currentCharacter = source[currentPosition++];
1798 if ((currentCharacter == ' ')
1799 || Character.isWhitespace(currentCharacter)) {
1802 return TokenNameINLINE_HTML;
1804 boolean phpStart = (currentCharacter == 'P')
1805 || (currentCharacter == 'p');
1807 int test = getNextChar('H', 'h');
1809 test = getNextChar('P', 'p');
1813 return TokenNameINLINE_HTML;
1820 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1821 if (recordLineSeparator) {
1822 pushLineSeparator();
1827 } //-----------------while--------------------
1829 return TokenNameINLINE_HTML;
1830 } //-----------------try--------------------
1831 catch (IndexOutOfBoundsException e) {
1832 startPosition = start;
1836 return TokenNameINLINE_HTML;
1838 // public final void getNextUnicodeChar()
1839 // throws IndexOutOfBoundsException, InvalidInputException {
1841 // //handle the case of unicode.
1842 // //when a unicode appears then we must use a buffer that holds char
1844 // //At the end of this method currentCharacter holds the new visited char
1845 // //and currentPosition points right next after it
1847 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1849 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1850 // currentPosition++;
1851 // while (source[currentPosition] == 'u') {
1852 // currentPosition++;
1856 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1858 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1860 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1862 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1864 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1866 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1867 // //need the unicode buffer
1868 // if (withoutUnicodePtr == 0) {
1869 // //buffer all the entries that have been left aside....
1870 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1871 // System.arraycopy(
1874 // withoutUnicodeBuffer,
1876 // withoutUnicodePtr);
1878 // //fill the buffer with the char
1879 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1881 // unicodeAsBackSlash = currentCharacter == '\\';
1884 * Tokenize a method body, assuming that curly brackets are properly
1887 public final void jumpOverMethodBody() {
1888 this.wasAcr = false;
1891 while (true) { //loop for jumping over comments
1892 // ---------Consume white space and handles startPosition---------
1893 boolean isWhiteSpace;
1895 startPosition = currentPosition;
1896 currentCharacter = source[currentPosition++];
1897 // if (((currentCharacter = source[currentPosition++]) == '\\')
1898 // && (source[currentPosition] == 'u')) {
1899 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1901 if (recordLineSeparator
1902 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1903 pushLineSeparator();
1904 isWhiteSpace = Character.isWhitespace(currentCharacter);
1906 } while (isWhiteSpace);
1907 // -------consume token until } is found---------
1908 switch (currentCharacter) {
1920 test = getNextChar('\\');
1923 scanDoubleQuotedEscapeCharacter();
1924 } catch (InvalidInputException ex) {
1927 // try { // consume next character
1928 unicodeAsBackSlash = false;
1929 currentCharacter = source[currentPosition++];
1930 // if (((currentCharacter = source[currentPosition++]) == '\\')
1931 // && (source[currentPosition] == 'u')) {
1932 // getNextUnicodeChar();
1934 if (withoutUnicodePtr != 0) {
1935 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1938 // } catch (InvalidInputException ex) {
1946 // try { // consume next character
1947 unicodeAsBackSlash = false;
1948 currentCharacter = source[currentPosition++];
1949 // if (((currentCharacter = source[currentPosition++]) == '\\')
1950 // && (source[currentPosition] == 'u')) {
1951 // getNextUnicodeChar();
1953 if (withoutUnicodePtr != 0) {
1954 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1957 // } catch (InvalidInputException ex) {
1959 while (currentCharacter != '"') {
1960 if (currentCharacter == '\r') {
1961 if (source[currentPosition] == '\n')
1964 // the string cannot go further that the line
1966 if (currentCharacter == '\n') {
1968 // the string cannot go further that the line
1970 if (currentCharacter == '\\') {
1972 scanDoubleQuotedEscapeCharacter();
1973 } catch (InvalidInputException ex) {
1976 // try { // consume next character
1977 unicodeAsBackSlash = false;
1978 currentCharacter = source[currentPosition++];
1979 // if (((currentCharacter = source[currentPosition++]) == '\\')
1980 // && (source[currentPosition] == 'u')) {
1981 // getNextUnicodeChar();
1983 if (withoutUnicodePtr != 0) {
1984 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1987 // } catch (InvalidInputException ex) {
1990 } catch (IndexOutOfBoundsException e) {
1997 if ((test = getNextChar('/', '*')) == 0) {
2001 currentCharacter = source[currentPosition++];
2002 // if (((currentCharacter = source[currentPosition++]) ==
2004 // && (source[currentPosition] == 'u')) {
2005 // //-------------unicode traitement ------------
2006 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2007 // currentPosition++;
2008 // while (source[currentPosition] == 'u') {
2009 // currentPosition++;
2012 // Character.getNumericValue(source[currentPosition++]))
2016 // Character.getNumericValue(source[currentPosition++]))
2020 // Character.getNumericValue(source[currentPosition++]))
2024 // Character.getNumericValue(source[currentPosition++]))
2027 // //error don't care of the value
2028 // currentCharacter = 'A';
2029 // } //something different from \n and \r
2031 // currentCharacter =
2032 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2035 while (currentCharacter != '\r' && currentCharacter != '\n') {
2037 currentCharacter = source[currentPosition++];
2038 // if (((currentCharacter = source[currentPosition++])
2040 // && (source[currentPosition] == 'u')) {
2041 // //-------------unicode traitement ------------
2042 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2043 // currentPosition++;
2044 // while (source[currentPosition] == 'u') {
2045 // currentPosition++;
2048 // Character.getNumericValue(source[currentPosition++]))
2052 // Character.getNumericValue(source[currentPosition++]))
2056 // Character.getNumericValue(source[currentPosition++]))
2060 // Character.getNumericValue(source[currentPosition++]))
2063 // //error don't care of the value
2064 // currentCharacter = 'A';
2065 // } //something different from \n and \r
2067 // currentCharacter =
2068 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2072 if (recordLineSeparator
2073 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2074 pushLineSeparator();
2075 } catch (IndexOutOfBoundsException e) {
2076 } //an eof will them be generated
2080 //traditional and annotation comment
2081 boolean star = false;
2082 // try { // consume next character
2083 unicodeAsBackSlash = false;
2084 currentCharacter = source[currentPosition++];
2085 // if (((currentCharacter = source[currentPosition++]) == '\\')
2086 // && (source[currentPosition] == 'u')) {
2087 // getNextUnicodeChar();
2089 if (withoutUnicodePtr != 0) {
2090 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2093 // } catch (InvalidInputException ex) {
2095 if (currentCharacter == '*') {
2098 if (recordLineSeparator
2099 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2100 pushLineSeparator();
2101 try { //get the next char
2102 currentCharacter = source[currentPosition++];
2103 // if (((currentCharacter = source[currentPosition++]) ==
2105 // && (source[currentPosition] == 'u')) {
2106 // //-------------unicode traitement ------------
2107 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2108 // currentPosition++;
2109 // while (source[currentPosition] == 'u') {
2110 // currentPosition++;
2113 // Character.getNumericValue(source[currentPosition++]))
2117 // Character.getNumericValue(source[currentPosition++]))
2121 // Character.getNumericValue(source[currentPosition++]))
2125 // Character.getNumericValue(source[currentPosition++]))
2128 // //error don't care of the value
2129 // currentCharacter = 'A';
2130 // } //something different from * and /
2132 // currentCharacter =
2133 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2136 //loop until end of comment */
2137 while ((currentCharacter != '/') || (!star)) {
2138 if (recordLineSeparator
2139 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2140 pushLineSeparator();
2141 star = currentCharacter == '*';
2143 currentCharacter = source[currentPosition++];
2144 // if (((currentCharacter = source[currentPosition++])
2146 // && (source[currentPosition] == 'u')) {
2147 // //-------------unicode traitement ------------
2148 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2149 // currentPosition++;
2150 // while (source[currentPosition] == 'u') {
2151 // currentPosition++;
2154 // Character.getNumericValue(source[currentPosition++]))
2158 // Character.getNumericValue(source[currentPosition++]))
2162 // Character.getNumericValue(source[currentPosition++]))
2166 // Character.getNumericValue(source[currentPosition++]))
2169 // //error don't care of the value
2170 // currentCharacter = 'A';
2171 // } //something different from * and /
2173 // currentCharacter =
2174 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2178 } catch (IndexOutOfBoundsException e) {
2186 if (isPHPIdentifierStart(currentCharacter)
2187 || currentCharacter == '$') {
2189 scanIdentifierOrKeyword((currentCharacter == '$'));
2190 } catch (InvalidInputException ex) {
2194 if (Character.isDigit(currentCharacter)) {
2197 } catch (InvalidInputException ex) {
2203 //-----------------end switch while try--------------------
2204 } catch (IndexOutOfBoundsException e) {
2205 } catch (InvalidInputException e) {
2209 // public final boolean jumpOverUnicodeWhiteSpace()
2210 // throws InvalidInputException {
2212 // //handle the case of unicode. Jump over the next whiteSpace
2213 // //making startPosition pointing on the next available char
2214 // //On false, the currentCharacter is filled up with a potential
2218 // this.wasAcr = false;
2219 // int c1, c2, c3, c4;
2220 // int unicodeSize = 6;
2221 // currentPosition++;
2222 // while (source[currentPosition] == 'u') {
2223 // currentPosition++;
2227 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2229 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2231 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2233 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2235 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2238 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2239 // if (recordLineSeparator
2240 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2241 // pushLineSeparator();
2242 // if (Character.isWhitespace(currentCharacter))
2245 // //buffer the new char which is not a white space
2246 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2247 // //withoutUnicodePtr == 1 is true here
2249 // } catch (IndexOutOfBoundsException e) {
2250 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2253 public final int[] getLineEnds() {
2254 //return a bounded copy of this.lineEnds
2256 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2259 public char[] getSource() {
2262 public static boolean isIdentifierOrKeyword(int token) {
2263 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2265 final char[] optimizedCurrentTokenSource1() {
2266 //return always the same char[] build only once
2267 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2268 char charOne = source[startPosition];
2323 return new char[]{charOne};
2326 final char[] optimizedCurrentTokenSource2() {
2327 //try to return the same char[] build only once
2329 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2331 char[][] table = charArray_length[0][hash];
2333 while (++i < InternalTableSize) {
2334 char[] charArray = table[i];
2335 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2338 //---------other side---------
2340 int max = newEntry2;
2341 while (++i <= max) {
2342 char[] charArray = table[i];
2343 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2346 //--------add the entry-------
2347 if (++max >= InternalTableSize)
2350 table[max] = (r = new char[]{c0, c1});
2354 final char[] optimizedCurrentTokenSource3() {
2355 //try to return the same char[] build only once
2357 int hash = (((c0 = source[startPosition]) << 12)
2358 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2360 char[][] table = charArray_length[1][hash];
2362 while (++i < InternalTableSize) {
2363 char[] charArray = table[i];
2364 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2367 //---------other side---------
2369 int max = newEntry3;
2370 while (++i <= max) {
2371 char[] charArray = table[i];
2372 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2375 //--------add the entry-------
2376 if (++max >= InternalTableSize)
2379 table[max] = (r = new char[]{c0, c1, c2});
2383 final char[] optimizedCurrentTokenSource4() {
2384 //try to return the same char[] build only once
2385 char c0, c1, c2, c3;
2386 long hash = ((((long) (c0 = source[startPosition])) << 18)
2387 + ((c1 = source[startPosition + 1]) << 12)
2388 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2390 char[][] table = charArray_length[2][(int) hash];
2392 while (++i < InternalTableSize) {
2393 char[] charArray = table[i];
2394 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2395 && (c3 == charArray[3]))
2398 //---------other side---------
2400 int max = newEntry4;
2401 while (++i <= max) {
2402 char[] charArray = table[i];
2403 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2404 && (c3 == charArray[3]))
2407 //--------add the entry-------
2408 if (++max >= InternalTableSize)
2411 table[max] = (r = new char[]{c0, c1, c2, c3});
2415 final char[] optimizedCurrentTokenSource5() {
2416 //try to return the same char[] build only once
2417 char c0, c1, c2, c3, c4;
2418 long hash = ((((long) (c0 = source[startPosition])) << 24)
2419 + (((long) (c1 = source[startPosition + 1])) << 18)
2420 + ((c2 = source[startPosition + 2]) << 12)
2421 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2423 char[][] table = charArray_length[3][(int) hash];
2425 while (++i < InternalTableSize) {
2426 char[] charArray = table[i];
2427 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2428 && (c3 == charArray[3]) && (c4 == charArray[4]))
2431 //---------other side---------
2433 int max = newEntry5;
2434 while (++i <= max) {
2435 char[] charArray = table[i];
2436 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2437 && (c3 == charArray[3]) && (c4 == charArray[4]))
2440 //--------add the entry-------
2441 if (++max >= InternalTableSize)
2444 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2448 final char[] optimizedCurrentTokenSource6() {
2449 //try to return the same char[] build only once
2450 char c0, c1, c2, c3, c4, c5;
2451 long hash = ((((long) (c0 = source[startPosition])) << 32)
2452 + (((long) (c1 = source[startPosition + 1])) << 24)
2453 + (((long) (c2 = source[startPosition + 2])) << 18)
2454 + ((c3 = source[startPosition + 3]) << 12)
2455 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2457 char[][] table = charArray_length[4][(int) hash];
2459 while (++i < InternalTableSize) {
2460 char[] charArray = table[i];
2461 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2462 && (c3 == charArray[3]) && (c4 == charArray[4])
2463 && (c5 == charArray[5]))
2466 //---------other side---------
2468 int max = newEntry6;
2469 while (++i <= max) {
2470 char[] charArray = table[i];
2471 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2472 && (c3 == charArray[3]) && (c4 == charArray[4])
2473 && (c5 == charArray[5]))
2476 //--------add the entry-------
2477 if (++max >= InternalTableSize)
2480 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2484 public final void pushLineSeparator() throws InvalidInputException {
2485 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2486 final int INCREMENT = 250;
2487 if (this.checkNonExternalizedStringLiterals) {
2488 // reinitialize the current line for non externalize strings purpose
2491 //currentCharacter is at position currentPosition-1
2493 if (currentCharacter == '\r') {
2494 int separatorPos = currentPosition - 1;
2495 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2497 //System.out.println("CR-" + separatorPos);
2499 lineEnds[++linePtr] = separatorPos;
2500 } catch (IndexOutOfBoundsException e) {
2501 //linePtr value is correct
2502 int oldLength = lineEnds.length;
2503 int[] old = lineEnds;
2504 lineEnds = new int[oldLength + INCREMENT];
2505 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2506 lineEnds[linePtr] = separatorPos;
2508 // look-ahead for merged cr+lf
2510 if (source[currentPosition] == '\n') {
2511 //System.out.println("look-ahead LF-" + currentPosition);
2512 lineEnds[linePtr] = currentPosition;
2518 } catch (IndexOutOfBoundsException e) {
2523 if (currentCharacter == '\n') {
2524 //must merge eventual cr followed by lf
2525 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2526 //System.out.println("merge LF-" + (currentPosition - 1));
2527 lineEnds[linePtr] = currentPosition - 1;
2529 int separatorPos = currentPosition - 1;
2530 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2532 // System.out.println("LF-" + separatorPos);
2534 lineEnds[++linePtr] = separatorPos;
2535 } catch (IndexOutOfBoundsException e) {
2536 //linePtr value is correct
2537 int oldLength = lineEnds.length;
2538 int[] old = lineEnds;
2539 lineEnds = new int[oldLength + INCREMENT];
2540 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2541 lineEnds[linePtr] = separatorPos;
2548 public final void pushUnicodeLineSeparator() {
2549 // isUnicode means that the \r or \n has been read as a unicode character
2550 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2551 final int INCREMENT = 250;
2552 //currentCharacter is at position currentPosition-1
2553 if (this.checkNonExternalizedStringLiterals) {
2554 // reinitialize the current line for non externalize strings purpose
2558 if (currentCharacter == '\r') {
2559 int separatorPos = currentPosition - 6;
2560 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2562 //System.out.println("CR-" + separatorPos);
2564 lineEnds[++linePtr] = separatorPos;
2565 } catch (IndexOutOfBoundsException e) {
2566 //linePtr value is correct
2567 int oldLength = lineEnds.length;
2568 int[] old = lineEnds;
2569 lineEnds = new int[oldLength + INCREMENT];
2570 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2571 lineEnds[linePtr] = separatorPos;
2573 // look-ahead for merged cr+lf
2574 if (source[currentPosition] == '\n') {
2575 //System.out.println("look-ahead LF-" + currentPosition);
2576 lineEnds[linePtr] = currentPosition;
2584 if (currentCharacter == '\n') {
2585 //must merge eventual cr followed by lf
2586 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2587 //System.out.println("merge LF-" + (currentPosition - 1));
2588 lineEnds[linePtr] = currentPosition - 6;
2590 int separatorPos = currentPosition - 6;
2591 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2593 // System.out.println("LF-" + separatorPos);
2595 lineEnds[++linePtr] = separatorPos;
2596 } catch (IndexOutOfBoundsException e) {
2597 //linePtr value is correct
2598 int oldLength = lineEnds.length;
2599 int[] old = lineEnds;
2600 lineEnds = new int[oldLength + INCREMENT];
2601 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2602 lineEnds[linePtr] = separatorPos;
2609 public void recordComment(int token) {
2611 int stopPosition = this.currentPosition;
2613 case TokenNameCOMMENT_LINE:
2614 stopPosition = -this.lastCommentLinePosition;
2616 case TokenNameCOMMENT_BLOCK:
2617 stopPosition = -this.currentPosition;
2621 // a new comment is recorded
2622 int length = this.commentStops.length;
2623 if (++this.commentPtr >= length) {
2624 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2625 //grows the positions buffers too
2626 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2628 this.commentStops[this.commentPtr] = stopPosition;
2629 this.commentStarts[this.commentPtr] = this.startPosition;
2631 // public final void recordComment(boolean isJavadoc) {
2632 // // a new annotation comment is recorded
2634 // commentStops[++commentPtr] = isJavadoc
2635 // ? currentPosition
2636 // : -currentPosition;
2637 // } catch (IndexOutOfBoundsException e) {
2638 // int oldStackLength = commentStops.length;
2639 // int[] oldStack = commentStops;
2640 // commentStops = new int[oldStackLength + 30];
2641 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2642 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2643 // //grows the positions buffers too
2644 // int[] old = commentStarts;
2645 // commentStarts = new int[oldStackLength + 30];
2646 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2648 // //the buffer is of a correct size here
2649 // commentStarts[commentPtr] = startPosition;
2651 public void resetTo(int begin, int end) {
2652 //reset the scanner to a given position where it may rescan again
2654 initialPosition = startPosition = currentPosition = begin;
2655 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2656 commentPtr = -1; // reset comment stack
2658 public final void scanSingleQuotedEscapeCharacter()
2659 throws InvalidInputException {
2660 // the string with "\\u" is a legal string of two chars \ and u
2661 //thus we use a direct access to the source (for regular cases).
2662 // if (unicodeAsBackSlash) {
2663 // // consume next character
2664 // unicodeAsBackSlash = false;
2665 // if (((currentCharacter = source[currentPosition++]) == '\\')
2666 // && (source[currentPosition] == 'u')) {
2667 // getNextUnicodeChar();
2669 // if (withoutUnicodePtr != 0) {
2670 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2674 currentCharacter = source[currentPosition++];
2675 switch (currentCharacter) {
2677 currentCharacter = '\'';
2680 currentCharacter = '\\';
2683 currentCharacter = '\\';
2687 public final void scanDoubleQuotedEscapeCharacter()
2688 throws InvalidInputException {
2689 // the string with "\\u" is a legal string of two chars \ and u
2690 //thus we use a direct access to the source (for regular cases).
2691 // if (unicodeAsBackSlash) {
2692 // // consume next character
2693 // unicodeAsBackSlash = false;
2694 // if (((currentCharacter = source[currentPosition++]) == '\\')
2695 // && (source[currentPosition] == 'u')) {
2696 // getNextUnicodeChar();
2698 // if (withoutUnicodePtr != 0) {
2699 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2703 currentCharacter = source[currentPosition++];
2704 switch (currentCharacter) {
2706 // currentCharacter = '\b';
2709 currentCharacter = '\t';
2712 currentCharacter = '\n';
2715 // currentCharacter = '\f';
2718 currentCharacter = '\r';
2721 currentCharacter = '\"';
2724 currentCharacter = '\'';
2727 currentCharacter = '\\';
2730 currentCharacter = '$';
2733 // -----------octal escape--------------
2735 // OctalDigit OctalDigit
2736 // ZeroToThree OctalDigit OctalDigit
2737 int number = Character.getNumericValue(currentCharacter);
2738 if (number >= 0 && number <= 7) {
2739 boolean zeroToThreeNot = number > 3;
2740 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2741 int digit = Character.getNumericValue(currentCharacter);
2742 if (digit >= 0 && digit <= 7) {
2743 number = (number * 8) + digit;
2745 .isDigit(currentCharacter = source[currentPosition++])) {
2746 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2747 // Digit --> ignore last character
2750 digit = Character.getNumericValue(currentCharacter);
2751 if (digit >= 0 && digit <= 7) {
2752 // has read \ZeroToThree OctalDigit OctalDigit
2753 number = (number * 8) + digit;
2754 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2755 // --> ignore last character
2759 } else { // has read \OctalDigit NonDigit--> ignore last
2763 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2767 } else { // has read \OctalDigit --> ignore last character
2771 throw new InvalidInputException(INVALID_ESCAPE);
2772 currentCharacter = (char) number;
2775 // throw new InvalidInputException(INVALID_ESCAPE);
2778 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2779 // return scanIdentifierOrKeyword( false );
2781 public int scanIdentifierOrKeyword(boolean isVariable)
2782 throws InvalidInputException {
2784 //first dispatch on the first char.
2785 //then the length. If there are several
2786 //keywors with the same length AND the same first char, then do another
2787 //disptach on the second char :-)...cool....but fast !
2788 useAssertAsAnIndentifier = false;
2789 while (getNextCharAsJavaIdentifierPart()) {
2792 // if (new String(getCurrentTokenSource()).equals("$this")) {
2793 // return TokenNamethis;
2795 return TokenNameVariable;
2800 // if (withoutUnicodePtr == 0)
2801 //quick test on length == 1 but not on length > 12 while most identifier
2802 //have a length which is <= 12...but there are lots of identifier with
2805 if ((length = currentPosition - startPosition) == 1)
2806 return TokenNameIdentifier;
2808 data = new char[length];
2809 index = startPosition;
2810 for (int i = 0; i < length; i++) {
2811 data[i] = Character.toLowerCase(source[index + i]);
2815 // if ((length = withoutUnicodePtr) == 1)
2816 // return TokenNameIdentifier;
2817 // // data = withoutUnicodeBuffer;
2818 // data = new char[withoutUnicodeBuffer.length];
2819 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2820 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2824 firstLetter = data[index];
2825 switch (firstLetter) {
2830 if ((data[++index] == '_') && (data[++index] == 'f')
2831 && (data[++index] == 'i') && (data[++index] == 'l')
2832 && (data[++index] == 'e') && (data[++index] == '_')
2833 && (data[++index] == '_'))
2834 return TokenNameFILE;
2835 index = 0; //__LINE__
2836 if ((data[++index] == '_') && (data[++index] == 'l')
2837 && (data[++index] == 'i') && (data[++index] == 'n')
2838 && (data[++index] == 'e') && (data[++index] == '_')
2839 && (data[++index] == '_'))
2840 return TokenNameLINE;
2844 if ((data[++index] == '_') && (data[++index] == 'c')
2845 && (data[++index] == 'l') && (data[++index] == 'a')
2846 && (data[++index] == 's') && (data[++index] == 's')
2847 && (data[++index] == '_') && (data[++index] == '_'))
2848 return TokenNameCLASS_C;
2852 if ((data[++index] == '_') && (data[++index] == 'm')
2853 && (data[++index] == 'e') && (data[++index] == 't')
2854 && (data[++index] == 'h') && (data[++index] == 'o')
2855 && (data[++index] == 'd') && (data[++index] == '_')
2856 && (data[++index] == '_'))
2857 return TokenNameMETHOD_C;
2861 if ((data[++index] == '_') && (data[++index] == 'f')
2862 && (data[++index] == 'u') && (data[++index] == 'n')
2863 && (data[++index] == 'c') && (data[++index] == 't')
2864 && (data[++index] == 'i') && (data[++index] == 'o')
2865 && (data[++index] == 'n') && (data[++index] == '_')
2866 && (data[++index] == '_'))
2867 return TokenNameFUNC_C;
2870 return TokenNameIdentifier;
2872 // as and array abstract
2876 if ((data[++index] == 's')) {
2879 return TokenNameIdentifier;
2883 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2884 return TokenNameand;
2886 return TokenNameIdentifier;
2890 if ((data[++index] == 'r') && (data[++index] == 'r')
2891 && (data[++index] == 'a') && (data[++index] == 'y'))
2892 return TokenNamearray;
2894 return TokenNameIdentifier;
2896 if ((data[++index] == 'b') && (data[++index] == 's')
2897 && (data[++index] == 't') && (data[++index] == 'r')
2898 && (data[++index] == 'a') && (data[++index] == 'c')
2899 && (data[++index] == 't'))
2900 return TokenNameabstract;
2902 return TokenNameIdentifier;
2904 return TokenNameIdentifier;
2910 if ((data[++index] == 'r') && (data[++index] == 'e')
2911 && (data[++index] == 'a') && (data[++index] == 'k'))
2912 return TokenNamebreak;
2914 return TokenNameIdentifier;
2916 return TokenNameIdentifier;
2919 //case catch class clone const continue
2922 if ((data[++index] == 'a') && (data[++index] == 's')
2923 && (data[++index] == 'e'))
2924 return TokenNamecase;
2926 return TokenNameIdentifier;
2928 if ((data[++index] == 'a') && (data[++index] == 't')
2929 && (data[++index] == 'c') && (data[++index] == 'h'))
2930 return TokenNamecatch;
2932 if ((data[++index] == 'l') && (data[++index] == 'a')
2933 && (data[++index] == 's') && (data[++index] == 's'))
2934 return TokenNameclass;
2936 if ((data[++index] == 'l') && (data[++index] == 'o')
2937 && (data[++index] == 'n') && (data[++index] == 'e'))
2938 return TokenNameclone;
2940 if ((data[++index] == 'o') && (data[++index] == 'n')
2941 && (data[++index] == 's') && (data[++index] == 't'))
2942 return TokenNameconst;
2944 return TokenNameIdentifier;
2946 if ((data[++index] == 'o') && (data[++index] == 'n')
2947 && (data[++index] == 't') && (data[++index] == 'i')
2948 && (data[++index] == 'n') && (data[++index] == 'u')
2949 && (data[++index] == 'e'))
2950 return TokenNamecontinue;
2952 return TokenNameIdentifier;
2954 return TokenNameIdentifier;
2957 // declare default do die
2958 // TODO delete define ==> no keyword !
2961 if ((data[++index] == 'o'))
2964 return TokenNameIdentifier;
2966 // if ((data[++index] == 'e')
2967 // && (data[++index] == 'f')
2968 // && (data[++index] == 'i')
2969 // && (data[++index] == 'n')
2970 // && (data[++index] == 'e'))
2971 // return TokenNamedefine;
2973 // return TokenNameIdentifier;
2975 if ((data[++index] == 'e') && (data[++index] == 'c')
2976 && (data[++index] == 'l') && (data[++index] == 'a')
2977 && (data[++index] == 'r') && (data[++index] == 'e'))
2978 return TokenNamedeclare;
2980 if ((data[++index] == 'e') && (data[++index] == 'f')
2981 && (data[++index] == 'a') && (data[++index] == 'u')
2982 && (data[++index] == 'l') && (data[++index] == 't'))
2983 return TokenNamedefault;
2985 return TokenNameIdentifier;
2987 return TokenNameIdentifier;
2990 //echo else exit elseif extends eval
2993 if ((data[++index] == 'c') && (data[++index] == 'h')
2994 && (data[++index] == 'o'))
2995 return TokenNameecho;
2996 else if ((data[index] == 'l') && (data[++index] == 's')
2997 && (data[++index] == 'e'))
2998 return TokenNameelse;
2999 else if ((data[index] == 'x') && (data[++index] == 'i')
3000 && (data[++index] == 't'))
3001 return TokenNameexit;
3002 else if ((data[index] == 'v') && (data[++index] == 'a')
3003 && (data[++index] == 'l'))
3004 return TokenNameeval;
3006 return TokenNameIdentifier;
3009 if ((data[++index] == 'n') && (data[++index] == 'd')
3010 && (data[++index] == 'i') && (data[++index] == 'f'))
3011 return TokenNameendif;
3012 if ((data[index] == 'm') && (data[++index] == 'p')
3013 && (data[++index] == 't') && (data[++index] == 'y'))
3014 return TokenNameempty;
3016 return TokenNameIdentifier;
3019 if ((data[++index] == 'n') && (data[++index] == 'd')
3020 && (data[++index] == 'f') && (data[++index] == 'o')
3021 && (data[++index] == 'r'))
3022 return TokenNameendfor;
3023 else if ((data[index] == 'l') && (data[++index] == 's')
3024 && (data[++index] == 'e') && (data[++index] == 'i')
3025 && (data[++index] == 'f'))
3026 return TokenNameelseif;
3028 return TokenNameIdentifier;
3030 if ((data[++index] == 'x') && (data[++index] == 't')
3031 && (data[++index] == 'e') && (data[++index] == 'n')
3032 && (data[++index] == 'd') && (data[++index] == 's'))
3033 return TokenNameextends;
3035 return TokenNameIdentifier;
3038 if ((data[++index] == 'n') && (data[++index] == 'd')
3039 && (data[++index] == 'w') && (data[++index] == 'h')
3040 && (data[++index] == 'i') && (data[++index] == 'l')
3041 && (data[++index] == 'e'))
3042 return TokenNameendwhile;
3044 return TokenNameIdentifier;
3047 if ((data[++index] == 'n') && (data[++index] == 'd')
3048 && (data[++index] == 's') && (data[++index] == 'w')
3049 && (data[++index] == 'i') && (data[++index] == 't')
3050 && (data[++index] == 'c') && (data[++index] == 'h'))
3051 return TokenNameendswitch;
3053 return TokenNameIdentifier;
3056 if ((data[++index] == 'n') && (data[++index] == 'd')
3057 && (data[++index] == 'd') && (data[++index] == 'e')
3058 && (data[++index] == 'c') && (data[++index] == 'l')
3059 && (data[++index] == 'a') && (data[++index] == 'r')
3060 && (data[++index] == 'e'))
3061 return TokenNameendforeach;
3063 if ((data[++index] == 'n') // endforeach
3064 && (data[++index] == 'd') && (data[++index] == 'f')
3065 && (data[++index] == 'o') && (data[++index] == 'r')
3066 && (data[++index] == 'e') && (data[++index] == 'a')
3067 && (data[++index] == 'c') && (data[++index] == 'h'))
3068 return TokenNameendforeach;
3070 return TokenNameIdentifier;
3072 return TokenNameIdentifier;
3075 //for false final function
3078 if ((data[++index] == 'o') && (data[++index] == 'r'))
3079 return TokenNamefor;
3081 return TokenNameIdentifier;
3083 // if ((data[++index] == 'a') && (data[++index] == 'l')
3084 // && (data[++index] == 's') && (data[++index] == 'e'))
3085 // return TokenNamefalse;
3086 if ((data[++index] == 'i') && (data[++index] == 'n')
3087 && (data[++index] == 'a') && (data[++index] == 'l'))
3088 return TokenNamefinal;
3090 return TokenNameIdentifier;
3093 if ((data[++index] == 'o') && (data[++index] == 'r')
3094 && (data[++index] == 'e') && (data[++index] == 'a')
3095 && (data[++index] == 'c') && (data[++index] == 'h'))
3096 return TokenNameforeach;
3098 return TokenNameIdentifier;
3101 if ((data[++index] == 'u') && (data[++index] == 'n')
3102 && (data[++index] == 'c') && (data[++index] == 't')
3103 && (data[++index] == 'i') && (data[++index] == 'o')
3104 && (data[++index] == 'n'))
3105 return TokenNamefunction;
3107 return TokenNameIdentifier;
3109 return TokenNameIdentifier;
3114 if ((data[++index] == 'l') && (data[++index] == 'o')
3115 && (data[++index] == 'b') && (data[++index] == 'a')
3116 && (data[++index] == 'l')) {
3117 return TokenNameglobal;
3120 return TokenNameIdentifier;
3122 //if int isset include include_once instanceof interface implements
3125 if (data[++index] == 'f')
3128 return TokenNameIdentifier;
3130 // if ((data[++index] == 'n') && (data[++index] == 't'))
3131 // return TokenNameint;
3133 // return TokenNameIdentifier;
3135 if ((data[++index] == 's') && (data[++index] == 's')
3136 && (data[++index] == 'e') && (data[++index] == 't'))
3137 return TokenNameisset;
3139 return TokenNameIdentifier;
3141 if ((data[++index] == 'n') && (data[++index] == 'c')
3142 && (data[++index] == 'l') && (data[++index] == 'u')
3143 && (data[++index] == 'd') && (data[++index] == 'e'))
3144 return TokenNameinclude;
3146 return TokenNameIdentifier;
3149 if ((data[++index] == 'n') && (data[++index] == 't')
3150 && (data[++index] == 'e') && (data[++index] == 'r')
3151 && (data[++index] == 'f') && (data[++index] == 'a')
3152 && (data[++index] == 'c') && (data[++index] == 'e'))
3153 return TokenNameinterface;
3155 return TokenNameIdentifier;
3158 if ((data[++index] == 'n') && (data[++index] == 's')
3159 && (data[++index] == 't') && (data[++index] == 'a')
3160 && (data[++index] == 'n') && (data[++index] == 'c')
3161 && (data[++index] == 'e') && (data[++index] == 'o')
3162 && (data[++index] == 'f'))
3163 return TokenNameinstanceof;
3164 if ((data[index] == 'm') && (data[++index] == 'p')
3165 && (data[++index] == 'l') && (data[++index] == 'e')
3166 && (data[++index] == 'm') && (data[++index] == 'e')
3167 && (data[++index] == 'n') && (data[++index] == 't')
3168 && (data[++index] == 's'))
3169 return TokenNameimplements;
3171 return TokenNameIdentifier;
3173 if ((data[++index] == 'n') && (data[++index] == 'c')
3174 && (data[++index] == 'l') && (data[++index] == 'u')
3175 && (data[++index] == 'd') && (data[++index] == 'e')
3176 && (data[++index] == '_') && (data[++index] == 'o')
3177 && (data[++index] == 'n') && (data[++index] == 'c')
3178 && (data[++index] == 'e'))
3179 return TokenNameinclude_once;
3181 return TokenNameIdentifier;
3183 return TokenNameIdentifier;
3188 if ((data[++index] == 'i') && (data[++index] == 's')
3189 && (data[++index] == 't')) {
3190 return TokenNamelist;
3193 return TokenNameIdentifier;
3198 if ((data[++index] == 'e') && (data[++index] == 'w'))
3199 return TokenNamenew;
3201 return TokenNameIdentifier;
3203 // if ((data[++index] == 'u') && (data[++index] == 'l')
3204 // && (data[++index] == 'l'))
3205 // return TokenNamenull;
3207 // return TokenNameIdentifier;
3209 return TokenNameIdentifier;
3214 if (data[++index] == 'r') {
3218 // if (length == 12) {
3219 // if ((data[++index] == 'l')
3220 // && (data[++index] == 'd')
3221 // && (data[++index] == '_')
3222 // && (data[++index] == 'f')
3223 // && (data[++index] == 'u')
3224 // && (data[++index] == 'n')
3225 // && (data[++index] == 'c')
3226 // && (data[++index] == 't')
3227 // && (data[++index] == 'i')
3228 // && (data[++index] == 'o')
3229 // && (data[++index] == 'n')) {
3230 // return TokenNameold_function;
3233 return TokenNameIdentifier;
3235 // print public private protected
3238 if ((data[++index] == 'r') && (data[++index] == 'i')
3239 && (data[++index] == 'n') && (data[++index] == 't')) {
3240 return TokenNameprint;
3242 return TokenNameIdentifier;
3244 if ((data[++index] == 'u') && (data[++index] == 'b')
3245 && (data[++index] == 'l') && (data[++index] == 'i')
3246 && (data[++index] == 'c')) {
3247 return TokenNamepublic;
3249 return TokenNameIdentifier;
3251 if ((data[++index] == 'r') && (data[++index] == 'i')
3252 && (data[++index] == 'v') && (data[++index] == 'a')
3253 && (data[++index] == 't') && (data[++index] == 'e')) {
3254 return TokenNameprivate;
3256 return TokenNameIdentifier;
3258 if ((data[++index] == 'r') && (data[++index] == 'o')
3259 && (data[++index] == 't') && (data[++index] == 'e')
3260 && (data[++index] == 'c') && (data[++index] == 't')
3261 && (data[++index] == 'e') && (data[++index] == 'd')) {
3262 return TokenNameprotected;
3264 return TokenNameIdentifier;
3266 return TokenNameIdentifier;
3268 //return require require_once
3270 if ((data[++index] == 'e') && (data[++index] == 't')
3271 && (data[++index] == 'u') && (data[++index] == 'r')
3272 && (data[++index] == 'n')) {
3273 return TokenNamereturn;
3275 } else if (length == 7) {
3276 if ((data[++index] == 'e') && (data[++index] == 'q')
3277 && (data[++index] == 'u') && (data[++index] == 'i')
3278 && (data[++index] == 'r') && (data[++index] == 'e')) {
3279 return TokenNamerequire;
3281 } else if (length == 12) {
3282 if ((data[++index] == 'e') && (data[++index] == 'q')
3283 && (data[++index] == 'u') && (data[++index] == 'i')
3284 && (data[++index] == 'r') && (data[++index] == 'e')
3285 && (data[++index] == '_') && (data[++index] == 'o')
3286 && (data[++index] == 'n') && (data[++index] == 'c')
3287 && (data[++index] == 'e')) {
3288 return TokenNamerequire_once;
3291 return TokenNameIdentifier;
3296 if (data[++index] == 't')
3297 if ((data[++index] == 'a') && (data[++index] == 't')
3298 && (data[++index] == 'i') && (data[++index] == 'c')) {
3299 return TokenNamestatic;
3301 return TokenNameIdentifier;
3302 else if ((data[index] == 'w') && (data[++index] == 'i')
3303 && (data[++index] == 't') && (data[++index] == 'c')
3304 && (data[++index] == 'h'))
3305 return TokenNameswitch;
3307 return TokenNameIdentifier;
3309 return TokenNameIdentifier;
3315 if ((data[++index] == 'r') && (data[++index] == 'y'))
3316 return TokenNametry;
3318 return TokenNameIdentifier;
3320 // if ((data[++index] == 'r') && (data[++index] == 'u')
3321 // && (data[++index] == 'e'))
3322 // return TokenNametrue;
3324 // return TokenNameIdentifier;
3326 if ((data[++index] == 'h') && (data[++index] == 'r')
3327 && (data[++index] == 'o') && (data[++index] == 'w'))
3328 return TokenNamethrow;
3330 return TokenNameIdentifier;
3332 return TokenNameIdentifier;
3338 if ((data[++index] == 's') && (data[++index] == 'e'))
3339 return TokenNameuse;
3341 return TokenNameIdentifier;
3343 if ((data[++index] == 'n') && (data[++index] == 's')
3344 && (data[++index] == 'e') && (data[++index] == 't'))
3345 return TokenNameunset;
3347 return TokenNameIdentifier;
3349 return TokenNameIdentifier;
3355 if ((data[++index] == 'a') && (data[++index] == 'r'))
3356 return TokenNamevar;
3358 return TokenNameIdentifier;
3360 return TokenNameIdentifier;
3366 if ((data[++index] == 'h') && (data[++index] == 'i')
3367 && (data[++index] == 'l') && (data[++index] == 'e'))
3368 return TokenNamewhile;
3370 return TokenNameIdentifier;
3371 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3372 // (data[++index]=='e') && (data[++index]=='f')&&
3373 // (data[++index]=='p'))
3374 //return TokenNamewidefp ;
3376 //return TokenNameIdentifier;
3378 return TokenNameIdentifier;
3384 if ((data[++index] == 'o') && (data[++index] == 'r'))
3385 return TokenNamexor;
3387 return TokenNameIdentifier;
3389 return TokenNameIdentifier;
3392 return TokenNameIdentifier;
3395 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3396 //when entering this method the currentCharacter is the firt
3397 //digit of the number , i.e. it may be preceeded by a . when
3399 boolean floating = dotPrefix;
3400 if ((!dotPrefix) && (currentCharacter == '0')) {
3401 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3402 //force the first char of the hexa number do exist...
3403 // consume next character
3404 unicodeAsBackSlash = false;
3405 currentCharacter = source[currentPosition++];
3406 // if (((currentCharacter = source[currentPosition++]) == '\\')
3407 // && (source[currentPosition] == 'u')) {
3408 // getNextUnicodeChar();
3410 // if (withoutUnicodePtr != 0) {
3411 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3414 if (Character.digit(currentCharacter, 16) == -1)
3415 throw new InvalidInputException(INVALID_HEXA);
3417 while (getNextCharAsDigit(16)) {
3419 // if (getNextChar('l', 'L') >= 0)
3420 // return TokenNameLongLiteral;
3422 return TokenNameIntegerLiteral;
3424 //there is x or X in the number
3425 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3426 // 00078.0 is true !!!!! crazy language
3427 if (getNextCharAsDigit()) {
3428 //-------------potential octal-----------------
3429 while (getNextCharAsDigit()) {
3431 // if (getNextChar('l', 'L') >= 0) {
3432 // return TokenNameLongLiteral;
3435 // if (getNextChar('f', 'F') >= 0) {
3436 // return TokenNameFloatingPointLiteral;
3438 if (getNextChar('d', 'D') >= 0) {
3439 return TokenNameDoubleLiteral;
3440 } else { //make the distinction between octal and float ....
3441 if (getNextChar('.')) { //bingo ! ....
3442 while (getNextCharAsDigit()) {
3444 if (getNextChar('e', 'E') >= 0) {
3445 // consume next character
3446 unicodeAsBackSlash = false;
3447 currentCharacter = source[currentPosition++];
3448 // if (((currentCharacter = source[currentPosition++]) == '\\')
3449 // && (source[currentPosition] == 'u')) {
3450 // getNextUnicodeChar();
3452 // if (withoutUnicodePtr != 0) {
3453 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3456 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3457 // consume next character
3458 unicodeAsBackSlash = false;
3459 currentCharacter = source[currentPosition++];
3460 // if (((currentCharacter = source[currentPosition++]) == '\\')
3461 // && (source[currentPosition] == 'u')) {
3462 // getNextUnicodeChar();
3464 // if (withoutUnicodePtr != 0) {
3465 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3466 // currentCharacter;
3470 if (!Character.isDigit(currentCharacter))
3471 throw new InvalidInputException(INVALID_FLOAT);
3472 while (getNextCharAsDigit()) {
3475 // if (getNextChar('f', 'F') >= 0)
3476 // return TokenNameFloatingPointLiteral;
3477 getNextChar('d', 'D'); //jump over potential d or D
3478 return TokenNameDoubleLiteral;
3480 return TokenNameIntegerLiteral;
3487 while (getNextCharAsDigit()) {
3489 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3490 // return TokenNameLongLiteral;
3491 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3492 while (getNextCharAsDigit()) {
3496 //if floating is true both exponant and suffix may be optional
3497 if (getNextChar('e', 'E') >= 0) {
3499 // consume next character
3500 unicodeAsBackSlash = false;
3501 currentCharacter = source[currentPosition++];
3502 // if (((currentCharacter = source[currentPosition++]) == '\\')
3503 // && (source[currentPosition] == 'u')) {
3504 // getNextUnicodeChar();
3506 // if (withoutUnicodePtr != 0) {
3507 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3510 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3513 unicodeAsBackSlash = false;
3514 currentCharacter = source[currentPosition++];
3515 // if (((currentCharacter = source[currentPosition++]) == '\\')
3516 // && (source[currentPosition] == 'u')) {
3517 // getNextUnicodeChar();
3519 // if (withoutUnicodePtr != 0) {
3520 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3524 if (!Character.isDigit(currentCharacter))
3525 throw new InvalidInputException(INVALID_FLOAT);
3526 while (getNextCharAsDigit()) {
3529 if (getNextChar('d', 'D') >= 0)
3530 return TokenNameDoubleLiteral;
3531 // if (getNextChar('f', 'F') >= 0)
3532 // return TokenNameFloatingPointLiteral;
3533 //the long flag has been tested before
3534 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3537 * Search the line number corresponding to a specific position
3540 public final int getLineNumber(int position) {
3541 if (lineEnds == null)
3543 int length = linePtr + 1;
3546 int g = 0, d = length - 1;
3550 if (position < lineEnds[m]) {
3552 } else if (position > lineEnds[m]) {
3558 if (position < lineEnds[m]) {
3563 public void setPHPMode(boolean mode) {
3566 public final void setSource(char[] source) {
3567 //the source-buffer is set to sourceString
3568 if (source == null) {
3569 this.source = new char[0];
3571 this.source = source;
3574 initialPosition = currentPosition = 0;
3575 containsAssertKeyword = false;
3576 withoutUnicodeBuffer = new char[this.source.length];
3577 encapsedStringStack = new Stack();
3579 public String toString() {
3580 if (startPosition == source.length)
3581 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3582 if (currentPosition > source.length)
3583 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3584 char front[] = new char[startPosition];
3585 System.arraycopy(source, 0, front, 0, startPosition);
3586 int middleLength = (currentPosition - 1) - startPosition + 1;
3588 if (middleLength > -1) {
3589 middle = new char[middleLength];
3590 System.arraycopy(source, startPosition, middle, 0, middleLength);
3592 middle = new char[0];
3594 char end[] = new char[source.length - (currentPosition - 1)];
3595 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3596 - (currentPosition - 1) - 1);
3597 return new String(front)
3598 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3599 + new String(middle)
3600 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3603 public final String toStringAction(int act) {
3605 case TokenNameERROR :
3606 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3608 case TokenNameINLINE_HTML :
3609 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3610 case TokenNameIdentifier :
3611 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3612 case TokenNameVariable :
3613 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3614 case TokenNameabstract :
3615 return "abstract"; //$NON-NLS-1$
3617 return "AND"; //$NON-NLS-1$
3618 case TokenNamearray :
3619 return "array"; //$NON-NLS-1$
3621 return "as"; //$NON-NLS-1$
3622 case TokenNamebreak :
3623 return "break"; //$NON-NLS-1$
3624 case TokenNamecase :
3625 return "case"; //$NON-NLS-1$
3626 case TokenNameclass :
3627 return "class"; //$NON-NLS-1$
3628 case TokenNamecatch :
3629 return "catch"; //$NON-NLS-1$
3630 case TokenNameclone :
3633 case TokenNameconst :
3636 case TokenNamecontinue :
3637 return "continue"; //$NON-NLS-1$
3638 case TokenNamedefault :
3639 return "default"; //$NON-NLS-1$
3640 // case TokenNamedefine :
3641 // return "define"; //$NON-NLS-1$
3643 return "do"; //$NON-NLS-1$
3644 case TokenNameecho :
3645 return "echo"; //$NON-NLS-1$
3646 case TokenNameelse :
3647 return "else"; //$NON-NLS-1$
3648 case TokenNameelseif :
3649 return "elseif"; //$NON-NLS-1$
3650 case TokenNameendfor :
3651 return "endfor"; //$NON-NLS-1$
3652 case TokenNameendforeach :
3653 return "endforeach"; //$NON-NLS-1$
3654 case TokenNameendif :
3655 return "endif"; //$NON-NLS-1$
3656 case TokenNameendswitch :
3657 return "endswitch"; //$NON-NLS-1$
3658 case TokenNameendwhile :
3659 return "endwhile"; //$NON-NLS-1$
3662 case TokenNameextends :
3663 return "extends"; //$NON-NLS-1$
3664 // case TokenNamefalse :
3665 // return "false"; //$NON-NLS-1$
3666 case TokenNamefinal :
3667 return "final"; //$NON-NLS-1$
3669 return "for"; //$NON-NLS-1$
3670 case TokenNameforeach :
3671 return "foreach"; //$NON-NLS-1$
3672 case TokenNamefunction :
3673 return "function"; //$NON-NLS-1$
3674 case TokenNameglobal :
3675 return "global"; //$NON-NLS-1$
3677 return "if"; //$NON-NLS-1$
3678 case TokenNameimplements :
3679 return "implements"; //$NON-NLS-1$
3680 case TokenNameinclude :
3681 return "include"; //$NON-NLS-1$
3682 case TokenNameinclude_once :
3683 return "include_once"; //$NON-NLS-1$
3684 case TokenNameinstanceof :
3685 return "instanceof"; //$NON-NLS-1$
3686 case TokenNameinterface :
3687 return "interface"; //$NON-NLS-1$
3688 case TokenNameisset :
3689 return "isset"; //$NON-NLS-1$
3690 case TokenNamelist :
3691 return "list"; //$NON-NLS-1$
3693 return "new"; //$NON-NLS-1$
3694 // case TokenNamenull :
3695 // return "null"; //$NON-NLS-1$
3697 return "OR"; //$NON-NLS-1$
3698 case TokenNameprint :
3699 return "print"; //$NON-NLS-1$
3700 case TokenNameprivate :
3701 return "private"; //$NON-NLS-1$
3702 case TokenNameprotected :
3703 return "protected"; //$NON-NLS-1$
3704 case TokenNamepublic :
3705 return "public"; //$NON-NLS-1$
3706 case TokenNamerequire :
3707 return "require"; //$NON-NLS-1$
3708 case TokenNamerequire_once :
3709 return "require_once"; //$NON-NLS-1$
3710 case TokenNamereturn :
3711 return "return"; //$NON-NLS-1$
3712 case TokenNamestatic :
3713 return "static"; //$NON-NLS-1$
3714 case TokenNameswitch :
3715 return "switch"; //$NON-NLS-1$
3716 // case TokenNametrue :
3717 // return "true"; //$NON-NLS-1$
3718 case TokenNameunset :
3719 return "unset"; //$NON-NLS-1$
3721 return "var"; //$NON-NLS-1$
3722 case TokenNamewhile :
3723 return "while"; //$NON-NLS-1$
3725 return "XOR"; //$NON-NLS-1$
3726 // case TokenNamethis :
3727 // return "$this"; //$NON-NLS-1$
3728 case TokenNameIntegerLiteral :
3729 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3730 case TokenNameDoubleLiteral :
3731 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3732 case TokenNameStringDoubleQuote :
3733 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3734 case TokenNameStringSingleQuote :
3735 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3736 case TokenNameStringInterpolated :
3737 return "StringInterpolated(" + new String(getCurrentTokenSource())
3738 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3739 case TokenNameEncapsedString0 :
3740 return "`"; //$NON-NLS-1$
3741 case TokenNameEncapsedString1 :
3742 return "\'"; //$NON-NLS-1$
3743 case TokenNameEncapsedString2 :
3744 return "\""; //$NON-NLS-1$
3745 case TokenNameSTRING :
3746 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3747 case TokenNameHEREDOC :
3748 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3749 case TokenNamePLUS_PLUS :
3750 return "++"; //$NON-NLS-1$
3751 case TokenNameMINUS_MINUS :
3752 return "--"; //$NON-NLS-1$
3753 case TokenNameEQUAL_EQUAL :
3754 return "=="; //$NON-NLS-1$
3755 case TokenNameEQUAL_EQUAL_EQUAL :
3756 return "==="; //$NON-NLS-1$
3757 case TokenNameEQUAL_GREATER :
3758 return "=>"; //$NON-NLS-1$
3759 case TokenNameLESS_EQUAL :
3760 return "<="; //$NON-NLS-1$
3761 case TokenNameGREATER_EQUAL :
3762 return ">="; //$NON-NLS-1$
3763 case TokenNameNOT_EQUAL :
3764 return "!="; //$NON-NLS-1$
3765 case TokenNameNOT_EQUAL_EQUAL :
3766 return "!=="; //$NON-NLS-1$
3767 case TokenNameLEFT_SHIFT :
3768 return "<<"; //$NON-NLS-1$
3769 case TokenNameRIGHT_SHIFT :
3770 return ">>"; //$NON-NLS-1$
3771 case TokenNamePLUS_EQUAL :
3772 return "+="; //$NON-NLS-1$
3773 case TokenNameMINUS_EQUAL :
3774 return "-="; //$NON-NLS-1$
3775 case TokenNameMULTIPLY_EQUAL :
3776 return "*="; //$NON-NLS-1$
3777 case TokenNameDIVIDE_EQUAL :
3778 return "/="; //$NON-NLS-1$
3779 case TokenNameAND_EQUAL :
3780 return "&="; //$NON-NLS-1$
3781 case TokenNameOR_EQUAL :
3782 return "|="; //$NON-NLS-1$
3783 case TokenNameXOR_EQUAL :
3784 return "^="; //$NON-NLS-1$
3785 case TokenNameREMAINDER_EQUAL :
3786 return "%="; //$NON-NLS-1$
3787 case TokenNameDOT_EQUAL :
3788 return ".="; //$NON-NLS-1$
3789 case TokenNameLEFT_SHIFT_EQUAL :
3790 return "<<="; //$NON-NLS-1$
3791 case TokenNameRIGHT_SHIFT_EQUAL :
3792 return ">>="; //$NON-NLS-1$
3793 case TokenNameOR_OR :
3794 return "||"; //$NON-NLS-1$
3795 case TokenNameAND_AND :
3796 return "&&"; //$NON-NLS-1$
3797 case TokenNamePLUS :
3798 return "+"; //$NON-NLS-1$
3799 case TokenNameMINUS :
3800 return "-"; //$NON-NLS-1$
3801 case TokenNameMINUS_GREATER :
3804 return "!"; //$NON-NLS-1$
3805 case TokenNameREMAINDER :
3806 return "%"; //$NON-NLS-1$
3808 return "^"; //$NON-NLS-1$
3810 return "&"; //$NON-NLS-1$
3811 case TokenNameMULTIPLY :
3812 return "*"; //$NON-NLS-1$
3814 return "|"; //$NON-NLS-1$
3815 case TokenNameTWIDDLE :
3816 return "~"; //$NON-NLS-1$
3817 case TokenNameTWIDDLE_EQUAL :
3818 return "~="; //$NON-NLS-1$
3819 case TokenNameDIVIDE :
3820 return "/"; //$NON-NLS-1$
3821 case TokenNameGREATER :
3822 return ">"; //$NON-NLS-1$
3823 case TokenNameLESS :
3824 return "<"; //$NON-NLS-1$
3825 case TokenNameLPAREN :
3826 return "("; //$NON-NLS-1$
3827 case TokenNameRPAREN :
3828 return ")"; //$NON-NLS-1$
3829 case TokenNameLBRACE :
3830 return "{"; //$NON-NLS-1$
3831 case TokenNameRBRACE :
3832 return "}"; //$NON-NLS-1$
3833 case TokenNameLBRACKET :
3834 return "["; //$NON-NLS-1$
3835 case TokenNameRBRACKET :
3836 return "]"; //$NON-NLS-1$
3837 case TokenNameSEMICOLON :
3838 return ";"; //$NON-NLS-1$
3839 case TokenNameQUESTION :
3840 return "?"; //$NON-NLS-1$
3841 case TokenNameCOLON :
3842 return ":"; //$NON-NLS-1$
3843 case TokenNameCOMMA :
3844 return ","; //$NON-NLS-1$
3846 return "."; //$NON-NLS-1$
3847 case TokenNameEQUAL :
3848 return "="; //$NON-NLS-1$
3851 case TokenNameDOLLAR :
3853 case TokenNameDOLLAR_LBRACE :
3856 return "EOF"; //$NON-NLS-1$
3857 case TokenNameWHITESPACE :
3858 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3859 case TokenNameCOMMENT_LINE :
3860 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3861 case TokenNameCOMMENT_BLOCK :
3862 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3863 case TokenNameCOMMENT_PHPDOC :
3864 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3865 // case TokenNameHTML :
3866 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3868 case TokenNameFILE :
3869 return "__FILE__"; //$NON-NLS-1$
3870 case TokenNameLINE :
3871 return "__LINE__"; //$NON-NLS-1$
3872 case TokenNameCLASS_C :
3873 return "__CLASS__"; //$NON-NLS-1$
3874 case TokenNameMETHOD_C :
3875 return "__METHOD__"; //$NON-NLS-1$
3876 case TokenNameFUNC_C :
3877 return "__FUNCTION__"; //$NON-NLS-1
3878 case TokenNameboolCAST :
3879 return "( bool )"; //$NON-NLS-1$
3880 case TokenNameintCAST :
3881 return "( int )"; //$NON-NLS-1$
3882 case TokenNamedoubleCAST :
3883 return "( double )"; //$NON-NLS-1$
3884 case TokenNameobjectCAST :
3885 return "( object )"; //$NON-NLS-1$
3886 case TokenNamestringCAST :
3887 return "( string )"; //$NON-NLS-1$
3889 return "not-a-token(" + (new Integer(act)) + ") "
3890 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3897 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3898 this(tokenizeComments, tokenizeWhiteSpace, false);
3900 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3901 boolean checkNonExternalizedStringLiterals) {
3902 this(tokenizeComments, tokenizeWhiteSpace,
3903 checkNonExternalizedStringLiterals, false);
3905 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3906 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3907 this(tokenizeComments, tokenizeWhiteSpace,
3908 checkNonExternalizedStringLiterals, assertMode, false, null, null);
3910 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3911 boolean checkNonExternalizedStringLiterals, boolean assertMode,
3912 boolean tokenizeStrings,
3914 char[][] taskPriorities) {
3915 this.eofPosition = Integer.MAX_VALUE;
3916 this.tokenizeComments = tokenizeComments;
3917 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3918 this.tokenizeStrings = tokenizeStrings;
3919 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3920 this.assertMode = assertMode;
3921 this.encapsedStringStack = null;
3922 this.taskTags = taskTags;
3923 this.taskPriorities = taskPriorities;
3925 private void checkNonExternalizeString() throws InvalidInputException {
3926 if (currentLine == null)
3928 parseTags(currentLine);
3930 private void parseTags(NLSLine line) throws InvalidInputException {
3931 String s = new String(getCurrentTokenSource());
3932 int pos = s.indexOf(TAG_PREFIX);
3933 int lineLength = line.size();
3935 int start = pos + TAG_PREFIX_LENGTH;
3936 int end = s.indexOf(TAG_POSTFIX, start);
3937 String index = s.substring(start, end);
3940 i = Integer.parseInt(index) - 1;
3941 // Tags are one based not zero based.
3942 } catch (NumberFormatException e) {
3943 i = -1; // we don't want to consider this as a valid NLS tag
3945 if (line.exists(i)) {
3948 pos = s.indexOf(TAG_PREFIX, start);
3950 this.nonNLSStrings = new StringLiteral[lineLength];
3951 int nonNLSCounter = 0;
3952 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3953 StringLiteral literal = (StringLiteral) iterator.next();
3954 if (literal != null) {
3955 this.nonNLSStrings[nonNLSCounter++] = literal;
3958 if (nonNLSCounter == 0) {
3959 this.nonNLSStrings = null;
3963 this.wasNonExternalizedStringLiteral = true;
3964 if (nonNLSCounter != lineLength) {
3965 System.arraycopy(this.nonNLSStrings, 0,
3966 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3971 public final void scanEscapeCharacter() throws InvalidInputException {
3972 // the string with "\\u" is a legal string of two chars \ and u
3973 //thus we use a direct access to the source (for regular cases).
3974 if (unicodeAsBackSlash) {
3975 // consume next character
3976 unicodeAsBackSlash = false;
3977 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3978 // (source[currentPosition] == 'u')) {
3979 // getNextUnicodeChar();
3981 if (withoutUnicodePtr != 0) {
3982 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3986 currentCharacter = source[currentPosition++];
3987 switch (currentCharacter) {
3989 currentCharacter = '\b';
3992 currentCharacter = '\t';
3995 currentCharacter = '\n';
3998 currentCharacter = '\f';
4001 currentCharacter = '\r';
4004 currentCharacter = '\"';
4007 currentCharacter = '\'';
4010 currentCharacter = '\\';
4013 // -----------octal escape--------------
4015 // OctalDigit OctalDigit
4016 // ZeroToThree OctalDigit OctalDigit
4017 int number = Character.getNumericValue(currentCharacter);
4018 if (number >= 0 && number <= 7) {
4019 boolean zeroToThreeNot = number > 3;
4020 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4021 int digit = Character.getNumericValue(currentCharacter);
4022 if (digit >= 0 && digit <= 7) {
4023 number = (number * 8) + digit;
4025 .isDigit(currentCharacter = source[currentPosition++])) {
4026 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4027 // Digit --> ignore last character
4030 digit = Character.getNumericValue(currentCharacter);
4031 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4032 // OctalDigit OctalDigit
4033 number = (number * 8) + digit;
4034 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4035 // --> ignore last character
4039 } else { // has read \OctalDigit NonDigit--> ignore last
4043 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4047 } else { // has read \OctalDigit --> ignore last character
4051 throw new InvalidInputException(INVALID_ESCAPE);
4052 currentCharacter = (char) number;
4054 throw new InvalidInputException(INVALID_ESCAPE);
4057 // chech presence of task: tags
4058 public void checkTaskTag(int commentStart, int commentEnd) {
4059 // only look for newer task: tags
4060 if (this.foundTaskCount > 0
4061 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4064 int foundTaskIndex = this.foundTaskCount;
4065 nextChar : for (int i = commentStart; i < commentEnd
4066 && i < this.eofPosition; i++) {
4068 char[] priority = null;
4069 // check for tag occurrence
4070 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4071 tag = this.taskTags[itag];
4072 priority = this.taskPriorities != null
4073 && itag < this.taskPriorities.length
4074 ? this.taskPriorities[itag]
4076 int tagLength = tag.length;
4077 for (int t = 0; t < tagLength; t++) {
4078 if (this.source[i + t] != tag[t])
4081 if (this.foundTaskTags == null) {
4082 this.foundTaskTags = new char[5][];
4083 this.foundTaskMessages = new char[5][];
4084 this.foundTaskPriorities = new char[5][];
4085 this.foundTaskPositions = new int[5][];
4086 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4087 System.arraycopy(this.foundTaskTags, 0,
4088 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4089 this.foundTaskCount);
4090 System.arraycopy(this.foundTaskMessages, 0,
4091 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4092 this.foundTaskCount);
4093 System.arraycopy(this.foundTaskPriorities, 0,
4094 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4095 0, this.foundTaskCount);
4096 System.arraycopy(this.foundTaskPositions, 0,
4097 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4098 this.foundTaskCount);
4100 this.foundTaskTags[this.foundTaskCount] = tag;
4101 this.foundTaskPriorities[this.foundTaskCount] = priority;
4102 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4104 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4105 this.foundTaskCount++;
4106 i += tagLength - 1; // will be incremented when looping
4109 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4110 // retrieve message start and end positions
4111 int msgStart = this.foundTaskPositions[i][0]
4112 + this.foundTaskTags[i].length;
4113 int max_value = i + 1 < this.foundTaskCount
4114 ? this.foundTaskPositions[i + 1][0] - 1
4116 // at most beginning of next task
4117 if (max_value < msgStart)
4118 max_value = msgStart; // would only occur if tag is before EOF.
4121 for (int j = msgStart; j < max_value; j++) {
4122 if ((c = this.source[j]) == '\n' || c == '\r') {
4128 for (int j = max_value; j > msgStart; j--) {
4129 if ((c = this.source[j]) == '*') {
4137 if (msgStart == end)
4140 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4142 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4144 // update the end position of the task
4145 this.foundTaskPositions[i][1] = end;
4146 // get the message source
4147 final int messageLength = end - msgStart + 1;
4148 char[] message = new char[messageLength];
4149 System.arraycopy(source, msgStart, message, 0, messageLength);
4150 this.foundTaskMessages[i] = message;