1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
24 public class Scanner implements IScanner, ITerminalSymbols {
26 * APIs ares - getNextToken() which return the current type of the token
27 * (this value is not memorized by the scanner) - getCurrentTokenSource()
28 * which provides with the token "REAL" source (aka all unicode have been
29 * transformed into a correct char) - sourceStart gives the position into the
30 * stream - currentPosition-1 gives the sourceEnd position into the stream
33 private boolean assertMode;
34 public boolean useAssertAsAnIndentifier = false;
35 //flag indicating if processed source contains occurrences of keyword assert
36 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
39 public Stack encapsedStringStack = null;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
48 public boolean tokenizeStrings;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
53 public char[] withoutUnicodeBuffer;
54 public int withoutUnicodePtr;
55 //when == 0 ==> no unicode in the current token
56 public boolean unicodeAsBackSlash = false;
57 public boolean scanningFloatLiteral = false;
58 //support for /** comments
59 public int[] commentStops = new int[10];
60 public int[] commentStarts = new int[10];
61 public int commentPtr = -1; // no comment test with commentPtr value -1
62 protected int lastCommentLinePosition = -1;
63 //diet parsing support - jump over some method body when requested
64 public boolean diet = false;
65 //support for the poor-line-debuggers ....
66 //remember the position of the cr/lf
67 public int[] lineEnds = new int[250];
68 public int linePtr = -1;
69 public boolean wasAcr = false;
70 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
71 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
72 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
73 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
74 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
75 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
76 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
77 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
78 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
79 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
80 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
81 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
82 //----------------optimized identifier managment------------------
83 static final char[] charArray_a = new char[]{'a'},
84 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
85 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
86 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
87 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
88 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
89 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
90 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
91 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
92 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
93 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
94 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
95 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
96 charArray_z = new char[]{'z'};
97 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
98 '\u0000', '\u0000', '\u0000'};
99 static final int TableSize = 30, InternalTableSize = 6;
101 public static final int OptimizedLength = 6;
103 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
104 // support for detecting non-externalized string literals
105 int currentLineNr = -1;
106 int previousLineNr = -1;
107 NLSLine currentLine = null;
108 List lines = new ArrayList();
109 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
110 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
111 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
112 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
113 public StringLiteral[] nonNLSStrings = null;
114 public boolean checkNonExternalizedStringLiterals = true;
115 public boolean wasNonExternalizedStringLiteral = false;
117 for (int i = 0; i < 6; i++) {
118 for (int j = 0; j < TableSize; j++) {
119 for (int k = 0; k < InternalTableSize; k++) {
120 charArray_length[i][j][k] = initCharArray;
125 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
127 public static final int RoundBracket = 0;
128 public static final int SquareBracket = 1;
129 public static final int CurlyBracket = 2;
130 public static final int BracketKinds = 3;
132 public char[][] foundTaskTags = null;
133 public char[][] foundTaskMessages;
134 public char[][] foundTaskPriorities = null;
135 public int[][] foundTaskPositions;
136 public int foundTaskCount = 0;
137 public char[][] taskTags = null;
138 public char[][] taskPriorities = null;
139 public static final boolean DEBUG = false;
140 public static final boolean TRACE = false;
143 * Determines if the specified character is permissible as the first
144 * character in a PHP identifier
146 public static boolean isPHPIdentifierStart(char ch) {
147 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
150 * Determines if the specified character may be part of a PHP identifier as
151 * other than the first character
153 public static boolean isPHPIdentifierPart(char ch) {
154 return Character.isLetterOrDigit(ch) || (ch == '_')
155 || (0x7F <= ch && ch <= 0xFF);
157 public final boolean atEnd() {
158 // This code is not relevant if source is
159 // Only a part of the real stream input
160 return source.length == currentPosition;
162 public char[] getCurrentIdentifierSource() {
163 //return the token REAL source (aka unicodes are precomputed)
165 // if (withoutUnicodePtr != 0)
166 // //0 is used as a fast test flag so the real first char is in position 1
168 // withoutUnicodeBuffer,
170 // result = new char[withoutUnicodePtr],
172 // withoutUnicodePtr);
174 int length = currentPosition - startPosition;
175 switch (length) { // see OptimizedLength
177 return optimizedCurrentTokenSource1();
179 return optimizedCurrentTokenSource2();
181 return optimizedCurrentTokenSource3();
183 return optimizedCurrentTokenSource4();
185 return optimizedCurrentTokenSource5();
187 return optimizedCurrentTokenSource6();
190 System.arraycopy(source, startPosition, result = new char[length], 0,
195 public int getCurrentTokenEndPosition() {
196 return this.currentPosition - 1;
198 public final char[] getCurrentTokenSource() {
199 // Return the token REAL source (aka unicodes are precomputed)
201 // if (withoutUnicodePtr != 0)
202 // // 0 is used as a fast test flag so the real first char is in position 1
204 // withoutUnicodeBuffer,
206 // result = new char[withoutUnicodePtr],
208 // withoutUnicodePtr);
211 System.arraycopy(source, startPosition,
212 result = new char[length = currentPosition - startPosition], 0, length);
216 public final char[] getCurrentTokenSource(int startPos) {
217 // Return the token REAL source (aka unicodes are precomputed)
219 // if (withoutUnicodePtr != 0)
220 // // 0 is used as a fast test flag so the real first char is in position 1
222 // withoutUnicodeBuffer,
224 // result = new char[withoutUnicodePtr],
226 // withoutUnicodePtr);
229 System.arraycopy(source, startPos,
230 result = new char[length = currentPosition - startPos], 0, length);
234 public final char[] getCurrentTokenSourceString() {
235 //return the token REAL source (aka unicodes are precomputed).
236 //REMOVE the two " that are at the beginning and the end.
238 if (withoutUnicodePtr != 0)
239 //0 is used as a fast test flag so the real first char is in position 1
240 System.arraycopy(withoutUnicodeBuffer, 2,
241 //2 is 1 (real start) + 1 (to jump over the ")
242 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
245 System.arraycopy(source, startPosition + 1,
246 result = new char[length = currentPosition - startPosition - 2], 0,
251 public int getCurrentTokenStartPosition() {
252 return this.startPosition;
254 public final char[] getCurrentStringLiteralSource() {
255 // Return the token REAL source (aka unicodes are precomputed)
258 System.arraycopy(source, startPosition + 1,
259 result = new char[length = currentPosition - startPosition - 2], 0,
265 * Search the source position corresponding to the end of a given line number
267 * Line numbers are 1-based, and relative to the scanner initialPosition.
268 * Character positions are 0-based.
270 * In case the given line number is inconsistent, answers -1.
272 public final int getLineEnd(int lineNumber) {
273 if (lineEnds == null)
275 if (lineNumber >= lineEnds.length)
279 if (lineNumber == lineEnds.length - 1)
281 return lineEnds[lineNumber - 1];
282 // next line start one character behind the lineEnd of the previous line
285 * Search the source position corresponding to the beginning of a given line
288 * Line numbers are 1-based, and relative to the scanner initialPosition.
289 * Character positions are 0-based.
291 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
293 * In case the given line number is inconsistent, answers -1.
295 public final int getLineStart(int lineNumber) {
296 if (lineEnds == null)
298 if (lineNumber >= lineEnds.length)
303 return initialPosition;
304 return lineEnds[lineNumber - 2] + 1;
305 // next line start one character behind the lineEnd of the previous line
307 public final boolean getNextChar(char testedChar) {
309 //handle the case of unicode.
310 //when a unicode appears then we must use a buffer that holds char
312 //At the end of this method currentCharacter holds the new visited char
313 //and currentPosition points right next after it
314 //Both previous lines are true if the currentCharacter is == to the
316 //On false, no side effect has occured.
317 //ALL getNextChar.... ARE OPTIMIZED COPIES
318 int temp = currentPosition;
320 currentCharacter = source[currentPosition++];
321 // if (((currentCharacter = source[currentPosition++]) == '\\')
322 // && (source[currentPosition] == 'u')) {
323 // //-------------unicode traitement ------------
324 // int c1, c2, c3, c4;
325 // int unicodeSize = 6;
326 // currentPosition++;
327 // while (source[currentPosition] == 'u') {
328 // currentPosition++;
332 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
334 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
336 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
338 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
340 // currentPosition = temp;
344 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
345 // if (currentCharacter != testedChar) {
346 // currentPosition = temp;
349 // unicodeAsBackSlash = currentCharacter == '\\';
351 // //need the unicode buffer
352 // if (withoutUnicodePtr == 0) {
353 // //buffer all the entries that have been left aside....
354 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
358 // withoutUnicodeBuffer,
360 // withoutUnicodePtr);
362 // //fill the buffer with the char
363 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
366 // } //-------------end unicode traitement--------------
368 if (currentCharacter != testedChar) {
369 currentPosition = temp;
372 unicodeAsBackSlash = false;
373 // if (withoutUnicodePtr != 0)
374 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
377 } catch (IndexOutOfBoundsException e) {
378 unicodeAsBackSlash = false;
379 currentPosition = temp;
383 public final int getNextChar(char testedChar1, char testedChar2) {
384 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
385 //test can be done with (x==0) for the first and (x>0) for the second
386 //handle the case of unicode.
387 //when a unicode appears then we must use a buffer that holds char
389 //At the end of this method currentCharacter holds the new visited char
390 //and currentPosition points right next after it
391 //Both previous lines are true if the currentCharacter is == to the
393 //On false, no side effect has occured.
394 //ALL getNextChar.... ARE OPTIMIZED COPIES
395 int temp = currentPosition;
398 currentCharacter = source[currentPosition++];
399 // if (((currentCharacter = source[currentPosition++]) == '\\')
400 // && (source[currentPosition] == 'u')) {
401 // //-------------unicode traitement ------------
402 // int c1, c2, c3, c4;
403 // int unicodeSize = 6;
404 // currentPosition++;
405 // while (source[currentPosition] == 'u') {
406 // currentPosition++;
410 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
412 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
414 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
416 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
418 // currentPosition = temp;
422 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
423 // if (currentCharacter == testedChar1)
425 // else if (currentCharacter == testedChar2)
428 // currentPosition = temp;
432 // //need the unicode buffer
433 // if (withoutUnicodePtr == 0) {
434 // //buffer all the entries that have been left aside....
435 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
439 // withoutUnicodeBuffer,
441 // withoutUnicodePtr);
443 // //fill the buffer with the char
444 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
446 // } //-------------end unicode traitement--------------
448 if (currentCharacter == testedChar1)
450 else if (currentCharacter == testedChar2)
453 currentPosition = temp;
456 // if (withoutUnicodePtr != 0)
457 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
460 } catch (IndexOutOfBoundsException e) {
461 currentPosition = temp;
465 public final boolean getNextCharAsDigit() {
467 //handle the case of unicode.
468 //when a unicode appears then we must use a buffer that holds char
470 //At the end of this method currentCharacter holds the new visited char
471 //and currentPosition points right next after it
472 //Both previous lines are true if the currentCharacter is a digit
473 //On false, no side effect has occured.
474 //ALL getNextChar.... ARE OPTIMIZED COPIES
475 int temp = currentPosition;
477 currentCharacter = source[currentPosition++];
478 // if (((currentCharacter = source[currentPosition++]) == '\\')
479 // && (source[currentPosition] == 'u')) {
480 // //-------------unicode traitement ------------
481 // int c1, c2, c3, c4;
482 // int unicodeSize = 6;
483 // currentPosition++;
484 // while (source[currentPosition] == 'u') {
485 // currentPosition++;
489 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
491 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
493 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
495 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
497 // currentPosition = temp;
501 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
502 // if (!Character.isDigit(currentCharacter)) {
503 // currentPosition = temp;
507 // //need the unicode buffer
508 // if (withoutUnicodePtr == 0) {
509 // //buffer all the entries that have been left aside....
510 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
514 // withoutUnicodeBuffer,
516 // withoutUnicodePtr);
518 // //fill the buffer with the char
519 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
521 // } //-------------end unicode traitement--------------
523 if (!Character.isDigit(currentCharacter)) {
524 currentPosition = temp;
527 // if (withoutUnicodePtr != 0)
528 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
531 } catch (IndexOutOfBoundsException e) {
532 currentPosition = temp;
536 public final boolean getNextCharAsDigit(int radix) {
538 //handle the case of unicode.
539 //when a unicode appears then we must use a buffer that holds char
541 //At the end of this method currentCharacter holds the new visited char
542 //and currentPosition points right next after it
543 //Both previous lines are true if the currentCharacter is a digit base on
545 //On false, no side effect has occured.
546 //ALL getNextChar.... ARE OPTIMIZED COPIES
547 int temp = currentPosition;
549 currentCharacter = source[currentPosition++];
550 // if (((currentCharacter = source[currentPosition++]) == '\\')
551 // && (source[currentPosition] == 'u')) {
552 // //-------------unicode traitement ------------
553 // int c1, c2, c3, c4;
554 // int unicodeSize = 6;
555 // currentPosition++;
556 // while (source[currentPosition] == 'u') {
557 // currentPosition++;
561 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
563 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
565 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
567 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
569 // currentPosition = temp;
573 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
574 // if (Character.digit(currentCharacter, radix) == -1) {
575 // currentPosition = temp;
579 // //need the unicode buffer
580 // if (withoutUnicodePtr == 0) {
581 // //buffer all the entries that have been left aside....
582 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
586 // withoutUnicodeBuffer,
588 // withoutUnicodePtr);
590 // //fill the buffer with the char
591 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
593 // } //-------------end unicode traitement--------------
595 if (Character.digit(currentCharacter, radix) == -1) {
596 currentPosition = temp;
599 // if (withoutUnicodePtr != 0)
600 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
603 } catch (IndexOutOfBoundsException e) {
604 currentPosition = temp;
608 public boolean getNextCharAsJavaIdentifierPart() {
610 //handle the case of unicode.
611 //when a unicode appears then we must use a buffer that holds char
613 //At the end of this method currentCharacter holds the new visited char
614 //and currentPosition points right next after it
615 //Both previous lines are true if the currentCharacter is a
616 // JavaIdentifierPart
617 //On false, no side effect has occured.
618 //ALL getNextChar.... ARE OPTIMIZED COPIES
619 int temp = currentPosition;
621 currentCharacter = source[currentPosition++];
622 // if (((currentCharacter = source[currentPosition++]) == '\\')
623 // && (source[currentPosition] == 'u')) {
624 // //-------------unicode traitement ------------
625 // int c1, c2, c3, c4;
626 // int unicodeSize = 6;
627 // currentPosition++;
628 // while (source[currentPosition] == 'u') {
629 // currentPosition++;
633 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
635 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
637 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
639 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
641 // currentPosition = temp;
645 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
646 // if (!isPHPIdentifierPart(currentCharacter)) {
647 // currentPosition = temp;
651 // //need the unicode buffer
652 // if (withoutUnicodePtr == 0) {
653 // //buffer all the entries that have been left aside....
654 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
658 // withoutUnicodeBuffer,
660 // withoutUnicodePtr);
662 // //fill the buffer with the char
663 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
665 // } //-------------end unicode traitement--------------
667 if (!isPHPIdentifierPart(currentCharacter)) {
668 currentPosition = temp;
671 // if (withoutUnicodePtr != 0)
672 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
675 } catch (IndexOutOfBoundsException e) {
676 currentPosition = temp;
680 public int getCastOrParen() {
681 int tempPosition = currentPosition;
682 char tempCharacter = currentCharacter;
683 int tempToken = TokenNameLPAREN;
684 boolean found = false;
685 StringBuffer buf = new StringBuffer();
688 currentCharacter = source[currentPosition++];
689 } while (currentCharacter == ' ' || currentCharacter == '\t');
690 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
691 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
692 buf.append(currentCharacter);
693 currentCharacter = source[currentPosition++];
695 if (buf.length() >= 3 && buf.length() <= 7) {
696 char[] data = buf.toString().toCharArray();
698 switch (data.length) {
701 if ((data[index] == 'i') && (data[++index] == 'n')
702 && (data[++index] == 't')) {
704 tempToken = TokenNameintCAST;
709 if ((data[index] == 'b') && (data[++index] == 'o')
710 && (data[++index] == 'o') && (data[++index] == 'l')) {
712 tempToken = TokenNameboolCAST;
715 if ((data[index] == 'r') && (data[++index] == 'e')
716 && (data[++index] == 'a') && (data[++index] == 'l')) {
718 tempToken = TokenNamedoubleCAST;
724 if ((data[index] == 'a') && (data[++index] == 'r')
725 && (data[++index] == 'r') && (data[++index] == 'a')
726 && (data[++index] == 'y')) {
728 tempToken = TokenNamearrayCAST;
731 if ((data[index] == 'u') && (data[++index] == 'n')
732 && (data[++index] == 's') && (data[++index] == 'e')
733 && (data[++index] == 't')) {
735 tempToken = TokenNameunsetCAST;
738 if ((data[index] == 'f') && (data[++index] == 'l')
739 && (data[++index] == 'o') && (data[++index] == 'a')
740 && (data[++index] == 't')) {
742 tempToken = TokenNamedoubleCAST;
748 // object string double
749 if ((data[index] == 'o') && (data[++index] == 'b')
750 && (data[++index] == 'j') && (data[++index] == 'e')
751 && (data[++index] == 'c') && (data[++index] == 't')) {
753 tempToken = TokenNameobjectCAST;
756 if ((data[index] == 's') && (data[++index] == 't')
757 && (data[++index] == 'r') && (data[++index] == 'i')
758 && (data[++index] == 'n') && (data[++index] == 'g')) {
760 tempToken = TokenNamestringCAST;
763 if ((data[index] == 'd') && (data[++index] == 'o')
764 && (data[++index] == 'u') && (data[++index] == 'b')
765 && (data[++index] == 'l') && (data[++index] == 'e')) {
767 tempToken = TokenNamedoubleCAST;
774 if ((data[index] == 'b') && (data[++index] == 'o')
775 && (data[++index] == 'o') && (data[++index] == 'l')
776 && (data[++index] == 'e') && (data[++index] == 'a')
777 && (data[++index] == 'n')) {
779 tempToken = TokenNameboolCAST;
782 if ((data[index] == 'i') && (data[++index] == 'n')
783 && (data[++index] == 't') && (data[++index] == 'e')
784 && (data[++index] == 'g') && (data[++index] == 'e')
785 && (data[++index] == 'r')) {
787 tempToken = TokenNameintCAST;
793 while (currentCharacter == ' ' || currentCharacter == '\t') {
794 currentCharacter = source[currentPosition++];
796 if (currentCharacter == ')') {
801 } catch (IndexOutOfBoundsException e) {
803 currentCharacter = tempCharacter;
804 currentPosition = tempPosition;
805 return TokenNameLPAREN;
807 public void consumeStringInterpolated() throws InvalidInputException {
809 // consume next character
810 unicodeAsBackSlash = false;
811 currentCharacter = source[currentPosition++];
812 // if (((currentCharacter = source[currentPosition++]) == '\\')
813 // && (source[currentPosition] == 'u')) {
814 // getNextUnicodeChar();
816 // if (withoutUnicodePtr != 0) {
817 // withoutUnicodeBuffer[++withoutUnicodePtr] =
821 while (currentCharacter != '`') {
822 /** ** in PHP \r and \n are valid in string literals *** */
823 // if ((currentCharacter == '\n')
824 // || (currentCharacter == '\r')) {
825 // // relocate if finding another quote fairly close: thus unicode
826 // '/u000D' will be fully consumed
827 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
828 // if (currentPosition + lookAhead == source.length)
830 // if (source[currentPosition + lookAhead] == '\n')
832 // if (source[currentPosition + lookAhead] == '\"') {
833 // currentPosition += lookAhead + 1;
837 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
839 if (currentCharacter == '\\') {
840 int escapeSize = currentPosition;
841 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
842 //scanEscapeCharacter make a side effect on this value and we need
843 // the previous value few lines down this one
844 scanDoubleQuotedEscapeCharacter();
845 escapeSize = currentPosition - escapeSize;
846 if (withoutUnicodePtr == 0) {
847 //buffer all the entries that have been left aside....
848 withoutUnicodePtr = currentPosition - escapeSize - 1
850 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
852 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
853 } else { //overwrite the / in the buffer
854 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
855 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
856 // where only one is correct
861 // consume next character
862 unicodeAsBackSlash = false;
863 currentCharacter = source[currentPosition++];
864 // if (((currentCharacter = source[currentPosition++]) == '\\')
865 // && (source[currentPosition] == 'u')) {
866 // getNextUnicodeChar();
868 if (withoutUnicodePtr != 0) {
869 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
873 } catch (IndexOutOfBoundsException e) {
874 // reset end position for error reporting
876 throw new InvalidInputException(UNTERMINATED_STRING);
877 } catch (InvalidInputException e) {
878 if (e.getMessage().equals(INVALID_ESCAPE)) {
879 // relocate if finding another quote fairly close: thus unicode
880 // '/u000D' will be fully consumed
881 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
882 if (currentPosition + lookAhead == source.length)
884 if (source[currentPosition + lookAhead] == '\n')
886 if (source[currentPosition + lookAhead] == '`') {
887 currentPosition += lookAhead + 1;
894 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
895 // //$NON-NLS-?$ where ? is an
897 if (currentLine == null) {
898 currentLine = new NLSLine();
899 lines.add(currentLine);
901 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
902 startPosition, currentPosition - 1));
905 public void consumeStringConstant() throws InvalidInputException {
907 // consume next character
908 unicodeAsBackSlash = false;
909 currentCharacter = source[currentPosition++];
910 // if (((currentCharacter = source[currentPosition++]) == '\\')
911 // && (source[currentPosition] == 'u')) {
912 // getNextUnicodeChar();
914 // if (withoutUnicodePtr != 0) {
915 // withoutUnicodeBuffer[++withoutUnicodePtr] =
919 while (currentCharacter != '\'') {
920 /** ** in PHP \r and \n are valid in string literals *** */
921 // if ((currentCharacter == '\n')
922 // || (currentCharacter == '\r')) {
923 // // relocate if finding another quote fairly close: thus unicode
924 // '/u000D' will be fully consumed
925 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
926 // if (currentPosition + lookAhead == source.length)
928 // if (source[currentPosition + lookAhead] == '\n')
930 // if (source[currentPosition + lookAhead] == '\"') {
931 // currentPosition += lookAhead + 1;
935 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
937 if (currentCharacter == '\\') {
938 int escapeSize = currentPosition;
939 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
940 //scanEscapeCharacter make a side effect on this value and we need
941 // the previous value few lines down this one
942 scanSingleQuotedEscapeCharacter();
943 escapeSize = currentPosition - escapeSize;
944 if (withoutUnicodePtr == 0) {
945 //buffer all the entries that have been left aside....
946 withoutUnicodePtr = currentPosition - escapeSize - 1
948 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
950 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
951 } else { //overwrite the / in the buffer
952 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
953 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
954 // where only one is correct
959 // consume next character
960 unicodeAsBackSlash = false;
961 currentCharacter = source[currentPosition++];
962 // if (((currentCharacter = source[currentPosition++]) == '\\')
963 // && (source[currentPosition] == 'u')) {
964 // getNextUnicodeChar();
966 if (withoutUnicodePtr != 0) {
967 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
971 } catch (IndexOutOfBoundsException e) {
972 // reset end position for error reporting
974 throw new InvalidInputException(UNTERMINATED_STRING);
975 } catch (InvalidInputException e) {
976 if (e.getMessage().equals(INVALID_ESCAPE)) {
977 // relocate if finding another quote fairly close: thus unicode
978 // '/u000D' will be fully consumed
979 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
980 if (currentPosition + lookAhead == source.length)
982 if (source[currentPosition + lookAhead] == '\n')
984 if (source[currentPosition + lookAhead] == '\'') {
985 currentPosition += lookAhead + 1;
992 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
993 // //$NON-NLS-?$ where ? is an
995 if (currentLine == null) {
996 currentLine = new NLSLine();
997 lines.add(currentLine);
999 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1000 startPosition, currentPosition - 1));
1003 public void consumeStringLiteral() throws InvalidInputException {
1005 // consume next character
1006 unicodeAsBackSlash = false;
1007 currentCharacter = source[currentPosition++];
1008 // if (((currentCharacter = source[currentPosition++]) == '\\')
1009 // && (source[currentPosition] == 'u')) {
1010 // getNextUnicodeChar();
1012 // if (withoutUnicodePtr != 0) {
1013 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1014 // currentCharacter;
1017 while (currentCharacter != '"') {
1018 /** ** in PHP \r and \n are valid in string literals *** */
1019 // if ((currentCharacter == '\n')
1020 // || (currentCharacter == '\r')) {
1021 // // relocate if finding another quote fairly close: thus unicode
1022 // '/u000D' will be fully consumed
1023 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1024 // if (currentPosition + lookAhead == source.length)
1026 // if (source[currentPosition + lookAhead] == '\n')
1028 // if (source[currentPosition + lookAhead] == '\"') {
1029 // currentPosition += lookAhead + 1;
1033 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1035 if (currentCharacter == '\\') {
1036 int escapeSize = currentPosition;
1037 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1038 //scanEscapeCharacter make a side effect on this value and we need
1039 // the previous value few lines down this one
1040 scanDoubleQuotedEscapeCharacter();
1041 escapeSize = currentPosition - escapeSize;
1042 if (withoutUnicodePtr == 0) {
1043 //buffer all the entries that have been left aside....
1044 withoutUnicodePtr = currentPosition - escapeSize - 1
1046 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1048 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1049 } else { //overwrite the / in the buffer
1050 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1051 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1052 // where only one is correct
1053 withoutUnicodePtr--;
1057 // consume next character
1058 unicodeAsBackSlash = false;
1059 currentCharacter = source[currentPosition++];
1060 // if (((currentCharacter = source[currentPosition++]) == '\\')
1061 // && (source[currentPosition] == 'u')) {
1062 // getNextUnicodeChar();
1064 if (withoutUnicodePtr != 0) {
1065 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1069 } catch (IndexOutOfBoundsException e) {
1070 // reset end position for error reporting
1072 throw new InvalidInputException(UNTERMINATED_STRING);
1073 } catch (InvalidInputException e) {
1074 if (e.getMessage().equals(INVALID_ESCAPE)) {
1075 // relocate if finding another quote fairly close: thus unicode
1076 // '/u000D' will be fully consumed
1077 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1078 if (currentPosition + lookAhead == source.length)
1080 if (source[currentPosition + lookAhead] == '\n')
1082 if (source[currentPosition + lookAhead] == '\"') {
1083 currentPosition += lookAhead + 1;
1090 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1091 // //$NON-NLS-?$ where ? is an
1093 if (currentLine == null) {
1094 currentLine = new NLSLine();
1095 lines.add(currentLine);
1097 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1098 startPosition, currentPosition - 1));
1101 public int getNextToken() throws InvalidInputException {
1103 return getInlinedHTML(currentPosition);
1106 this.wasAcr = false;
1108 jumpOverMethodBody();
1110 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1114 withoutUnicodePtr = 0;
1115 //start with a new token
1116 char encapsedChar = ' ';
1117 if (!encapsedStringStack.isEmpty()) {
1118 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1120 if (encapsedChar != '$' && encapsedChar != ' ') {
1121 currentCharacter = source[currentPosition++];
1122 if (currentCharacter == encapsedChar) {
1123 switch (currentCharacter) {
1125 return TokenNameEncapsedString0;
1127 return TokenNameEncapsedString1;
1129 return TokenNameEncapsedString2;
1132 while (currentCharacter != encapsedChar) {
1133 /** ** in PHP \r and \n are valid in string literals *** */
1134 switch (currentCharacter) {
1136 int escapeSize = currentPosition;
1137 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1138 //scanEscapeCharacter make a side effect on this value and
1139 // we need the previous value few lines down this one
1140 scanDoubleQuotedEscapeCharacter();
1141 escapeSize = currentPosition - escapeSize;
1142 if (withoutUnicodePtr == 0) {
1143 //buffer all the entries that have been left aside....
1144 withoutUnicodePtr = currentPosition - escapeSize - 1
1146 System.arraycopy(source, startPosition,
1147 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1148 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1149 } else { //overwrite the / in the buffer
1150 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1151 if (backSlashAsUnicodeInString) { //there are TWO \ in
1152 withoutUnicodePtr--;
1157 if (isPHPIdentifierStart(source[currentPosition])
1158 || source[currentPosition] == '{') {
1160 encapsedStringStack.push(new Character('$'));
1161 return TokenNameSTRING;
1165 if (source[currentPosition] == '$') { // CURLY_OPEN
1167 encapsedStringStack.push(new Character('$'));
1168 return TokenNameSTRING;
1171 // consume next character
1172 unicodeAsBackSlash = false;
1173 currentCharacter = source[currentPosition++];
1174 if (withoutUnicodePtr != 0) {
1175 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1180 return TokenNameSTRING;
1182 // ---------Consume white space and handles startPosition---------
1183 int whiteStart = currentPosition;
1184 startPosition = currentPosition;
1185 currentCharacter = source[currentPosition++];
1186 if (encapsedChar == '$') {
1187 switch (currentCharacter) {
1189 currentCharacter = source[currentPosition++];
1190 return TokenNameSTRING;
1192 if (encapsedChar == '$') {
1193 if (getNextChar('$'))
1194 return TokenNameLBRACE_DOLLAR;
1196 return TokenNameLBRACE;
1198 return TokenNameRBRACE;
1200 return TokenNameLBRACKET;
1202 return TokenNameRBRACKET;
1204 if (tokenizeStrings) {
1205 consumeStringConstant();
1206 return TokenNameStringSingleQuote;
1208 return TokenNameEncapsedString1;
1210 return TokenNameEncapsedString2;
1212 if (tokenizeStrings) {
1213 consumeStringInterpolated();
1214 return TokenNameStringInterpolated;
1216 return TokenNameEncapsedString0;
1218 if (getNextChar('>'))
1219 return TokenNameMINUS_GREATER;
1220 return TokenNameSTRING;
1222 if (currentCharacter == '$') {
1223 int oldPosition = currentPosition;
1225 currentCharacter = source[currentPosition++];
1226 if (currentCharacter == '{') {
1227 return TokenNameDOLLAR_LBRACE;
1229 if (isPHPIdentifierStart(currentCharacter)) {
1230 return scanIdentifierOrKeyword(true);
1232 currentPosition = oldPosition;
1233 return TokenNameSTRING;
1235 } catch (IndexOutOfBoundsException e) {
1236 currentPosition = oldPosition;
1237 return TokenNameSTRING;
1240 if (isPHPIdentifierStart(currentCharacter))
1241 return scanIdentifierOrKeyword(false);
1242 if (Character.isDigit(currentCharacter))
1243 return scanNumber(false);
1244 return TokenNameERROR;
1247 // boolean isWhiteSpace;
1249 while ((currentCharacter == ' ')
1250 || Character.isWhitespace(currentCharacter)) {
1251 startPosition = currentPosition;
1252 currentCharacter = source[currentPosition++];
1253 // if (((currentCharacter = source[currentPosition++]) == '\\')
1254 // && (source[currentPosition] == 'u')) {
1255 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1257 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1258 checkNonExternalizeString();
1259 if (recordLineSeparator) {
1260 pushLineSeparator();
1265 // isWhiteSpace = (currentCharacter == ' ')
1266 // || Character.isWhitespace(currentCharacter);
1269 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1270 // reposition scanner in case we are interested by spaces as tokens
1272 startPosition = whiteStart;
1273 return TokenNameWHITESPACE;
1275 //little trick to get out in the middle of a source compuation
1276 if (currentPosition > eofPosition)
1277 return TokenNameEOF;
1278 // ---------Identify the next token-------------
1279 switch (currentCharacter) {
1281 return getCastOrParen();
1283 return TokenNameRPAREN;
1285 return TokenNameLBRACE;
1287 return TokenNameRBRACE;
1289 return TokenNameLBRACKET;
1291 return TokenNameRBRACKET;
1293 return TokenNameSEMICOLON;
1295 return TokenNameCOMMA;
1297 if (getNextChar('='))
1298 return TokenNameDOT_EQUAL;
1299 if (getNextCharAsDigit())
1300 return scanNumber(true);
1301 return TokenNameDOT;
1305 if ((test = getNextChar('+', '=')) == 0)
1306 return TokenNamePLUS_PLUS;
1308 return TokenNamePLUS_EQUAL;
1309 return TokenNamePLUS;
1314 if ((test = getNextChar('-', '=')) == 0)
1315 return TokenNameMINUS_MINUS;
1317 return TokenNameMINUS_EQUAL;
1318 if (getNextChar('>'))
1319 return TokenNameMINUS_GREATER;
1320 return TokenNameMINUS;
1323 if (getNextChar('='))
1324 return TokenNameTWIDDLE_EQUAL;
1325 return TokenNameTWIDDLE;
1327 if (getNextChar('=')) {
1328 if (getNextChar('=')) {
1329 return TokenNameNOT_EQUAL_EQUAL;
1331 return TokenNameNOT_EQUAL;
1333 return TokenNameNOT;
1335 if (getNextChar('='))
1336 return TokenNameMULTIPLY_EQUAL;
1337 return TokenNameMULTIPLY;
1339 if (getNextChar('='))
1340 return TokenNameREMAINDER_EQUAL;
1341 return TokenNameREMAINDER;
1344 int oldPosition = currentPosition;
1346 currentCharacter = source[currentPosition++];
1347 } catch (IndexOutOfBoundsException e) {
1348 currentPosition = oldPosition;
1349 return TokenNameLESS;
1351 switch (currentCharacter) {
1353 return TokenNameLESS_EQUAL;
1355 return TokenNameNOT_EQUAL;
1357 if (getNextChar('='))
1358 return TokenNameLEFT_SHIFT_EQUAL;
1359 if (getNextChar('<')) {
1360 currentCharacter = source[currentPosition++];
1361 while (Character.isWhitespace(currentCharacter)) {
1362 currentCharacter = source[currentPosition++];
1364 int heredocStart = currentPosition - 1;
1365 int heredocLength = 0;
1366 if (isPHPIdentifierStart(currentCharacter)) {
1367 currentCharacter = source[currentPosition++];
1369 return TokenNameERROR;
1371 while (isPHPIdentifierPart(currentCharacter)) {
1372 currentCharacter = source[currentPosition++];
1374 heredocLength = currentPosition - heredocStart - 1;
1375 // heredoc end-tag determination
1376 boolean endTag = true;
1379 ch = source[currentPosition++];
1380 if (ch == '\r' || ch == '\n') {
1381 if (recordLineSeparator) {
1382 pushLineSeparator();
1386 for (int i = 0; i < heredocLength; i++) {
1387 if (source[currentPosition + i] != source[heredocStart
1394 currentPosition += heredocLength - 1;
1395 currentCharacter = source[currentPosition++];
1396 break; // do...while loop
1402 return TokenNameHEREDOC;
1404 return TokenNameLEFT_SHIFT;
1406 currentPosition = oldPosition;
1407 return TokenNameLESS;
1412 if ((test = getNextChar('=', '>')) == 0)
1413 return TokenNameGREATER_EQUAL;
1415 if ((test = getNextChar('=', '>')) == 0)
1416 return TokenNameRIGHT_SHIFT_EQUAL;
1417 return TokenNameRIGHT_SHIFT;
1419 return TokenNameGREATER;
1422 if (getNextChar('=')) {
1423 if (getNextChar('=')) {
1424 return TokenNameEQUAL_EQUAL_EQUAL;
1426 return TokenNameEQUAL_EQUAL;
1428 if (getNextChar('>'))
1429 return TokenNameEQUAL_GREATER;
1430 return TokenNameEQUAL;
1434 if ((test = getNextChar('&', '=')) == 0)
1435 return TokenNameAND_AND;
1437 return TokenNameAND_EQUAL;
1438 return TokenNameAND;
1443 if ((test = getNextChar('|', '=')) == 0)
1444 return TokenNameOR_OR;
1446 return TokenNameOR_EQUAL;
1450 if (getNextChar('='))
1451 return TokenNameXOR_EQUAL;
1452 return TokenNameXOR;
1454 if (getNextChar('>')) {
1456 if (currentPosition == source.length) {
1458 return TokenNameINLINE_HTML;
1460 return getInlinedHTML(currentPosition - 2);
1462 return TokenNameQUESTION;
1464 if (getNextChar(':'))
1465 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1466 return TokenNameCOLON;
1470 consumeStringConstant();
1471 return TokenNameStringSingleQuote;
1473 if (tokenizeStrings) {
1474 consumeStringLiteral();
1475 return TokenNameStringDoubleQuote;
1477 return TokenNameEncapsedString2;
1479 if (tokenizeStrings) {
1480 consumeStringInterpolated();
1481 return TokenNameStringInterpolated;
1483 return TokenNameEncapsedString0;
1487 char startChar = currentCharacter;
1488 if (getNextChar('=')) {
1489 return TokenNameDIVIDE_EQUAL;
1492 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1494 this.lastCommentLinePosition = this.currentPosition;
1495 int endPositionForLineComment = 0;
1496 try { //get the next char
1497 currentCharacter = source[currentPosition++];
1498 // if (((currentCharacter = source[currentPosition++])
1500 // && (source[currentPosition] == 'u')) {
1501 // //-------------unicode traitement ------------
1502 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1503 // currentPosition++;
1504 // while (source[currentPosition] == 'u') {
1505 // currentPosition++;
1508 // Character.getNumericValue(source[currentPosition++]))
1512 // Character.getNumericValue(source[currentPosition++]))
1516 // Character.getNumericValue(source[currentPosition++]))
1520 // Character.getNumericValue(source[currentPosition++]))
1524 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1526 // currentCharacter =
1527 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1530 //handle the \\u case manually into comment
1531 // if (currentCharacter == '\\') {
1532 // if (source[currentPosition] == '\\')
1533 // currentPosition++;
1534 // } //jump over the \\
1535 boolean isUnicode = false;
1536 while (currentCharacter != '\r' && currentCharacter != '\n') {
1537 this.lastCommentLinePosition = this.currentPosition;
1538 if (currentCharacter == '?') {
1539 if (getNextChar('>')) {
1540 startPosition = currentPosition - 2;
1542 return TokenNameINLINE_HTML;
1547 currentCharacter = source[currentPosition++];
1548 // if (((currentCharacter = source[currentPosition++])
1550 // && (source[currentPosition] == 'u')) {
1551 // isUnicode = true;
1552 // //-------------unicode traitement ------------
1553 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1554 // currentPosition++;
1555 // while (source[currentPosition] == 'u') {
1556 // currentPosition++;
1559 // Character.getNumericValue(source[currentPosition++]))
1563 // Character.getNumericValue(
1564 // source[currentPosition++]))
1568 // Character.getNumericValue(
1569 // source[currentPosition++]))
1573 // Character.getNumericValue(
1574 // source[currentPosition++]))
1578 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1580 // currentCharacter =
1581 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1584 //handle the \\u case manually into comment
1585 // if (currentCharacter == '\\') {
1586 // if (source[currentPosition] == '\\')
1587 // currentPosition++;
1588 // } //jump over the \\
1591 endPositionForLineComment = currentPosition - 6;
1593 endPositionForLineComment = currentPosition - 1;
1595 // recordComment(false);
1596 recordComment(TokenNameCOMMENT_LINE);
1597 if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1598 if ((currentCharacter == '\r')
1599 || (currentCharacter == '\n')) {
1600 checkNonExternalizeString();
1601 if (recordLineSeparator) {
1603 pushUnicodeLineSeparator();
1605 pushLineSeparator();
1611 if (tokenizeComments) {
1613 currentPosition = endPositionForLineComment;
1614 // reset one character behind
1616 return TokenNameCOMMENT_LINE;
1618 } catch (IndexOutOfBoundsException e) { //an eof will them
1620 if (tokenizeComments) {
1622 // reset one character behind
1623 return TokenNameCOMMENT_LINE;
1629 //traditional and annotation comment
1630 boolean isJavadoc = false, star = false;
1631 // consume next character
1632 unicodeAsBackSlash = false;
1633 currentCharacter = source[currentPosition++];
1634 // if (((currentCharacter = source[currentPosition++]) ==
1636 // && (source[currentPosition] == 'u')) {
1637 // getNextUnicodeChar();
1639 // if (withoutUnicodePtr != 0) {
1640 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1641 // currentCharacter;
1644 if (currentCharacter == '*') {
1648 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1649 checkNonExternalizeString();
1650 if (recordLineSeparator) {
1651 pushLineSeparator();
1656 try { //get the next char
1657 currentCharacter = source[currentPosition++];
1658 // if (((currentCharacter = source[currentPosition++])
1660 // && (source[currentPosition] == 'u')) {
1661 // //-------------unicode traitement ------------
1662 // getNextUnicodeChar();
1664 //handle the \\u case manually into comment
1665 // if (currentCharacter == '\\') {
1666 // if (source[currentPosition] == '\\')
1667 // currentPosition++;
1668 // //jump over the \\
1670 // empty comment is not a javadoc /**/
1671 if (currentCharacter == '/') {
1674 //loop until end of comment */
1675 while ((currentCharacter != '/') || (!star)) {
1676 if ((currentCharacter == '\r')
1677 || (currentCharacter == '\n')) {
1678 checkNonExternalizeString();
1679 if (recordLineSeparator) {
1680 pushLineSeparator();
1685 star = currentCharacter == '*';
1687 currentCharacter = source[currentPosition++];
1688 // if (((currentCharacter = source[currentPosition++])
1690 // && (source[currentPosition] == 'u')) {
1691 // //-------------unicode traitement ------------
1692 // getNextUnicodeChar();
1694 //handle the \\u case manually into comment
1695 // if (currentCharacter == '\\') {
1696 // if (source[currentPosition] == '\\')
1697 // currentPosition++;
1698 // } //jump over the \\
1700 //recordComment(isJavadoc);
1702 recordComment(TokenNameCOMMENT_PHPDOC);
1704 recordComment(TokenNameCOMMENT_BLOCK);
1707 if (tokenizeComments) {
1709 return TokenNameCOMMENT_PHPDOC;
1710 return TokenNameCOMMENT_BLOCK;
1712 } catch (IndexOutOfBoundsException e) {
1713 // reset end position for error reporting
1715 throw new InvalidInputException(UNTERMINATED_COMMENT);
1719 return TokenNameDIVIDE;
1723 return TokenNameEOF;
1724 //the atEnd may not be <currentPosition == source.length> if
1725 // source is only some part of a real (external) stream
1726 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1728 if (currentCharacter == '$') {
1729 int oldPosition = currentPosition;
1731 currentCharacter = source[currentPosition++];
1732 if (isPHPIdentifierStart(currentCharacter)) {
1733 return scanIdentifierOrKeyword(true);
1735 currentPosition = oldPosition;
1736 return TokenNameDOLLAR;
1738 } catch (IndexOutOfBoundsException e) {
1739 currentPosition = oldPosition;
1740 return TokenNameDOLLAR;
1743 if (isPHPIdentifierStart(currentCharacter))
1744 return scanIdentifierOrKeyword(false);
1745 if (Character.isDigit(currentCharacter))
1746 return scanNumber(false);
1747 return TokenNameERROR;
1750 } //-----------------end switch while try--------------------
1751 catch (IndexOutOfBoundsException e) {
1754 return TokenNameEOF;
1757 private int getInlinedHTML(int start) throws InvalidInputException {
1758 int token = getInlinedHTMLToken(start);
1759 if (token == TokenNameINLINE_HTML) {
1760 // Stack stack = new Stack();
1761 // // scan html for errors
1762 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1763 // int lastPHPEndPos=0;
1764 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1765 // Tag tag=(Tag)i.next();
1767 // if (tag instanceof StartTag) {
1768 // StartTag startTag=(StartTag)tag;
1769 // // System.out.println("startTag: "+tag);
1770 // if (startTag.isServerTag()) {
1771 // // TODO : what to do with a server tag ?
1773 // // do whatever with HTML start tag
1774 // // use startTag.getElement() to find the element corresponding
1775 // // to this start tag which may be useful if you implement code
1777 // stack.push(startTag);
1780 // EndTag endTag=(EndTag)tag;
1781 // StartTag stag = (StartTag) stack.peek();
1782 //// System.out.println("endTag: "+tag);
1783 // // do whatever with HTML end tag.
1791 * InvalidInputException
1793 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1794 // int htmlPosition = start;
1795 if (currentPosition > source.length) {
1796 currentPosition = source.length;
1797 return TokenNameEOF;
1799 startPosition = start;
1802 currentCharacter = source[currentPosition++];
1803 if (currentCharacter == '<') {
1804 if (getNextChar('?')) {
1805 currentCharacter = source[currentPosition++];
1806 if ((currentCharacter == ' ')
1807 || Character.isWhitespace(currentCharacter)) {
1810 return TokenNameINLINE_HTML;
1812 boolean phpStart = (currentCharacter == 'P')
1813 || (currentCharacter == 'p');
1815 int test = getNextChar('H', 'h');
1817 test = getNextChar('P', 'p');
1821 return TokenNameINLINE_HTML;
1828 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1829 if (recordLineSeparator) {
1830 pushLineSeparator();
1835 } //-----------------while--------------------
1837 return TokenNameINLINE_HTML;
1838 } //-----------------try--------------------
1839 catch (IndexOutOfBoundsException e) {
1840 startPosition = start;
1844 return TokenNameINLINE_HTML;
1846 // public final void getNextUnicodeChar()
1847 // throws IndexOutOfBoundsException, InvalidInputException {
1849 // //handle the case of unicode.
1850 // //when a unicode appears then we must use a buffer that holds char
1852 // //At the end of this method currentCharacter holds the new visited char
1853 // //and currentPosition points right next after it
1855 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1857 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1858 // currentPosition++;
1859 // while (source[currentPosition] == 'u') {
1860 // currentPosition++;
1864 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1866 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1868 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1870 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1872 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1874 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1875 // //need the unicode buffer
1876 // if (withoutUnicodePtr == 0) {
1877 // //buffer all the entries that have been left aside....
1878 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1879 // System.arraycopy(
1882 // withoutUnicodeBuffer,
1884 // withoutUnicodePtr);
1886 // //fill the buffer with the char
1887 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1889 // unicodeAsBackSlash = currentCharacter == '\\';
1892 * Tokenize a method body, assuming that curly brackets are properly
1895 public final void jumpOverMethodBody() {
1896 this.wasAcr = false;
1899 while (true) { //loop for jumping over comments
1900 // ---------Consume white space and handles startPosition---------
1901 boolean isWhiteSpace;
1903 startPosition = currentPosition;
1904 currentCharacter = source[currentPosition++];
1905 // if (((currentCharacter = source[currentPosition++]) == '\\')
1906 // && (source[currentPosition] == 'u')) {
1907 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1909 if (recordLineSeparator
1910 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1911 pushLineSeparator();
1912 isWhiteSpace = Character.isWhitespace(currentCharacter);
1914 } while (isWhiteSpace);
1915 // -------consume token until } is found---------
1916 switch (currentCharacter) {
1928 test = getNextChar('\\');
1931 scanDoubleQuotedEscapeCharacter();
1932 } catch (InvalidInputException ex) {
1935 // try { // consume next character
1936 unicodeAsBackSlash = false;
1937 currentCharacter = source[currentPosition++];
1938 // if (((currentCharacter = source[currentPosition++]) == '\\')
1939 // && (source[currentPosition] == 'u')) {
1940 // getNextUnicodeChar();
1942 if (withoutUnicodePtr != 0) {
1943 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1946 // } catch (InvalidInputException ex) {
1954 // try { // consume next character
1955 unicodeAsBackSlash = false;
1956 currentCharacter = source[currentPosition++];
1957 // if (((currentCharacter = source[currentPosition++]) == '\\')
1958 // && (source[currentPosition] == 'u')) {
1959 // getNextUnicodeChar();
1961 if (withoutUnicodePtr != 0) {
1962 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1965 // } catch (InvalidInputException ex) {
1967 while (currentCharacter != '"') {
1968 if (currentCharacter == '\r') {
1969 if (source[currentPosition] == '\n')
1972 // the string cannot go further that the line
1974 if (currentCharacter == '\n') {
1976 // the string cannot go further that the line
1978 if (currentCharacter == '\\') {
1980 scanDoubleQuotedEscapeCharacter();
1981 } catch (InvalidInputException ex) {
1984 // try { // consume next character
1985 unicodeAsBackSlash = false;
1986 currentCharacter = source[currentPosition++];
1987 // if (((currentCharacter = source[currentPosition++]) == '\\')
1988 // && (source[currentPosition] == 'u')) {
1989 // getNextUnicodeChar();
1991 if (withoutUnicodePtr != 0) {
1992 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1995 // } catch (InvalidInputException ex) {
1998 } catch (IndexOutOfBoundsException e) {
2005 if ((test = getNextChar('/', '*')) == 0) {
2009 currentCharacter = source[currentPosition++];
2010 // if (((currentCharacter = source[currentPosition++]) ==
2012 // && (source[currentPosition] == 'u')) {
2013 // //-------------unicode traitement ------------
2014 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2015 // currentPosition++;
2016 // while (source[currentPosition] == 'u') {
2017 // currentPosition++;
2020 // Character.getNumericValue(source[currentPosition++]))
2024 // Character.getNumericValue(source[currentPosition++]))
2028 // Character.getNumericValue(source[currentPosition++]))
2032 // Character.getNumericValue(source[currentPosition++]))
2035 // //error don't care of the value
2036 // currentCharacter = 'A';
2037 // } //something different from \n and \r
2039 // currentCharacter =
2040 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2043 while (currentCharacter != '\r' && currentCharacter != '\n') {
2045 currentCharacter = source[currentPosition++];
2046 // if (((currentCharacter = source[currentPosition++])
2048 // && (source[currentPosition] == 'u')) {
2049 // //-------------unicode traitement ------------
2050 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2051 // currentPosition++;
2052 // while (source[currentPosition] == 'u') {
2053 // currentPosition++;
2056 // Character.getNumericValue(source[currentPosition++]))
2060 // Character.getNumericValue(source[currentPosition++]))
2064 // Character.getNumericValue(source[currentPosition++]))
2068 // Character.getNumericValue(source[currentPosition++]))
2071 // //error don't care of the value
2072 // currentCharacter = 'A';
2073 // } //something different from \n and \r
2075 // currentCharacter =
2076 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2080 if (recordLineSeparator
2081 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2082 pushLineSeparator();
2083 } catch (IndexOutOfBoundsException e) {
2084 } //an eof will them be generated
2088 //traditional and annotation comment
2089 boolean star = false;
2090 // try { // consume next character
2091 unicodeAsBackSlash = false;
2092 currentCharacter = source[currentPosition++];
2093 // if (((currentCharacter = source[currentPosition++]) == '\\')
2094 // && (source[currentPosition] == 'u')) {
2095 // getNextUnicodeChar();
2097 if (withoutUnicodePtr != 0) {
2098 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2101 // } catch (InvalidInputException ex) {
2103 if (currentCharacter == '*') {
2106 if (recordLineSeparator
2107 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2108 pushLineSeparator();
2109 try { //get the next char
2110 currentCharacter = source[currentPosition++];
2111 // if (((currentCharacter = source[currentPosition++]) ==
2113 // && (source[currentPosition] == 'u')) {
2114 // //-------------unicode traitement ------------
2115 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2116 // currentPosition++;
2117 // while (source[currentPosition] == 'u') {
2118 // currentPosition++;
2121 // Character.getNumericValue(source[currentPosition++]))
2125 // Character.getNumericValue(source[currentPosition++]))
2129 // Character.getNumericValue(source[currentPosition++]))
2133 // Character.getNumericValue(source[currentPosition++]))
2136 // //error don't care of the value
2137 // currentCharacter = 'A';
2138 // } //something different from * and /
2140 // currentCharacter =
2141 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2144 //loop until end of comment */
2145 while ((currentCharacter != '/') || (!star)) {
2146 if (recordLineSeparator
2147 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2148 pushLineSeparator();
2149 star = currentCharacter == '*';
2151 currentCharacter = source[currentPosition++];
2152 // if (((currentCharacter = source[currentPosition++])
2154 // && (source[currentPosition] == 'u')) {
2155 // //-------------unicode traitement ------------
2156 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2157 // currentPosition++;
2158 // while (source[currentPosition] == 'u') {
2159 // currentPosition++;
2162 // Character.getNumericValue(source[currentPosition++]))
2166 // Character.getNumericValue(source[currentPosition++]))
2170 // Character.getNumericValue(source[currentPosition++]))
2174 // Character.getNumericValue(source[currentPosition++]))
2177 // //error don't care of the value
2178 // currentCharacter = 'A';
2179 // } //something different from * and /
2181 // currentCharacter =
2182 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2186 } catch (IndexOutOfBoundsException e) {
2194 if (isPHPIdentifierStart(currentCharacter)
2195 || currentCharacter == '$') {
2197 scanIdentifierOrKeyword((currentCharacter == '$'));
2198 } catch (InvalidInputException ex) {
2202 if (Character.isDigit(currentCharacter)) {
2205 } catch (InvalidInputException ex) {
2211 //-----------------end switch while try--------------------
2212 } catch (IndexOutOfBoundsException e) {
2213 } catch (InvalidInputException e) {
2217 // public final boolean jumpOverUnicodeWhiteSpace()
2218 // throws InvalidInputException {
2220 // //handle the case of unicode. Jump over the next whiteSpace
2221 // //making startPosition pointing on the next available char
2222 // //On false, the currentCharacter is filled up with a potential
2226 // this.wasAcr = false;
2227 // int c1, c2, c3, c4;
2228 // int unicodeSize = 6;
2229 // currentPosition++;
2230 // while (source[currentPosition] == 'u') {
2231 // currentPosition++;
2235 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2237 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2239 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2241 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2243 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2246 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2247 // if (recordLineSeparator
2248 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2249 // pushLineSeparator();
2250 // if (Character.isWhitespace(currentCharacter))
2253 // //buffer the new char which is not a white space
2254 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2255 // //withoutUnicodePtr == 1 is true here
2257 // } catch (IndexOutOfBoundsException e) {
2258 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2261 public final int[] getLineEnds() {
2262 //return a bounded copy of this.lineEnds
2264 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2267 public char[] getSource() {
2270 public static boolean isIdentifierOrKeyword(int token) {
2271 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2273 final char[] optimizedCurrentTokenSource1() {
2274 //return always the same char[] build only once
2275 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2276 char charOne = source[startPosition];
2331 return new char[]{charOne};
2334 final char[] optimizedCurrentTokenSource2() {
2335 //try to return the same char[] build only once
2337 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2339 char[][] table = charArray_length[0][hash];
2341 while (++i < InternalTableSize) {
2342 char[] charArray = table[i];
2343 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2346 //---------other side---------
2348 int max = newEntry2;
2349 while (++i <= max) {
2350 char[] charArray = table[i];
2351 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2354 //--------add the entry-------
2355 if (++max >= InternalTableSize)
2358 table[max] = (r = new char[]{c0, c1});
2362 final char[] optimizedCurrentTokenSource3() {
2363 //try to return the same char[] build only once
2365 int hash = (((c0 = source[startPosition]) << 12)
2366 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2368 char[][] table = charArray_length[1][hash];
2370 while (++i < InternalTableSize) {
2371 char[] charArray = table[i];
2372 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2375 //---------other side---------
2377 int max = newEntry3;
2378 while (++i <= max) {
2379 char[] charArray = table[i];
2380 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2383 //--------add the entry-------
2384 if (++max >= InternalTableSize)
2387 table[max] = (r = new char[]{c0, c1, c2});
2391 final char[] optimizedCurrentTokenSource4() {
2392 //try to return the same char[] build only once
2393 char c0, c1, c2, c3;
2394 long hash = ((((long) (c0 = source[startPosition])) << 18)
2395 + ((c1 = source[startPosition + 1]) << 12)
2396 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2398 char[][] table = charArray_length[2][(int) hash];
2400 while (++i < InternalTableSize) {
2401 char[] charArray = table[i];
2402 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2403 && (c3 == charArray[3]))
2406 //---------other side---------
2408 int max = newEntry4;
2409 while (++i <= max) {
2410 char[] charArray = table[i];
2411 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2412 && (c3 == charArray[3]))
2415 //--------add the entry-------
2416 if (++max >= InternalTableSize)
2419 table[max] = (r = new char[]{c0, c1, c2, c3});
2423 final char[] optimizedCurrentTokenSource5() {
2424 //try to return the same char[] build only once
2425 char c0, c1, c2, c3, c4;
2426 long hash = ((((long) (c0 = source[startPosition])) << 24)
2427 + (((long) (c1 = source[startPosition + 1])) << 18)
2428 + ((c2 = source[startPosition + 2]) << 12)
2429 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2431 char[][] table = charArray_length[3][(int) hash];
2433 while (++i < InternalTableSize) {
2434 char[] charArray = table[i];
2435 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2436 && (c3 == charArray[3]) && (c4 == charArray[4]))
2439 //---------other side---------
2441 int max = newEntry5;
2442 while (++i <= max) {
2443 char[] charArray = table[i];
2444 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2445 && (c3 == charArray[3]) && (c4 == charArray[4]))
2448 //--------add the entry-------
2449 if (++max >= InternalTableSize)
2452 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2456 final char[] optimizedCurrentTokenSource6() {
2457 //try to return the same char[] build only once
2458 char c0, c1, c2, c3, c4, c5;
2459 long hash = ((((long) (c0 = source[startPosition])) << 32)
2460 + (((long) (c1 = source[startPosition + 1])) << 24)
2461 + (((long) (c2 = source[startPosition + 2])) << 18)
2462 + ((c3 = source[startPosition + 3]) << 12)
2463 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2465 char[][] table = charArray_length[4][(int) hash];
2467 while (++i < InternalTableSize) {
2468 char[] charArray = table[i];
2469 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2470 && (c3 == charArray[3]) && (c4 == charArray[4])
2471 && (c5 == charArray[5]))
2474 //---------other side---------
2476 int max = newEntry6;
2477 while (++i <= max) {
2478 char[] charArray = table[i];
2479 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2480 && (c3 == charArray[3]) && (c4 == charArray[4])
2481 && (c5 == charArray[5]))
2484 //--------add the entry-------
2485 if (++max >= InternalTableSize)
2488 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2492 public final void pushLineSeparator() throws InvalidInputException {
2493 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2494 final int INCREMENT = 250;
2495 if (this.checkNonExternalizedStringLiterals) {
2496 // reinitialize the current line for non externalize strings purpose
2499 //currentCharacter is at position currentPosition-1
2501 if (currentCharacter == '\r') {
2502 int separatorPos = currentPosition - 1;
2503 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2505 //System.out.println("CR-" + separatorPos);
2507 lineEnds[++linePtr] = separatorPos;
2508 } catch (IndexOutOfBoundsException e) {
2509 //linePtr value is correct
2510 int oldLength = lineEnds.length;
2511 int[] old = lineEnds;
2512 lineEnds = new int[oldLength + INCREMENT];
2513 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2514 lineEnds[linePtr] = separatorPos;
2516 // look-ahead for merged cr+lf
2518 if (source[currentPosition] == '\n') {
2519 //System.out.println("look-ahead LF-" + currentPosition);
2520 lineEnds[linePtr] = currentPosition;
2526 } catch (IndexOutOfBoundsException e) {
2531 if (currentCharacter == '\n') {
2532 //must merge eventual cr followed by lf
2533 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2534 //System.out.println("merge LF-" + (currentPosition - 1));
2535 lineEnds[linePtr] = currentPosition - 1;
2537 int separatorPos = currentPosition - 1;
2538 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2540 // System.out.println("LF-" + separatorPos);
2542 lineEnds[++linePtr] = separatorPos;
2543 } catch (IndexOutOfBoundsException e) {
2544 //linePtr value is correct
2545 int oldLength = lineEnds.length;
2546 int[] old = lineEnds;
2547 lineEnds = new int[oldLength + INCREMENT];
2548 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2549 lineEnds[linePtr] = separatorPos;
2556 public final void pushUnicodeLineSeparator() {
2557 // isUnicode means that the \r or \n has been read as a unicode character
2558 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2559 final int INCREMENT = 250;
2560 //currentCharacter is at position currentPosition-1
2561 if (this.checkNonExternalizedStringLiterals) {
2562 // reinitialize the current line for non externalize strings purpose
2566 if (currentCharacter == '\r') {
2567 int separatorPos = currentPosition - 6;
2568 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2570 //System.out.println("CR-" + separatorPos);
2572 lineEnds[++linePtr] = separatorPos;
2573 } catch (IndexOutOfBoundsException e) {
2574 //linePtr value is correct
2575 int oldLength = lineEnds.length;
2576 int[] old = lineEnds;
2577 lineEnds = new int[oldLength + INCREMENT];
2578 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2579 lineEnds[linePtr] = separatorPos;
2581 // look-ahead for merged cr+lf
2582 if (source[currentPosition] == '\n') {
2583 //System.out.println("look-ahead LF-" + currentPosition);
2584 lineEnds[linePtr] = currentPosition;
2592 if (currentCharacter == '\n') {
2593 //must merge eventual cr followed by lf
2594 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2595 //System.out.println("merge LF-" + (currentPosition - 1));
2596 lineEnds[linePtr] = currentPosition - 6;
2598 int separatorPos = currentPosition - 6;
2599 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2601 // System.out.println("LF-" + separatorPos);
2603 lineEnds[++linePtr] = separatorPos;
2604 } catch (IndexOutOfBoundsException e) {
2605 //linePtr value is correct
2606 int oldLength = lineEnds.length;
2607 int[] old = lineEnds;
2608 lineEnds = new int[oldLength + INCREMENT];
2609 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2610 lineEnds[linePtr] = separatorPos;
2617 public void recordComment(int token) {
2619 int stopPosition = this.currentPosition;
2621 case TokenNameCOMMENT_LINE:
2622 stopPosition = -this.lastCommentLinePosition;
2624 case TokenNameCOMMENT_BLOCK:
2625 stopPosition = -this.currentPosition;
2629 // a new comment is recorded
2630 int length = this.commentStops.length;
2631 if (++this.commentPtr >= length) {
2632 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2633 //grows the positions buffers too
2634 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2636 this.commentStops[this.commentPtr] = stopPosition;
2637 this.commentStarts[this.commentPtr] = this.startPosition;
2639 // public final void recordComment(boolean isJavadoc) {
2640 // // a new annotation comment is recorded
2642 // commentStops[++commentPtr] = isJavadoc
2643 // ? currentPosition
2644 // : -currentPosition;
2645 // } catch (IndexOutOfBoundsException e) {
2646 // int oldStackLength = commentStops.length;
2647 // int[] oldStack = commentStops;
2648 // commentStops = new int[oldStackLength + 30];
2649 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2650 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2651 // //grows the positions buffers too
2652 // int[] old = commentStarts;
2653 // commentStarts = new int[oldStackLength + 30];
2654 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2656 // //the buffer is of a correct size here
2657 // commentStarts[commentPtr] = startPosition;
2659 public void resetTo(int begin, int end) {
2660 //reset the scanner to a given position where it may rescan again
2662 initialPosition = startPosition = currentPosition = begin;
2663 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2664 commentPtr = -1; // reset comment stack
2666 public final void scanSingleQuotedEscapeCharacter()
2667 throws InvalidInputException {
2668 // the string with "\\u" is a legal string of two chars \ and u
2669 //thus we use a direct access to the source (for regular cases).
2670 // if (unicodeAsBackSlash) {
2671 // // consume next character
2672 // unicodeAsBackSlash = false;
2673 // if (((currentCharacter = source[currentPosition++]) == '\\')
2674 // && (source[currentPosition] == 'u')) {
2675 // getNextUnicodeChar();
2677 // if (withoutUnicodePtr != 0) {
2678 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2682 currentCharacter = source[currentPosition++];
2683 switch (currentCharacter) {
2685 currentCharacter = '\'';
2688 currentCharacter = '\\';
2691 currentCharacter = '\\';
2695 public final void scanDoubleQuotedEscapeCharacter()
2696 throws InvalidInputException {
2697 // the string with "\\u" is a legal string of two chars \ and u
2698 //thus we use a direct access to the source (for regular cases).
2699 // if (unicodeAsBackSlash) {
2700 // // consume next character
2701 // unicodeAsBackSlash = false;
2702 // if (((currentCharacter = source[currentPosition++]) == '\\')
2703 // && (source[currentPosition] == 'u')) {
2704 // getNextUnicodeChar();
2706 // if (withoutUnicodePtr != 0) {
2707 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2711 currentCharacter = source[currentPosition++];
2712 switch (currentCharacter) {
2714 // currentCharacter = '\b';
2717 currentCharacter = '\t';
2720 currentCharacter = '\n';
2723 // currentCharacter = '\f';
2726 currentCharacter = '\r';
2729 currentCharacter = '\"';
2732 currentCharacter = '\'';
2735 currentCharacter = '\\';
2738 currentCharacter = '$';
2741 // -----------octal escape--------------
2743 // OctalDigit OctalDigit
2744 // ZeroToThree OctalDigit OctalDigit
2745 int number = Character.getNumericValue(currentCharacter);
2746 if (number >= 0 && number <= 7) {
2747 boolean zeroToThreeNot = number > 3;
2748 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2749 int digit = Character.getNumericValue(currentCharacter);
2750 if (digit >= 0 && digit <= 7) {
2751 number = (number * 8) + digit;
2753 .isDigit(currentCharacter = source[currentPosition++])) {
2754 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2755 // Digit --> ignore last character
2758 digit = Character.getNumericValue(currentCharacter);
2759 if (digit >= 0 && digit <= 7) {
2760 // has read \ZeroToThree OctalDigit OctalDigit
2761 number = (number * 8) + digit;
2762 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2763 // --> ignore last character
2767 } else { // has read \OctalDigit NonDigit--> ignore last
2771 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2775 } else { // has read \OctalDigit --> ignore last character
2779 throw new InvalidInputException(INVALID_ESCAPE);
2780 currentCharacter = (char) number;
2783 // throw new InvalidInputException(INVALID_ESCAPE);
2786 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2787 // return scanIdentifierOrKeyword( false );
2789 public int scanIdentifierOrKeyword(boolean isVariable)
2790 throws InvalidInputException {
2792 //first dispatch on the first char.
2793 //then the length. If there are several
2794 //keywors with the same length AND the same first char, then do another
2795 //disptach on the second char :-)...cool....but fast !
2796 useAssertAsAnIndentifier = false;
2797 while (getNextCharAsJavaIdentifierPart()) {
2800 // if (new String(getCurrentTokenSource()).equals("$this")) {
2801 // return TokenNamethis;
2803 return TokenNameVariable;
2808 // if (withoutUnicodePtr == 0)
2809 //quick test on length == 1 but not on length > 12 while most identifier
2810 //have a length which is <= 12...but there are lots of identifier with
2813 if ((length = currentPosition - startPosition) == 1)
2814 return TokenNameIdentifier;
2816 data = new char[length];
2817 index = startPosition;
2818 for (int i = 0; i < length; i++) {
2819 data[i] = Character.toLowerCase(source[index + i]);
2823 // if ((length = withoutUnicodePtr) == 1)
2824 // return TokenNameIdentifier;
2825 // // data = withoutUnicodeBuffer;
2826 // data = new char[withoutUnicodeBuffer.length];
2827 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2828 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2832 firstLetter = data[index];
2833 switch (firstLetter) {
2838 if ((data[++index] == '_') && (data[++index] == 'f')
2839 && (data[++index] == 'i') && (data[++index] == 'l')
2840 && (data[++index] == 'e') && (data[++index] == '_')
2841 && (data[++index] == '_'))
2842 return TokenNameFILE;
2843 index = 0; //__LINE__
2844 if ((data[++index] == '_') && (data[++index] == 'l')
2845 && (data[++index] == 'i') && (data[++index] == 'n')
2846 && (data[++index] == 'e') && (data[++index] == '_')
2847 && (data[++index] == '_'))
2848 return TokenNameLINE;
2852 if ((data[++index] == '_') && (data[++index] == 'c')
2853 && (data[++index] == 'l') && (data[++index] == 'a')
2854 && (data[++index] == 's') && (data[++index] == 's')
2855 && (data[++index] == '_') && (data[++index] == '_'))
2856 return TokenNameCLASS_C;
2860 if ((data[++index] == '_') && (data[++index] == 'm')
2861 && (data[++index] == 'e') && (data[++index] == 't')
2862 && (data[++index] == 'h') && (data[++index] == 'o')
2863 && (data[++index] == 'd') && (data[++index] == '_')
2864 && (data[++index] == '_'))
2865 return TokenNameMETHOD_C;
2869 if ((data[++index] == '_') && (data[++index] == 'f')
2870 && (data[++index] == 'u') && (data[++index] == 'n')
2871 && (data[++index] == 'c') && (data[++index] == 't')
2872 && (data[++index] == 'i') && (data[++index] == 'o')
2873 && (data[++index] == 'n') && (data[++index] == '_')
2874 && (data[++index] == '_'))
2875 return TokenNameFUNC_C;
2878 return TokenNameIdentifier;
2880 // as and array abstract
2884 if ((data[++index] == 's')) {
2887 return TokenNameIdentifier;
2891 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2892 return TokenNameand;
2894 return TokenNameIdentifier;
2898 if ((data[++index] == 'r') && (data[++index] == 'r')
2899 && (data[++index] == 'a') && (data[++index] == 'y'))
2900 return TokenNamearray;
2902 return TokenNameIdentifier;
2904 if ((data[++index] == 'b') && (data[++index] == 's')
2905 && (data[++index] == 't') && (data[++index] == 'r')
2906 && (data[++index] == 'a') && (data[++index] == 'c')
2907 && (data[++index] == 't'))
2908 return TokenNameabstract;
2910 return TokenNameIdentifier;
2912 return TokenNameIdentifier;
2918 if ((data[++index] == 'r') && (data[++index] == 'e')
2919 && (data[++index] == 'a') && (data[++index] == 'k'))
2920 return TokenNamebreak;
2922 return TokenNameIdentifier;
2924 return TokenNameIdentifier;
2927 //case catch class clone const continue
2930 if ((data[++index] == 'a') && (data[++index] == 's')
2931 && (data[++index] == 'e'))
2932 return TokenNamecase;
2934 return TokenNameIdentifier;
2936 if ((data[++index] == 'a') && (data[++index] == 't')
2937 && (data[++index] == 'c') && (data[++index] == 'h'))
2938 return TokenNamecatch;
2940 if ((data[++index] == 'l') && (data[++index] == 'a')
2941 && (data[++index] == 's') && (data[++index] == 's'))
2942 return TokenNameclass;
2944 if ((data[++index] == 'l') && (data[++index] == 'o')
2945 && (data[++index] == 'n') && (data[++index] == 'e'))
2946 return TokenNameclone;
2948 if ((data[++index] == 'o') && (data[++index] == 'n')
2949 && (data[++index] == 's') && (data[++index] == 't'))
2950 return TokenNameconst;
2952 return TokenNameIdentifier;
2954 if ((data[++index] == 'o') && (data[++index] == 'n')
2955 && (data[++index] == 't') && (data[++index] == 'i')
2956 && (data[++index] == 'n') && (data[++index] == 'u')
2957 && (data[++index] == 'e'))
2958 return TokenNamecontinue;
2960 return TokenNameIdentifier;
2962 return TokenNameIdentifier;
2965 // declare default do die
2966 // TODO delete define ==> no keyword !
2969 if ((data[++index] == 'o'))
2972 return TokenNameIdentifier;
2974 // if ((data[++index] == 'e')
2975 // && (data[++index] == 'f')
2976 // && (data[++index] == 'i')
2977 // && (data[++index] == 'n')
2978 // && (data[++index] == 'e'))
2979 // return TokenNamedefine;
2981 // return TokenNameIdentifier;
2983 if ((data[++index] == 'e') && (data[++index] == 'c')
2984 && (data[++index] == 'l') && (data[++index] == 'a')
2985 && (data[++index] == 'r') && (data[++index] == 'e'))
2986 return TokenNamedeclare;
2988 if ((data[++index] == 'e') && (data[++index] == 'f')
2989 && (data[++index] == 'a') && (data[++index] == 'u')
2990 && (data[++index] == 'l') && (data[++index] == 't'))
2991 return TokenNamedefault;
2993 return TokenNameIdentifier;
2995 return TokenNameIdentifier;
2998 //echo else exit elseif extends eval
3001 if ((data[++index] == 'c') && (data[++index] == 'h')
3002 && (data[++index] == 'o'))
3003 return TokenNameecho;
3004 else if ((data[index] == 'l') && (data[++index] == 's')
3005 && (data[++index] == 'e'))
3006 return TokenNameelse;
3007 else if ((data[index] == 'x') && (data[++index] == 'i')
3008 && (data[++index] == 't'))
3009 return TokenNameexit;
3010 else if ((data[index] == 'v') && (data[++index] == 'a')
3011 && (data[++index] == 'l'))
3012 return TokenNameeval;
3014 return TokenNameIdentifier;
3017 if ((data[++index] == 'n') && (data[++index] == 'd')
3018 && (data[++index] == 'i') && (data[++index] == 'f'))
3019 return TokenNameendif;
3020 if ((data[index] == 'm') && (data[++index] == 'p')
3021 && (data[++index] == 't') && (data[++index] == 'y'))
3022 return TokenNameempty;
3024 return TokenNameIdentifier;
3027 if ((data[++index] == 'n') && (data[++index] == 'd')
3028 && (data[++index] == 'f') && (data[++index] == 'o')
3029 && (data[++index] == 'r'))
3030 return TokenNameendfor;
3031 else if ((data[index] == 'l') && (data[++index] == 's')
3032 && (data[++index] == 'e') && (data[++index] == 'i')
3033 && (data[++index] == 'f'))
3034 return TokenNameelseif;
3036 return TokenNameIdentifier;
3038 if ((data[++index] == 'x') && (data[++index] == 't')
3039 && (data[++index] == 'e') && (data[++index] == 'n')
3040 && (data[++index] == 'd') && (data[++index] == 's'))
3041 return TokenNameextends;
3043 return TokenNameIdentifier;
3046 if ((data[++index] == 'n') && (data[++index] == 'd')
3047 && (data[++index] == 'w') && (data[++index] == 'h')
3048 && (data[++index] == 'i') && (data[++index] == 'l')
3049 && (data[++index] == 'e'))
3050 return TokenNameendwhile;
3052 return TokenNameIdentifier;
3055 if ((data[++index] == 'n') && (data[++index] == 'd')
3056 && (data[++index] == 's') && (data[++index] == 'w')
3057 && (data[++index] == 'i') && (data[++index] == 't')
3058 && (data[++index] == 'c') && (data[++index] == 'h'))
3059 return TokenNameendswitch;
3061 return TokenNameIdentifier;
3064 if ((data[++index] == 'n') && (data[++index] == 'd')
3065 && (data[++index] == 'd') && (data[++index] == 'e')
3066 && (data[++index] == 'c') && (data[++index] == 'l')
3067 && (data[++index] == 'a') && (data[++index] == 'r')
3068 && (data[++index] == 'e'))
3069 return TokenNameendforeach;
3071 if ((data[++index] == 'n') // endforeach
3072 && (data[++index] == 'd') && (data[++index] == 'f')
3073 && (data[++index] == 'o') && (data[++index] == 'r')
3074 && (data[++index] == 'e') && (data[++index] == 'a')
3075 && (data[++index] == 'c') && (data[++index] == 'h'))
3076 return TokenNameendforeach;
3078 return TokenNameIdentifier;
3080 return TokenNameIdentifier;
3083 //for false final function
3086 if ((data[++index] == 'o') && (data[++index] == 'r'))
3087 return TokenNamefor;
3089 return TokenNameIdentifier;
3091 // if ((data[++index] == 'a') && (data[++index] == 'l')
3092 // && (data[++index] == 's') && (data[++index] == 'e'))
3093 // return TokenNamefalse;
3094 if ((data[++index] == 'i') && (data[++index] == 'n')
3095 && (data[++index] == 'a') && (data[++index] == 'l'))
3096 return TokenNamefinal;
3098 return TokenNameIdentifier;
3101 if ((data[++index] == 'o') && (data[++index] == 'r')
3102 && (data[++index] == 'e') && (data[++index] == 'a')
3103 && (data[++index] == 'c') && (data[++index] == 'h'))
3104 return TokenNameforeach;
3106 return TokenNameIdentifier;
3109 if ((data[++index] == 'u') && (data[++index] == 'n')
3110 && (data[++index] == 'c') && (data[++index] == 't')
3111 && (data[++index] == 'i') && (data[++index] == 'o')
3112 && (data[++index] == 'n'))
3113 return TokenNamefunction;
3115 return TokenNameIdentifier;
3117 return TokenNameIdentifier;
3122 if ((data[++index] == 'l') && (data[++index] == 'o')
3123 && (data[++index] == 'b') && (data[++index] == 'a')
3124 && (data[++index] == 'l')) {
3125 return TokenNameglobal;
3128 return TokenNameIdentifier;
3130 //if int isset include include_once instanceof interface implements
3133 if (data[++index] == 'f')
3136 return TokenNameIdentifier;
3138 // if ((data[++index] == 'n') && (data[++index] == 't'))
3139 // return TokenNameint;
3141 // return TokenNameIdentifier;
3143 if ((data[++index] == 's') && (data[++index] == 's')
3144 && (data[++index] == 'e') && (data[++index] == 't'))
3145 return TokenNameisset;
3147 return TokenNameIdentifier;
3149 if ((data[++index] == 'n') && (data[++index] == 'c')
3150 && (data[++index] == 'l') && (data[++index] == 'u')
3151 && (data[++index] == 'd') && (data[++index] == 'e'))
3152 return TokenNameinclude;
3154 return TokenNameIdentifier;
3157 if ((data[++index] == 'n') && (data[++index] == 't')
3158 && (data[++index] == 'e') && (data[++index] == 'r')
3159 && (data[++index] == 'f') && (data[++index] == 'a')
3160 && (data[++index] == 'c') && (data[++index] == 'e'))
3161 return TokenNameinterface;
3163 return TokenNameIdentifier;
3166 if ((data[++index] == 'n') && (data[++index] == 's')
3167 && (data[++index] == 't') && (data[++index] == 'a')
3168 && (data[++index] == 'n') && (data[++index] == 'c')
3169 && (data[++index] == 'e') && (data[++index] == 'o')
3170 && (data[++index] == 'f'))
3171 return TokenNameinstanceof;
3172 if ((data[index] == 'm') && (data[++index] == 'p')
3173 && (data[++index] == 'l') && (data[++index] == 'e')
3174 && (data[++index] == 'm') && (data[++index] == 'e')
3175 && (data[++index] == 'n') && (data[++index] == 't')
3176 && (data[++index] == 's'))
3177 return TokenNameimplements;
3179 return TokenNameIdentifier;
3181 if ((data[++index] == 'n') && (data[++index] == 'c')
3182 && (data[++index] == 'l') && (data[++index] == 'u')
3183 && (data[++index] == 'd') && (data[++index] == 'e')
3184 && (data[++index] == '_') && (data[++index] == 'o')
3185 && (data[++index] == 'n') && (data[++index] == 'c')
3186 && (data[++index] == 'e'))
3187 return TokenNameinclude_once;
3189 return TokenNameIdentifier;
3191 return TokenNameIdentifier;
3196 if ((data[++index] == 'i') && (data[++index] == 's')
3197 && (data[++index] == 't')) {
3198 return TokenNamelist;
3201 return TokenNameIdentifier;
3206 if ((data[++index] == 'e') && (data[++index] == 'w'))
3207 return TokenNamenew;
3209 return TokenNameIdentifier;
3211 // if ((data[++index] == 'u') && (data[++index] == 'l')
3212 // && (data[++index] == 'l'))
3213 // return TokenNamenull;
3215 // return TokenNameIdentifier;
3217 return TokenNameIdentifier;
3222 if (data[++index] == 'r') {
3226 // if (length == 12) {
3227 // if ((data[++index] == 'l')
3228 // && (data[++index] == 'd')
3229 // && (data[++index] == '_')
3230 // && (data[++index] == 'f')
3231 // && (data[++index] == 'u')
3232 // && (data[++index] == 'n')
3233 // && (data[++index] == 'c')
3234 // && (data[++index] == 't')
3235 // && (data[++index] == 'i')
3236 // && (data[++index] == 'o')
3237 // && (data[++index] == 'n')) {
3238 // return TokenNameold_function;
3241 return TokenNameIdentifier;
3243 // print public private protected
3246 if ((data[++index] == 'r') && (data[++index] == 'i')
3247 && (data[++index] == 'n') && (data[++index] == 't')) {
3248 return TokenNameprint;
3250 return TokenNameIdentifier;
3252 if ((data[++index] == 'u') && (data[++index] == 'b')
3253 && (data[++index] == 'l') && (data[++index] == 'i')
3254 && (data[++index] == 'c')) {
3255 return TokenNamepublic;
3257 return TokenNameIdentifier;
3259 if ((data[++index] == 'r') && (data[++index] == 'i')
3260 && (data[++index] == 'v') && (data[++index] == 'a')
3261 && (data[++index] == 't') && (data[++index] == 'e')) {
3262 return TokenNameprivate;
3264 return TokenNameIdentifier;
3266 if ((data[++index] == 'r') && (data[++index] == 'o')
3267 && (data[++index] == 't') && (data[++index] == 'e')
3268 && (data[++index] == 'c') && (data[++index] == 't')
3269 && (data[++index] == 'e') && (data[++index] == 'd')) {
3270 return TokenNameprotected;
3272 return TokenNameIdentifier;
3274 return TokenNameIdentifier;
3276 //return require require_once
3278 if ((data[++index] == 'e') && (data[++index] == 't')
3279 && (data[++index] == 'u') && (data[++index] == 'r')
3280 && (data[++index] == 'n')) {
3281 return TokenNamereturn;
3283 } else if (length == 7) {
3284 if ((data[++index] == 'e') && (data[++index] == 'q')
3285 && (data[++index] == 'u') && (data[++index] == 'i')
3286 && (data[++index] == 'r') && (data[++index] == 'e')) {
3287 return TokenNamerequire;
3289 } else if (length == 12) {
3290 if ((data[++index] == 'e') && (data[++index] == 'q')
3291 && (data[++index] == 'u') && (data[++index] == 'i')
3292 && (data[++index] == 'r') && (data[++index] == 'e')
3293 && (data[++index] == '_') && (data[++index] == 'o')
3294 && (data[++index] == 'n') && (data[++index] == 'c')
3295 && (data[++index] == 'e')) {
3296 return TokenNamerequire_once;
3299 return TokenNameIdentifier;
3304 if (data[++index] == 't')
3305 if ((data[++index] == 'a') && (data[++index] == 't')
3306 && (data[++index] == 'i') && (data[++index] == 'c')) {
3307 return TokenNamestatic;
3309 return TokenNameIdentifier;
3310 else if ((data[index] == 'w') && (data[++index] == 'i')
3311 && (data[++index] == 't') && (data[++index] == 'c')
3312 && (data[++index] == 'h'))
3313 return TokenNameswitch;
3315 return TokenNameIdentifier;
3317 return TokenNameIdentifier;
3323 if ((data[++index] == 'r') && (data[++index] == 'y'))
3324 return TokenNametry;
3326 return TokenNameIdentifier;
3328 // if ((data[++index] == 'r') && (data[++index] == 'u')
3329 // && (data[++index] == 'e'))
3330 // return TokenNametrue;
3332 // return TokenNameIdentifier;
3334 if ((data[++index] == 'h') && (data[++index] == 'r')
3335 && (data[++index] == 'o') && (data[++index] == 'w'))
3336 return TokenNamethrow;
3338 return TokenNameIdentifier;
3340 return TokenNameIdentifier;
3346 if ((data[++index] == 's') && (data[++index] == 'e'))
3347 return TokenNameuse;
3349 return TokenNameIdentifier;
3351 if ((data[++index] == 'n') && (data[++index] == 's')
3352 && (data[++index] == 'e') && (data[++index] == 't'))
3353 return TokenNameunset;
3355 return TokenNameIdentifier;
3357 return TokenNameIdentifier;
3363 if ((data[++index] == 'a') && (data[++index] == 'r'))
3364 return TokenNamevar;
3366 return TokenNameIdentifier;
3368 return TokenNameIdentifier;
3374 if ((data[++index] == 'h') && (data[++index] == 'i')
3375 && (data[++index] == 'l') && (data[++index] == 'e'))
3376 return TokenNamewhile;
3378 return TokenNameIdentifier;
3379 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3380 // (data[++index]=='e') && (data[++index]=='f')&&
3381 // (data[++index]=='p'))
3382 //return TokenNamewidefp ;
3384 //return TokenNameIdentifier;
3386 return TokenNameIdentifier;
3392 if ((data[++index] == 'o') && (data[++index] == 'r'))
3393 return TokenNamexor;
3395 return TokenNameIdentifier;
3397 return TokenNameIdentifier;
3400 return TokenNameIdentifier;
3403 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3404 //when entering this method the currentCharacter is the firt
3405 //digit of the number , i.e. it may be preceeded by a . when
3407 boolean floating = dotPrefix;
3408 if ((!dotPrefix) && (currentCharacter == '0')) {
3409 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3410 //force the first char of the hexa number do exist...
3411 // consume next character
3412 unicodeAsBackSlash = false;
3413 currentCharacter = source[currentPosition++];
3414 // if (((currentCharacter = source[currentPosition++]) == '\\')
3415 // && (source[currentPosition] == 'u')) {
3416 // getNextUnicodeChar();
3418 // if (withoutUnicodePtr != 0) {
3419 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3422 if (Character.digit(currentCharacter, 16) == -1)
3423 throw new InvalidInputException(INVALID_HEXA);
3425 while (getNextCharAsDigit(16)) {
3427 // if (getNextChar('l', 'L') >= 0)
3428 // return TokenNameLongLiteral;
3430 return TokenNameIntegerLiteral;
3432 //there is x or X in the number
3433 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3434 // 00078.0 is true !!!!! crazy language
3435 if (getNextCharAsDigit()) {
3436 //-------------potential octal-----------------
3437 while (getNextCharAsDigit()) {
3439 // if (getNextChar('l', 'L') >= 0) {
3440 // return TokenNameLongLiteral;
3443 // if (getNextChar('f', 'F') >= 0) {
3444 // return TokenNameFloatingPointLiteral;
3446 if (getNextChar('d', 'D') >= 0) {
3447 return TokenNameDoubleLiteral;
3448 } else { //make the distinction between octal and float ....
3449 if (getNextChar('.')) { //bingo ! ....
3450 while (getNextCharAsDigit()) {
3452 if (getNextChar('e', 'E') >= 0) {
3453 // consume next character
3454 unicodeAsBackSlash = false;
3455 currentCharacter = source[currentPosition++];
3456 // if (((currentCharacter = source[currentPosition++]) == '\\')
3457 // && (source[currentPosition] == 'u')) {
3458 // getNextUnicodeChar();
3460 // if (withoutUnicodePtr != 0) {
3461 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3464 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3465 // consume next character
3466 unicodeAsBackSlash = false;
3467 currentCharacter = source[currentPosition++];
3468 // if (((currentCharacter = source[currentPosition++]) == '\\')
3469 // && (source[currentPosition] == 'u')) {
3470 // getNextUnicodeChar();
3472 // if (withoutUnicodePtr != 0) {
3473 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3474 // currentCharacter;
3478 if (!Character.isDigit(currentCharacter))
3479 throw new InvalidInputException(INVALID_FLOAT);
3480 while (getNextCharAsDigit()) {
3483 // if (getNextChar('f', 'F') >= 0)
3484 // return TokenNameFloatingPointLiteral;
3485 getNextChar('d', 'D'); //jump over potential d or D
3486 return TokenNameDoubleLiteral;
3488 return TokenNameIntegerLiteral;
3495 while (getNextCharAsDigit()) {
3497 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3498 // return TokenNameLongLiteral;
3499 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3500 while (getNextCharAsDigit()) {
3504 //if floating is true both exponant and suffix may be optional
3505 if (getNextChar('e', 'E') >= 0) {
3507 // consume next character
3508 unicodeAsBackSlash = false;
3509 currentCharacter = source[currentPosition++];
3510 // if (((currentCharacter = source[currentPosition++]) == '\\')
3511 // && (source[currentPosition] == 'u')) {
3512 // getNextUnicodeChar();
3514 // if (withoutUnicodePtr != 0) {
3515 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3518 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3521 unicodeAsBackSlash = false;
3522 currentCharacter = source[currentPosition++];
3523 // if (((currentCharacter = source[currentPosition++]) == '\\')
3524 // && (source[currentPosition] == 'u')) {
3525 // getNextUnicodeChar();
3527 // if (withoutUnicodePtr != 0) {
3528 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3532 if (!Character.isDigit(currentCharacter))
3533 throw new InvalidInputException(INVALID_FLOAT);
3534 while (getNextCharAsDigit()) {
3537 if (getNextChar('d', 'D') >= 0)
3538 return TokenNameDoubleLiteral;
3539 // if (getNextChar('f', 'F') >= 0)
3540 // return TokenNameFloatingPointLiteral;
3541 //the long flag has been tested before
3542 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3545 * Search the line number corresponding to a specific position
3548 public final int getLineNumber(int position) {
3549 if (lineEnds == null)
3551 int length = linePtr + 1;
3554 int g = 0, d = length - 1;
3558 if (position < lineEnds[m]) {
3560 } else if (position > lineEnds[m]) {
3566 if (position < lineEnds[m]) {
3571 public void setPHPMode(boolean mode) {
3574 public final void setSource(char[] source) {
3575 //the source-buffer is set to sourceString
3576 if (source == null) {
3577 this.source = new char[0];
3579 this.source = source;
3582 initialPosition = currentPosition = 0;
3583 containsAssertKeyword = false;
3584 withoutUnicodeBuffer = new char[this.source.length];
3585 encapsedStringStack = new Stack();
3587 public String toString() {
3588 if (startPosition == source.length)
3589 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3590 if (currentPosition > source.length)
3591 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3592 char front[] = new char[startPosition];
3593 System.arraycopy(source, 0, front, 0, startPosition);
3594 int middleLength = (currentPosition - 1) - startPosition + 1;
3596 if (middleLength > -1) {
3597 middle = new char[middleLength];
3598 System.arraycopy(source, startPosition, middle, 0, middleLength);
3600 middle = new char[0];
3602 char end[] = new char[source.length - (currentPosition - 1)];
3603 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3604 - (currentPosition - 1) - 1);
3605 return new String(front)
3606 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3607 + new String(middle)
3608 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3611 public final String toStringAction(int act) {
3613 case TokenNameERROR :
3614 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3616 case TokenNameINLINE_HTML :
3617 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3618 case TokenNameIdentifier :
3619 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3620 case TokenNameVariable :
3621 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3622 case TokenNameabstract :
3623 return "abstract"; //$NON-NLS-1$
3625 return "AND"; //$NON-NLS-1$
3626 case TokenNamearray :
3627 return "array"; //$NON-NLS-1$
3629 return "as"; //$NON-NLS-1$
3630 case TokenNamebreak :
3631 return "break"; //$NON-NLS-1$
3632 case TokenNamecase :
3633 return "case"; //$NON-NLS-1$
3634 case TokenNameclass :
3635 return "class"; //$NON-NLS-1$
3636 case TokenNamecatch :
3637 return "catch"; //$NON-NLS-1$
3638 case TokenNameclone :
3641 case TokenNameconst :
3644 case TokenNamecontinue :
3645 return "continue"; //$NON-NLS-1$
3646 case TokenNamedefault :
3647 return "default"; //$NON-NLS-1$
3648 // case TokenNamedefine :
3649 // return "define"; //$NON-NLS-1$
3651 return "do"; //$NON-NLS-1$
3652 case TokenNameecho :
3653 return "echo"; //$NON-NLS-1$
3654 case TokenNameelse :
3655 return "else"; //$NON-NLS-1$
3656 case TokenNameelseif :
3657 return "elseif"; //$NON-NLS-1$
3658 case TokenNameendfor :
3659 return "endfor"; //$NON-NLS-1$
3660 case TokenNameendforeach :
3661 return "endforeach"; //$NON-NLS-1$
3662 case TokenNameendif :
3663 return "endif"; //$NON-NLS-1$
3664 case TokenNameendswitch :
3665 return "endswitch"; //$NON-NLS-1$
3666 case TokenNameendwhile :
3667 return "endwhile"; //$NON-NLS-1$
3670 case TokenNameextends :
3671 return "extends"; //$NON-NLS-1$
3672 // case TokenNamefalse :
3673 // return "false"; //$NON-NLS-1$
3674 case TokenNamefinal :
3675 return "final"; //$NON-NLS-1$
3677 return "for"; //$NON-NLS-1$
3678 case TokenNameforeach :
3679 return "foreach"; //$NON-NLS-1$
3680 case TokenNamefunction :
3681 return "function"; //$NON-NLS-1$
3682 case TokenNameglobal :
3683 return "global"; //$NON-NLS-1$
3685 return "if"; //$NON-NLS-1$
3686 case TokenNameimplements :
3687 return "implements"; //$NON-NLS-1$
3688 case TokenNameinclude :
3689 return "include"; //$NON-NLS-1$
3690 case TokenNameinclude_once :
3691 return "include_once"; //$NON-NLS-1$
3692 case TokenNameinstanceof :
3693 return "instanceof"; //$NON-NLS-1$
3694 case TokenNameinterface :
3695 return "interface"; //$NON-NLS-1$
3696 case TokenNameisset :
3697 return "isset"; //$NON-NLS-1$
3698 case TokenNamelist :
3699 return "list"; //$NON-NLS-1$
3701 return "new"; //$NON-NLS-1$
3702 // case TokenNamenull :
3703 // return "null"; //$NON-NLS-1$
3705 return "OR"; //$NON-NLS-1$
3706 case TokenNameprint :
3707 return "print"; //$NON-NLS-1$
3708 case TokenNameprivate :
3709 return "private"; //$NON-NLS-1$
3710 case TokenNameprotected :
3711 return "protected"; //$NON-NLS-1$
3712 case TokenNamepublic :
3713 return "public"; //$NON-NLS-1$
3714 case TokenNamerequire :
3715 return "require"; //$NON-NLS-1$
3716 case TokenNamerequire_once :
3717 return "require_once"; //$NON-NLS-1$
3718 case TokenNamereturn :
3719 return "return"; //$NON-NLS-1$
3720 case TokenNamestatic :
3721 return "static"; //$NON-NLS-1$
3722 case TokenNameswitch :
3723 return "switch"; //$NON-NLS-1$
3724 // case TokenNametrue :
3725 // return "true"; //$NON-NLS-1$
3726 case TokenNameunset :
3727 return "unset"; //$NON-NLS-1$
3729 return "var"; //$NON-NLS-1$
3730 case TokenNamewhile :
3731 return "while"; //$NON-NLS-1$
3733 return "XOR"; //$NON-NLS-1$
3734 // case TokenNamethis :
3735 // return "$this"; //$NON-NLS-1$
3736 case TokenNameIntegerLiteral :
3737 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3738 case TokenNameDoubleLiteral :
3739 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3740 case TokenNameStringDoubleQuote :
3741 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3742 case TokenNameStringSingleQuote :
3743 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3744 case TokenNameStringInterpolated :
3745 return "StringInterpolated(" + new String(getCurrentTokenSource())
3746 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3747 case TokenNameEncapsedString0 :
3748 return "`"; //$NON-NLS-1$
3749 case TokenNameEncapsedString1 :
3750 return "\'"; //$NON-NLS-1$
3751 case TokenNameEncapsedString2 :
3752 return "\""; //$NON-NLS-1$
3753 case TokenNameSTRING :
3754 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3755 case TokenNameHEREDOC :
3756 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3757 case TokenNamePLUS_PLUS :
3758 return "++"; //$NON-NLS-1$
3759 case TokenNameMINUS_MINUS :
3760 return "--"; //$NON-NLS-1$
3761 case TokenNameEQUAL_EQUAL :
3762 return "=="; //$NON-NLS-1$
3763 case TokenNameEQUAL_EQUAL_EQUAL :
3764 return "==="; //$NON-NLS-1$
3765 case TokenNameEQUAL_GREATER :
3766 return "=>"; //$NON-NLS-1$
3767 case TokenNameLESS_EQUAL :
3768 return "<="; //$NON-NLS-1$
3769 case TokenNameGREATER_EQUAL :
3770 return ">="; //$NON-NLS-1$
3771 case TokenNameNOT_EQUAL :
3772 return "!="; //$NON-NLS-1$
3773 case TokenNameNOT_EQUAL_EQUAL :
3774 return "!=="; //$NON-NLS-1$
3775 case TokenNameLEFT_SHIFT :
3776 return "<<"; //$NON-NLS-1$
3777 case TokenNameRIGHT_SHIFT :
3778 return ">>"; //$NON-NLS-1$
3779 case TokenNamePLUS_EQUAL :
3780 return "+="; //$NON-NLS-1$
3781 case TokenNameMINUS_EQUAL :
3782 return "-="; //$NON-NLS-1$
3783 case TokenNameMULTIPLY_EQUAL :
3784 return "*="; //$NON-NLS-1$
3785 case TokenNameDIVIDE_EQUAL :
3786 return "/="; //$NON-NLS-1$
3787 case TokenNameAND_EQUAL :
3788 return "&="; //$NON-NLS-1$
3789 case TokenNameOR_EQUAL :
3790 return "|="; //$NON-NLS-1$
3791 case TokenNameXOR_EQUAL :
3792 return "^="; //$NON-NLS-1$
3793 case TokenNameREMAINDER_EQUAL :
3794 return "%="; //$NON-NLS-1$
3795 case TokenNameDOT_EQUAL :
3796 return ".="; //$NON-NLS-1$
3797 case TokenNameLEFT_SHIFT_EQUAL :
3798 return "<<="; //$NON-NLS-1$
3799 case TokenNameRIGHT_SHIFT_EQUAL :
3800 return ">>="; //$NON-NLS-1$
3801 case TokenNameOR_OR :
3802 return "||"; //$NON-NLS-1$
3803 case TokenNameAND_AND :
3804 return "&&"; //$NON-NLS-1$
3805 case TokenNamePLUS :
3806 return "+"; //$NON-NLS-1$
3807 case TokenNameMINUS :
3808 return "-"; //$NON-NLS-1$
3809 case TokenNameMINUS_GREATER :
3812 return "!"; //$NON-NLS-1$
3813 case TokenNameREMAINDER :
3814 return "%"; //$NON-NLS-1$
3816 return "^"; //$NON-NLS-1$
3818 return "&"; //$NON-NLS-1$
3819 case TokenNameMULTIPLY :
3820 return "*"; //$NON-NLS-1$
3822 return "|"; //$NON-NLS-1$
3823 case TokenNameTWIDDLE :
3824 return "~"; //$NON-NLS-1$
3825 case TokenNameTWIDDLE_EQUAL :
3826 return "~="; //$NON-NLS-1$
3827 case TokenNameDIVIDE :
3828 return "/"; //$NON-NLS-1$
3829 case TokenNameGREATER :
3830 return ">"; //$NON-NLS-1$
3831 case TokenNameLESS :
3832 return "<"; //$NON-NLS-1$
3833 case TokenNameLPAREN :
3834 return "("; //$NON-NLS-1$
3835 case TokenNameRPAREN :
3836 return ")"; //$NON-NLS-1$
3837 case TokenNameLBRACE :
3838 return "{"; //$NON-NLS-1$
3839 case TokenNameRBRACE :
3840 return "}"; //$NON-NLS-1$
3841 case TokenNameLBRACKET :
3842 return "["; //$NON-NLS-1$
3843 case TokenNameRBRACKET :
3844 return "]"; //$NON-NLS-1$
3845 case TokenNameSEMICOLON :
3846 return ";"; //$NON-NLS-1$
3847 case TokenNameQUESTION :
3848 return "?"; //$NON-NLS-1$
3849 case TokenNameCOLON :
3850 return ":"; //$NON-NLS-1$
3851 case TokenNameCOMMA :
3852 return ","; //$NON-NLS-1$
3854 return "."; //$NON-NLS-1$
3855 case TokenNameEQUAL :
3856 return "="; //$NON-NLS-1$
3859 case TokenNameDOLLAR :
3861 case TokenNameDOLLAR_LBRACE :
3863 case TokenNameLBRACE_DOLLAR :
3866 return "EOF"; //$NON-NLS-1$
3867 case TokenNameWHITESPACE :
3868 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3869 case TokenNameCOMMENT_LINE :
3870 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3871 case TokenNameCOMMENT_BLOCK :
3872 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3873 case TokenNameCOMMENT_PHPDOC :
3874 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3875 // case TokenNameHTML :
3876 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3878 case TokenNameFILE :
3879 return "__FILE__"; //$NON-NLS-1$
3880 case TokenNameLINE :
3881 return "__LINE__"; //$NON-NLS-1$
3882 case TokenNameCLASS_C :
3883 return "__CLASS__"; //$NON-NLS-1$
3884 case TokenNameMETHOD_C :
3885 return "__METHOD__"; //$NON-NLS-1$
3886 case TokenNameFUNC_C :
3887 return "__FUNCTION__"; //$NON-NLS-1
3888 case TokenNameboolCAST :
3889 return "( bool )"; //$NON-NLS-1$
3890 case TokenNameintCAST :
3891 return "( int )"; //$NON-NLS-1$
3892 case TokenNamedoubleCAST :
3893 return "( double )"; //$NON-NLS-1$
3894 case TokenNameobjectCAST :
3895 return "( object )"; //$NON-NLS-1$
3896 case TokenNamestringCAST :
3897 return "( string )"; //$NON-NLS-1$
3899 return "not-a-token(" + (new Integer(act)) + ") "
3900 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3907 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3908 this(tokenizeComments, tokenizeWhiteSpace, false);
3910 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3911 boolean checkNonExternalizedStringLiterals) {
3912 this(tokenizeComments, tokenizeWhiteSpace,
3913 checkNonExternalizedStringLiterals, false);
3915 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3916 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3917 this(tokenizeComments, tokenizeWhiteSpace,
3918 checkNonExternalizedStringLiterals, assertMode, false, null, null);
3920 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3921 boolean checkNonExternalizedStringLiterals, boolean assertMode,
3922 boolean tokenizeStrings,
3924 char[][] taskPriorities) {
3925 this.eofPosition = Integer.MAX_VALUE;
3926 this.tokenizeComments = tokenizeComments;
3927 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3928 this.tokenizeStrings = tokenizeStrings;
3929 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3930 this.assertMode = assertMode;
3931 this.encapsedStringStack = null;
3932 this.taskTags = taskTags;
3933 this.taskPriorities = taskPriorities;
3935 private void checkNonExternalizeString() throws InvalidInputException {
3936 if (currentLine == null)
3938 parseTags(currentLine);
3940 private void parseTags(NLSLine line) throws InvalidInputException {
3941 String s = new String(getCurrentTokenSource());
3942 int pos = s.indexOf(TAG_PREFIX);
3943 int lineLength = line.size();
3945 int start = pos + TAG_PREFIX_LENGTH;
3946 int end = s.indexOf(TAG_POSTFIX, start);
3947 String index = s.substring(start, end);
3950 i = Integer.parseInt(index) - 1;
3951 // Tags are one based not zero based.
3952 } catch (NumberFormatException e) {
3953 i = -1; // we don't want to consider this as a valid NLS tag
3955 if (line.exists(i)) {
3958 pos = s.indexOf(TAG_PREFIX, start);
3960 this.nonNLSStrings = new StringLiteral[lineLength];
3961 int nonNLSCounter = 0;
3962 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3963 StringLiteral literal = (StringLiteral) iterator.next();
3964 if (literal != null) {
3965 this.nonNLSStrings[nonNLSCounter++] = literal;
3968 if (nonNLSCounter == 0) {
3969 this.nonNLSStrings = null;
3973 this.wasNonExternalizedStringLiteral = true;
3974 if (nonNLSCounter != lineLength) {
3975 System.arraycopy(this.nonNLSStrings, 0,
3976 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3981 public final void scanEscapeCharacter() throws InvalidInputException {
3982 // the string with "\\u" is a legal string of two chars \ and u
3983 //thus we use a direct access to the source (for regular cases).
3984 if (unicodeAsBackSlash) {
3985 // consume next character
3986 unicodeAsBackSlash = false;
3987 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3988 // (source[currentPosition] == 'u')) {
3989 // getNextUnicodeChar();
3991 if (withoutUnicodePtr != 0) {
3992 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3996 currentCharacter = source[currentPosition++];
3997 switch (currentCharacter) {
3999 currentCharacter = '\b';
4002 currentCharacter = '\t';
4005 currentCharacter = '\n';
4008 currentCharacter = '\f';
4011 currentCharacter = '\r';
4014 currentCharacter = '\"';
4017 currentCharacter = '\'';
4020 currentCharacter = '\\';
4023 // -----------octal escape--------------
4025 // OctalDigit OctalDigit
4026 // ZeroToThree OctalDigit OctalDigit
4027 int number = Character.getNumericValue(currentCharacter);
4028 if (number >= 0 && number <= 7) {
4029 boolean zeroToThreeNot = number > 3;
4030 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4031 int digit = Character.getNumericValue(currentCharacter);
4032 if (digit >= 0 && digit <= 7) {
4033 number = (number * 8) + digit;
4035 .isDigit(currentCharacter = source[currentPosition++])) {
4036 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4037 // Digit --> ignore last character
4040 digit = Character.getNumericValue(currentCharacter);
4041 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4042 // OctalDigit OctalDigit
4043 number = (number * 8) + digit;
4044 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4045 // --> ignore last character
4049 } else { // has read \OctalDigit NonDigit--> ignore last
4053 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4057 } else { // has read \OctalDigit --> ignore last character
4061 throw new InvalidInputException(INVALID_ESCAPE);
4062 currentCharacter = (char) number;
4064 throw new InvalidInputException(INVALID_ESCAPE);
4067 // chech presence of task: tags
4068 public void checkTaskTag(int commentStart, int commentEnd) {
4069 // only look for newer task: tags
4070 if (this.foundTaskCount > 0
4071 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4074 int foundTaskIndex = this.foundTaskCount;
4075 nextChar : for (int i = commentStart; i < commentEnd
4076 && i < this.eofPosition; i++) {
4078 char[] priority = null;
4079 // check for tag occurrence
4080 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4081 tag = this.taskTags[itag];
4082 priority = this.taskPriorities != null
4083 && itag < this.taskPriorities.length
4084 ? this.taskPriorities[itag]
4086 int tagLength = tag.length;
4087 for (int t = 0; t < tagLength; t++) {
4088 if (this.source[i + t] != tag[t])
4091 if (this.foundTaskTags == null) {
4092 this.foundTaskTags = new char[5][];
4093 this.foundTaskMessages = new char[5][];
4094 this.foundTaskPriorities = new char[5][];
4095 this.foundTaskPositions = new int[5][];
4096 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4097 System.arraycopy(this.foundTaskTags, 0,
4098 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4099 this.foundTaskCount);
4100 System.arraycopy(this.foundTaskMessages, 0,
4101 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4102 this.foundTaskCount);
4103 System.arraycopy(this.foundTaskPriorities, 0,
4104 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4105 0, this.foundTaskCount);
4106 System.arraycopy(this.foundTaskPositions, 0,
4107 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4108 this.foundTaskCount);
4110 this.foundTaskTags[this.foundTaskCount] = tag;
4111 this.foundTaskPriorities[this.foundTaskCount] = priority;
4112 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4114 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4115 this.foundTaskCount++;
4116 i += tagLength - 1; // will be incremented when looping
4119 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4120 // retrieve message start and end positions
4121 int msgStart = this.foundTaskPositions[i][0]
4122 + this.foundTaskTags[i].length;
4123 int max_value = i + 1 < this.foundTaskCount
4124 ? this.foundTaskPositions[i + 1][0] - 1
4126 // at most beginning of next task
4127 if (max_value < msgStart)
4128 max_value = msgStart; // would only occur if tag is before EOF.
4131 for (int j = msgStart; j < max_value; j++) {
4132 if ((c = this.source[j]) == '\n' || c == '\r') {
4138 for (int j = max_value; j > msgStart; j--) {
4139 if ((c = this.source[j]) == '*') {
4147 if (msgStart == end)
4150 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4152 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4154 // update the end position of the task
4155 this.foundTaskPositions[i][1] = end;
4156 // get the message source
4157 final int messageLength = end - msgStart + 1;
4158 char[] message = new char[messageLength];
4159 System.arraycopy(source, msgStart, message, 0, messageLength);
4160 this.foundTaskMessages[i] = message;