1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token
25 * (this value is not memorized by the scanner) - getCurrentTokenSource()
26 * which provides with the token "REAL" source (aka all unicode have been
27 * transformed into a correct char) - sourceStart gives the position into the
28 * stream - currentPosition-1 gives the sourceEnd position into the stream
31 private boolean assertMode;
32 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
35 public boolean recordLineSeparator;
36 public boolean phpMode = false;
37 public Stack encapsedStringStack = null;
38 public char currentCharacter;
39 public int startPosition;
40 public int currentPosition;
41 public int initialPosition, eofPosition;
42 // after this position eof are generated instead of real token from the
44 public boolean tokenizeComments;
45 public boolean tokenizeWhiteSpace;
46 public boolean tokenizeStrings;
47 //source should be viewed as a window (aka a part)
48 //of a entire very large stream
51 public char[] withoutUnicodeBuffer;
52 public int withoutUnicodePtr;
53 //when == 0 ==> no unicode in the current token
54 public boolean unicodeAsBackSlash = false;
55 public boolean scanningFloatLiteral = false;
56 //support for /** comments
57 public int[] commentStops = new int[10];
58 public int[] commentStarts = new int[10];
59 public int commentPtr = -1; // no comment test with commentPtr value -1
60 protected int lastCommentLinePosition = -1;
61 //diet parsing support - jump over some method body when requested
62 public boolean diet = false;
63 //support for the poor-line-debuggers ....
64 //remember the position of the cr/lf
65 public int[] lineEnds = new int[250];
66 public int linePtr = -1;
67 public boolean wasAcr = false;
68 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
69 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
70 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
71 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
72 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
73 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
74 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
75 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
76 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
77 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
78 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
79 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
80 //----------------optimized identifier managment------------------
81 static final char[] charArray_a = new char[]{'a'},
82 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
83 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
84 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
85 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
86 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
87 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
88 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
89 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
90 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
91 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
92 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
93 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
94 charArray_z = new char[]{'z'};
95 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
96 '\u0000', '\u0000', '\u0000'};
97 static final int TableSize = 30, InternalTableSize = 6;
99 public static final int OptimizedLength = 6;
101 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
102 // support for detecting non-externalized string literals
103 int currentLineNr = -1;
104 int previousLineNr = -1;
105 NLSLine currentLine = null;
106 List lines = new ArrayList();
107 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
108 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
109 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
110 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
111 public StringLiteral[] nonNLSStrings = null;
112 public boolean checkNonExternalizedStringLiterals = true;
113 public boolean wasNonExternalizedStringLiteral = false;
115 for (int i = 0; i < 6; i++) {
116 for (int j = 0; j < TableSize; j++) {
117 for (int k = 0; k < InternalTableSize; k++) {
118 charArray_length[i][j][k] = initCharArray;
123 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
125 public static final int RoundBracket = 0;
126 public static final int SquareBracket = 1;
127 public static final int CurlyBracket = 2;
128 public static final int BracketKinds = 3;
130 public char[][] foundTaskTags = null;
131 public char[][] foundTaskMessages;
132 public char[][] foundTaskPriorities = null;
133 public int[][] foundTaskPositions;
134 public int foundTaskCount = 0;
135 public char[][] taskTags = null;
136 public char[][] taskPriorities = null;
137 public static final boolean DEBUG = false;
138 public static final boolean TRACE = false;
141 * Determines if the specified character is permissible as the first
142 * character in a PHP identifier
144 public static boolean isPHPIdentifierStart(char ch) {
145 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
148 * Determines if the specified character may be part of a PHP identifier as
149 * other than the first character
151 public static boolean isPHPIdentifierPart(char ch) {
152 return Character.isLetterOrDigit(ch) || (ch == '_')
153 || (0x7F <= ch && ch <= 0xFF);
155 public final boolean atEnd() {
156 // This code is not relevant if source is
157 // Only a part of the real stream input
158 return source.length == currentPosition;
160 public char[] getCurrentIdentifierSource() {
161 //return the token REAL source (aka unicodes are precomputed)
163 // if (withoutUnicodePtr != 0)
164 // //0 is used as a fast test flag so the real first char is in position 1
166 // withoutUnicodeBuffer,
168 // result = new char[withoutUnicodePtr],
170 // withoutUnicodePtr);
172 int length = currentPosition - startPosition;
173 switch (length) { // see OptimizedLength
175 return optimizedCurrentTokenSource1();
177 return optimizedCurrentTokenSource2();
179 return optimizedCurrentTokenSource3();
181 return optimizedCurrentTokenSource4();
183 return optimizedCurrentTokenSource5();
185 return optimizedCurrentTokenSource6();
188 System.arraycopy(source, startPosition, result = new char[length], 0,
193 public int getCurrentTokenEndPosition() {
194 return this.currentPosition - 1;
196 public final char[] getCurrentTokenSource() {
197 // Return the token REAL source (aka unicodes are precomputed)
199 // if (withoutUnicodePtr != 0)
200 // // 0 is used as a fast test flag so the real first char is in position 1
202 // withoutUnicodeBuffer,
204 // result = new char[withoutUnicodePtr],
206 // withoutUnicodePtr);
209 System.arraycopy(source, startPosition,
210 result = new char[length = currentPosition - startPosition], 0, length);
214 public final char[] getCurrentTokenSource(int startPos) {
215 // Return the token REAL source (aka unicodes are precomputed)
217 // if (withoutUnicodePtr != 0)
218 // // 0 is used as a fast test flag so the real first char is in position 1
220 // withoutUnicodeBuffer,
222 // result = new char[withoutUnicodePtr],
224 // withoutUnicodePtr);
227 System.arraycopy(source, startPos,
228 result = new char[length = currentPosition - startPos], 0, length);
232 public final char[] getCurrentTokenSourceString() {
233 //return the token REAL source (aka unicodes are precomputed).
234 //REMOVE the two " that are at the beginning and the end.
236 if (withoutUnicodePtr != 0)
237 //0 is used as a fast test flag so the real first char is in position 1
238 System.arraycopy(withoutUnicodeBuffer, 2,
239 //2 is 1 (real start) + 1 (to jump over the ")
240 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
243 System.arraycopy(source, startPosition + 1,
244 result = new char[length = currentPosition - startPosition - 2], 0,
249 public int getCurrentTokenStartPosition() {
250 return this.startPosition;
252 public final char[] getCurrentStringLiteralSource() {
253 // Return the token REAL source (aka unicodes are precomputed)
256 System.arraycopy(source, startPosition + 1,
257 result = new char[length = currentPosition - startPosition - 2], 0,
263 * Search the source position corresponding to the end of a given line number
265 * Line numbers are 1-based, and relative to the scanner initialPosition.
266 * Character positions are 0-based.
268 * In case the given line number is inconsistent, answers -1.
270 public final int getLineEnd(int lineNumber) {
271 if (lineEnds == null)
273 if (lineNumber >= lineEnds.length)
277 if (lineNumber == lineEnds.length - 1)
279 return lineEnds[lineNumber - 1];
280 // next line start one character behind the lineEnd of the previous line
283 * Search the source position corresponding to the beginning of a given line
286 * Line numbers are 1-based, and relative to the scanner initialPosition.
287 * Character positions are 0-based.
289 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
291 * In case the given line number is inconsistent, answers -1.
293 public final int getLineStart(int lineNumber) {
294 if (lineEnds == null)
296 if (lineNumber >= lineEnds.length)
301 return initialPosition;
302 return lineEnds[lineNumber - 2] + 1;
303 // next line start one character behind the lineEnd of the previous line
305 public final boolean getNextChar(char testedChar) {
307 //handle the case of unicode.
308 //when a unicode appears then we must use a buffer that holds char
310 //At the end of this method currentCharacter holds the new visited char
311 //and currentPosition points right next after it
312 //Both previous lines are true if the currentCharacter is == to the
314 //On false, no side effect has occured.
315 //ALL getNextChar.... ARE OPTIMIZED COPIES
316 int temp = currentPosition;
318 currentCharacter = source[currentPosition++];
319 // if (((currentCharacter = source[currentPosition++]) == '\\')
320 // && (source[currentPosition] == 'u')) {
321 // //-------------unicode traitement ------------
322 // int c1, c2, c3, c4;
323 // int unicodeSize = 6;
324 // currentPosition++;
325 // while (source[currentPosition] == 'u') {
326 // currentPosition++;
330 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
332 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
334 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
336 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
338 // currentPosition = temp;
342 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
343 // if (currentCharacter != testedChar) {
344 // currentPosition = temp;
347 // unicodeAsBackSlash = currentCharacter == '\\';
349 // //need the unicode buffer
350 // if (withoutUnicodePtr == 0) {
351 // //buffer all the entries that have been left aside....
352 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
356 // withoutUnicodeBuffer,
358 // withoutUnicodePtr);
360 // //fill the buffer with the char
361 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
364 // } //-------------end unicode traitement--------------
366 if (currentCharacter != testedChar) {
367 currentPosition = temp;
370 unicodeAsBackSlash = false;
371 // if (withoutUnicodePtr != 0)
372 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
375 } catch (IndexOutOfBoundsException e) {
376 unicodeAsBackSlash = false;
377 currentPosition = temp;
381 public final int getNextChar(char testedChar1, char testedChar2) {
382 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
383 //test can be done with (x==0) for the first and (x>0) for the second
384 //handle the case of unicode.
385 //when a unicode appears then we must use a buffer that holds char
387 //At the end of this method currentCharacter holds the new visited char
388 //and currentPosition points right next after it
389 //Both previous lines are true if the currentCharacter is == to the
391 //On false, no side effect has occured.
392 //ALL getNextChar.... ARE OPTIMIZED COPIES
393 int temp = currentPosition;
396 currentCharacter = source[currentPosition++];
397 // if (((currentCharacter = source[currentPosition++]) == '\\')
398 // && (source[currentPosition] == 'u')) {
399 // //-------------unicode traitement ------------
400 // int c1, c2, c3, c4;
401 // int unicodeSize = 6;
402 // currentPosition++;
403 // while (source[currentPosition] == 'u') {
404 // currentPosition++;
408 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
410 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
412 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
414 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
416 // currentPosition = temp;
420 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
421 // if (currentCharacter == testedChar1)
423 // else if (currentCharacter == testedChar2)
426 // currentPosition = temp;
430 // //need the unicode buffer
431 // if (withoutUnicodePtr == 0) {
432 // //buffer all the entries that have been left aside....
433 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
437 // withoutUnicodeBuffer,
439 // withoutUnicodePtr);
441 // //fill the buffer with the char
442 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
444 // } //-------------end unicode traitement--------------
446 if (currentCharacter == testedChar1)
448 else if (currentCharacter == testedChar2)
451 currentPosition = temp;
454 // if (withoutUnicodePtr != 0)
455 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
458 } catch (IndexOutOfBoundsException e) {
459 currentPosition = temp;
463 public final boolean getNextCharAsDigit() {
465 //handle the case of unicode.
466 //when a unicode appears then we must use a buffer that holds char
468 //At the end of this method currentCharacter holds the new visited char
469 //and currentPosition points right next after it
470 //Both previous lines are true if the currentCharacter is a digit
471 //On false, no side effect has occured.
472 //ALL getNextChar.... ARE OPTIMIZED COPIES
473 int temp = currentPosition;
475 currentCharacter = source[currentPosition++];
476 // if (((currentCharacter = source[currentPosition++]) == '\\')
477 // && (source[currentPosition] == 'u')) {
478 // //-------------unicode traitement ------------
479 // int c1, c2, c3, c4;
480 // int unicodeSize = 6;
481 // currentPosition++;
482 // while (source[currentPosition] == 'u') {
483 // currentPosition++;
487 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
489 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
491 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
493 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
495 // currentPosition = temp;
499 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
500 // if (!Character.isDigit(currentCharacter)) {
501 // currentPosition = temp;
505 // //need the unicode buffer
506 // if (withoutUnicodePtr == 0) {
507 // //buffer all the entries that have been left aside....
508 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
512 // withoutUnicodeBuffer,
514 // withoutUnicodePtr);
516 // //fill the buffer with the char
517 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
519 // } //-------------end unicode traitement--------------
521 if (!Character.isDigit(currentCharacter)) {
522 currentPosition = temp;
525 // if (withoutUnicodePtr != 0)
526 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
529 } catch (IndexOutOfBoundsException e) {
530 currentPosition = temp;
534 public final boolean getNextCharAsDigit(int radix) {
536 //handle the case of unicode.
537 //when a unicode appears then we must use a buffer that holds char
539 //At the end of this method currentCharacter holds the new visited char
540 //and currentPosition points right next after it
541 //Both previous lines are true if the currentCharacter is a digit base on
543 //On false, no side effect has occured.
544 //ALL getNextChar.... ARE OPTIMIZED COPIES
545 int temp = currentPosition;
547 currentCharacter = source[currentPosition++];
548 // if (((currentCharacter = source[currentPosition++]) == '\\')
549 // && (source[currentPosition] == 'u')) {
550 // //-------------unicode traitement ------------
551 // int c1, c2, c3, c4;
552 // int unicodeSize = 6;
553 // currentPosition++;
554 // while (source[currentPosition] == 'u') {
555 // currentPosition++;
559 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
561 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
563 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
565 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
567 // currentPosition = temp;
571 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
572 // if (Character.digit(currentCharacter, radix) == -1) {
573 // currentPosition = temp;
577 // //need the unicode buffer
578 // if (withoutUnicodePtr == 0) {
579 // //buffer all the entries that have been left aside....
580 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
584 // withoutUnicodeBuffer,
586 // withoutUnicodePtr);
588 // //fill the buffer with the char
589 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
591 // } //-------------end unicode traitement--------------
593 if (Character.digit(currentCharacter, radix) == -1) {
594 currentPosition = temp;
597 // if (withoutUnicodePtr != 0)
598 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
601 } catch (IndexOutOfBoundsException e) {
602 currentPosition = temp;
606 public boolean getNextCharAsJavaIdentifierPart() {
608 //handle the case of unicode.
609 //when a unicode appears then we must use a buffer that holds char
611 //At the end of this method currentCharacter holds the new visited char
612 //and currentPosition points right next after it
613 //Both previous lines are true if the currentCharacter is a
614 // JavaIdentifierPart
615 //On false, no side effect has occured.
616 //ALL getNextChar.... ARE OPTIMIZED COPIES
617 int temp = currentPosition;
619 currentCharacter = source[currentPosition++];
620 // if (((currentCharacter = source[currentPosition++]) == '\\')
621 // && (source[currentPosition] == 'u')) {
622 // //-------------unicode traitement ------------
623 // int c1, c2, c3, c4;
624 // int unicodeSize = 6;
625 // currentPosition++;
626 // while (source[currentPosition] == 'u') {
627 // currentPosition++;
631 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
633 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
635 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
637 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
639 // currentPosition = temp;
643 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
644 // if (!isPHPIdentifierPart(currentCharacter)) {
645 // currentPosition = temp;
649 // //need the unicode buffer
650 // if (withoutUnicodePtr == 0) {
651 // //buffer all the entries that have been left aside....
652 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
656 // withoutUnicodeBuffer,
658 // withoutUnicodePtr);
660 // //fill the buffer with the char
661 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
663 // } //-------------end unicode traitement--------------
665 if (!isPHPIdentifierPart(currentCharacter)) {
666 currentPosition = temp;
669 // if (withoutUnicodePtr != 0)
670 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
673 } catch (IndexOutOfBoundsException e) {
674 currentPosition = temp;
678 public int getCastOrParen() {
679 int tempPosition = currentPosition;
680 char tempCharacter = currentCharacter;
681 int tempToken = TokenNameLPAREN;
682 boolean found = false;
683 StringBuffer buf = new StringBuffer();
686 currentCharacter = source[currentPosition++];
687 } while (currentCharacter == ' ' || currentCharacter == '\t');
688 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
689 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
690 buf.append(currentCharacter);
691 currentCharacter = source[currentPosition++];
693 if (buf.length() >= 3 && buf.length() <= 7) {
694 char[] data = buf.toString().toCharArray();
696 switch (data.length) {
699 if ((data[index] == 'i') && (data[++index] == 'n')
700 && (data[++index] == 't')) {
702 tempToken = TokenNameintCAST;
707 if ((data[index] == 'b') && (data[++index] == 'o')
708 && (data[++index] == 'o') && (data[++index] == 'l')) {
710 tempToken = TokenNameboolCAST;
713 if ((data[index] == 'r') && (data[++index] == 'e')
714 && (data[++index] == 'a') && (data[++index] == 'l')) {
716 tempToken = TokenNamedoubleCAST;
722 if ((data[index] == 'a') && (data[++index] == 'r')
723 && (data[++index] == 'r') && (data[++index] == 'a')
724 && (data[++index] == 'y')) {
726 tempToken = TokenNamearrayCAST;
729 if ((data[index] == 'u') && (data[++index] == 'n')
730 && (data[++index] == 's') && (data[++index] == 'e')
731 && (data[++index] == 't')) {
733 tempToken = TokenNameunsetCAST;
736 if ((data[index] == 'f') && (data[++index] == 'l')
737 && (data[++index] == 'o') && (data[++index] == 'a')
738 && (data[++index] == 't')) {
740 tempToken = TokenNamedoubleCAST;
746 // object string double
747 if ((data[index] == 'o') && (data[++index] == 'b')
748 && (data[++index] == 'j') && (data[++index] == 'e')
749 && (data[++index] == 'c') && (data[++index] == 't')) {
751 tempToken = TokenNameobjectCAST;
754 if ((data[index] == 's') && (data[++index] == 't')
755 && (data[++index] == 'r') && (data[++index] == 'i')
756 && (data[++index] == 'n') && (data[++index] == 'g')) {
758 tempToken = TokenNamestringCAST;
761 if ((data[index] == 'd') && (data[++index] == 'o')
762 && (data[++index] == 'u') && (data[++index] == 'b')
763 && (data[++index] == 'l') && (data[++index] == 'e')) {
765 tempToken = TokenNamedoubleCAST;
772 if ((data[index] == 'b') && (data[++index] == 'o')
773 && (data[++index] == 'o') && (data[++index] == 'l')
774 && (data[++index] == 'e') && (data[++index] == 'a')
775 && (data[++index] == 'n')) {
777 tempToken = TokenNameboolCAST;
780 if ((data[index] == 'i') && (data[++index] == 'n')
781 && (data[++index] == 't') && (data[++index] == 'e')
782 && (data[++index] == 'g') && (data[++index] == 'e')
783 && (data[++index] == 'r')) {
785 tempToken = TokenNameintCAST;
791 while (currentCharacter == ' ' || currentCharacter == '\t') {
792 currentCharacter = source[currentPosition++];
794 if (currentCharacter == ')') {
799 } catch (IndexOutOfBoundsException e) {
801 currentCharacter = tempCharacter;
802 currentPosition = tempPosition;
803 return TokenNameLPAREN;
805 public void consumeStringInterpolated() throws InvalidInputException {
807 // consume next character
808 unicodeAsBackSlash = false;
809 currentCharacter = source[currentPosition++];
810 // if (((currentCharacter = source[currentPosition++]) == '\\')
811 // && (source[currentPosition] == 'u')) {
812 // getNextUnicodeChar();
814 // if (withoutUnicodePtr != 0) {
815 // withoutUnicodeBuffer[++withoutUnicodePtr] =
819 while (currentCharacter != '`') {
820 /** ** in PHP \r and \n are valid in string literals *** */
821 // if ((currentCharacter == '\n')
822 // || (currentCharacter == '\r')) {
823 // // relocate if finding another quote fairly close: thus unicode
824 // '/u000D' will be fully consumed
825 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
826 // if (currentPosition + lookAhead == source.length)
828 // if (source[currentPosition + lookAhead] == '\n')
830 // if (source[currentPosition + lookAhead] == '\"') {
831 // currentPosition += lookAhead + 1;
835 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
837 if (currentCharacter == '\\') {
838 int escapeSize = currentPosition;
839 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
840 //scanEscapeCharacter make a side effect on this value and we need
841 // the previous value few lines down this one
842 scanDoubleQuotedEscapeCharacter();
843 escapeSize = currentPosition - escapeSize;
844 if (withoutUnicodePtr == 0) {
845 //buffer all the entries that have been left aside....
846 withoutUnicodePtr = currentPosition - escapeSize - 1
848 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
850 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
851 } else { //overwrite the / in the buffer
852 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
853 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
854 // where only one is correct
859 // consume next character
860 unicodeAsBackSlash = false;
861 currentCharacter = source[currentPosition++];
862 // if (((currentCharacter = source[currentPosition++]) == '\\')
863 // && (source[currentPosition] == 'u')) {
864 // getNextUnicodeChar();
866 if (withoutUnicodePtr != 0) {
867 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
871 } catch (IndexOutOfBoundsException e) {
872 throw new InvalidInputException(UNTERMINATED_STRING);
873 } catch (InvalidInputException e) {
874 if (e.getMessage().equals(INVALID_ESCAPE)) {
875 // relocate if finding another quote fairly close: thus unicode
876 // '/u000D' will be fully consumed
877 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
878 if (currentPosition + lookAhead == source.length)
880 if (source[currentPosition + lookAhead] == '\n')
882 if (source[currentPosition + lookAhead] == '`') {
883 currentPosition += lookAhead + 1;
890 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
891 // //$NON-NLS-?$ where ? is an
893 if (currentLine == null) {
894 currentLine = new NLSLine();
895 lines.add(currentLine);
897 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
898 startPosition, currentPosition - 1));
901 public void consumeStringConstant() throws InvalidInputException {
903 // consume next character
904 unicodeAsBackSlash = false;
905 currentCharacter = source[currentPosition++];
906 // if (((currentCharacter = source[currentPosition++]) == '\\')
907 // && (source[currentPosition] == 'u')) {
908 // getNextUnicodeChar();
910 // if (withoutUnicodePtr != 0) {
911 // withoutUnicodeBuffer[++withoutUnicodePtr] =
915 while (currentCharacter != '\'') {
916 /** ** in PHP \r and \n are valid in string literals *** */
917 // if ((currentCharacter == '\n')
918 // || (currentCharacter == '\r')) {
919 // // relocate if finding another quote fairly close: thus unicode
920 // '/u000D' will be fully consumed
921 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
922 // if (currentPosition + lookAhead == source.length)
924 // if (source[currentPosition + lookAhead] == '\n')
926 // if (source[currentPosition + lookAhead] == '\"') {
927 // currentPosition += lookAhead + 1;
931 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
933 if (currentCharacter == '\\') {
934 int escapeSize = currentPosition;
935 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
936 //scanEscapeCharacter make a side effect on this value and we need
937 // the previous value few lines down this one
938 scanSingleQuotedEscapeCharacter();
939 escapeSize = currentPosition - escapeSize;
940 if (withoutUnicodePtr == 0) {
941 //buffer all the entries that have been left aside....
942 withoutUnicodePtr = currentPosition - escapeSize - 1
944 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
946 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
947 } else { //overwrite the / in the buffer
948 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
949 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
950 // where only one is correct
955 // consume next character
956 unicodeAsBackSlash = false;
957 currentCharacter = source[currentPosition++];
958 // if (((currentCharacter = source[currentPosition++]) == '\\')
959 // && (source[currentPosition] == 'u')) {
960 // getNextUnicodeChar();
962 if (withoutUnicodePtr != 0) {
963 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
967 } catch (IndexOutOfBoundsException e) {
968 throw new InvalidInputException(UNTERMINATED_STRING);
969 } catch (InvalidInputException e) {
970 if (e.getMessage().equals(INVALID_ESCAPE)) {
971 // relocate if finding another quote fairly close: thus unicode
972 // '/u000D' will be fully consumed
973 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
974 if (currentPosition + lookAhead == source.length)
976 if (source[currentPosition + lookAhead] == '\n')
978 if (source[currentPosition + lookAhead] == '\'') {
979 currentPosition += lookAhead + 1;
986 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
987 // //$NON-NLS-?$ where ? is an
989 if (currentLine == null) {
990 currentLine = new NLSLine();
991 lines.add(currentLine);
993 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
994 startPosition, currentPosition - 1));
997 public void consumeStringLiteral() throws InvalidInputException {
999 // consume next character
1000 unicodeAsBackSlash = false;
1001 currentCharacter = source[currentPosition++];
1002 // if (((currentCharacter = source[currentPosition++]) == '\\')
1003 // && (source[currentPosition] == 'u')) {
1004 // getNextUnicodeChar();
1006 // if (withoutUnicodePtr != 0) {
1007 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1008 // currentCharacter;
1011 while (currentCharacter != '"') {
1012 /** ** in PHP \r and \n are valid in string literals *** */
1013 // if ((currentCharacter == '\n')
1014 // || (currentCharacter == '\r')) {
1015 // // relocate if finding another quote fairly close: thus unicode
1016 // '/u000D' will be fully consumed
1017 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1018 // if (currentPosition + lookAhead == source.length)
1020 // if (source[currentPosition + lookAhead] == '\n')
1022 // if (source[currentPosition + lookAhead] == '\"') {
1023 // currentPosition += lookAhead + 1;
1027 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1029 if (currentCharacter == '\\') {
1030 int escapeSize = currentPosition;
1031 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1032 //scanEscapeCharacter make a side effect on this value and we need
1033 // the previous value few lines down this one
1034 scanDoubleQuotedEscapeCharacter();
1035 escapeSize = currentPosition - escapeSize;
1036 if (withoutUnicodePtr == 0) {
1037 //buffer all the entries that have been left aside....
1038 withoutUnicodePtr = currentPosition - escapeSize - 1
1040 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1042 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1043 } else { //overwrite the / in the buffer
1044 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1045 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1046 // where only one is correct
1047 withoutUnicodePtr--;
1051 // consume next character
1052 unicodeAsBackSlash = false;
1053 currentCharacter = source[currentPosition++];
1054 // if (((currentCharacter = source[currentPosition++]) == '\\')
1055 // && (source[currentPosition] == 'u')) {
1056 // getNextUnicodeChar();
1058 if (withoutUnicodePtr != 0) {
1059 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1063 } catch (IndexOutOfBoundsException e) {
1064 throw new InvalidInputException(UNTERMINATED_STRING);
1065 } catch (InvalidInputException e) {
1066 if (e.getMessage().equals(INVALID_ESCAPE)) {
1067 // relocate if finding another quote fairly close: thus unicode
1068 // '/u000D' will be fully consumed
1069 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1070 if (currentPosition + lookAhead == source.length)
1072 if (source[currentPosition + lookAhead] == '\n')
1074 if (source[currentPosition + lookAhead] == '\"') {
1075 currentPosition += lookAhead + 1;
1082 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1083 // //$NON-NLS-?$ where ? is an
1085 if (currentLine == null) {
1086 currentLine = new NLSLine();
1087 lines.add(currentLine);
1089 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1090 startPosition, currentPosition - 1));
1093 public int getNextToken() throws InvalidInputException {
1095 return getInlinedHTML(currentPosition);
1098 this.wasAcr = false;
1100 jumpOverMethodBody();
1102 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1106 withoutUnicodePtr = 0;
1107 //start with a new token
1108 char encapsedChar = ' ';
1109 if (!encapsedStringStack.isEmpty()) {
1110 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1112 if (encapsedChar != '$' && encapsedChar != ' ') {
1113 currentCharacter = source[currentPosition++];
1114 if (currentCharacter == encapsedChar) {
1115 switch (currentCharacter) {
1117 return TokenNameEncapsedString0;
1119 return TokenNameEncapsedString1;
1121 return TokenNameEncapsedString2;
1124 while (currentCharacter != encapsedChar) {
1125 /** ** in PHP \r and \n are valid in string literals *** */
1126 switch (currentCharacter) {
1128 int escapeSize = currentPosition;
1129 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1130 //scanEscapeCharacter make a side effect on this value and
1131 // we need the previous value few lines down this one
1132 scanDoubleQuotedEscapeCharacter();
1133 escapeSize = currentPosition - escapeSize;
1134 if (withoutUnicodePtr == 0) {
1135 //buffer all the entries that have been left aside....
1136 withoutUnicodePtr = currentPosition - escapeSize - 1
1138 System.arraycopy(source, startPosition,
1139 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1140 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1141 } else { //overwrite the / in the buffer
1142 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1143 if (backSlashAsUnicodeInString) { //there are TWO \ in
1144 withoutUnicodePtr--;
1149 if (isPHPIdentifierStart(source[currentPosition])
1150 || source[currentPosition] == '{') {
1152 encapsedStringStack.push(new Character('$'));
1153 return TokenNameSTRING;
1157 if (source[currentPosition] == '$') { // CURLY_OPEN
1159 encapsedStringStack.push(new Character('$'));
1160 return TokenNameSTRING;
1163 // consume next character
1164 unicodeAsBackSlash = false;
1165 currentCharacter = source[currentPosition++];
1166 if (withoutUnicodePtr != 0) {
1167 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1172 return TokenNameSTRING;
1174 // ---------Consume white space and handles startPosition---------
1175 int whiteStart = currentPosition;
1176 startPosition = currentPosition;
1177 currentCharacter = source[currentPosition++];
1178 if (encapsedChar == '$') {
1179 switch (currentCharacter) {
1181 currentCharacter = source[currentPosition++];
1182 return TokenNameSTRING;
1184 if (encapsedChar == '$') {
1185 if (getNextChar('$'))
1186 return TokenNameCURLY_OPEN;
1188 return TokenNameLBRACE;
1190 return TokenNameRBRACE;
1192 return TokenNameLBRACKET;
1194 return TokenNameRBRACKET;
1196 if (tokenizeStrings) {
1197 consumeStringConstant();
1198 return TokenNameStringConstant;
1200 return TokenNameEncapsedString1;
1202 return TokenNameEncapsedString2;
1204 if (tokenizeStrings) {
1205 consumeStringInterpolated();
1206 return TokenNameStringInterpolated;
1208 return TokenNameEncapsedString0;
1210 if (getNextChar('>'))
1211 return TokenNameMINUS_GREATER;
1212 return TokenNameSTRING;
1214 if (currentCharacter == '$') {
1215 int oldPosition = currentPosition;
1217 currentCharacter = source[currentPosition++];
1218 if (currentCharacter == '{') {
1219 return TokenNameDOLLAR_LBRACE;
1221 if (isPHPIdentifierStart(currentCharacter)) {
1222 return scanIdentifierOrKeyword(true);
1224 currentPosition = oldPosition;
1225 return TokenNameSTRING;
1227 } catch (IndexOutOfBoundsException e) {
1228 currentPosition = oldPosition;
1229 return TokenNameSTRING;
1232 if (isPHPIdentifierStart(currentCharacter))
1233 return scanIdentifierOrKeyword(false);
1234 if (Character.isDigit(currentCharacter))
1235 return scanNumber(false);
1236 return TokenNameERROR;
1239 // boolean isWhiteSpace;
1241 while ((currentCharacter == ' ')
1242 || Character.isWhitespace(currentCharacter)) {
1243 startPosition = currentPosition;
1244 currentCharacter = source[currentPosition++];
1245 // if (((currentCharacter = source[currentPosition++]) == '\\')
1246 // && (source[currentPosition] == 'u')) {
1247 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1249 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1250 checkNonExternalizeString();
1251 if (recordLineSeparator) {
1252 pushLineSeparator();
1257 // isWhiteSpace = (currentCharacter == ' ')
1258 // || Character.isWhitespace(currentCharacter);
1261 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1262 // reposition scanner in case we are interested by spaces as tokens
1264 startPosition = whiteStart;
1265 return TokenNameWHITESPACE;
1267 //little trick to get out in the middle of a source compuation
1268 if (currentPosition > eofPosition)
1269 return TokenNameEOF;
1270 // ---------Identify the next token-------------
1271 switch (currentCharacter) {
1273 return getCastOrParen();
1275 return TokenNameRPAREN;
1277 return TokenNameLBRACE;
1279 return TokenNameRBRACE;
1281 return TokenNameLBRACKET;
1283 return TokenNameRBRACKET;
1285 return TokenNameSEMICOLON;
1287 return TokenNameCOMMA;
1289 if (getNextChar('='))
1290 return TokenNameDOT_EQUAL;
1291 if (getNextCharAsDigit())
1292 return scanNumber(true);
1293 return TokenNameDOT;
1297 if ((test = getNextChar('+', '=')) == 0)
1298 return TokenNamePLUS_PLUS;
1300 return TokenNamePLUS_EQUAL;
1301 return TokenNamePLUS;
1306 if ((test = getNextChar('-', '=')) == 0)
1307 return TokenNameMINUS_MINUS;
1309 return TokenNameMINUS_EQUAL;
1310 if (getNextChar('>'))
1311 return TokenNameMINUS_GREATER;
1312 return TokenNameMINUS;
1315 if (getNextChar('='))
1316 return TokenNameTWIDDLE_EQUAL;
1317 return TokenNameTWIDDLE;
1319 if (getNextChar('=')) {
1320 if (getNextChar('=')) {
1321 return TokenNameNOT_EQUAL_EQUAL;
1323 return TokenNameNOT_EQUAL;
1325 return TokenNameNOT;
1327 if (getNextChar('='))
1328 return TokenNameMULTIPLY_EQUAL;
1329 return TokenNameMULTIPLY;
1331 if (getNextChar('='))
1332 return TokenNameREMAINDER_EQUAL;
1333 return TokenNameREMAINDER;
1336 int oldPosition = currentPosition;
1338 currentCharacter = source[currentPosition++];
1339 } catch (IndexOutOfBoundsException e) {
1340 currentPosition = oldPosition;
1341 return TokenNameLESS;
1343 switch (currentCharacter) {
1345 return TokenNameLESS_EQUAL;
1347 return TokenNameNOT_EQUAL;
1349 if (getNextChar('='))
1350 return TokenNameLEFT_SHIFT_EQUAL;
1351 if (getNextChar('<')) {
1352 currentCharacter = source[currentPosition++];
1353 while (Character.isWhitespace(currentCharacter)) {
1354 currentCharacter = source[currentPosition++];
1356 int heredocStart = currentPosition - 1;
1357 int heredocLength = 0;
1358 if (isPHPIdentifierStart(currentCharacter)) {
1359 currentCharacter = source[currentPosition++];
1361 return TokenNameERROR;
1363 while (isPHPIdentifierPart(currentCharacter)) {
1364 currentCharacter = source[currentPosition++];
1366 heredocLength = currentPosition - heredocStart - 1;
1367 // heredoc end-tag determination
1368 boolean endTag = true;
1371 ch = source[currentPosition++];
1372 if (ch == '\r' || ch == '\n') {
1373 if (recordLineSeparator) {
1374 pushLineSeparator();
1378 for (int i = 0; i < heredocLength; i++) {
1379 if (source[currentPosition + i] != source[heredocStart
1386 currentPosition += heredocLength - 1;
1387 currentCharacter = source[currentPosition++];
1388 break; // do...while loop
1394 return TokenNameHEREDOC;
1396 return TokenNameLEFT_SHIFT;
1398 currentPosition = oldPosition;
1399 return TokenNameLESS;
1404 if ((test = getNextChar('=', '>')) == 0)
1405 return TokenNameGREATER_EQUAL;
1407 if ((test = getNextChar('=', '>')) == 0)
1408 return TokenNameRIGHT_SHIFT_EQUAL;
1409 return TokenNameRIGHT_SHIFT;
1411 return TokenNameGREATER;
1414 if (getNextChar('=')) {
1415 if (getNextChar('=')) {
1416 return TokenNameEQUAL_EQUAL_EQUAL;
1418 return TokenNameEQUAL_EQUAL;
1420 if (getNextChar('>'))
1421 return TokenNameEQUAL_GREATER;
1422 return TokenNameEQUAL;
1426 if ((test = getNextChar('&', '=')) == 0)
1427 return TokenNameAND_AND;
1429 return TokenNameAND_EQUAL;
1430 return TokenNameAND;
1435 if ((test = getNextChar('|', '=')) == 0)
1436 return TokenNameOR_OR;
1438 return TokenNameOR_EQUAL;
1442 if (getNextChar('='))
1443 return TokenNameXOR_EQUAL;
1444 return TokenNameXOR;
1446 if (getNextChar('>')) {
1448 if (currentPosition == source.length) {
1450 return TokenNameINLINE_HTML;
1452 return getInlinedHTML(currentPosition - 2);
1454 return TokenNameQUESTION;
1456 if (getNextChar(':'))
1457 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1458 return TokenNameCOLON;
1462 consumeStringConstant();
1463 return TokenNameStringConstant;
1465 if (tokenizeStrings) {
1466 consumeStringLiteral();
1467 return TokenNameStringLiteral;
1469 return TokenNameEncapsedString2;
1471 if (tokenizeStrings) {
1472 consumeStringInterpolated();
1473 return TokenNameStringInterpolated;
1475 return TokenNameEncapsedString0;
1479 char startChar = currentCharacter;
1480 if (getNextChar('=')) {
1481 return TokenNameDIVIDE_EQUAL;
1484 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1486 this.lastCommentLinePosition = this.currentPosition;
1487 int endPositionForLineComment = 0;
1488 try { //get the next char
1489 currentCharacter = source[currentPosition++];
1490 // if (((currentCharacter = source[currentPosition++])
1492 // && (source[currentPosition] == 'u')) {
1493 // //-------------unicode traitement ------------
1494 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1495 // currentPosition++;
1496 // while (source[currentPosition] == 'u') {
1497 // currentPosition++;
1500 // Character.getNumericValue(source[currentPosition++]))
1504 // Character.getNumericValue(source[currentPosition++]))
1508 // Character.getNumericValue(source[currentPosition++]))
1512 // Character.getNumericValue(source[currentPosition++]))
1516 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1518 // currentCharacter =
1519 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1522 //handle the \\u case manually into comment
1523 // if (currentCharacter == '\\') {
1524 // if (source[currentPosition] == '\\')
1525 // currentPosition++;
1526 // } //jump over the \\
1527 boolean isUnicode = false;
1528 while (currentCharacter != '\r' && currentCharacter != '\n') {
1529 this.lastCommentLinePosition = this.currentPosition;
1530 if (currentCharacter == '?') {
1531 if (getNextChar('>')) {
1532 startPosition = currentPosition - 2;
1534 return TokenNameINLINE_HTML;
1539 currentCharacter = source[currentPosition++];
1540 // if (((currentCharacter = source[currentPosition++])
1542 // && (source[currentPosition] == 'u')) {
1543 // isUnicode = true;
1544 // //-------------unicode traitement ------------
1545 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1546 // currentPosition++;
1547 // while (source[currentPosition] == 'u') {
1548 // currentPosition++;
1551 // Character.getNumericValue(source[currentPosition++]))
1555 // Character.getNumericValue(
1556 // source[currentPosition++]))
1560 // Character.getNumericValue(
1561 // source[currentPosition++]))
1565 // Character.getNumericValue(
1566 // source[currentPosition++]))
1570 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1572 // currentCharacter =
1573 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1576 //handle the \\u case manually into comment
1577 // if (currentCharacter == '\\') {
1578 // if (source[currentPosition] == '\\')
1579 // currentPosition++;
1580 // } //jump over the \\
1583 endPositionForLineComment = currentPosition - 6;
1585 endPositionForLineComment = currentPosition - 1;
1587 // recordComment(false);
1588 recordComment(TokenNameCOMMENT_LINE);
1589 if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1590 if ((currentCharacter == '\r')
1591 || (currentCharacter == '\n')) {
1592 checkNonExternalizeString();
1593 if (recordLineSeparator) {
1595 pushUnicodeLineSeparator();
1597 pushLineSeparator();
1603 if (tokenizeComments) {
1605 currentPosition = endPositionForLineComment;
1606 // reset one character behind
1608 return TokenNameCOMMENT_LINE;
1610 } catch (IndexOutOfBoundsException e) { //an eof will them
1612 if (tokenizeComments) {
1614 // reset one character behind
1615 return TokenNameCOMMENT_LINE;
1621 //traditional and annotation comment
1622 boolean isJavadoc = false, star = false;
1623 // consume next character
1624 unicodeAsBackSlash = false;
1625 currentCharacter = source[currentPosition++];
1626 // if (((currentCharacter = source[currentPosition++]) ==
1628 // && (source[currentPosition] == 'u')) {
1629 // getNextUnicodeChar();
1631 // if (withoutUnicodePtr != 0) {
1632 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1633 // currentCharacter;
1636 if (currentCharacter == '*') {
1640 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1641 checkNonExternalizeString();
1642 if (recordLineSeparator) {
1643 pushLineSeparator();
1648 try { //get the next char
1649 currentCharacter = source[currentPosition++];
1650 // if (((currentCharacter = source[currentPosition++])
1652 // && (source[currentPosition] == 'u')) {
1653 // //-------------unicode traitement ------------
1654 // getNextUnicodeChar();
1656 //handle the \\u case manually into comment
1657 // if (currentCharacter == '\\') {
1658 // if (source[currentPosition] == '\\')
1659 // currentPosition++;
1660 // //jump over the \\
1662 // empty comment is not a javadoc /**/
1663 if (currentCharacter == '/') {
1666 //loop until end of comment */
1667 while ((currentCharacter != '/') || (!star)) {
1668 if ((currentCharacter == '\r')
1669 || (currentCharacter == '\n')) {
1670 checkNonExternalizeString();
1671 if (recordLineSeparator) {
1672 pushLineSeparator();
1677 star = currentCharacter == '*';
1679 currentCharacter = source[currentPosition++];
1680 // if (((currentCharacter = source[currentPosition++])
1682 // && (source[currentPosition] == 'u')) {
1683 // //-------------unicode traitement ------------
1684 // getNextUnicodeChar();
1686 //handle the \\u case manually into comment
1687 // if (currentCharacter == '\\') {
1688 // if (source[currentPosition] == '\\')
1689 // currentPosition++;
1690 // } //jump over the \\
1692 //recordComment(isJavadoc);
1694 recordComment(TokenNameCOMMENT_PHPDOC);
1696 recordComment(TokenNameCOMMENT_BLOCK);
1699 if (tokenizeComments) {
1701 return TokenNameCOMMENT_PHPDOC;
1702 return TokenNameCOMMENT_BLOCK;
1704 } catch (IndexOutOfBoundsException e) {
1705 throw new InvalidInputException(UNTERMINATED_COMMENT);
1709 return TokenNameDIVIDE;
1713 return TokenNameEOF;
1714 //the atEnd may not be <currentPosition == source.length> if
1715 // source is only some part of a real (external) stream
1716 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1718 if (currentCharacter == '$') {
1719 int oldPosition = currentPosition;
1721 currentCharacter = source[currentPosition++];
1722 if (isPHPIdentifierStart(currentCharacter)) {
1723 return scanIdentifierOrKeyword(true);
1725 currentPosition = oldPosition;
1726 return TokenNameDOLLAR;
1728 } catch (IndexOutOfBoundsException e) {
1729 currentPosition = oldPosition;
1730 return TokenNameDOLLAR;
1733 if (isPHPIdentifierStart(currentCharacter))
1734 return scanIdentifierOrKeyword(false);
1735 if (Character.isDigit(currentCharacter))
1736 return scanNumber(false);
1737 return TokenNameERROR;
1740 } //-----------------end switch while try--------------------
1741 catch (IndexOutOfBoundsException e) {
1744 return TokenNameEOF;
1747 private int getInlinedHTML(int start) throws InvalidInputException {
1748 int token = getInlinedHTMLToken(start);
1749 if (token == TokenNameINLINE_HTML) {
1750 // Stack stack = new Stack();
1751 // // scan html for errors
1752 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1753 // int lastPHPEndPos=0;
1754 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1755 // Tag tag=(Tag)i.next();
1757 // if (tag instanceof StartTag) {
1758 // StartTag startTag=(StartTag)tag;
1759 // // System.out.println("startTag: "+tag);
1760 // if (startTag.isServerTag()) {
1761 // // TODO : what to do with a server tag ?
1763 // // do whatever with HTML start tag
1764 // // use startTag.getElement() to find the element corresponding
1765 // // to this start tag which may be useful if you implement code
1767 // stack.push(startTag);
1770 // EndTag endTag=(EndTag)tag;
1771 // StartTag stag = (StartTag) stack.peek();
1772 //// System.out.println("endTag: "+tag);
1773 // // do whatever with HTML end tag.
1781 * InvalidInputException
1783 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1784 // int htmlPosition = start;
1785 if (currentPosition > source.length) {
1786 currentPosition = source.length;
1787 return TokenNameEOF;
1789 startPosition = start;
1792 currentCharacter = source[currentPosition++];
1793 if (currentCharacter == '<') {
1794 if (getNextChar('?')) {
1795 currentCharacter = source[currentPosition++];
1796 if ((currentCharacter == ' ')
1797 || Character.isWhitespace(currentCharacter)) {
1800 return TokenNameINLINE_HTML;
1802 boolean phpStart = (currentCharacter == 'P')
1803 || (currentCharacter == 'p');
1805 int test = getNextChar('H', 'h');
1807 test = getNextChar('P', 'p');
1811 return TokenNameINLINE_HTML;
1818 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1819 if (recordLineSeparator) {
1820 pushLineSeparator();
1825 } //-----------------while--------------------
1827 return TokenNameINLINE_HTML;
1828 } //-----------------try--------------------
1829 catch (IndexOutOfBoundsException e) {
1830 startPosition = start;
1834 return TokenNameINLINE_HTML;
1836 // public final void getNextUnicodeChar()
1837 // throws IndexOutOfBoundsException, InvalidInputException {
1839 // //handle the case of unicode.
1840 // //when a unicode appears then we must use a buffer that holds char
1842 // //At the end of this method currentCharacter holds the new visited char
1843 // //and currentPosition points right next after it
1845 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1847 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1848 // currentPosition++;
1849 // while (source[currentPosition] == 'u') {
1850 // currentPosition++;
1854 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1856 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1858 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1860 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1862 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1864 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1865 // //need the unicode buffer
1866 // if (withoutUnicodePtr == 0) {
1867 // //buffer all the entries that have been left aside....
1868 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1869 // System.arraycopy(
1872 // withoutUnicodeBuffer,
1874 // withoutUnicodePtr);
1876 // //fill the buffer with the char
1877 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1879 // unicodeAsBackSlash = currentCharacter == '\\';
1882 * Tokenize a method body, assuming that curly brackets are properly
1885 public final void jumpOverMethodBody() {
1886 this.wasAcr = false;
1889 while (true) { //loop for jumping over comments
1890 // ---------Consume white space and handles startPosition---------
1891 boolean isWhiteSpace;
1893 startPosition = currentPosition;
1894 currentCharacter = source[currentPosition++];
1895 // if (((currentCharacter = source[currentPosition++]) == '\\')
1896 // && (source[currentPosition] == 'u')) {
1897 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1899 if (recordLineSeparator
1900 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1901 pushLineSeparator();
1902 isWhiteSpace = Character.isWhitespace(currentCharacter);
1904 } while (isWhiteSpace);
1905 // -------consume token until } is found---------
1906 switch (currentCharacter) {
1918 test = getNextChar('\\');
1921 scanDoubleQuotedEscapeCharacter();
1922 } catch (InvalidInputException ex) {
1925 // try { // consume next character
1926 unicodeAsBackSlash = false;
1927 currentCharacter = source[currentPosition++];
1928 // if (((currentCharacter = source[currentPosition++]) == '\\')
1929 // && (source[currentPosition] == 'u')) {
1930 // getNextUnicodeChar();
1932 if (withoutUnicodePtr != 0) {
1933 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1936 // } catch (InvalidInputException ex) {
1944 // try { // consume next character
1945 unicodeAsBackSlash = false;
1946 currentCharacter = source[currentPosition++];
1947 // if (((currentCharacter = source[currentPosition++]) == '\\')
1948 // && (source[currentPosition] == 'u')) {
1949 // getNextUnicodeChar();
1951 if (withoutUnicodePtr != 0) {
1952 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1955 // } catch (InvalidInputException ex) {
1957 while (currentCharacter != '"') {
1958 if (currentCharacter == '\r') {
1959 if (source[currentPosition] == '\n')
1962 // the string cannot go further that the line
1964 if (currentCharacter == '\n') {
1966 // the string cannot go further that the line
1968 if (currentCharacter == '\\') {
1970 scanDoubleQuotedEscapeCharacter();
1971 } catch (InvalidInputException ex) {
1974 // try { // consume next character
1975 unicodeAsBackSlash = false;
1976 currentCharacter = source[currentPosition++];
1977 // if (((currentCharacter = source[currentPosition++]) == '\\')
1978 // && (source[currentPosition] == 'u')) {
1979 // getNextUnicodeChar();
1981 if (withoutUnicodePtr != 0) {
1982 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1985 // } catch (InvalidInputException ex) {
1988 } catch (IndexOutOfBoundsException e) {
1995 if ((test = getNextChar('/', '*')) == 0) {
1999 currentCharacter = source[currentPosition++];
2000 // if (((currentCharacter = source[currentPosition++]) ==
2002 // && (source[currentPosition] == 'u')) {
2003 // //-------------unicode traitement ------------
2004 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2005 // currentPosition++;
2006 // while (source[currentPosition] == 'u') {
2007 // currentPosition++;
2010 // Character.getNumericValue(source[currentPosition++]))
2014 // Character.getNumericValue(source[currentPosition++]))
2018 // Character.getNumericValue(source[currentPosition++]))
2022 // Character.getNumericValue(source[currentPosition++]))
2025 // //error don't care of the value
2026 // currentCharacter = 'A';
2027 // } //something different from \n and \r
2029 // currentCharacter =
2030 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2033 while (currentCharacter != '\r' && currentCharacter != '\n') {
2035 currentCharacter = source[currentPosition++];
2036 // if (((currentCharacter = source[currentPosition++])
2038 // && (source[currentPosition] == 'u')) {
2039 // //-------------unicode traitement ------------
2040 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2041 // currentPosition++;
2042 // while (source[currentPosition] == 'u') {
2043 // currentPosition++;
2046 // Character.getNumericValue(source[currentPosition++]))
2050 // Character.getNumericValue(source[currentPosition++]))
2054 // Character.getNumericValue(source[currentPosition++]))
2058 // Character.getNumericValue(source[currentPosition++]))
2061 // //error don't care of the value
2062 // currentCharacter = 'A';
2063 // } //something different from \n and \r
2065 // currentCharacter =
2066 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2070 if (recordLineSeparator
2071 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2072 pushLineSeparator();
2073 } catch (IndexOutOfBoundsException e) {
2074 } //an eof will them be generated
2078 //traditional and annotation comment
2079 boolean star = false;
2080 // try { // consume next character
2081 unicodeAsBackSlash = false;
2082 currentCharacter = source[currentPosition++];
2083 // if (((currentCharacter = source[currentPosition++]) == '\\')
2084 // && (source[currentPosition] == 'u')) {
2085 // getNextUnicodeChar();
2087 if (withoutUnicodePtr != 0) {
2088 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2091 // } catch (InvalidInputException ex) {
2093 if (currentCharacter == '*') {
2096 if (recordLineSeparator
2097 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2098 pushLineSeparator();
2099 try { //get the next char
2100 currentCharacter = source[currentPosition++];
2101 // if (((currentCharacter = source[currentPosition++]) ==
2103 // && (source[currentPosition] == 'u')) {
2104 // //-------------unicode traitement ------------
2105 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2106 // currentPosition++;
2107 // while (source[currentPosition] == 'u') {
2108 // currentPosition++;
2111 // Character.getNumericValue(source[currentPosition++]))
2115 // Character.getNumericValue(source[currentPosition++]))
2119 // Character.getNumericValue(source[currentPosition++]))
2123 // Character.getNumericValue(source[currentPosition++]))
2126 // //error don't care of the value
2127 // currentCharacter = 'A';
2128 // } //something different from * and /
2130 // currentCharacter =
2131 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2134 //loop until end of comment */
2135 while ((currentCharacter != '/') || (!star)) {
2136 if (recordLineSeparator
2137 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2138 pushLineSeparator();
2139 star = currentCharacter == '*';
2141 currentCharacter = source[currentPosition++];
2142 // if (((currentCharacter = source[currentPosition++])
2144 // && (source[currentPosition] == 'u')) {
2145 // //-------------unicode traitement ------------
2146 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2147 // currentPosition++;
2148 // while (source[currentPosition] == 'u') {
2149 // currentPosition++;
2152 // Character.getNumericValue(source[currentPosition++]))
2156 // Character.getNumericValue(source[currentPosition++]))
2160 // Character.getNumericValue(source[currentPosition++]))
2164 // Character.getNumericValue(source[currentPosition++]))
2167 // //error don't care of the value
2168 // currentCharacter = 'A';
2169 // } //something different from * and /
2171 // currentCharacter =
2172 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2176 } catch (IndexOutOfBoundsException e) {
2184 if (isPHPIdentifierStart(currentCharacter)
2185 || currentCharacter == '$') {
2187 scanIdentifierOrKeyword((currentCharacter == '$'));
2188 } catch (InvalidInputException ex) {
2192 if (Character.isDigit(currentCharacter)) {
2195 } catch (InvalidInputException ex) {
2201 //-----------------end switch while try--------------------
2202 } catch (IndexOutOfBoundsException e) {
2203 } catch (InvalidInputException e) {
2207 // public final boolean jumpOverUnicodeWhiteSpace()
2208 // throws InvalidInputException {
2210 // //handle the case of unicode. Jump over the next whiteSpace
2211 // //making startPosition pointing on the next available char
2212 // //On false, the currentCharacter is filled up with a potential
2216 // this.wasAcr = false;
2217 // int c1, c2, c3, c4;
2218 // int unicodeSize = 6;
2219 // currentPosition++;
2220 // while (source[currentPosition] == 'u') {
2221 // currentPosition++;
2225 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2227 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2229 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2231 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2233 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2236 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2237 // if (recordLineSeparator
2238 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2239 // pushLineSeparator();
2240 // if (Character.isWhitespace(currentCharacter))
2243 // //buffer the new char which is not a white space
2244 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2245 // //withoutUnicodePtr == 1 is true here
2247 // } catch (IndexOutOfBoundsException e) {
2248 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2251 public final int[] getLineEnds() {
2252 //return a bounded copy of this.lineEnds
2254 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2257 public char[] getSource() {
2260 final char[] optimizedCurrentTokenSource1() {
2261 //return always the same char[] build only once
2262 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2263 char charOne = source[startPosition];
2318 return new char[]{charOne};
2321 final char[] optimizedCurrentTokenSource2() {
2322 //try to return the same char[] build only once
2324 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2326 char[][] table = charArray_length[0][hash];
2328 while (++i < InternalTableSize) {
2329 char[] charArray = table[i];
2330 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2333 //---------other side---------
2335 int max = newEntry2;
2336 while (++i <= max) {
2337 char[] charArray = table[i];
2338 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2341 //--------add the entry-------
2342 if (++max >= InternalTableSize)
2345 table[max] = (r = new char[]{c0, c1});
2349 final char[] optimizedCurrentTokenSource3() {
2350 //try to return the same char[] build only once
2352 int hash = (((c0 = source[startPosition]) << 12)
2353 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2355 char[][] table = charArray_length[1][hash];
2357 while (++i < InternalTableSize) {
2358 char[] charArray = table[i];
2359 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2362 //---------other side---------
2364 int max = newEntry3;
2365 while (++i <= max) {
2366 char[] charArray = table[i];
2367 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2370 //--------add the entry-------
2371 if (++max >= InternalTableSize)
2374 table[max] = (r = new char[]{c0, c1, c2});
2378 final char[] optimizedCurrentTokenSource4() {
2379 //try to return the same char[] build only once
2380 char c0, c1, c2, c3;
2381 long hash = ((((long) (c0 = source[startPosition])) << 18)
2382 + ((c1 = source[startPosition + 1]) << 12)
2383 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2385 char[][] table = charArray_length[2][(int) hash];
2387 while (++i < InternalTableSize) {
2388 char[] charArray = table[i];
2389 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2390 && (c3 == charArray[3]))
2393 //---------other side---------
2395 int max = newEntry4;
2396 while (++i <= max) {
2397 char[] charArray = table[i];
2398 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2399 && (c3 == charArray[3]))
2402 //--------add the entry-------
2403 if (++max >= InternalTableSize)
2406 table[max] = (r = new char[]{c0, c1, c2, c3});
2410 final char[] optimizedCurrentTokenSource5() {
2411 //try to return the same char[] build only once
2412 char c0, c1, c2, c3, c4;
2413 long hash = ((((long) (c0 = source[startPosition])) << 24)
2414 + (((long) (c1 = source[startPosition + 1])) << 18)
2415 + ((c2 = source[startPosition + 2]) << 12)
2416 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2418 char[][] table = charArray_length[3][(int) hash];
2420 while (++i < InternalTableSize) {
2421 char[] charArray = table[i];
2422 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2423 && (c3 == charArray[3]) && (c4 == charArray[4]))
2426 //---------other side---------
2428 int max = newEntry5;
2429 while (++i <= max) {
2430 char[] charArray = table[i];
2431 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2432 && (c3 == charArray[3]) && (c4 == charArray[4]))
2435 //--------add the entry-------
2436 if (++max >= InternalTableSize)
2439 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2443 final char[] optimizedCurrentTokenSource6() {
2444 //try to return the same char[] build only once
2445 char c0, c1, c2, c3, c4, c5;
2446 long hash = ((((long) (c0 = source[startPosition])) << 32)
2447 + (((long) (c1 = source[startPosition + 1])) << 24)
2448 + (((long) (c2 = source[startPosition + 2])) << 18)
2449 + ((c3 = source[startPosition + 3]) << 12)
2450 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2452 char[][] table = charArray_length[4][(int) hash];
2454 while (++i < InternalTableSize) {
2455 char[] charArray = table[i];
2456 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2457 && (c3 == charArray[3]) && (c4 == charArray[4])
2458 && (c5 == charArray[5]))
2461 //---------other side---------
2463 int max = newEntry6;
2464 while (++i <= max) {
2465 char[] charArray = table[i];
2466 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2467 && (c3 == charArray[3]) && (c4 == charArray[4])
2468 && (c5 == charArray[5]))
2471 //--------add the entry-------
2472 if (++max >= InternalTableSize)
2475 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2479 public final void pushLineSeparator() throws InvalidInputException {
2480 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2481 final int INCREMENT = 250;
2482 if (this.checkNonExternalizedStringLiterals) {
2483 // reinitialize the current line for non externalize strings purpose
2486 //currentCharacter is at position currentPosition-1
2488 if (currentCharacter == '\r') {
2489 int separatorPos = currentPosition - 1;
2490 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2492 //System.out.println("CR-" + separatorPos);
2494 lineEnds[++linePtr] = separatorPos;
2495 } catch (IndexOutOfBoundsException e) {
2496 //linePtr value is correct
2497 int oldLength = lineEnds.length;
2498 int[] old = lineEnds;
2499 lineEnds = new int[oldLength + INCREMENT];
2500 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2501 lineEnds[linePtr] = separatorPos;
2503 // look-ahead for merged cr+lf
2505 if (source[currentPosition] == '\n') {
2506 //System.out.println("look-ahead LF-" + currentPosition);
2507 lineEnds[linePtr] = currentPosition;
2513 } catch (IndexOutOfBoundsException e) {
2518 if (currentCharacter == '\n') {
2519 //must merge eventual cr followed by lf
2520 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2521 //System.out.println("merge LF-" + (currentPosition - 1));
2522 lineEnds[linePtr] = currentPosition - 1;
2524 int separatorPos = currentPosition - 1;
2525 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2527 // System.out.println("LF-" + separatorPos);
2529 lineEnds[++linePtr] = separatorPos;
2530 } catch (IndexOutOfBoundsException e) {
2531 //linePtr value is correct
2532 int oldLength = lineEnds.length;
2533 int[] old = lineEnds;
2534 lineEnds = new int[oldLength + INCREMENT];
2535 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2536 lineEnds[linePtr] = separatorPos;
2543 public final void pushUnicodeLineSeparator() {
2544 // isUnicode means that the \r or \n has been read as a unicode character
2545 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2546 final int INCREMENT = 250;
2547 //currentCharacter is at position currentPosition-1
2548 if (this.checkNonExternalizedStringLiterals) {
2549 // reinitialize the current line for non externalize strings purpose
2553 if (currentCharacter == '\r') {
2554 int separatorPos = currentPosition - 6;
2555 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2557 //System.out.println("CR-" + separatorPos);
2559 lineEnds[++linePtr] = separatorPos;
2560 } catch (IndexOutOfBoundsException e) {
2561 //linePtr value is correct
2562 int oldLength = lineEnds.length;
2563 int[] old = lineEnds;
2564 lineEnds = new int[oldLength + INCREMENT];
2565 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2566 lineEnds[linePtr] = separatorPos;
2568 // look-ahead for merged cr+lf
2569 if (source[currentPosition] == '\n') {
2570 //System.out.println("look-ahead LF-" + currentPosition);
2571 lineEnds[linePtr] = currentPosition;
2579 if (currentCharacter == '\n') {
2580 //must merge eventual cr followed by lf
2581 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2582 //System.out.println("merge LF-" + (currentPosition - 1));
2583 lineEnds[linePtr] = currentPosition - 6;
2585 int separatorPos = currentPosition - 6;
2586 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2588 // System.out.println("LF-" + separatorPos);
2590 lineEnds[++linePtr] = separatorPos;
2591 } catch (IndexOutOfBoundsException e) {
2592 //linePtr value is correct
2593 int oldLength = lineEnds.length;
2594 int[] old = lineEnds;
2595 lineEnds = new int[oldLength + INCREMENT];
2596 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2597 lineEnds[linePtr] = separatorPos;
2604 public void recordComment(int token) {
2606 int stopPosition = this.currentPosition;
2608 case TokenNameCOMMENT_LINE:
2609 stopPosition = -this.lastCommentLinePosition;
2611 case TokenNameCOMMENT_BLOCK:
2612 stopPosition = -this.currentPosition;
2616 // a new comment is recorded
2617 int length = this.commentStops.length;
2618 if (++this.commentPtr >= length) {
2619 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2620 //grows the positions buffers too
2621 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2623 this.commentStops[this.commentPtr] = stopPosition;
2624 this.commentStarts[this.commentPtr] = this.startPosition;
2626 // public final void recordComment(boolean isJavadoc) {
2627 // // a new annotation comment is recorded
2629 // commentStops[++commentPtr] = isJavadoc
2630 // ? currentPosition
2631 // : -currentPosition;
2632 // } catch (IndexOutOfBoundsException e) {
2633 // int oldStackLength = commentStops.length;
2634 // int[] oldStack = commentStops;
2635 // commentStops = new int[oldStackLength + 30];
2636 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2637 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2638 // //grows the positions buffers too
2639 // int[] old = commentStarts;
2640 // commentStarts = new int[oldStackLength + 30];
2641 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2643 // //the buffer is of a correct size here
2644 // commentStarts[commentPtr] = startPosition;
2646 public void resetTo(int begin, int end) {
2647 //reset the scanner to a given position where it may rescan again
2649 initialPosition = startPosition = currentPosition = begin;
2650 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2651 commentPtr = -1; // reset comment stack
2653 public final void scanSingleQuotedEscapeCharacter()
2654 throws InvalidInputException {
2655 // the string with "\\u" is a legal string of two chars \ and u
2656 //thus we use a direct access to the source (for regular cases).
2657 // if (unicodeAsBackSlash) {
2658 // // consume next character
2659 // unicodeAsBackSlash = false;
2660 // if (((currentCharacter = source[currentPosition++]) == '\\')
2661 // && (source[currentPosition] == 'u')) {
2662 // getNextUnicodeChar();
2664 // if (withoutUnicodePtr != 0) {
2665 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2669 currentCharacter = source[currentPosition++];
2670 switch (currentCharacter) {
2672 currentCharacter = '\'';
2675 currentCharacter = '\\';
2678 currentCharacter = '\\';
2682 public final void scanDoubleQuotedEscapeCharacter()
2683 throws InvalidInputException {
2684 // the string with "\\u" is a legal string of two chars \ and u
2685 //thus we use a direct access to the source (for regular cases).
2686 // if (unicodeAsBackSlash) {
2687 // // consume next character
2688 // unicodeAsBackSlash = false;
2689 // if (((currentCharacter = source[currentPosition++]) == '\\')
2690 // && (source[currentPosition] == 'u')) {
2691 // getNextUnicodeChar();
2693 // if (withoutUnicodePtr != 0) {
2694 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2698 currentCharacter = source[currentPosition++];
2699 switch (currentCharacter) {
2701 // currentCharacter = '\b';
2704 currentCharacter = '\t';
2707 currentCharacter = '\n';
2710 // currentCharacter = '\f';
2713 currentCharacter = '\r';
2716 currentCharacter = '\"';
2719 currentCharacter = '\'';
2722 currentCharacter = '\\';
2725 currentCharacter = '$';
2728 // -----------octal escape--------------
2730 // OctalDigit OctalDigit
2731 // ZeroToThree OctalDigit OctalDigit
2732 int number = Character.getNumericValue(currentCharacter);
2733 if (number >= 0 && number <= 7) {
2734 boolean zeroToThreeNot = number > 3;
2735 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2736 int digit = Character.getNumericValue(currentCharacter);
2737 if (digit >= 0 && digit <= 7) {
2738 number = (number * 8) + digit;
2740 .isDigit(currentCharacter = source[currentPosition++])) {
2741 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2742 // Digit --> ignore last character
2745 digit = Character.getNumericValue(currentCharacter);
2746 if (digit >= 0 && digit <= 7) {
2747 // has read \ZeroToThree OctalDigit OctalDigit
2748 number = (number * 8) + digit;
2749 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2750 // --> ignore last character
2754 } else { // has read \OctalDigit NonDigit--> ignore last
2758 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2762 } else { // has read \OctalDigit --> ignore last character
2766 throw new InvalidInputException(INVALID_ESCAPE);
2767 currentCharacter = (char) number;
2770 // throw new InvalidInputException(INVALID_ESCAPE);
2773 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2774 // return scanIdentifierOrKeyword( false );
2776 public int scanIdentifierOrKeyword(boolean isVariable)
2777 throws InvalidInputException {
2779 //first dispatch on the first char.
2780 //then the length. If there are several
2781 //keywors with the same length AND the same first char, then do another
2782 //disptach on the second char :-)...cool....but fast !
2783 useAssertAsAnIndentifier = false;
2784 while (getNextCharAsJavaIdentifierPart()) {
2787 // if (new String(getCurrentTokenSource()).equals("$this")) {
2788 // return TokenNamethis;
2790 return TokenNameVariable;
2795 // if (withoutUnicodePtr == 0)
2796 //quick test on length == 1 but not on length > 12 while most identifier
2797 //have a length which is <= 12...but there are lots of identifier with
2800 if ((length = currentPosition - startPosition) == 1)
2801 return TokenNameIdentifier;
2803 data = new char[length];
2804 index = startPosition;
2805 for (int i = 0; i < length; i++) {
2806 data[i] = Character.toLowerCase(source[index + i]);
2810 // if ((length = withoutUnicodePtr) == 1)
2811 // return TokenNameIdentifier;
2812 // // data = withoutUnicodeBuffer;
2813 // data = new char[withoutUnicodeBuffer.length];
2814 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2815 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2819 firstLetter = data[index];
2820 switch (firstLetter) {
2825 if ((data[++index] == '_') && (data[++index] == 'f')
2826 && (data[++index] == 'i') && (data[++index] == 'l')
2827 && (data[++index] == 'e') && (data[++index] == '_')
2828 && (data[++index] == '_'))
2829 return TokenNameFILE;
2830 index = 0; //__LINE__
2831 if ((data[++index] == '_') && (data[++index] == 'l')
2832 && (data[++index] == 'i') && (data[++index] == 'n')
2833 && (data[++index] == 'e') && (data[++index] == '_')
2834 && (data[++index] == '_'))
2835 return TokenNameLINE;
2839 if ((data[++index] == '_') && (data[++index] == 'c')
2840 && (data[++index] == 'l') && (data[++index] == 'a')
2841 && (data[++index] == 's') && (data[++index] == 's')
2842 && (data[++index] == '_') && (data[++index] == '_'))
2843 return TokenNameCLASS_C;
2847 if ((data[++index] == '_') && (data[++index] == 'm')
2848 && (data[++index] == 'e') && (data[++index] == 't')
2849 && (data[++index] == 'h') && (data[++index] == 'o')
2850 && (data[++index] == 'd') && (data[++index] == '_')
2851 && (data[++index] == '_'))
2852 return TokenNameMETHOD_C;
2856 if ((data[++index] == '_') && (data[++index] == 'f')
2857 && (data[++index] == 'u') && (data[++index] == 'n')
2858 && (data[++index] == 'c') && (data[++index] == 't')
2859 && (data[++index] == 'i') && (data[++index] == 'o')
2860 && (data[++index] == 'n') && (data[++index] == '_')
2861 && (data[++index] == '_'))
2862 return TokenNameFUNC_C;
2865 return TokenNameIdentifier;
2867 // as and array abstract
2871 if ((data[++index] == 's')) {
2874 return TokenNameIdentifier;
2878 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2879 return TokenNameand;
2881 return TokenNameIdentifier;
2885 if ((data[++index] == 'r') && (data[++index] == 'r')
2886 && (data[++index] == 'a') && (data[++index] == 'y'))
2887 return TokenNamearray;
2889 return TokenNameIdentifier;
2891 if ((data[++index] == 'b') && (data[++index] == 's')
2892 && (data[++index] == 't') && (data[++index] == 'r')
2893 && (data[++index] == 'a') && (data[++index] == 'c')
2894 && (data[++index] == 't'))
2895 return TokenNameabstract;
2897 return TokenNameIdentifier;
2899 return TokenNameIdentifier;
2905 if ((data[++index] == 'r') && (data[++index] == 'e')
2906 && (data[++index] == 'a') && (data[++index] == 'k'))
2907 return TokenNamebreak;
2909 return TokenNameIdentifier;
2911 return TokenNameIdentifier;
2914 //case catch class clone const continue
2917 if ((data[++index] == 'a') && (data[++index] == 's')
2918 && (data[++index] == 'e'))
2919 return TokenNamecase;
2921 return TokenNameIdentifier;
2923 if ((data[++index] == 'a') && (data[++index] == 't')
2924 && (data[++index] == 'c') && (data[++index] == 'h'))
2925 return TokenNamecatch;
2927 if ((data[++index] == 'l') && (data[++index] == 'a')
2928 && (data[++index] == 's') && (data[++index] == 's'))
2929 return TokenNameclass;
2931 if ((data[++index] == 'l') && (data[++index] == 'o')
2932 && (data[++index] == 'n') && (data[++index] == 'e'))
2933 return TokenNameclone;
2935 if ((data[++index] == 'o') && (data[++index] == 'n')
2936 && (data[++index] == 's') && (data[++index] == 't'))
2937 return TokenNameconst;
2939 return TokenNameIdentifier;
2941 if ((data[++index] == 'o') && (data[++index] == 'n')
2942 && (data[++index] == 't') && (data[++index] == 'i')
2943 && (data[++index] == 'n') && (data[++index] == 'u')
2944 && (data[++index] == 'e'))
2945 return TokenNamecontinue;
2947 return TokenNameIdentifier;
2949 return TokenNameIdentifier;
2952 // declare default do die
2953 // TODO delete define ==> no keyword !
2956 if ((data[++index] == 'o'))
2959 return TokenNameIdentifier;
2961 // if ((data[++index] == 'e')
2962 // && (data[++index] == 'f')
2963 // && (data[++index] == 'i')
2964 // && (data[++index] == 'n')
2965 // && (data[++index] == 'e'))
2966 // return TokenNamedefine;
2968 // return TokenNameIdentifier;
2970 if ((data[++index] == 'e') && (data[++index] == 'c')
2971 && (data[++index] == 'l') && (data[++index] == 'a')
2972 && (data[++index] == 'r') && (data[++index] == 'e'))
2973 return TokenNamedeclare;
2975 if ((data[++index] == 'e') && (data[++index] == 'f')
2976 && (data[++index] == 'a') && (data[++index] == 'u')
2977 && (data[++index] == 'l') && (data[++index] == 't'))
2978 return TokenNamedefault;
2980 return TokenNameIdentifier;
2982 return TokenNameIdentifier;
2985 //echo else exit elseif extends eval
2988 if ((data[++index] == 'c') && (data[++index] == 'h')
2989 && (data[++index] == 'o'))
2990 return TokenNameecho;
2991 else if ((data[index] == 'l') && (data[++index] == 's')
2992 && (data[++index] == 'e'))
2993 return TokenNameelse;
2994 else if ((data[index] == 'x') && (data[++index] == 'i')
2995 && (data[++index] == 't'))
2996 return TokenNameexit;
2997 else if ((data[index] == 'v') && (data[++index] == 'a')
2998 && (data[++index] == 'l'))
2999 return TokenNameeval;
3001 return TokenNameIdentifier;
3004 if ((data[++index] == 'n') && (data[++index] == 'd')
3005 && (data[++index] == 'i') && (data[++index] == 'f'))
3006 return TokenNameendif;
3007 if ((data[index] == 'm') && (data[++index] == 'p')
3008 && (data[++index] == 't') && (data[++index] == 'y'))
3009 return TokenNameempty;
3011 return TokenNameIdentifier;
3014 if ((data[++index] == 'n') && (data[++index] == 'd')
3015 && (data[++index] == 'f') && (data[++index] == 'o')
3016 && (data[++index] == 'r'))
3017 return TokenNameendfor;
3018 else if ((data[index] == 'l') && (data[++index] == 's')
3019 && (data[++index] == 'e') && (data[++index] == 'i')
3020 && (data[++index] == 'f'))
3021 return TokenNameelseif;
3023 return TokenNameIdentifier;
3025 if ((data[++index] == 'x') && (data[++index] == 't')
3026 && (data[++index] == 'e') && (data[++index] == 'n')
3027 && (data[++index] == 'd') && (data[++index] == 's'))
3028 return TokenNameextends;
3030 return TokenNameIdentifier;
3033 if ((data[++index] == 'n') && (data[++index] == 'd')
3034 && (data[++index] == 'w') && (data[++index] == 'h')
3035 && (data[++index] == 'i') && (data[++index] == 'l')
3036 && (data[++index] == 'e'))
3037 return TokenNameendwhile;
3039 return TokenNameIdentifier;
3042 if ((data[++index] == 'n') && (data[++index] == 'd')
3043 && (data[++index] == 's') && (data[++index] == 'w')
3044 && (data[++index] == 'i') && (data[++index] == 't')
3045 && (data[++index] == 'c') && (data[++index] == 'h'))
3046 return TokenNameendswitch;
3048 return TokenNameIdentifier;
3051 if ((data[++index] == 'n') && (data[++index] == 'd')
3052 && (data[++index] == 'd') && (data[++index] == 'e')
3053 && (data[++index] == 'c') && (data[++index] == 'l')
3054 && (data[++index] == 'a') && (data[++index] == 'r')
3055 && (data[++index] == 'e'))
3056 return TokenNameendforeach;
3058 if ((data[++index] == 'n') // endforeach
3059 && (data[++index] == 'd') && (data[++index] == 'f')
3060 && (data[++index] == 'o') && (data[++index] == 'r')
3061 && (data[++index] == 'e') && (data[++index] == 'a')
3062 && (data[++index] == 'c') && (data[++index] == 'h'))
3063 return TokenNameendforeach;
3065 return TokenNameIdentifier;
3067 return TokenNameIdentifier;
3070 //for false final function
3073 if ((data[++index] == 'o') && (data[++index] == 'r'))
3074 return TokenNamefor;
3076 return TokenNameIdentifier;
3078 // if ((data[++index] == 'a') && (data[++index] == 'l')
3079 // && (data[++index] == 's') && (data[++index] == 'e'))
3080 // return TokenNamefalse;
3081 if ((data[++index] == 'i') && (data[++index] == 'n')
3082 && (data[++index] == 'a') && (data[++index] == 'l'))
3083 return TokenNamefinal;
3085 return TokenNameIdentifier;
3088 if ((data[++index] == 'o') && (data[++index] == 'r')
3089 && (data[++index] == 'e') && (data[++index] == 'a')
3090 && (data[++index] == 'c') && (data[++index] == 'h'))
3091 return TokenNameforeach;
3093 return TokenNameIdentifier;
3096 if ((data[++index] == 'u') && (data[++index] == 'n')
3097 && (data[++index] == 'c') && (data[++index] == 't')
3098 && (data[++index] == 'i') && (data[++index] == 'o')
3099 && (data[++index] == 'n'))
3100 return TokenNamefunction;
3102 return TokenNameIdentifier;
3104 return TokenNameIdentifier;
3109 if ((data[++index] == 'l') && (data[++index] == 'o')
3110 && (data[++index] == 'b') && (data[++index] == 'a')
3111 && (data[++index] == 'l')) {
3112 return TokenNameglobal;
3115 return TokenNameIdentifier;
3117 //if int isset include include_once instanceof interface implements
3120 if (data[++index] == 'f')
3123 return TokenNameIdentifier;
3125 // if ((data[++index] == 'n') && (data[++index] == 't'))
3126 // return TokenNameint;
3128 // return TokenNameIdentifier;
3130 if ((data[++index] == 's') && (data[++index] == 's')
3131 && (data[++index] == 'e') && (data[++index] == 't'))
3132 return TokenNameisset;
3134 return TokenNameIdentifier;
3136 if ((data[++index] == 'n') && (data[++index] == 'c')
3137 && (data[++index] == 'l') && (data[++index] == 'u')
3138 && (data[++index] == 'd') && (data[++index] == 'e'))
3139 return TokenNameinclude;
3141 return TokenNameIdentifier;
3144 if ((data[++index] == 'n') && (data[++index] == 't')
3145 && (data[++index] == 'e') && (data[++index] == 'r')
3146 && (data[++index] == 'f') && (data[++index] == 'a')
3147 && (data[++index] == 'c') && (data[++index] == 'e'))
3148 return TokenNameinterface;
3150 return TokenNameIdentifier;
3153 if ((data[++index] == 'n') && (data[++index] == 's')
3154 && (data[++index] == 't') && (data[++index] == 'a')
3155 && (data[++index] == 'n') && (data[++index] == 'c')
3156 && (data[++index] == 'e') && (data[++index] == 'o')
3157 && (data[++index] == 'f'))
3158 return TokenNameinstanceof;
3159 if ((data[index] == 'm') && (data[++index] == 'p')
3160 && (data[++index] == 'l') && (data[++index] == 'e')
3161 && (data[++index] == 'm') && (data[++index] == 'e')
3162 && (data[++index] == 'n') && (data[++index] == 't')
3163 && (data[++index] == 's'))
3164 return TokenNameimplements;
3166 return TokenNameIdentifier;
3168 if ((data[++index] == 'n') && (data[++index] == 'c')
3169 && (data[++index] == 'l') && (data[++index] == 'u')
3170 && (data[++index] == 'd') && (data[++index] == 'e')
3171 && (data[++index] == '_') && (data[++index] == 'o')
3172 && (data[++index] == 'n') && (data[++index] == 'c')
3173 && (data[++index] == 'e'))
3174 return TokenNameinclude_once;
3176 return TokenNameIdentifier;
3178 return TokenNameIdentifier;
3183 if ((data[++index] == 'i') && (data[++index] == 's')
3184 && (data[++index] == 't')) {
3185 return TokenNamelist;
3188 return TokenNameIdentifier;
3193 if ((data[++index] == 'e') && (data[++index] == 'w'))
3194 return TokenNamenew;
3196 return TokenNameIdentifier;
3198 // if ((data[++index] == 'u') && (data[++index] == 'l')
3199 // && (data[++index] == 'l'))
3200 // return TokenNamenull;
3202 // return TokenNameIdentifier;
3204 return TokenNameIdentifier;
3209 if (data[++index] == 'r') {
3213 // if (length == 12) {
3214 // if ((data[++index] == 'l')
3215 // && (data[++index] == 'd')
3216 // && (data[++index] == '_')
3217 // && (data[++index] == 'f')
3218 // && (data[++index] == 'u')
3219 // && (data[++index] == 'n')
3220 // && (data[++index] == 'c')
3221 // && (data[++index] == 't')
3222 // && (data[++index] == 'i')
3223 // && (data[++index] == 'o')
3224 // && (data[++index] == 'n')) {
3225 // return TokenNameold_function;
3228 return TokenNameIdentifier;
3230 // print public private protected
3233 if ((data[++index] == 'r') && (data[++index] == 'i')
3234 && (data[++index] == 'n') && (data[++index] == 't')) {
3235 return TokenNameprint;
3237 return TokenNameIdentifier;
3239 if ((data[++index] == 'u') && (data[++index] == 'b')
3240 && (data[++index] == 'l') && (data[++index] == 'i')
3241 && (data[++index] == 'c')) {
3242 return TokenNamepublic;
3244 return TokenNameIdentifier;
3246 if ((data[++index] == 'r') && (data[++index] == 'i')
3247 && (data[++index] == 'v') && (data[++index] == 'a')
3248 && (data[++index] == 't') && (data[++index] == 'e')) {
3249 return TokenNameprivate;
3251 return TokenNameIdentifier;
3253 if ((data[++index] == 'r') && (data[++index] == 'o')
3254 && (data[++index] == 't') && (data[++index] == 'e')
3255 && (data[++index] == 'c') && (data[++index] == 't')
3256 && (data[++index] == 'e') && (data[++index] == 'd')) {
3257 return TokenNameprotected;
3259 return TokenNameIdentifier;
3261 return TokenNameIdentifier;
3263 //return require require_once
3265 if ((data[++index] == 'e') && (data[++index] == 't')
3266 && (data[++index] == 'u') && (data[++index] == 'r')
3267 && (data[++index] == 'n')) {
3268 return TokenNamereturn;
3270 } else if (length == 7) {
3271 if ((data[++index] == 'e') && (data[++index] == 'q')
3272 && (data[++index] == 'u') && (data[++index] == 'i')
3273 && (data[++index] == 'r') && (data[++index] == 'e')) {
3274 return TokenNamerequire;
3276 } else if (length == 12) {
3277 if ((data[++index] == 'e') && (data[++index] == 'q')
3278 && (data[++index] == 'u') && (data[++index] == 'i')
3279 && (data[++index] == 'r') && (data[++index] == 'e')
3280 && (data[++index] == '_') && (data[++index] == 'o')
3281 && (data[++index] == 'n') && (data[++index] == 'c')
3282 && (data[++index] == 'e')) {
3283 return TokenNamerequire_once;
3286 return TokenNameIdentifier;
3291 if (data[++index] == 't')
3292 if ((data[++index] == 'a') && (data[++index] == 't')
3293 && (data[++index] == 'i') && (data[++index] == 'c')) {
3294 return TokenNamestatic;
3296 return TokenNameIdentifier;
3297 else if ((data[index] == 'w') && (data[++index] == 'i')
3298 && (data[++index] == 't') && (data[++index] == 'c')
3299 && (data[++index] == 'h'))
3300 return TokenNameswitch;
3302 return TokenNameIdentifier;
3304 return TokenNameIdentifier;
3310 if ((data[++index] == 'r') && (data[++index] == 'y'))
3311 return TokenNametry;
3313 return TokenNameIdentifier;
3315 // if ((data[++index] == 'r') && (data[++index] == 'u')
3316 // && (data[++index] == 'e'))
3317 // return TokenNametrue;
3319 // return TokenNameIdentifier;
3321 if ((data[++index] == 'h') && (data[++index] == 'r')
3322 && (data[++index] == 'o') && (data[++index] == 'w'))
3323 return TokenNamethrow;
3325 return TokenNameIdentifier;
3327 return TokenNameIdentifier;
3333 if ((data[++index] == 's') && (data[++index] == 'e'))
3334 return TokenNameuse;
3336 return TokenNameIdentifier;
3338 if ((data[++index] == 'n') && (data[++index] == 's')
3339 && (data[++index] == 'e') && (data[++index] == 't'))
3340 return TokenNameunset;
3342 return TokenNameIdentifier;
3344 return TokenNameIdentifier;
3350 if ((data[++index] == 'a') && (data[++index] == 'r'))
3351 return TokenNamevar;
3353 return TokenNameIdentifier;
3355 return TokenNameIdentifier;
3361 if ((data[++index] == 'h') && (data[++index] == 'i')
3362 && (data[++index] == 'l') && (data[++index] == 'e'))
3363 return TokenNamewhile;
3365 return TokenNameIdentifier;
3366 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3367 // (data[++index]=='e') && (data[++index]=='f')&&
3368 // (data[++index]=='p'))
3369 //return TokenNamewidefp ;
3371 //return TokenNameIdentifier;
3373 return TokenNameIdentifier;
3379 if ((data[++index] == 'o') && (data[++index] == 'r'))
3380 return TokenNamexor;
3382 return TokenNameIdentifier;
3384 return TokenNameIdentifier;
3387 return TokenNameIdentifier;
3390 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3391 //when entering this method the currentCharacter is the firt
3392 //digit of the number , i.e. it may be preceeded by a . when
3394 boolean floating = dotPrefix;
3395 if ((!dotPrefix) && (currentCharacter == '0')) {
3396 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3397 //force the first char of the hexa number do exist...
3398 // consume next character
3399 unicodeAsBackSlash = false;
3400 currentCharacter = source[currentPosition++];
3401 // if (((currentCharacter = source[currentPosition++]) == '\\')
3402 // && (source[currentPosition] == 'u')) {
3403 // getNextUnicodeChar();
3405 // if (withoutUnicodePtr != 0) {
3406 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3409 if (Character.digit(currentCharacter, 16) == -1)
3410 throw new InvalidInputException(INVALID_HEXA);
3412 while (getNextCharAsDigit(16)) {
3414 // if (getNextChar('l', 'L') >= 0)
3415 // return TokenNameLongLiteral;
3417 return TokenNameIntegerLiteral;
3419 //there is x or X in the number
3420 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3421 // 00078.0 is true !!!!! crazy language
3422 if (getNextCharAsDigit()) {
3423 //-------------potential octal-----------------
3424 while (getNextCharAsDigit()) {
3426 // if (getNextChar('l', 'L') >= 0) {
3427 // return TokenNameLongLiteral;
3430 // if (getNextChar('f', 'F') >= 0) {
3431 // return TokenNameFloatingPointLiteral;
3433 if (getNextChar('d', 'D') >= 0) {
3434 return TokenNameDoubleLiteral;
3435 } else { //make the distinction between octal and float ....
3436 if (getNextChar('.')) { //bingo ! ....
3437 while (getNextCharAsDigit()) {
3439 if (getNextChar('e', 'E') >= 0) {
3440 // consume next character
3441 unicodeAsBackSlash = false;
3442 currentCharacter = source[currentPosition++];
3443 // if (((currentCharacter = source[currentPosition++]) == '\\')
3444 // && (source[currentPosition] == 'u')) {
3445 // getNextUnicodeChar();
3447 // if (withoutUnicodePtr != 0) {
3448 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3451 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3452 // consume next character
3453 unicodeAsBackSlash = false;
3454 currentCharacter = source[currentPosition++];
3455 // if (((currentCharacter = source[currentPosition++]) == '\\')
3456 // && (source[currentPosition] == 'u')) {
3457 // getNextUnicodeChar();
3459 // if (withoutUnicodePtr != 0) {
3460 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3461 // currentCharacter;
3465 if (!Character.isDigit(currentCharacter))
3466 throw new InvalidInputException(INVALID_FLOAT);
3467 while (getNextCharAsDigit()) {
3470 // if (getNextChar('f', 'F') >= 0)
3471 // return TokenNameFloatingPointLiteral;
3472 getNextChar('d', 'D'); //jump over potential d or D
3473 return TokenNameDoubleLiteral;
3475 return TokenNameIntegerLiteral;
3482 while (getNextCharAsDigit()) {
3484 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3485 // return TokenNameLongLiteral;
3486 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3487 while (getNextCharAsDigit()) {
3491 //if floating is true both exponant and suffix may be optional
3492 if (getNextChar('e', 'E') >= 0) {
3494 // consume next character
3495 unicodeAsBackSlash = false;
3496 currentCharacter = source[currentPosition++];
3497 // if (((currentCharacter = source[currentPosition++]) == '\\')
3498 // && (source[currentPosition] == 'u')) {
3499 // getNextUnicodeChar();
3501 // if (withoutUnicodePtr != 0) {
3502 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3505 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3508 unicodeAsBackSlash = false;
3509 currentCharacter = source[currentPosition++];
3510 // if (((currentCharacter = source[currentPosition++]) == '\\')
3511 // && (source[currentPosition] == 'u')) {
3512 // getNextUnicodeChar();
3514 // if (withoutUnicodePtr != 0) {
3515 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3519 if (!Character.isDigit(currentCharacter))
3520 throw new InvalidInputException(INVALID_FLOAT);
3521 while (getNextCharAsDigit()) {
3524 if (getNextChar('d', 'D') >= 0)
3525 return TokenNameDoubleLiteral;
3526 // if (getNextChar('f', 'F') >= 0)
3527 // return TokenNameFloatingPointLiteral;
3528 //the long flag has been tested before
3529 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3532 * Search the line number corresponding to a specific position
3535 public final int getLineNumber(int position) {
3536 if (lineEnds == null)
3538 int length = linePtr + 1;
3541 int g = 0, d = length - 1;
3545 if (position < lineEnds[m]) {
3547 } else if (position > lineEnds[m]) {
3553 if (position < lineEnds[m]) {
3558 public void setPHPMode(boolean mode) {
3561 public final void setSource(char[] source) {
3562 //the source-buffer is set to sourceString
3563 if (source == null) {
3564 this.source = new char[0];
3566 this.source = source;
3569 initialPosition = currentPosition = 0;
3570 containsAssertKeyword = false;
3571 withoutUnicodeBuffer = new char[this.source.length];
3572 encapsedStringStack = new Stack();
3574 public String toString() {
3575 if (startPosition == source.length)
3576 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3577 if (currentPosition > source.length)
3578 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3579 char front[] = new char[startPosition];
3580 System.arraycopy(source, 0, front, 0, startPosition);
3581 int middleLength = (currentPosition - 1) - startPosition + 1;
3583 if (middleLength > -1) {
3584 middle = new char[middleLength];
3585 System.arraycopy(source, startPosition, middle, 0, middleLength);
3587 middle = new char[0];
3589 char end[] = new char[source.length - (currentPosition - 1)];
3590 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3591 - (currentPosition - 1) - 1);
3592 return new String(front)
3593 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3594 + new String(middle)
3595 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3598 public final String toStringAction(int act) {
3600 case TokenNameERROR :
3601 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3603 case TokenNameINLINE_HTML :
3604 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3605 case TokenNameIdentifier :
3606 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3607 case TokenNameVariable :
3608 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3609 case TokenNameabstract :
3610 return "abstract"; //$NON-NLS-1$
3612 return "AND"; //$NON-NLS-1$
3613 case TokenNamearray :
3614 return "array"; //$NON-NLS-1$
3616 return "as"; //$NON-NLS-1$
3617 case TokenNamebreak :
3618 return "break"; //$NON-NLS-1$
3619 case TokenNamecase :
3620 return "case"; //$NON-NLS-1$
3621 case TokenNameclass :
3622 return "class"; //$NON-NLS-1$
3623 case TokenNamecatch :
3624 return "catch"; //$NON-NLS-1$
3625 case TokenNameclone :
3628 case TokenNameconst :
3631 case TokenNamecontinue :
3632 return "continue"; //$NON-NLS-1$
3633 case TokenNamedefault :
3634 return "default"; //$NON-NLS-1$
3635 // case TokenNamedefine :
3636 // return "define"; //$NON-NLS-1$
3638 return "do"; //$NON-NLS-1$
3639 case TokenNameecho :
3640 return "echo"; //$NON-NLS-1$
3641 case TokenNameelse :
3642 return "else"; //$NON-NLS-1$
3643 case TokenNameelseif :
3644 return "elseif"; //$NON-NLS-1$
3645 case TokenNameendfor :
3646 return "endfor"; //$NON-NLS-1$
3647 case TokenNameendforeach :
3648 return "endforeach"; //$NON-NLS-1$
3649 case TokenNameendif :
3650 return "endif"; //$NON-NLS-1$
3651 case TokenNameendswitch :
3652 return "endswitch"; //$NON-NLS-1$
3653 case TokenNameendwhile :
3654 return "endwhile"; //$NON-NLS-1$
3657 case TokenNameextends :
3658 return "extends"; //$NON-NLS-1$
3659 // case TokenNamefalse :
3660 // return "false"; //$NON-NLS-1$
3661 case TokenNamefinal :
3662 return "final"; //$NON-NLS-1$
3664 return "for"; //$NON-NLS-1$
3665 case TokenNameforeach :
3666 return "foreach"; //$NON-NLS-1$
3667 case TokenNamefunction :
3668 return "function"; //$NON-NLS-1$
3669 case TokenNameglobal :
3670 return "global"; //$NON-NLS-1$
3672 return "if"; //$NON-NLS-1$
3673 case TokenNameimplements :
3674 return "implements"; //$NON-NLS-1$
3675 case TokenNameinclude :
3676 return "include"; //$NON-NLS-1$
3677 case TokenNameinclude_once :
3678 return "include_once"; //$NON-NLS-1$
3679 case TokenNameinstanceof :
3680 return "instanceof"; //$NON-NLS-1$
3681 case TokenNameinterface :
3682 return "interface"; //$NON-NLS-1$
3683 case TokenNameisset :
3684 return "isset"; //$NON-NLS-1$
3685 case TokenNamelist :
3686 return "list"; //$NON-NLS-1$
3688 return "new"; //$NON-NLS-1$
3689 // case TokenNamenull :
3690 // return "null"; //$NON-NLS-1$
3692 return "OR"; //$NON-NLS-1$
3693 case TokenNameprint :
3694 return "print"; //$NON-NLS-1$
3695 case TokenNameprivate :
3696 return "private"; //$NON-NLS-1$
3697 case TokenNameprotected :
3698 return "protected"; //$NON-NLS-1$
3699 case TokenNamepublic :
3700 return "public"; //$NON-NLS-1$
3701 case TokenNamerequire :
3702 return "require"; //$NON-NLS-1$
3703 case TokenNamerequire_once :
3704 return "require_once"; //$NON-NLS-1$
3705 case TokenNamereturn :
3706 return "return"; //$NON-NLS-1$
3707 case TokenNamestatic :
3708 return "static"; //$NON-NLS-1$
3709 case TokenNameswitch :
3710 return "switch"; //$NON-NLS-1$
3711 // case TokenNametrue :
3712 // return "true"; //$NON-NLS-1$
3713 case TokenNameunset :
3714 return "unset"; //$NON-NLS-1$
3716 return "var"; //$NON-NLS-1$
3717 case TokenNamewhile :
3718 return "while"; //$NON-NLS-1$
3720 return "XOR"; //$NON-NLS-1$
3721 // case TokenNamethis :
3722 // return "$this"; //$NON-NLS-1$
3723 case TokenNameIntegerLiteral :
3724 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3725 case TokenNameDoubleLiteral :
3726 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3727 case TokenNameStringLiteral :
3728 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3729 case TokenNameStringConstant :
3730 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3731 case TokenNameStringInterpolated :
3732 return "StringInterpolated(" + new String(getCurrentTokenSource())
3733 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3734 case TokenNameEncapsedString0 :
3735 return "`"; //$NON-NLS-1$
3736 case TokenNameEncapsedString1 :
3737 return "\'"; //$NON-NLS-1$
3738 case TokenNameEncapsedString2 :
3739 return "\""; //$NON-NLS-1$
3740 case TokenNameSTRING :
3741 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3742 case TokenNameHEREDOC :
3743 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3744 case TokenNamePLUS_PLUS :
3745 return "++"; //$NON-NLS-1$
3746 case TokenNameMINUS_MINUS :
3747 return "--"; //$NON-NLS-1$
3748 case TokenNameEQUAL_EQUAL :
3749 return "=="; //$NON-NLS-1$
3750 case TokenNameEQUAL_EQUAL_EQUAL :
3751 return "==="; //$NON-NLS-1$
3752 case TokenNameEQUAL_GREATER :
3753 return "=>"; //$NON-NLS-1$
3754 case TokenNameLESS_EQUAL :
3755 return "<="; //$NON-NLS-1$
3756 case TokenNameGREATER_EQUAL :
3757 return ">="; //$NON-NLS-1$
3758 case TokenNameNOT_EQUAL :
3759 return "!="; //$NON-NLS-1$
3760 case TokenNameNOT_EQUAL_EQUAL :
3761 return "!=="; //$NON-NLS-1$
3762 case TokenNameLEFT_SHIFT :
3763 return "<<"; //$NON-NLS-1$
3764 case TokenNameRIGHT_SHIFT :
3765 return ">>"; //$NON-NLS-1$
3766 case TokenNamePLUS_EQUAL :
3767 return "+="; //$NON-NLS-1$
3768 case TokenNameMINUS_EQUAL :
3769 return "-="; //$NON-NLS-1$
3770 case TokenNameMULTIPLY_EQUAL :
3771 return "*="; //$NON-NLS-1$
3772 case TokenNameDIVIDE_EQUAL :
3773 return "/="; //$NON-NLS-1$
3774 case TokenNameAND_EQUAL :
3775 return "&="; //$NON-NLS-1$
3776 case TokenNameOR_EQUAL :
3777 return "|="; //$NON-NLS-1$
3778 case TokenNameXOR_EQUAL :
3779 return "^="; //$NON-NLS-1$
3780 case TokenNameREMAINDER_EQUAL :
3781 return "%="; //$NON-NLS-1$
3782 case TokenNameDOT_EQUAL :
3783 return ".="; //$NON-NLS-1$
3784 case TokenNameLEFT_SHIFT_EQUAL :
3785 return "<<="; //$NON-NLS-1$
3786 case TokenNameRIGHT_SHIFT_EQUAL :
3787 return ">>="; //$NON-NLS-1$
3788 case TokenNameOR_OR :
3789 return "||"; //$NON-NLS-1$
3790 case TokenNameAND_AND :
3791 return "&&"; //$NON-NLS-1$
3792 case TokenNamePLUS :
3793 return "+"; //$NON-NLS-1$
3794 case TokenNameMINUS :
3795 return "-"; //$NON-NLS-1$
3796 case TokenNameMINUS_GREATER :
3799 return "!"; //$NON-NLS-1$
3800 case TokenNameREMAINDER :
3801 return "%"; //$NON-NLS-1$
3803 return "^"; //$NON-NLS-1$
3805 return "&"; //$NON-NLS-1$
3806 case TokenNameMULTIPLY :
3807 return "*"; //$NON-NLS-1$
3809 return "|"; //$NON-NLS-1$
3810 case TokenNameTWIDDLE :
3811 return "~"; //$NON-NLS-1$
3812 case TokenNameTWIDDLE_EQUAL :
3813 return "~="; //$NON-NLS-1$
3814 case TokenNameDIVIDE :
3815 return "/"; //$NON-NLS-1$
3816 case TokenNameGREATER :
3817 return ">"; //$NON-NLS-1$
3818 case TokenNameLESS :
3819 return "<"; //$NON-NLS-1$
3820 case TokenNameLPAREN :
3821 return "("; //$NON-NLS-1$
3822 case TokenNameRPAREN :
3823 return ")"; //$NON-NLS-1$
3824 case TokenNameLBRACE :
3825 return "{"; //$NON-NLS-1$
3826 case TokenNameRBRACE :
3827 return "}"; //$NON-NLS-1$
3828 case TokenNameLBRACKET :
3829 return "["; //$NON-NLS-1$
3830 case TokenNameRBRACKET :
3831 return "]"; //$NON-NLS-1$
3832 case TokenNameSEMICOLON :
3833 return ";"; //$NON-NLS-1$
3834 case TokenNameQUESTION :
3835 return "?"; //$NON-NLS-1$
3836 case TokenNameCOLON :
3837 return ":"; //$NON-NLS-1$
3838 case TokenNameCOMMA :
3839 return ","; //$NON-NLS-1$
3841 return "."; //$NON-NLS-1$
3842 case TokenNameEQUAL :
3843 return "="; //$NON-NLS-1$
3846 case TokenNameDOLLAR :
3848 case TokenNameDOLLAR_LBRACE :
3851 return "EOF"; //$NON-NLS-1$
3852 case TokenNameWHITESPACE :
3853 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3854 case TokenNameCOMMENT_LINE :
3855 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3856 case TokenNameCOMMENT_BLOCK :
3857 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3858 case TokenNameCOMMENT_PHPDOC :
3859 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3860 // case TokenNameHTML :
3861 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3863 case TokenNameFILE :
3864 return "__FILE__"; //$NON-NLS-1$
3865 case TokenNameLINE :
3866 return "__LINE__"; //$NON-NLS-1$
3867 case TokenNameCLASS_C :
3868 return "__CLASS__"; //$NON-NLS-1$
3869 case TokenNameMETHOD_C :
3870 return "__METHOD__"; //$NON-NLS-1$
3871 case TokenNameFUNC_C :
3872 return "__FUNCTION__"; //$NON-NLS-1
3873 case TokenNameboolCAST :
3874 return "( bool )"; //$NON-NLS-1$
3875 case TokenNameintCAST :
3876 return "( int )"; //$NON-NLS-1$
3877 case TokenNamedoubleCAST :
3878 return "( double )"; //$NON-NLS-1$
3879 case TokenNameobjectCAST :
3880 return "( object )"; //$NON-NLS-1$
3881 case TokenNamestringCAST :
3882 return "( string )"; //$NON-NLS-1$
3884 return "not-a-token(" + (new Integer(act)) + ") "
3885 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3892 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3893 this(tokenizeComments, tokenizeWhiteSpace, false);
3895 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3896 boolean checkNonExternalizedStringLiterals) {
3897 this(tokenizeComments, tokenizeWhiteSpace,
3898 checkNonExternalizedStringLiterals, false);
3900 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3901 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3902 this(tokenizeComments, tokenizeWhiteSpace,
3903 checkNonExternalizedStringLiterals, assertMode, false, null, null);
3905 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3906 boolean checkNonExternalizedStringLiterals, boolean assertMode,
3907 boolean tokenizeStrings,
3909 char[][] taskPriorities) {
3910 this.eofPosition = Integer.MAX_VALUE;
3911 this.tokenizeComments = tokenizeComments;
3912 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3913 this.tokenizeStrings = tokenizeStrings;
3914 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3915 this.assertMode = assertMode;
3916 this.encapsedStringStack = null;
3917 this.taskTags = taskTags;
3918 this.taskPriorities = taskPriorities;
3920 private void checkNonExternalizeString() throws InvalidInputException {
3921 if (currentLine == null)
3923 parseTags(currentLine);
3925 private void parseTags(NLSLine line) throws InvalidInputException {
3926 String s = new String(getCurrentTokenSource());
3927 int pos = s.indexOf(TAG_PREFIX);
3928 int lineLength = line.size();
3930 int start = pos + TAG_PREFIX_LENGTH;
3931 int end = s.indexOf(TAG_POSTFIX, start);
3932 String index = s.substring(start, end);
3935 i = Integer.parseInt(index) - 1;
3936 // Tags are one based not zero based.
3937 } catch (NumberFormatException e) {
3938 i = -1; // we don't want to consider this as a valid NLS tag
3940 if (line.exists(i)) {
3943 pos = s.indexOf(TAG_PREFIX, start);
3945 this.nonNLSStrings = new StringLiteral[lineLength];
3946 int nonNLSCounter = 0;
3947 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3948 StringLiteral literal = (StringLiteral) iterator.next();
3949 if (literal != null) {
3950 this.nonNLSStrings[nonNLSCounter++] = literal;
3953 if (nonNLSCounter == 0) {
3954 this.nonNLSStrings = null;
3958 this.wasNonExternalizedStringLiteral = true;
3959 if (nonNLSCounter != lineLength) {
3960 System.arraycopy(this.nonNLSStrings, 0,
3961 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3966 public final void scanEscapeCharacter() throws InvalidInputException {
3967 // the string with "\\u" is a legal string of two chars \ and u
3968 //thus we use a direct access to the source (for regular cases).
3969 if (unicodeAsBackSlash) {
3970 // consume next character
3971 unicodeAsBackSlash = false;
3972 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3973 // (source[currentPosition] == 'u')) {
3974 // getNextUnicodeChar();
3976 if (withoutUnicodePtr != 0) {
3977 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3981 currentCharacter = source[currentPosition++];
3982 switch (currentCharacter) {
3984 currentCharacter = '\b';
3987 currentCharacter = '\t';
3990 currentCharacter = '\n';
3993 currentCharacter = '\f';
3996 currentCharacter = '\r';
3999 currentCharacter = '\"';
4002 currentCharacter = '\'';
4005 currentCharacter = '\\';
4008 // -----------octal escape--------------
4010 // OctalDigit OctalDigit
4011 // ZeroToThree OctalDigit OctalDigit
4012 int number = Character.getNumericValue(currentCharacter);
4013 if (number >= 0 && number <= 7) {
4014 boolean zeroToThreeNot = number > 3;
4015 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4016 int digit = Character.getNumericValue(currentCharacter);
4017 if (digit >= 0 && digit <= 7) {
4018 number = (number * 8) + digit;
4020 .isDigit(currentCharacter = source[currentPosition++])) {
4021 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4022 // Digit --> ignore last character
4025 digit = Character.getNumericValue(currentCharacter);
4026 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4027 // OctalDigit OctalDigit
4028 number = (number * 8) + digit;
4029 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4030 // --> ignore last character
4034 } else { // has read \OctalDigit NonDigit--> ignore last
4038 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4042 } else { // has read \OctalDigit --> ignore last character
4046 throw new InvalidInputException(INVALID_ESCAPE);
4047 currentCharacter = (char) number;
4049 throw new InvalidInputException(INVALID_ESCAPE);
4052 // chech presence of task: tags
4053 public void checkTaskTag(int commentStart, int commentEnd) {
4054 // only look for newer task: tags
4055 if (this.foundTaskCount > 0
4056 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4059 int foundTaskIndex = this.foundTaskCount;
4060 nextChar : for (int i = commentStart; i < commentEnd
4061 && i < this.eofPosition; i++) {
4063 char[] priority = null;
4064 // check for tag occurrence
4065 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4066 tag = this.taskTags[itag];
4067 priority = this.taskPriorities != null
4068 && itag < this.taskPriorities.length
4069 ? this.taskPriorities[itag]
4071 int tagLength = tag.length;
4072 for (int t = 0; t < tagLength; t++) {
4073 if (this.source[i + t] != tag[t])
4076 if (this.foundTaskTags == null) {
4077 this.foundTaskTags = new char[5][];
4078 this.foundTaskMessages = new char[5][];
4079 this.foundTaskPriorities = new char[5][];
4080 this.foundTaskPositions = new int[5][];
4081 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4082 System.arraycopy(this.foundTaskTags, 0,
4083 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4084 this.foundTaskCount);
4085 System.arraycopy(this.foundTaskMessages, 0,
4086 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4087 this.foundTaskCount);
4088 System.arraycopy(this.foundTaskPriorities, 0,
4089 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4090 0, this.foundTaskCount);
4091 System.arraycopy(this.foundTaskPositions, 0,
4092 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4093 this.foundTaskCount);
4095 this.foundTaskTags[this.foundTaskCount] = tag;
4096 this.foundTaskPriorities[this.foundTaskCount] = priority;
4097 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4099 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4100 this.foundTaskCount++;
4101 i += tagLength - 1; // will be incremented when looping
4104 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4105 // retrieve message start and end positions
4106 int msgStart = this.foundTaskPositions[i][0]
4107 + this.foundTaskTags[i].length;
4108 int max_value = i + 1 < this.foundTaskCount
4109 ? this.foundTaskPositions[i + 1][0] - 1
4111 // at most beginning of next task
4112 if (max_value < msgStart)
4113 max_value = msgStart; // would only occur if tag is before EOF.
4116 for (int j = msgStart; j < max_value; j++) {
4117 if ((c = this.source[j]) == '\n' || c == '\r') {
4123 for (int j = max_value; j > msgStart; j--) {
4124 if ((c = this.source[j]) == '*') {
4132 if (msgStart == end)
4135 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4137 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4139 // update the end position of the task
4140 this.foundTaskPositions[i][1] = end;
4141 // get the message source
4142 final int messageLength = end - msgStart + 1;
4143 char[] message = new char[messageLength];
4144 System.arraycopy(source, msgStart, message, 0, messageLength);
4145 this.foundTaskMessages[i] = message;