1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
16 import net.sourceforge.phpdt.core.compiler.CharOperation;
17 import net.sourceforge.phpdt.core.compiler.IScanner;
18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token
24 * (this value is not memorized by the scanner) - getCurrentTokenSource()
25 * which provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
32 //flag indicating if processed source contains occurrences of keyword assert
33 public boolean containsAssertKeyword = false;
34 public boolean recordLineSeparator;
35 public boolean phpMode = false;
36 public Stack encapsedStringStack = null;
37 public char currentCharacter;
38 public int startPosition;
39 public int currentPosition;
40 public int initialPosition, eofPosition;
41 // after this position eof are generated instead of real token from the
43 public boolean tokenizeComments;
44 public boolean tokenizeWhiteSpace;
45 public boolean tokenizeStrings;
46 //source should be viewed as a window (aka a part)
47 //of a entire very large stream
50 public char[] withoutUnicodeBuffer;
51 public int withoutUnicodePtr;
52 //when == 0 ==> no unicode in the current token
53 public boolean unicodeAsBackSlash = false;
54 public boolean scanningFloatLiteral = false;
55 //support for /** comments
56 //public char[][] comments = new char[10][];
57 public int[] commentStops = new int[10];
58 public int[] commentStarts = new int[10];
59 public int commentPtr = -1; // no comment test with commentPtr value -1
60 //diet parsing support - jump over some method body when requested
61 public boolean diet = false;
62 //support for the poor-line-debuggers ....
63 //remember the position of the cr/lf
64 public int[] lineEnds = new int[250];
65 public int linePtr = -1;
66 public boolean wasAcr = false;
67 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
68 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
69 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
70 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
71 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
72 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
73 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
74 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
75 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
76 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
77 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
78 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
79 //----------------optimized identifier managment------------------
80 static final char[] charArray_a = new char[]{'a'},
81 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
82 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
83 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
84 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
85 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
86 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
87 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
88 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
89 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
90 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
91 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
92 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
93 charArray_z = new char[]{'z'};
94 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
95 '\u0000', '\u0000', '\u0000'};
96 static final int TableSize = 30, InternalTableSize = 6;
98 public static final int OptimizedLength = 6;
100 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
101 // support for detecting non-externalized string literals
102 int currentLineNr = -1;
103 int previousLineNr = -1;
104 NLSLine currentLine = null;
105 List lines = new ArrayList();
106 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
107 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
108 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
109 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
110 public StringLiteral[] nonNLSStrings = null;
111 public boolean checkNonExternalizedStringLiterals = true;
112 public boolean wasNonExternalizedStringLiteral = false;
114 for (int i = 0; i < 6; i++) {
115 for (int j = 0; j < TableSize; j++) {
116 for (int k = 0; k < InternalTableSize; k++) {
117 charArray_length[i][j][k] = initCharArray;
122 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
124 public static final int RoundBracket = 0;
125 public static final int SquareBracket = 1;
126 public static final int CurlyBracket = 2;
127 public static final int BracketKinds = 3;
129 public char[][] foundTaskTags = null;
130 public char[][] foundTaskMessages;
131 public char[][] foundTaskPriorities = null;
132 public int[][] foundTaskPositions;
133 public int foundTaskCount = 0;
134 public char[][] taskTags = null;
135 public char[][] taskPriorities = null;
136 public static final boolean DEBUG = false;
137 public static final boolean TRACE = false;
140 * Determines if the specified character is permissible as the first
141 * character in a PHP identifier
143 public static boolean isPHPIdentifierStart(char ch) {
144 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
147 * Determines if the specified character may be part of a PHP identifier as
148 * other than the first character
150 public static boolean isPHPIdentifierPart(char ch) {
151 return Character.isLetterOrDigit(ch) || (ch == '_')
152 || (0x7F <= ch && ch <= 0xFF);
154 public final boolean atEnd() {
155 // This code is not relevant if source is
156 // Only a part of the real stream input
157 return source.length == currentPosition;
159 public char[] getCurrentIdentifierSource() {
160 //return the token REAL source (aka unicodes are precomputed)
162 // if (withoutUnicodePtr != 0)
163 // //0 is used as a fast test flag so the real first char is in position 1
165 // withoutUnicodeBuffer,
167 // result = new char[withoutUnicodePtr],
169 // withoutUnicodePtr);
171 int length = currentPosition - startPosition;
172 switch (length) { // see OptimizedLength
174 return optimizedCurrentTokenSource1();
176 return optimizedCurrentTokenSource2();
178 return optimizedCurrentTokenSource3();
180 return optimizedCurrentTokenSource4();
182 return optimizedCurrentTokenSource5();
184 return optimizedCurrentTokenSource6();
187 System.arraycopy(source, startPosition, result = new char[length], 0,
192 public int getCurrentTokenEndPosition() {
193 return this.currentPosition - 1;
195 public final char[] getCurrentTokenSource() {
196 // Return the token REAL source (aka unicodes are precomputed)
198 // if (withoutUnicodePtr != 0)
199 // // 0 is used as a fast test flag so the real first char is in position 1
201 // withoutUnicodeBuffer,
203 // result = new char[withoutUnicodePtr],
205 // withoutUnicodePtr);
208 System.arraycopy(source, startPosition,
209 result = new char[length = currentPosition - startPosition], 0, length);
213 public final char[] getCurrentTokenSource(int startPos) {
214 // Return the token REAL source (aka unicodes are precomputed)
216 // if (withoutUnicodePtr != 0)
217 // // 0 is used as a fast test flag so the real first char is in position 1
219 // withoutUnicodeBuffer,
221 // result = new char[withoutUnicodePtr],
223 // withoutUnicodePtr);
226 System.arraycopy(source, startPos,
227 result = new char[length = currentPosition - startPos], 0, length);
231 public final char[] getCurrentTokenSourceString() {
232 //return the token REAL source (aka unicodes are precomputed).
233 //REMOVE the two " that are at the beginning and the end.
235 if (withoutUnicodePtr != 0)
236 //0 is used as a fast test flag so the real first char is in position 1
237 System.arraycopy(withoutUnicodeBuffer, 2,
238 //2 is 1 (real start) + 1 (to jump over the ")
239 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
242 System.arraycopy(source, startPosition + 1,
243 result = new char[length = currentPosition - startPosition - 2], 0,
248 public int getCurrentTokenStartPosition() {
249 return this.startPosition;
251 public final char[] getCurrentStringLiteralSource() {
252 // Return the token REAL source (aka unicodes are precomputed)
255 System.arraycopy(source, startPosition + 1,
256 result = new char[length = currentPosition - startPosition - 2], 0,
262 * Search the source position corresponding to the end of a given line number
264 * Line numbers are 1-based, and relative to the scanner initialPosition.
265 * Character positions are 0-based.
267 * In case the given line number is inconsistent, answers -1.
269 public final int getLineEnd(int lineNumber) {
270 if (lineEnds == null)
272 if (lineNumber >= lineEnds.length)
276 if (lineNumber == lineEnds.length - 1)
278 return lineEnds[lineNumber - 1];
279 // next line start one character behind the lineEnd of the previous line
282 * Search the source position corresponding to the beginning of a given line
285 * Line numbers are 1-based, and relative to the scanner initialPosition.
286 * Character positions are 0-based.
288 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
290 * In case the given line number is inconsistent, answers -1.
292 public final int getLineStart(int lineNumber) {
293 if (lineEnds == null)
295 if (lineNumber >= lineEnds.length)
300 return initialPosition;
301 return lineEnds[lineNumber - 2] + 1;
302 // next line start one character behind the lineEnd of the previous line
304 public final boolean getNextChar(char testedChar) {
306 //handle the case of unicode.
307 //when a unicode appears then we must use a buffer that holds char
309 //At the end of this method currentCharacter holds the new visited char
310 //and currentPosition points right next after it
311 //Both previous lines are true if the currentCharacter is == to the
313 //On false, no side effect has occured.
314 //ALL getNextChar.... ARE OPTIMIZED COPIES
315 int temp = currentPosition;
317 currentCharacter = source[currentPosition++];
318 // if (((currentCharacter = source[currentPosition++]) == '\\')
319 // && (source[currentPosition] == 'u')) {
320 // //-------------unicode traitement ------------
321 // int c1, c2, c3, c4;
322 // int unicodeSize = 6;
323 // currentPosition++;
324 // while (source[currentPosition] == 'u') {
325 // currentPosition++;
329 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
331 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
333 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
335 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
337 // currentPosition = temp;
341 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
342 // if (currentCharacter != testedChar) {
343 // currentPosition = temp;
346 // unicodeAsBackSlash = currentCharacter == '\\';
348 // //need the unicode buffer
349 // if (withoutUnicodePtr == 0) {
350 // //buffer all the entries that have been left aside....
351 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
355 // withoutUnicodeBuffer,
357 // withoutUnicodePtr);
359 // //fill the buffer with the char
360 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
363 // } //-------------end unicode traitement--------------
365 if (currentCharacter != testedChar) {
366 currentPosition = temp;
369 unicodeAsBackSlash = false;
370 // if (withoutUnicodePtr != 0)
371 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
374 } catch (IndexOutOfBoundsException e) {
375 unicodeAsBackSlash = false;
376 currentPosition = temp;
380 public final int getNextChar(char testedChar1, char testedChar2) {
381 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
382 //test can be done with (x==0) for the first and (x>0) for the second
383 //handle the case of unicode.
384 //when a unicode appears then we must use a buffer that holds char
386 //At the end of this method currentCharacter holds the new visited char
387 //and currentPosition points right next after it
388 //Both previous lines are true if the currentCharacter is == to the
390 //On false, no side effect has occured.
391 //ALL getNextChar.... ARE OPTIMIZED COPIES
392 int temp = currentPosition;
395 currentCharacter = source[currentPosition++];
396 // if (((currentCharacter = source[currentPosition++]) == '\\')
397 // && (source[currentPosition] == 'u')) {
398 // //-------------unicode traitement ------------
399 // int c1, c2, c3, c4;
400 // int unicodeSize = 6;
401 // currentPosition++;
402 // while (source[currentPosition] == 'u') {
403 // currentPosition++;
407 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
409 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
411 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
413 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
415 // currentPosition = temp;
419 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
420 // if (currentCharacter == testedChar1)
422 // else if (currentCharacter == testedChar2)
425 // currentPosition = temp;
429 // //need the unicode buffer
430 // if (withoutUnicodePtr == 0) {
431 // //buffer all the entries that have been left aside....
432 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
436 // withoutUnicodeBuffer,
438 // withoutUnicodePtr);
440 // //fill the buffer with the char
441 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
443 // } //-------------end unicode traitement--------------
445 if (currentCharacter == testedChar1)
447 else if (currentCharacter == testedChar2)
450 currentPosition = temp;
453 // if (withoutUnicodePtr != 0)
454 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
457 } catch (IndexOutOfBoundsException e) {
458 currentPosition = temp;
462 public final boolean getNextCharAsDigit() {
464 //handle the case of unicode.
465 //when a unicode appears then we must use a buffer that holds char
467 //At the end of this method currentCharacter holds the new visited char
468 //and currentPosition points right next after it
469 //Both previous lines are true if the currentCharacter is a digit
470 //On false, no side effect has occured.
471 //ALL getNextChar.... ARE OPTIMIZED COPIES
472 int temp = currentPosition;
474 currentCharacter = source[currentPosition++];
475 // if (((currentCharacter = source[currentPosition++]) == '\\')
476 // && (source[currentPosition] == 'u')) {
477 // //-------------unicode traitement ------------
478 // int c1, c2, c3, c4;
479 // int unicodeSize = 6;
480 // currentPosition++;
481 // while (source[currentPosition] == 'u') {
482 // currentPosition++;
486 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
488 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
490 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
492 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
494 // currentPosition = temp;
498 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
499 // if (!Character.isDigit(currentCharacter)) {
500 // currentPosition = temp;
504 // //need the unicode buffer
505 // if (withoutUnicodePtr == 0) {
506 // //buffer all the entries that have been left aside....
507 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
511 // withoutUnicodeBuffer,
513 // withoutUnicodePtr);
515 // //fill the buffer with the char
516 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
518 // } //-------------end unicode traitement--------------
520 if (!Character.isDigit(currentCharacter)) {
521 currentPosition = temp;
524 // if (withoutUnicodePtr != 0)
525 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
528 } catch (IndexOutOfBoundsException e) {
529 currentPosition = temp;
533 public final boolean getNextCharAsDigit(int radix) {
535 //handle the case of unicode.
536 //when a unicode appears then we must use a buffer that holds char
538 //At the end of this method currentCharacter holds the new visited char
539 //and currentPosition points right next after it
540 //Both previous lines are true if the currentCharacter is a digit base on
542 //On false, no side effect has occured.
543 //ALL getNextChar.... ARE OPTIMIZED COPIES
544 int temp = currentPosition;
546 currentCharacter = source[currentPosition++];
547 // if (((currentCharacter = source[currentPosition++]) == '\\')
548 // && (source[currentPosition] == 'u')) {
549 // //-------------unicode traitement ------------
550 // int c1, c2, c3, c4;
551 // int unicodeSize = 6;
552 // currentPosition++;
553 // while (source[currentPosition] == 'u') {
554 // currentPosition++;
558 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
560 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
562 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
564 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
566 // currentPosition = temp;
570 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
571 // if (Character.digit(currentCharacter, radix) == -1) {
572 // currentPosition = temp;
576 // //need the unicode buffer
577 // if (withoutUnicodePtr == 0) {
578 // //buffer all the entries that have been left aside....
579 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
583 // withoutUnicodeBuffer,
585 // withoutUnicodePtr);
587 // //fill the buffer with the char
588 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
590 // } //-------------end unicode traitement--------------
592 if (Character.digit(currentCharacter, radix) == -1) {
593 currentPosition = temp;
596 // if (withoutUnicodePtr != 0)
597 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
600 } catch (IndexOutOfBoundsException e) {
601 currentPosition = temp;
605 public boolean getNextCharAsJavaIdentifierPart() {
607 //handle the case of unicode.
608 //when a unicode appears then we must use a buffer that holds char
610 //At the end of this method currentCharacter holds the new visited char
611 //and currentPosition points right next after it
612 //Both previous lines are true if the currentCharacter is a
613 // JavaIdentifierPart
614 //On false, no side effect has occured.
615 //ALL getNextChar.... ARE OPTIMIZED COPIES
616 int temp = currentPosition;
618 currentCharacter = source[currentPosition++];
619 // if (((currentCharacter = source[currentPosition++]) == '\\')
620 // && (source[currentPosition] == 'u')) {
621 // //-------------unicode traitement ------------
622 // int c1, c2, c3, c4;
623 // int unicodeSize = 6;
624 // currentPosition++;
625 // while (source[currentPosition] == 'u') {
626 // currentPosition++;
630 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
632 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
634 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
636 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
638 // currentPosition = temp;
642 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
643 // if (!isPHPIdentifierPart(currentCharacter)) {
644 // currentPosition = temp;
648 // //need the unicode buffer
649 // if (withoutUnicodePtr == 0) {
650 // //buffer all the entries that have been left aside....
651 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
655 // withoutUnicodeBuffer,
657 // withoutUnicodePtr);
659 // //fill the buffer with the char
660 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
662 // } //-------------end unicode traitement--------------
664 if (!isPHPIdentifierPart(currentCharacter)) {
665 currentPosition = temp;
668 // if (withoutUnicodePtr != 0)
669 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
672 } catch (IndexOutOfBoundsException e) {
673 currentPosition = temp;
677 public int getCastOrParen() {
678 int tempPosition = currentPosition;
679 char tempCharacter = currentCharacter;
680 int tempToken = TokenNameLPAREN;
681 boolean found = false;
682 StringBuffer buf = new StringBuffer();
685 currentCharacter = source[currentPosition++];
686 } while (currentCharacter == ' ' || currentCharacter == '\t');
687 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
688 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
689 buf.append(currentCharacter);
690 currentCharacter = source[currentPosition++];
692 if (buf.length() >= 3 && buf.length() <= 7) {
693 char[] data = buf.toString().toCharArray();
695 switch (data.length) {
698 if ((data[index] == 'i') && (data[++index] == 'n')
699 && (data[++index] == 't')) {
701 tempToken = TokenNameintCAST;
706 if ((data[index] == 'b') && (data[++index] == 'o')
707 && (data[++index] == 'o') && (data[++index] == 'l')) {
709 tempToken = TokenNameboolCAST;
712 if ((data[index] == 'r') && (data[++index] == 'e')
713 && (data[++index] == 'a') && (data[++index] == 'l')) {
715 tempToken = TokenNamedoubleCAST;
721 if ((data[index] == 'a') && (data[++index] == 'r')
722 && (data[++index] == 'r') && (data[++index] == 'a')
723 && (data[++index] == 'y')) {
725 tempToken = TokenNamearrayCAST;
728 if ((data[index] == 'u') && (data[++index] == 'n')
729 && (data[++index] == 's') && (data[++index] == 'e')
730 && (data[++index] == 't')) {
732 tempToken = TokenNameunsetCAST;
735 if ((data[index] == 'f') && (data[++index] == 'l')
736 && (data[++index] == 'o') && (data[++index] == 'a')
737 && (data[++index] == 't')) {
739 tempToken = TokenNamedoubleCAST;
745 // object string double
746 if ((data[index] == 'o') && (data[++index] == 'b')
747 && (data[++index] == 'j') && (data[++index] == 'e')
748 && (data[++index] == 'c') && (data[++index] == 't')) {
750 tempToken = TokenNameobjectCAST;
753 if ((data[index] == 's') && (data[++index] == 't')
754 && (data[++index] == 'r') && (data[++index] == 'i')
755 && (data[++index] == 'n') && (data[++index] == 'g')) {
757 tempToken = TokenNamestringCAST;
760 if ((data[index] == 'd') && (data[++index] == 'o')
761 && (data[++index] == 'u') && (data[++index] == 'b')
762 && (data[++index] == 'l') && (data[++index] == 'e')) {
764 tempToken = TokenNamedoubleCAST;
771 if ((data[index] == 'b') && (data[++index] == 'o')
772 && (data[++index] == 'o') && (data[++index] == 'l')
773 && (data[++index] == 'e') && (data[++index] == 'a')
774 && (data[++index] == 'n')) {
776 tempToken = TokenNameboolCAST;
779 if ((data[index] == 'i') && (data[++index] == 'n')
780 && (data[++index] == 't') && (data[++index] == 'e')
781 && (data[++index] == 'g') && (data[++index] == 'e')
782 && (data[++index] == 'r')) {
784 tempToken = TokenNameintCAST;
790 while (currentCharacter == ' ' || currentCharacter == '\t') {
791 currentCharacter = source[currentPosition++];
793 if (currentCharacter == ')') {
798 } catch (IndexOutOfBoundsException e) {
800 currentCharacter = tempCharacter;
801 currentPosition = tempPosition;
802 return TokenNameLPAREN;
804 public void consumeStringInterpolated() throws InvalidInputException {
806 // consume next character
807 unicodeAsBackSlash = false;
808 currentCharacter = source[currentPosition++];
809 // if (((currentCharacter = source[currentPosition++]) == '\\')
810 // && (source[currentPosition] == 'u')) {
811 // getNextUnicodeChar();
813 // if (withoutUnicodePtr != 0) {
814 // withoutUnicodeBuffer[++withoutUnicodePtr] =
818 while (currentCharacter != '`') {
819 /** ** in PHP \r and \n are valid in string literals *** */
820 // if ((currentCharacter == '\n')
821 // || (currentCharacter == '\r')) {
822 // // relocate if finding another quote fairly close: thus unicode
823 // '/u000D' will be fully consumed
824 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
825 // if (currentPosition + lookAhead == source.length)
827 // if (source[currentPosition + lookAhead] == '\n')
829 // if (source[currentPosition + lookAhead] == '\"') {
830 // currentPosition += lookAhead + 1;
834 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
836 if (currentCharacter == '\\') {
837 int escapeSize = currentPosition;
838 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
839 //scanEscapeCharacter make a side effect on this value and we need
840 // the previous value few lines down this one
841 scanDoubleQuotedEscapeCharacter();
842 escapeSize = currentPosition - escapeSize;
843 if (withoutUnicodePtr == 0) {
844 //buffer all the entries that have been left aside....
845 withoutUnicodePtr = currentPosition - escapeSize - 1
847 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
849 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
850 } else { //overwrite the / in the buffer
851 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
852 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
853 // where only one is correct
858 // consume next character
859 unicodeAsBackSlash = false;
860 currentCharacter = source[currentPosition++];
861 // if (((currentCharacter = source[currentPosition++]) == '\\')
862 // && (source[currentPosition] == 'u')) {
863 // getNextUnicodeChar();
865 if (withoutUnicodePtr != 0) {
866 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
870 } catch (IndexOutOfBoundsException e) {
871 throw new InvalidInputException(UNTERMINATED_STRING);
872 } catch (InvalidInputException e) {
873 if (e.getMessage().equals(INVALID_ESCAPE)) {
874 // relocate if finding another quote fairly close: thus unicode
875 // '/u000D' will be fully consumed
876 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
877 if (currentPosition + lookAhead == source.length)
879 if (source[currentPosition + lookAhead] == '\n')
881 if (source[currentPosition + lookAhead] == '`') {
882 currentPosition += lookAhead + 1;
889 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
890 // //$NON-NLS-?$ where ? is an
892 if (currentLine == null) {
893 currentLine = new NLSLine();
894 lines.add(currentLine);
896 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
897 startPosition, currentPosition - 1));
900 public void consumeStringConstant() throws InvalidInputException {
902 // consume next character
903 unicodeAsBackSlash = false;
904 currentCharacter = source[currentPosition++];
905 // if (((currentCharacter = source[currentPosition++]) == '\\')
906 // && (source[currentPosition] == 'u')) {
907 // getNextUnicodeChar();
909 // if (withoutUnicodePtr != 0) {
910 // withoutUnicodeBuffer[++withoutUnicodePtr] =
914 while (currentCharacter != '\'') {
915 /** ** in PHP \r and \n are valid in string literals *** */
916 // if ((currentCharacter == '\n')
917 // || (currentCharacter == '\r')) {
918 // // relocate if finding another quote fairly close: thus unicode
919 // '/u000D' will be fully consumed
920 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
921 // if (currentPosition + lookAhead == source.length)
923 // if (source[currentPosition + lookAhead] == '\n')
925 // if (source[currentPosition + lookAhead] == '\"') {
926 // currentPosition += lookAhead + 1;
930 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
932 if (currentCharacter == '\\') {
933 int escapeSize = currentPosition;
934 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
935 //scanEscapeCharacter make a side effect on this value and we need
936 // the previous value few lines down this one
937 scanSingleQuotedEscapeCharacter();
938 escapeSize = currentPosition - escapeSize;
939 if (withoutUnicodePtr == 0) {
940 //buffer all the entries that have been left aside....
941 withoutUnicodePtr = currentPosition - escapeSize - 1
943 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
945 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
946 } else { //overwrite the / in the buffer
947 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
948 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
949 // where only one is correct
954 // consume next character
955 unicodeAsBackSlash = false;
956 currentCharacter = source[currentPosition++];
957 // if (((currentCharacter = source[currentPosition++]) == '\\')
958 // && (source[currentPosition] == 'u')) {
959 // getNextUnicodeChar();
961 if (withoutUnicodePtr != 0) {
962 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
966 } catch (IndexOutOfBoundsException e) {
967 throw new InvalidInputException(UNTERMINATED_STRING);
968 } catch (InvalidInputException e) {
969 if (e.getMessage().equals(INVALID_ESCAPE)) {
970 // relocate if finding another quote fairly close: thus unicode
971 // '/u000D' will be fully consumed
972 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
973 if (currentPosition + lookAhead == source.length)
975 if (source[currentPosition + lookAhead] == '\n')
977 if (source[currentPosition + lookAhead] == '\'') {
978 currentPosition += lookAhead + 1;
985 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
986 // //$NON-NLS-?$ where ? is an
988 if (currentLine == null) {
989 currentLine = new NLSLine();
990 lines.add(currentLine);
992 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
993 startPosition, currentPosition - 1));
996 public void consumeStringLiteral() throws InvalidInputException {
998 // consume next character
999 unicodeAsBackSlash = false;
1000 currentCharacter = source[currentPosition++];
1001 // if (((currentCharacter = source[currentPosition++]) == '\\')
1002 // && (source[currentPosition] == 'u')) {
1003 // getNextUnicodeChar();
1005 // if (withoutUnicodePtr != 0) {
1006 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1007 // currentCharacter;
1010 while (currentCharacter != '"') {
1011 /** ** in PHP \r and \n are valid in string literals *** */
1012 // if ((currentCharacter == '\n')
1013 // || (currentCharacter == '\r')) {
1014 // // relocate if finding another quote fairly close: thus unicode
1015 // '/u000D' will be fully consumed
1016 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1017 // if (currentPosition + lookAhead == source.length)
1019 // if (source[currentPosition + lookAhead] == '\n')
1021 // if (source[currentPosition + lookAhead] == '\"') {
1022 // currentPosition += lookAhead + 1;
1026 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1028 if (currentCharacter == '\\') {
1029 int escapeSize = currentPosition;
1030 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1031 //scanEscapeCharacter make a side effect on this value and we need
1032 // the previous value few lines down this one
1033 scanDoubleQuotedEscapeCharacter();
1034 escapeSize = currentPosition - escapeSize;
1035 if (withoutUnicodePtr == 0) {
1036 //buffer all the entries that have been left aside....
1037 withoutUnicodePtr = currentPosition - escapeSize - 1
1039 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1041 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1042 } else { //overwrite the / in the buffer
1043 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1044 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1045 // where only one is correct
1046 withoutUnicodePtr--;
1050 // consume next character
1051 unicodeAsBackSlash = false;
1052 currentCharacter = source[currentPosition++];
1053 // if (((currentCharacter = source[currentPosition++]) == '\\')
1054 // && (source[currentPosition] == 'u')) {
1055 // getNextUnicodeChar();
1057 if (withoutUnicodePtr != 0) {
1058 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1062 } catch (IndexOutOfBoundsException e) {
1063 throw new InvalidInputException(UNTERMINATED_STRING);
1064 } catch (InvalidInputException e) {
1065 if (e.getMessage().equals(INVALID_ESCAPE)) {
1066 // relocate if finding another quote fairly close: thus unicode
1067 // '/u000D' will be fully consumed
1068 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1069 if (currentPosition + lookAhead == source.length)
1071 if (source[currentPosition + lookAhead] == '\n')
1073 if (source[currentPosition + lookAhead] == '\"') {
1074 currentPosition += lookAhead + 1;
1081 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1082 // //$NON-NLS-?$ where ? is an
1084 if (currentLine == null) {
1085 currentLine = new NLSLine();
1086 lines.add(currentLine);
1088 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1089 startPosition, currentPosition - 1));
1092 public int getNextToken() throws InvalidInputException {
1094 return getInlinedHTML(currentPosition);
1097 this.wasAcr = false;
1099 jumpOverMethodBody();
1101 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1105 withoutUnicodePtr = 0;
1106 //start with a new token
1107 char encapsedChar = ' ';
1108 if (!encapsedStringStack.isEmpty()) {
1109 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1111 if (encapsedChar != '$' && encapsedChar != ' ') {
1112 currentCharacter = source[currentPosition++];
1113 if (currentCharacter == encapsedChar) {
1114 switch (currentCharacter) {
1116 return TokenNameEncapsedString0;
1118 return TokenNameEncapsedString1;
1120 return TokenNameEncapsedString2;
1123 while (currentCharacter != encapsedChar) {
1124 /** ** in PHP \r and \n are valid in string literals *** */
1125 switch (currentCharacter) {
1127 int escapeSize = currentPosition;
1128 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1129 //scanEscapeCharacter make a side effect on this value and
1130 // we need the previous value few lines down this one
1131 scanDoubleQuotedEscapeCharacter();
1132 escapeSize = currentPosition - escapeSize;
1133 if (withoutUnicodePtr == 0) {
1134 //buffer all the entries that have been left aside....
1135 withoutUnicodePtr = currentPosition - escapeSize - 1
1137 System.arraycopy(source, startPosition,
1138 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1139 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1140 } else { //overwrite the / in the buffer
1141 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1142 if (backSlashAsUnicodeInString) { //there are TWO \ in
1143 withoutUnicodePtr--;
1148 if (isPHPIdentifierStart(source[currentPosition])
1149 || source[currentPosition] == '{') {
1151 encapsedStringStack.push(new Character('$'));
1152 return TokenNameSTRING;
1156 if (source[currentPosition] == '$') { // CURLY_OPEN
1158 encapsedStringStack.push(new Character('$'));
1159 return TokenNameSTRING;
1162 // consume next character
1163 unicodeAsBackSlash = false;
1164 currentCharacter = source[currentPosition++];
1165 if (withoutUnicodePtr != 0) {
1166 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1171 return TokenNameSTRING;
1173 // ---------Consume white space and handles startPosition---------
1174 int whiteStart = currentPosition;
1175 startPosition = currentPosition;
1176 currentCharacter = source[currentPosition++];
1177 if (encapsedChar == '$') {
1178 switch (currentCharacter) {
1180 currentCharacter = source[currentPosition++];
1181 return TokenNameSTRING;
1183 if (encapsedChar == '$') {
1184 if (getNextChar('$'))
1185 return TokenNameCURLY_OPEN;
1187 return TokenNameLBRACE;
1189 return TokenNameRBRACE;
1191 return TokenNameLBRACKET;
1193 return TokenNameRBRACKET;
1195 if (tokenizeStrings) {
1196 consumeStringConstant();
1197 return TokenNameStringConstant;
1199 return TokenNameEncapsedString1;
1201 return TokenNameEncapsedString2;
1203 if (tokenizeStrings) {
1204 consumeStringInterpolated();
1205 return TokenNameStringInterpolated;
1207 return TokenNameEncapsedString0;
1209 if (getNextChar('>'))
1210 return TokenNameMINUS_GREATER;
1211 return TokenNameSTRING;
1213 if (currentCharacter == '$') {
1214 int oldPosition = currentPosition;
1216 currentCharacter = source[currentPosition++];
1217 if (currentCharacter == '{') {
1218 return TokenNameDOLLAR_LBRACE;
1220 if (isPHPIdentifierStart(currentCharacter)) {
1221 return scanIdentifierOrKeyword(true);
1223 currentPosition = oldPosition;
1224 return TokenNameSTRING;
1226 } catch (IndexOutOfBoundsException e) {
1227 currentPosition = oldPosition;
1228 return TokenNameSTRING;
1231 if (isPHPIdentifierStart(currentCharacter))
1232 return scanIdentifierOrKeyword(false);
1233 if (Character.isDigit(currentCharacter))
1234 return scanNumber(false);
1235 return TokenNameERROR;
1238 // boolean isWhiteSpace;
1240 while ((currentCharacter == ' ')
1241 || Character.isWhitespace(currentCharacter)) {
1242 startPosition = currentPosition;
1243 currentCharacter = source[currentPosition++];
1244 // if (((currentCharacter = source[currentPosition++]) == '\\')
1245 // && (source[currentPosition] == 'u')) {
1246 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1248 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1249 checkNonExternalizeString();
1250 if (recordLineSeparator) {
1251 pushLineSeparator();
1256 // isWhiteSpace = (currentCharacter == ' ')
1257 // || Character.isWhitespace(currentCharacter);
1260 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1261 // reposition scanner in case we are interested by spaces as tokens
1263 startPosition = whiteStart;
1264 return TokenNameWHITESPACE;
1266 //little trick to get out in the middle of a source compuation
1267 if (currentPosition > eofPosition)
1268 return TokenNameEOF;
1269 // ---------Identify the next token-------------
1270 switch (currentCharacter) {
1272 return getCastOrParen();
1274 return TokenNameRPAREN;
1276 return TokenNameLBRACE;
1278 return TokenNameRBRACE;
1280 return TokenNameLBRACKET;
1282 return TokenNameRBRACKET;
1284 return TokenNameSEMICOLON;
1286 return TokenNameCOMMA;
1288 if (getNextChar('='))
1289 return TokenNameDOT_EQUAL;
1290 if (getNextCharAsDigit())
1291 return scanNumber(true);
1292 return TokenNameDOT;
1296 if ((test = getNextChar('+', '=')) == 0)
1297 return TokenNamePLUS_PLUS;
1299 return TokenNamePLUS_EQUAL;
1300 return TokenNamePLUS;
1305 if ((test = getNextChar('-', '=')) == 0)
1306 return TokenNameMINUS_MINUS;
1308 return TokenNameMINUS_EQUAL;
1309 if (getNextChar('>'))
1310 return TokenNameMINUS_GREATER;
1311 return TokenNameMINUS;
1314 if (getNextChar('='))
1315 return TokenNameTWIDDLE_EQUAL;
1316 return TokenNameTWIDDLE;
1318 if (getNextChar('=')) {
1319 if (getNextChar('=')) {
1320 return TokenNameNOT_EQUAL_EQUAL;
1322 return TokenNameNOT_EQUAL;
1324 return TokenNameNOT;
1326 if (getNextChar('='))
1327 return TokenNameMULTIPLY_EQUAL;
1328 return TokenNameMULTIPLY;
1330 if (getNextChar('='))
1331 return TokenNameREMAINDER_EQUAL;
1332 return TokenNameREMAINDER;
1335 int oldPosition = currentPosition;
1337 currentCharacter = source[currentPosition++];
1338 } catch (IndexOutOfBoundsException e) {
1339 currentPosition = oldPosition;
1340 return TokenNameLESS;
1342 switch (currentCharacter) {
1344 return TokenNameLESS_EQUAL;
1346 return TokenNameNOT_EQUAL;
1348 if (getNextChar('='))
1349 return TokenNameLEFT_SHIFT_EQUAL;
1350 if (getNextChar('<')) {
1351 currentCharacter = source[currentPosition++];
1352 while (Character.isWhitespace(currentCharacter)) {
1353 currentCharacter = source[currentPosition++];
1355 int heredocStart = currentPosition - 1;
1356 int heredocLength = 0;
1357 if (isPHPIdentifierStart(currentCharacter)) {
1358 currentCharacter = source[currentPosition++];
1360 return TokenNameERROR;
1362 while (isPHPIdentifierPart(currentCharacter)) {
1363 currentCharacter = source[currentPosition++];
1365 heredocLength = currentPosition - heredocStart - 1;
1366 // heredoc end-tag determination
1367 boolean endTag = true;
1370 ch = source[currentPosition++];
1371 if (ch == '\r' || ch == '\n') {
1372 if (recordLineSeparator) {
1373 pushLineSeparator();
1377 for (int i = 0; i < heredocLength; i++) {
1378 if (source[currentPosition + i] != source[heredocStart
1385 currentPosition += heredocLength - 1;
1386 currentCharacter = source[currentPosition++];
1387 break; // do...while loop
1393 return TokenNameHEREDOC;
1395 return TokenNameLEFT_SHIFT;
1397 currentPosition = oldPosition;
1398 return TokenNameLESS;
1403 if ((test = getNextChar('=', '>')) == 0)
1404 return TokenNameGREATER_EQUAL;
1406 if ((test = getNextChar('=', '>')) == 0)
1407 return TokenNameRIGHT_SHIFT_EQUAL;
1408 return TokenNameRIGHT_SHIFT;
1410 return TokenNameGREATER;
1413 if (getNextChar('=')) {
1414 if (getNextChar('=')) {
1415 return TokenNameEQUAL_EQUAL_EQUAL;
1417 return TokenNameEQUAL_EQUAL;
1419 if (getNextChar('>'))
1420 return TokenNameEQUAL_GREATER;
1421 return TokenNameEQUAL;
1425 if ((test = getNextChar('&', '=')) == 0)
1426 return TokenNameAND_AND;
1428 return TokenNameAND_EQUAL;
1429 return TokenNameAND;
1434 if ((test = getNextChar('|', '=')) == 0)
1435 return TokenNameOR_OR;
1437 return TokenNameOR_EQUAL;
1441 if (getNextChar('='))
1442 return TokenNameXOR_EQUAL;
1443 return TokenNameXOR;
1445 if (getNextChar('>')) {
1447 if (currentPosition == source.length) {
1449 return TokenNameINLINE_HTML;
1451 return getInlinedHTML(currentPosition - 2);
1453 return TokenNameQUESTION;
1455 if (getNextChar(':'))
1456 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1457 return TokenNameCOLON;
1461 consumeStringConstant();
1462 return TokenNameStringConstant;
1464 if (tokenizeStrings) {
1465 consumeStringLiteral();
1466 return TokenNameStringLiteral;
1468 return TokenNameEncapsedString2;
1470 if (tokenizeStrings) {
1471 consumeStringInterpolated();
1472 return TokenNameStringInterpolated;
1474 return TokenNameEncapsedString0;
1478 char startChar = currentCharacter;
1479 if (getNextChar('=')) {
1480 return TokenNameDIVIDE_EQUAL;
1483 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1485 int endPositionForLineComment = 0;
1486 try { //get the next char
1487 currentCharacter = source[currentPosition++];
1488 // if (((currentCharacter = source[currentPosition++])
1490 // && (source[currentPosition] == 'u')) {
1491 // //-------------unicode traitement ------------
1492 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1493 // currentPosition++;
1494 // while (source[currentPosition] == 'u') {
1495 // currentPosition++;
1498 // Character.getNumericValue(source[currentPosition++]))
1502 // Character.getNumericValue(source[currentPosition++]))
1506 // Character.getNumericValue(source[currentPosition++]))
1510 // Character.getNumericValue(source[currentPosition++]))
1514 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1516 // currentCharacter =
1517 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1520 //handle the \\u case manually into comment
1521 // if (currentCharacter == '\\') {
1522 // if (source[currentPosition] == '\\')
1523 // currentPosition++;
1524 // } //jump over the \\
1525 boolean isUnicode = false;
1526 while (currentCharacter != '\r' && currentCharacter != '\n') {
1527 if (currentCharacter == '?') {
1528 if (getNextChar('>')) {
1529 startPosition = currentPosition - 2;
1531 return TokenNameINLINE_HTML;
1536 currentCharacter = source[currentPosition++];
1537 // if (((currentCharacter = source[currentPosition++])
1539 // && (source[currentPosition] == 'u')) {
1540 // isUnicode = true;
1541 // //-------------unicode traitement ------------
1542 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1543 // currentPosition++;
1544 // while (source[currentPosition] == 'u') {
1545 // currentPosition++;
1548 // Character.getNumericValue(source[currentPosition++]))
1552 // Character.getNumericValue(
1553 // source[currentPosition++]))
1557 // Character.getNumericValue(
1558 // source[currentPosition++]))
1562 // Character.getNumericValue(
1563 // source[currentPosition++]))
1567 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1569 // currentCharacter =
1570 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1573 //handle the \\u case manually into comment
1574 // if (currentCharacter == '\\') {
1575 // if (source[currentPosition] == '\\')
1576 // currentPosition++;
1577 // } //jump over the \\
1580 endPositionForLineComment = currentPosition - 6;
1582 endPositionForLineComment = currentPosition - 1;
1584 recordComment(false);
1585 if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1586 if ((currentCharacter == '\r')
1587 || (currentCharacter == '\n')) {
1588 checkNonExternalizeString();
1589 if (recordLineSeparator) {
1591 pushUnicodeLineSeparator();
1593 pushLineSeparator();
1599 if (tokenizeComments) {
1601 currentPosition = endPositionForLineComment;
1602 // reset one character behind
1604 return TokenNameCOMMENT_LINE;
1606 } catch (IndexOutOfBoundsException e) { //an eof will them
1608 if (tokenizeComments) {
1610 // reset one character behind
1611 return TokenNameCOMMENT_LINE;
1617 //traditional and annotation comment
1618 boolean isJavadoc = false, star = false;
1619 // consume next character
1620 unicodeAsBackSlash = false;
1621 currentCharacter = source[currentPosition++];
1622 // if (((currentCharacter = source[currentPosition++]) ==
1624 // && (source[currentPosition] == 'u')) {
1625 // getNextUnicodeChar();
1627 // if (withoutUnicodePtr != 0) {
1628 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1629 // currentCharacter;
1632 if (currentCharacter == '*') {
1636 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1637 checkNonExternalizeString();
1638 if (recordLineSeparator) {
1639 pushLineSeparator();
1644 try { //get the next char
1645 currentCharacter = source[currentPosition++];
1646 // if (((currentCharacter = source[currentPosition++])
1648 // && (source[currentPosition] == 'u')) {
1649 // //-------------unicode traitement ------------
1650 // getNextUnicodeChar();
1652 //handle the \\u case manually into comment
1653 // if (currentCharacter == '\\') {
1654 // if (source[currentPosition] == '\\')
1655 // currentPosition++;
1656 // //jump over the \\
1658 // empty comment is not a javadoc /**/
1659 if (currentCharacter == '/') {
1662 //loop until end of comment */
1663 while ((currentCharacter != '/') || (!star)) {
1664 if ((currentCharacter == '\r')
1665 || (currentCharacter == '\n')) {
1666 checkNonExternalizeString();
1667 if (recordLineSeparator) {
1668 pushLineSeparator();
1673 star = currentCharacter == '*';
1675 currentCharacter = source[currentPosition++];
1676 // if (((currentCharacter = source[currentPosition++])
1678 // && (source[currentPosition] == 'u')) {
1679 // //-------------unicode traitement ------------
1680 // getNextUnicodeChar();
1682 //handle the \\u case manually into comment
1683 // if (currentCharacter == '\\') {
1684 // if (source[currentPosition] == '\\')
1685 // currentPosition++;
1686 // } //jump over the \\
1688 recordComment(isJavadoc);
1689 if (tokenizeComments) {
1691 return TokenNameCOMMENT_PHPDOC;
1692 return TokenNameCOMMENT_BLOCK;
1694 } catch (IndexOutOfBoundsException e) {
1695 throw new InvalidInputException(UNTERMINATED_COMMENT);
1699 return TokenNameDIVIDE;
1703 return TokenNameEOF;
1704 //the atEnd may not be <currentPosition == source.length> if
1705 // source is only some part of a real (external) stream
1706 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1708 if (currentCharacter == '$') {
1709 int oldPosition = currentPosition;
1711 currentCharacter = source[currentPosition++];
1712 if (isPHPIdentifierStart(currentCharacter)) {
1713 return scanIdentifierOrKeyword(true);
1715 currentPosition = oldPosition;
1716 return TokenNameDOLLAR;
1718 } catch (IndexOutOfBoundsException e) {
1719 currentPosition = oldPosition;
1720 return TokenNameDOLLAR;
1723 if (isPHPIdentifierStart(currentCharacter))
1724 return scanIdentifierOrKeyword(false);
1725 if (Character.isDigit(currentCharacter))
1726 return scanNumber(false);
1727 return TokenNameERROR;
1730 } //-----------------end switch while try--------------------
1731 catch (IndexOutOfBoundsException e) {
1734 return TokenNameEOF;
1738 * InvalidInputException
1740 private int getInlinedHTML(int start) throws InvalidInputException {
1741 // int htmlPosition = start;
1742 if (currentPosition > source.length) {
1743 currentPosition = source.length;
1744 return TokenNameEOF;
1746 startPosition = start;
1749 currentCharacter = source[currentPosition++];
1750 if (currentCharacter == '<') {
1751 if (getNextChar('?')) {
1752 currentCharacter = source[currentPosition++];
1753 if ((currentCharacter == ' ')
1754 || Character.isWhitespace(currentCharacter)) {
1757 return TokenNameINLINE_HTML;
1759 boolean phpStart = (currentCharacter == 'P')
1760 || (currentCharacter == 'p');
1762 int test = getNextChar('H', 'h');
1764 test = getNextChar('P', 'p');
1768 return TokenNameINLINE_HTML;
1775 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1776 if (recordLineSeparator) {
1777 pushLineSeparator();
1782 } //-----------------while--------------------
1784 return TokenNameINLINE_HTML;
1785 } //-----------------try--------------------
1786 catch (IndexOutOfBoundsException e) {
1787 startPosition = start;
1791 return TokenNameINLINE_HTML;
1793 // public final void getNextUnicodeChar()
1794 // throws IndexOutOfBoundsException, InvalidInputException {
1796 // //handle the case of unicode.
1797 // //when a unicode appears then we must use a buffer that holds char
1799 // //At the end of this method currentCharacter holds the new visited char
1800 // //and currentPosition points right next after it
1802 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1804 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1805 // currentPosition++;
1806 // while (source[currentPosition] == 'u') {
1807 // currentPosition++;
1811 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1813 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1815 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1817 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1819 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1821 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1822 // //need the unicode buffer
1823 // if (withoutUnicodePtr == 0) {
1824 // //buffer all the entries that have been left aside....
1825 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1826 // System.arraycopy(
1829 // withoutUnicodeBuffer,
1831 // withoutUnicodePtr);
1833 // //fill the buffer with the char
1834 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1836 // unicodeAsBackSlash = currentCharacter == '\\';
1839 * Tokenize a method body, assuming that curly brackets are properly
1842 public final void jumpOverMethodBody() {
1843 this.wasAcr = false;
1846 while (true) { //loop for jumping over comments
1847 // ---------Consume white space and handles startPosition---------
1848 boolean isWhiteSpace;
1850 startPosition = currentPosition;
1851 currentCharacter = source[currentPosition++];
1852 // if (((currentCharacter = source[currentPosition++]) == '\\')
1853 // && (source[currentPosition] == 'u')) {
1854 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1856 if (recordLineSeparator
1857 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1858 pushLineSeparator();
1859 isWhiteSpace = Character.isWhitespace(currentCharacter);
1861 } while (isWhiteSpace);
1862 // -------consume token until } is found---------
1863 switch (currentCharacter) {
1875 test = getNextChar('\\');
1878 scanDoubleQuotedEscapeCharacter();
1879 } catch (InvalidInputException ex) {
1882 // try { // consume next character
1883 unicodeAsBackSlash = false;
1884 currentCharacter = source[currentPosition++];
1885 // if (((currentCharacter = source[currentPosition++]) == '\\')
1886 // && (source[currentPosition] == 'u')) {
1887 // getNextUnicodeChar();
1889 if (withoutUnicodePtr != 0) {
1890 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1893 // } catch (InvalidInputException ex) {
1901 // try { // consume next character
1902 unicodeAsBackSlash = false;
1903 currentCharacter = source[currentPosition++];
1904 // if (((currentCharacter = source[currentPosition++]) == '\\')
1905 // && (source[currentPosition] == 'u')) {
1906 // getNextUnicodeChar();
1908 if (withoutUnicodePtr != 0) {
1909 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1912 // } catch (InvalidInputException ex) {
1914 while (currentCharacter != '"') {
1915 if (currentCharacter == '\r') {
1916 if (source[currentPosition] == '\n')
1919 // the string cannot go further that the line
1921 if (currentCharacter == '\n') {
1923 // the string cannot go further that the line
1925 if (currentCharacter == '\\') {
1927 scanDoubleQuotedEscapeCharacter();
1928 } catch (InvalidInputException ex) {
1931 // try { // consume next character
1932 unicodeAsBackSlash = false;
1933 currentCharacter = source[currentPosition++];
1934 // if (((currentCharacter = source[currentPosition++]) == '\\')
1935 // && (source[currentPosition] == 'u')) {
1936 // getNextUnicodeChar();
1938 if (withoutUnicodePtr != 0) {
1939 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1942 // } catch (InvalidInputException ex) {
1945 } catch (IndexOutOfBoundsException e) {
1952 if ((test = getNextChar('/', '*')) == 0) {
1956 currentCharacter = source[currentPosition++];
1957 // if (((currentCharacter = source[currentPosition++]) ==
1959 // && (source[currentPosition] == 'u')) {
1960 // //-------------unicode traitement ------------
1961 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1962 // currentPosition++;
1963 // while (source[currentPosition] == 'u') {
1964 // currentPosition++;
1967 // Character.getNumericValue(source[currentPosition++]))
1971 // Character.getNumericValue(source[currentPosition++]))
1975 // Character.getNumericValue(source[currentPosition++]))
1979 // Character.getNumericValue(source[currentPosition++]))
1982 // //error don't care of the value
1983 // currentCharacter = 'A';
1984 // } //something different from \n and \r
1986 // currentCharacter =
1987 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1990 while (currentCharacter != '\r' && currentCharacter != '\n') {
1992 currentCharacter = source[currentPosition++];
1993 // if (((currentCharacter = source[currentPosition++])
1995 // && (source[currentPosition] == 'u')) {
1996 // //-------------unicode traitement ------------
1997 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1998 // currentPosition++;
1999 // while (source[currentPosition] == 'u') {
2000 // currentPosition++;
2003 // Character.getNumericValue(source[currentPosition++]))
2007 // Character.getNumericValue(source[currentPosition++]))
2011 // Character.getNumericValue(source[currentPosition++]))
2015 // Character.getNumericValue(source[currentPosition++]))
2018 // //error don't care of the value
2019 // currentCharacter = 'A';
2020 // } //something different from \n and \r
2022 // currentCharacter =
2023 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2027 if (recordLineSeparator
2028 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2029 pushLineSeparator();
2030 } catch (IndexOutOfBoundsException e) {
2031 } //an eof will them be generated
2035 //traditional and annotation comment
2036 boolean star = false;
2037 // try { // consume next character
2038 unicodeAsBackSlash = false;
2039 currentCharacter = source[currentPosition++];
2040 // if (((currentCharacter = source[currentPosition++]) == '\\')
2041 // && (source[currentPosition] == 'u')) {
2042 // getNextUnicodeChar();
2044 if (withoutUnicodePtr != 0) {
2045 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2048 // } catch (InvalidInputException ex) {
2050 if (currentCharacter == '*') {
2053 if (recordLineSeparator
2054 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2055 pushLineSeparator();
2056 try { //get the next char
2057 currentCharacter = source[currentPosition++];
2058 // if (((currentCharacter = source[currentPosition++]) ==
2060 // && (source[currentPosition] == 'u')) {
2061 // //-------------unicode traitement ------------
2062 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2063 // currentPosition++;
2064 // while (source[currentPosition] == 'u') {
2065 // currentPosition++;
2068 // Character.getNumericValue(source[currentPosition++]))
2072 // Character.getNumericValue(source[currentPosition++]))
2076 // Character.getNumericValue(source[currentPosition++]))
2080 // Character.getNumericValue(source[currentPosition++]))
2083 // //error don't care of the value
2084 // currentCharacter = 'A';
2085 // } //something different from * and /
2087 // currentCharacter =
2088 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2091 //loop until end of comment */
2092 while ((currentCharacter != '/') || (!star)) {
2093 if (recordLineSeparator
2094 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2095 pushLineSeparator();
2096 star = currentCharacter == '*';
2098 currentCharacter = source[currentPosition++];
2099 // if (((currentCharacter = source[currentPosition++])
2101 // && (source[currentPosition] == 'u')) {
2102 // //-------------unicode traitement ------------
2103 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2104 // currentPosition++;
2105 // while (source[currentPosition] == 'u') {
2106 // currentPosition++;
2109 // Character.getNumericValue(source[currentPosition++]))
2113 // Character.getNumericValue(source[currentPosition++]))
2117 // Character.getNumericValue(source[currentPosition++]))
2121 // Character.getNumericValue(source[currentPosition++]))
2124 // //error don't care of the value
2125 // currentCharacter = 'A';
2126 // } //something different from * and /
2128 // currentCharacter =
2129 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2133 } catch (IndexOutOfBoundsException e) {
2141 if (isPHPIdentifierStart(currentCharacter)
2142 || currentCharacter == '$') {
2144 scanIdentifierOrKeyword((currentCharacter == '$'));
2145 } catch (InvalidInputException ex) {
2149 if (Character.isDigit(currentCharacter)) {
2152 } catch (InvalidInputException ex) {
2158 //-----------------end switch while try--------------------
2159 } catch (IndexOutOfBoundsException e) {
2160 } catch (InvalidInputException e) {
2164 // public final boolean jumpOverUnicodeWhiteSpace()
2165 // throws InvalidInputException {
2167 // //handle the case of unicode. Jump over the next whiteSpace
2168 // //making startPosition pointing on the next available char
2169 // //On false, the currentCharacter is filled up with a potential
2173 // this.wasAcr = false;
2174 // int c1, c2, c3, c4;
2175 // int unicodeSize = 6;
2176 // currentPosition++;
2177 // while (source[currentPosition] == 'u') {
2178 // currentPosition++;
2182 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2184 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2186 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2188 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2190 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2193 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2194 // if (recordLineSeparator
2195 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2196 // pushLineSeparator();
2197 // if (Character.isWhitespace(currentCharacter))
2200 // //buffer the new char which is not a white space
2201 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2202 // //withoutUnicodePtr == 1 is true here
2204 // } catch (IndexOutOfBoundsException e) {
2205 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2208 public final int[] getLineEnds() {
2209 //return a bounded copy of this.lineEnds
2211 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2214 public char[] getSource() {
2217 final char[] optimizedCurrentTokenSource1() {
2218 //return always the same char[] build only once
2219 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2220 char charOne = source[startPosition];
2275 return new char[]{charOne};
2278 final char[] optimizedCurrentTokenSource2() {
2279 //try to return the same char[] build only once
2281 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2283 char[][] table = charArray_length[0][hash];
2285 while (++i < InternalTableSize) {
2286 char[] charArray = table[i];
2287 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2290 //---------other side---------
2292 int max = newEntry2;
2293 while (++i <= max) {
2294 char[] charArray = table[i];
2295 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2298 //--------add the entry-------
2299 if (++max >= InternalTableSize)
2302 table[max] = (r = new char[]{c0, c1});
2306 final char[] optimizedCurrentTokenSource3() {
2307 //try to return the same char[] build only once
2309 int hash = (((c0 = source[startPosition]) << 12)
2310 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2312 char[][] table = charArray_length[1][hash];
2314 while (++i < InternalTableSize) {
2315 char[] charArray = table[i];
2316 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2319 //---------other side---------
2321 int max = newEntry3;
2322 while (++i <= max) {
2323 char[] charArray = table[i];
2324 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2327 //--------add the entry-------
2328 if (++max >= InternalTableSize)
2331 table[max] = (r = new char[]{c0, c1, c2});
2335 final char[] optimizedCurrentTokenSource4() {
2336 //try to return the same char[] build only once
2337 char c0, c1, c2, c3;
2338 long hash = ((((long) (c0 = source[startPosition])) << 18)
2339 + ((c1 = source[startPosition + 1]) << 12)
2340 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2342 char[][] table = charArray_length[2][(int) hash];
2344 while (++i < InternalTableSize) {
2345 char[] charArray = table[i];
2346 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2347 && (c3 == charArray[3]))
2350 //---------other side---------
2352 int max = newEntry4;
2353 while (++i <= max) {
2354 char[] charArray = table[i];
2355 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2356 && (c3 == charArray[3]))
2359 //--------add the entry-------
2360 if (++max >= InternalTableSize)
2363 table[max] = (r = new char[]{c0, c1, c2, c3});
2367 final char[] optimizedCurrentTokenSource5() {
2368 //try to return the same char[] build only once
2369 char c0, c1, c2, c3, c4;
2370 long hash = ((((long) (c0 = source[startPosition])) << 24)
2371 + (((long) (c1 = source[startPosition + 1])) << 18)
2372 + ((c2 = source[startPosition + 2]) << 12)
2373 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2375 char[][] table = charArray_length[3][(int) hash];
2377 while (++i < InternalTableSize) {
2378 char[] charArray = table[i];
2379 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2380 && (c3 == charArray[3]) && (c4 == charArray[4]))
2383 //---------other side---------
2385 int max = newEntry5;
2386 while (++i <= max) {
2387 char[] charArray = table[i];
2388 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2389 && (c3 == charArray[3]) && (c4 == charArray[4]))
2392 //--------add the entry-------
2393 if (++max >= InternalTableSize)
2396 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2400 final char[] optimizedCurrentTokenSource6() {
2401 //try to return the same char[] build only once
2402 char c0, c1, c2, c3, c4, c5;
2403 long hash = ((((long) (c0 = source[startPosition])) << 32)
2404 + (((long) (c1 = source[startPosition + 1])) << 24)
2405 + (((long) (c2 = source[startPosition + 2])) << 18)
2406 + ((c3 = source[startPosition + 3]) << 12)
2407 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2409 char[][] table = charArray_length[4][(int) hash];
2411 while (++i < InternalTableSize) {
2412 char[] charArray = table[i];
2413 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2414 && (c3 == charArray[3]) && (c4 == charArray[4])
2415 && (c5 == charArray[5]))
2418 //---------other side---------
2420 int max = newEntry6;
2421 while (++i <= max) {
2422 char[] charArray = table[i];
2423 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2424 && (c3 == charArray[3]) && (c4 == charArray[4])
2425 && (c5 == charArray[5]))
2428 //--------add the entry-------
2429 if (++max >= InternalTableSize)
2432 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2436 public final void pushLineSeparator() throws InvalidInputException {
2437 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2438 final int INCREMENT = 250;
2439 if (this.checkNonExternalizedStringLiterals) {
2440 // reinitialize the current line for non externalize strings purpose
2443 //currentCharacter is at position currentPosition-1
2445 if (currentCharacter == '\r') {
2446 int separatorPos = currentPosition - 1;
2447 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2449 //System.out.println("CR-" + separatorPos);
2451 lineEnds[++linePtr] = separatorPos;
2452 } catch (IndexOutOfBoundsException e) {
2453 //linePtr value is correct
2454 int oldLength = lineEnds.length;
2455 int[] old = lineEnds;
2456 lineEnds = new int[oldLength + INCREMENT];
2457 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2458 lineEnds[linePtr] = separatorPos;
2460 // look-ahead for merged cr+lf
2462 if (source[currentPosition] == '\n') {
2463 //System.out.println("look-ahead LF-" + currentPosition);
2464 lineEnds[linePtr] = currentPosition;
2470 } catch (IndexOutOfBoundsException e) {
2475 if (currentCharacter == '\n') {
2476 //must merge eventual cr followed by lf
2477 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2478 //System.out.println("merge LF-" + (currentPosition - 1));
2479 lineEnds[linePtr] = currentPosition - 1;
2481 int separatorPos = currentPosition - 1;
2482 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2484 // System.out.println("LF-" + separatorPos);
2486 lineEnds[++linePtr] = separatorPos;
2487 } catch (IndexOutOfBoundsException e) {
2488 //linePtr value is correct
2489 int oldLength = lineEnds.length;
2490 int[] old = lineEnds;
2491 lineEnds = new int[oldLength + INCREMENT];
2492 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2493 lineEnds[linePtr] = separatorPos;
2500 public final void pushUnicodeLineSeparator() {
2501 // isUnicode means that the \r or \n has been read as a unicode character
2502 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2503 final int INCREMENT = 250;
2504 //currentCharacter is at position currentPosition-1
2505 if (this.checkNonExternalizedStringLiterals) {
2506 // reinitialize the current line for non externalize strings purpose
2510 if (currentCharacter == '\r') {
2511 int separatorPos = currentPosition - 6;
2512 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2514 //System.out.println("CR-" + separatorPos);
2516 lineEnds[++linePtr] = separatorPos;
2517 } catch (IndexOutOfBoundsException e) {
2518 //linePtr value is correct
2519 int oldLength = lineEnds.length;
2520 int[] old = lineEnds;
2521 lineEnds = new int[oldLength + INCREMENT];
2522 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2523 lineEnds[linePtr] = separatorPos;
2525 // look-ahead for merged cr+lf
2526 if (source[currentPosition] == '\n') {
2527 //System.out.println("look-ahead LF-" + currentPosition);
2528 lineEnds[linePtr] = currentPosition;
2536 if (currentCharacter == '\n') {
2537 //must merge eventual cr followed by lf
2538 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2539 //System.out.println("merge LF-" + (currentPosition - 1));
2540 lineEnds[linePtr] = currentPosition - 6;
2542 int separatorPos = currentPosition - 6;
2543 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2545 // System.out.println("LF-" + separatorPos);
2547 lineEnds[++linePtr] = separatorPos;
2548 } catch (IndexOutOfBoundsException e) {
2549 //linePtr value is correct
2550 int oldLength = lineEnds.length;
2551 int[] old = lineEnds;
2552 lineEnds = new int[oldLength + INCREMENT];
2553 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2554 lineEnds[linePtr] = separatorPos;
2561 public final void recordComment(boolean isJavadoc) {
2562 // a new annotation comment is recorded
2564 commentStops[++commentPtr] = isJavadoc
2567 } catch (IndexOutOfBoundsException e) {
2568 int oldStackLength = commentStops.length;
2569 int[] oldStack = commentStops;
2570 commentStops = new int[oldStackLength + 30];
2571 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2572 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2573 //grows the positions buffers too
2574 int[] old = commentStarts;
2575 commentStarts = new int[oldStackLength + 30];
2576 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2578 //the buffer is of a correct size here
2579 commentStarts[commentPtr] = startPosition;
2581 public void resetTo(int begin, int end) {
2582 //reset the scanner to a given position where it may rescan again
2584 initialPosition = startPosition = currentPosition = begin;
2585 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2586 commentPtr = -1; // reset comment stack
2588 public final void scanSingleQuotedEscapeCharacter()
2589 throws InvalidInputException {
2590 // the string with "\\u" is a legal string of two chars \ and u
2591 //thus we use a direct access to the source (for regular cases).
2592 // if (unicodeAsBackSlash) {
2593 // // consume next character
2594 // unicodeAsBackSlash = false;
2595 // if (((currentCharacter = source[currentPosition++]) == '\\')
2596 // && (source[currentPosition] == 'u')) {
2597 // getNextUnicodeChar();
2599 // if (withoutUnicodePtr != 0) {
2600 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2604 currentCharacter = source[currentPosition++];
2605 switch (currentCharacter) {
2607 currentCharacter = '\'';
2610 currentCharacter = '\\';
2613 currentCharacter = '\\';
2617 public final void scanDoubleQuotedEscapeCharacter()
2618 throws InvalidInputException {
2619 // the string with "\\u" is a legal string of two chars \ and u
2620 //thus we use a direct access to the source (for regular cases).
2621 // if (unicodeAsBackSlash) {
2622 // // consume next character
2623 // unicodeAsBackSlash = false;
2624 // if (((currentCharacter = source[currentPosition++]) == '\\')
2625 // && (source[currentPosition] == 'u')) {
2626 // getNextUnicodeChar();
2628 // if (withoutUnicodePtr != 0) {
2629 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2633 currentCharacter = source[currentPosition++];
2634 switch (currentCharacter) {
2636 // currentCharacter = '\b';
2639 currentCharacter = '\t';
2642 currentCharacter = '\n';
2645 // currentCharacter = '\f';
2648 currentCharacter = '\r';
2651 currentCharacter = '\"';
2654 currentCharacter = '\'';
2657 currentCharacter = '\\';
2660 currentCharacter = '$';
2663 // -----------octal escape--------------
2665 // OctalDigit OctalDigit
2666 // ZeroToThree OctalDigit OctalDigit
2667 int number = Character.getNumericValue(currentCharacter);
2668 if (number >= 0 && number <= 7) {
2669 boolean zeroToThreeNot = number > 3;
2670 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2671 int digit = Character.getNumericValue(currentCharacter);
2672 if (digit >= 0 && digit <= 7) {
2673 number = (number * 8) + digit;
2675 .isDigit(currentCharacter = source[currentPosition++])) {
2676 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2677 // Digit --> ignore last character
2680 digit = Character.getNumericValue(currentCharacter);
2681 if (digit >= 0 && digit <= 7) {
2682 // has read \ZeroToThree OctalDigit OctalDigit
2683 number = (number * 8) + digit;
2684 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2685 // --> ignore last character
2689 } else { // has read \OctalDigit NonDigit--> ignore last
2693 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2697 } else { // has read \OctalDigit --> ignore last character
2701 throw new InvalidInputException(INVALID_ESCAPE);
2702 currentCharacter = (char) number;
2705 // throw new InvalidInputException(INVALID_ESCAPE);
2708 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2709 // return scanIdentifierOrKeyword( false );
2711 public int scanIdentifierOrKeyword(boolean isVariable)
2712 throws InvalidInputException {
2714 //first dispatch on the first char.
2715 //then the length. If there are several
2716 //keywors with the same length AND the same first char, then do another
2717 //disptach on the second char :-)...cool....but fast !
2718 useAssertAsAnIndentifier = false;
2719 while (getNextCharAsJavaIdentifierPart()) {
2722 // if (new String(getCurrentTokenSource()).equals("$this")) {
2723 // return TokenNamethis;
2725 return TokenNameVariable;
2730 // if (withoutUnicodePtr == 0)
2731 //quick test on length == 1 but not on length > 12 while most identifier
2732 //have a length which is <= 12...but there are lots of identifier with
2735 if ((length = currentPosition - startPosition) == 1)
2736 return TokenNameIdentifier;
2738 data = new char[length];
2739 index = startPosition;
2740 for (int i = 0; i < length; i++) {
2741 data[i] = Character.toLowerCase(source[index + i]);
2745 // if ((length = withoutUnicodePtr) == 1)
2746 // return TokenNameIdentifier;
2747 // // data = withoutUnicodeBuffer;
2748 // data = new char[withoutUnicodeBuffer.length];
2749 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2750 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2754 firstLetter = data[index];
2755 switch (firstLetter) {
2760 if ((data[++index] == '_') && (data[++index] == 'f')
2761 && (data[++index] == 'i') && (data[++index] == 'l')
2762 && (data[++index] == 'e') && (data[++index] == '_')
2763 && (data[++index] == '_'))
2764 return TokenNameFILE;
2765 index = 0; //__LINE__
2766 if ((data[++index] == '_') && (data[++index] == 'l')
2767 && (data[++index] == 'i') && (data[++index] == 'n')
2768 && (data[++index] == 'e') && (data[++index] == '_')
2769 && (data[++index] == '_'))
2770 return TokenNameLINE;
2774 if ((data[++index] == '_') && (data[++index] == 'c')
2775 && (data[++index] == 'l') && (data[++index] == 'a')
2776 && (data[++index] == 's') && (data[++index] == 's')
2777 && (data[++index] == '_') && (data[++index] == '_'))
2778 return TokenNameCLASS_C;
2782 if ((data[++index] == '_') && (data[++index] == 'm')
2783 && (data[++index] == 'e') && (data[++index] == 't')
2784 && (data[++index] == 'h') && (data[++index] == 'o')
2785 && (data[++index] == 'd') && (data[++index] == '_')
2786 && (data[++index] == '_'))
2787 return TokenNameMETHOD_C;
2791 if ((data[++index] == '_') && (data[++index] == 'f')
2792 && (data[++index] == 'u') && (data[++index] == 'n')
2793 && (data[++index] == 'c') && (data[++index] == 't')
2794 && (data[++index] == 'i') && (data[++index] == 'o')
2795 && (data[++index] == 'n') && (data[++index] == '_')
2796 && (data[++index] == '_'))
2797 return TokenNameFUNC_C;
2800 return TokenNameIdentifier;
2802 // as and array abstract
2806 if ((data[++index] == 's')) {
2809 return TokenNameIdentifier;
2813 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2814 return TokenNameand;
2816 return TokenNameIdentifier;
2820 if ((data[++index] == 'r') && (data[++index] == 'r')
2821 && (data[++index] == 'a') && (data[++index] == 'y'))
2822 return TokenNamearray;
2824 return TokenNameIdentifier;
2826 if ((data[++index] == 'b') && (data[++index] == 's')
2827 && (data[++index] == 't') && (data[++index] == 'r')
2828 && (data[++index] == 'a') && (data[++index] == 'c')
2829 && (data[++index] == 't'))
2830 return TokenNameabstract;
2832 return TokenNameIdentifier;
2834 return TokenNameIdentifier;
2840 if ((data[++index] == 'r') && (data[++index] == 'e')
2841 && (data[++index] == 'a') && (data[++index] == 'k'))
2842 return TokenNamebreak;
2844 return TokenNameIdentifier;
2846 return TokenNameIdentifier;
2849 //case catch class clone const continue
2852 if ((data[++index] == 'a') && (data[++index] == 's')
2853 && (data[++index] == 'e'))
2854 return TokenNamecase;
2856 return TokenNameIdentifier;
2858 if ((data[++index] == 'a') && (data[++index] == 't')
2859 && (data[++index] == 'c') && (data[++index] == 'h'))
2860 return TokenNamecatch;
2862 if ((data[++index] == 'l') && (data[++index] == 'a')
2863 && (data[++index] == 's') && (data[++index] == 's'))
2864 return TokenNameclass;
2866 if ((data[++index] == 'l') && (data[++index] == 'o')
2867 && (data[++index] == 'n') && (data[++index] == 'e'))
2868 return TokenNameclone;
2870 if ((data[++index] == 'o') && (data[++index] == 'n')
2871 && (data[++index] == 's') && (data[++index] == 't'))
2872 return TokenNameconst;
2874 return TokenNameIdentifier;
2876 if ((data[++index] == 'o') && (data[++index] == 'n')
2877 && (data[++index] == 't') && (data[++index] == 'i')
2878 && (data[++index] == 'n') && (data[++index] == 'u')
2879 && (data[++index] == 'e'))
2880 return TokenNamecontinue;
2882 return TokenNameIdentifier;
2884 return TokenNameIdentifier;
2887 // declare default do die
2888 // TODO delete define ==> no keyword !
2891 if ((data[++index] == 'o'))
2894 return TokenNameIdentifier;
2896 // if ((data[++index] == 'e')
2897 // && (data[++index] == 'f')
2898 // && (data[++index] == 'i')
2899 // && (data[++index] == 'n')
2900 // && (data[++index] == 'e'))
2901 // return TokenNamedefine;
2903 // return TokenNameIdentifier;
2905 if ((data[++index] == 'e') && (data[++index] == 'c')
2906 && (data[++index] == 'l') && (data[++index] == 'a')
2907 && (data[++index] == 'r') && (data[++index] == 'e'))
2908 return TokenNamedeclare;
2910 if ((data[++index] == 'e') && (data[++index] == 'f')
2911 && (data[++index] == 'a') && (data[++index] == 'u')
2912 && (data[++index] == 'l') && (data[++index] == 't'))
2913 return TokenNamedefault;
2915 return TokenNameIdentifier;
2917 return TokenNameIdentifier;
2920 //echo else exit elseif extends eval
2923 if ((data[++index] == 'c') && (data[++index] == 'h')
2924 && (data[++index] == 'o'))
2925 return TokenNameecho;
2926 else if ((data[index] == 'l') && (data[++index] == 's')
2927 && (data[++index] == 'e'))
2928 return TokenNameelse;
2929 else if ((data[index] == 'x') && (data[++index] == 'i')
2930 && (data[++index] == 't'))
2931 return TokenNameexit;
2932 else if ((data[index] == 'v') && (data[++index] == 'a')
2933 && (data[++index] == 'l'))
2934 return TokenNameeval;
2936 return TokenNameIdentifier;
2939 if ((data[++index] == 'n') && (data[++index] == 'd')
2940 && (data[++index] == 'i') && (data[++index] == 'f'))
2941 return TokenNameendif;
2942 if ((data[index] == 'm') && (data[++index] == 'p')
2943 && (data[++index] == 't') && (data[++index] == 'y'))
2944 return TokenNameempty;
2946 return TokenNameIdentifier;
2949 if ((data[++index] == 'n') && (data[++index] == 'd')
2950 && (data[++index] == 'f') && (data[++index] == 'o')
2951 && (data[++index] == 'r'))
2952 return TokenNameendfor;
2953 else if ((data[index] == 'l') && (data[++index] == 's')
2954 && (data[++index] == 'e') && (data[++index] == 'i')
2955 && (data[++index] == 'f'))
2956 return TokenNameelseif;
2958 return TokenNameIdentifier;
2960 if ((data[++index] == 'x') && (data[++index] == 't')
2961 && (data[++index] == 'e') && (data[++index] == 'n')
2962 && (data[++index] == 'd') && (data[++index] == 's'))
2963 return TokenNameextends;
2965 return TokenNameIdentifier;
2968 if ((data[++index] == 'n') && (data[++index] == 'd')
2969 && (data[++index] == 'w') && (data[++index] == 'h')
2970 && (data[++index] == 'i') && (data[++index] == 'l')
2971 && (data[++index] == 'e'))
2972 return TokenNameendwhile;
2974 return TokenNameIdentifier;
2977 if ((data[++index] == 'n') && (data[++index] == 'd')
2978 && (data[++index] == 's') && (data[++index] == 'w')
2979 && (data[++index] == 'i') && (data[++index] == 't')
2980 && (data[++index] == 'c') && (data[++index] == 'h'))
2981 return TokenNameendswitch;
2983 return TokenNameIdentifier;
2986 if ((data[++index] == 'n') && (data[++index] == 'd')
2987 && (data[++index] == 'd') && (data[++index] == 'e')
2988 && (data[++index] == 'c') && (data[++index] == 'l')
2989 && (data[++index] == 'a') && (data[++index] == 'r')
2990 && (data[++index] == 'e'))
2991 return TokenNameendforeach;
2993 if ((data[++index] == 'n') // endforeach
2994 && (data[++index] == 'd') && (data[++index] == 'f')
2995 && (data[++index] == 'o') && (data[++index] == 'r')
2996 && (data[++index] == 'e') && (data[++index] == 'a')
2997 && (data[++index] == 'c') && (data[++index] == 'h'))
2998 return TokenNameendforeach;
3000 return TokenNameIdentifier;
3002 return TokenNameIdentifier;
3005 //for false final function
3008 if ((data[++index] == 'o') && (data[++index] == 'r'))
3009 return TokenNamefor;
3011 return TokenNameIdentifier;
3013 // if ((data[++index] == 'a') && (data[++index] == 'l')
3014 // && (data[++index] == 's') && (data[++index] == 'e'))
3015 // return TokenNamefalse;
3016 if ((data[++index] == 'i') && (data[++index] == 'n')
3017 && (data[++index] == 'a') && (data[++index] == 'l'))
3018 return TokenNamefinal;
3020 return TokenNameIdentifier;
3023 if ((data[++index] == 'o') && (data[++index] == 'r')
3024 && (data[++index] == 'e') && (data[++index] == 'a')
3025 && (data[++index] == 'c') && (data[++index] == 'h'))
3026 return TokenNameforeach;
3028 return TokenNameIdentifier;
3031 if ((data[++index] == 'u') && (data[++index] == 'n')
3032 && (data[++index] == 'c') && (data[++index] == 't')
3033 && (data[++index] == 'i') && (data[++index] == 'o')
3034 && (data[++index] == 'n'))
3035 return TokenNamefunction;
3037 return TokenNameIdentifier;
3039 return TokenNameIdentifier;
3044 if ((data[++index] == 'l') && (data[++index] == 'o')
3045 && (data[++index] == 'b') && (data[++index] == 'a')
3046 && (data[++index] == 'l')) {
3047 return TokenNameglobal;
3050 return TokenNameIdentifier;
3052 //if int isset include include_once instanceof interface implements
3055 if (data[++index] == 'f')
3058 return TokenNameIdentifier;
3060 // if ((data[++index] == 'n') && (data[++index] == 't'))
3061 // return TokenNameint;
3063 // return TokenNameIdentifier;
3065 if ((data[++index] == 's') && (data[++index] == 's')
3066 && (data[++index] == 'e') && (data[++index] == 't'))
3067 return TokenNameisset;
3069 return TokenNameIdentifier;
3071 if ((data[++index] == 'n') && (data[++index] == 'c')
3072 && (data[++index] == 'l') && (data[++index] == 'u')
3073 && (data[++index] == 'd') && (data[++index] == 'e'))
3074 return TokenNameinclude;
3076 return TokenNameIdentifier;
3079 if ((data[++index] == 'n') && (data[++index] == 't')
3080 && (data[++index] == 'e') && (data[++index] == 'r')
3081 && (data[++index] == 'f') && (data[++index] == 'a')
3082 && (data[++index] == 'c') && (data[++index] == 'e'))
3083 return TokenNameinterface;
3085 return TokenNameIdentifier;
3088 if ((data[++index] == 'n') && (data[++index] == 's')
3089 && (data[++index] == 't') && (data[++index] == 'a')
3090 && (data[++index] == 'n') && (data[++index] == 'c')
3091 && (data[++index] == 'e') && (data[++index] == 'o')
3092 && (data[++index] == 'f'))
3093 return TokenNameinstanceof;
3094 if ((data[index] == 'm') && (data[++index] == 'p')
3095 && (data[++index] == 'l') && (data[++index] == 'e')
3096 && (data[++index] == 'm') && (data[++index] == 'e')
3097 && (data[++index] == 'n') && (data[++index] == 't')
3098 && (data[++index] == 's'))
3099 return TokenNameimplements;
3101 return TokenNameIdentifier;
3103 if ((data[++index] == 'n') && (data[++index] == 'c')
3104 && (data[++index] == 'l') && (data[++index] == 'u')
3105 && (data[++index] == 'd') && (data[++index] == 'e')
3106 && (data[++index] == '_') && (data[++index] == 'o')
3107 && (data[++index] == 'n') && (data[++index] == 'c')
3108 && (data[++index] == 'e'))
3109 return TokenNameinclude_once;
3111 return TokenNameIdentifier;
3113 return TokenNameIdentifier;
3118 if ((data[++index] == 'i') && (data[++index] == 's')
3119 && (data[++index] == 't')) {
3120 return TokenNamelist;
3123 return TokenNameIdentifier;
3128 if ((data[++index] == 'e') && (data[++index] == 'w'))
3129 return TokenNamenew;
3131 return TokenNameIdentifier;
3133 // if ((data[++index] == 'u') && (data[++index] == 'l')
3134 // && (data[++index] == 'l'))
3135 // return TokenNamenull;
3137 // return TokenNameIdentifier;
3139 return TokenNameIdentifier;
3144 if (data[++index] == 'r') {
3148 // if (length == 12) {
3149 // if ((data[++index] == 'l')
3150 // && (data[++index] == 'd')
3151 // && (data[++index] == '_')
3152 // && (data[++index] == 'f')
3153 // && (data[++index] == 'u')
3154 // && (data[++index] == 'n')
3155 // && (data[++index] == 'c')
3156 // && (data[++index] == 't')
3157 // && (data[++index] == 'i')
3158 // && (data[++index] == 'o')
3159 // && (data[++index] == 'n')) {
3160 // return TokenNameold_function;
3163 return TokenNameIdentifier;
3165 // print public private protected
3168 if ((data[++index] == 'r') && (data[++index] == 'i')
3169 && (data[++index] == 'n') && (data[++index] == 't')) {
3170 return TokenNameprint;
3172 return TokenNameIdentifier;
3174 if ((data[++index] == 'u') && (data[++index] == 'b')
3175 && (data[++index] == 'l') && (data[++index] == 'i')
3176 && (data[++index] == 'c')) {
3177 return TokenNamepublic;
3179 return TokenNameIdentifier;
3181 if ((data[++index] == 'r') && (data[++index] == 'i')
3182 && (data[++index] == 'v') && (data[++index] == 'a')
3183 && (data[++index] == 't') && (data[++index] == 'e')) {
3184 return TokenNameprivate;
3186 return TokenNameIdentifier;
3188 if ((data[++index] == 'r') && (data[++index] == 'o')
3189 && (data[++index] == 't') && (data[++index] == 'e')
3190 && (data[++index] == 'c') && (data[++index] == 't')
3191 && (data[++index] == 'e') && (data[++index] == 'd')) {
3192 return TokenNameprotected;
3194 return TokenNameIdentifier;
3196 return TokenNameIdentifier;
3198 //return require require_once
3200 if ((data[++index] == 'e') && (data[++index] == 't')
3201 && (data[++index] == 'u') && (data[++index] == 'r')
3202 && (data[++index] == 'n')) {
3203 return TokenNamereturn;
3205 } else if (length == 7) {
3206 if ((data[++index] == 'e') && (data[++index] == 'q')
3207 && (data[++index] == 'u') && (data[++index] == 'i')
3208 && (data[++index] == 'r') && (data[++index] == 'e')) {
3209 return TokenNamerequire;
3211 } else if (length == 12) {
3212 if ((data[++index] == 'e') && (data[++index] == 'q')
3213 && (data[++index] == 'u') && (data[++index] == 'i')
3214 && (data[++index] == 'r') && (data[++index] == 'e')
3215 && (data[++index] == '_') && (data[++index] == 'o')
3216 && (data[++index] == 'n') && (data[++index] == 'c')
3217 && (data[++index] == 'e')) {
3218 return TokenNamerequire_once;
3221 return TokenNameIdentifier;
3226 if (data[++index] == 't')
3227 if ((data[++index] == 'a') && (data[++index] == 't')
3228 && (data[++index] == 'i') && (data[++index] == 'c')) {
3229 return TokenNamestatic;
3231 return TokenNameIdentifier;
3232 else if ((data[index] == 'w') && (data[++index] == 'i')
3233 && (data[++index] == 't') && (data[++index] == 'c')
3234 && (data[++index] == 'h'))
3235 return TokenNameswitch;
3237 return TokenNameIdentifier;
3239 return TokenNameIdentifier;
3245 if ((data[++index] == 'r') && (data[++index] == 'y'))
3246 return TokenNametry;
3248 return TokenNameIdentifier;
3250 // if ((data[++index] == 'r') && (data[++index] == 'u')
3251 // && (data[++index] == 'e'))
3252 // return TokenNametrue;
3254 // return TokenNameIdentifier;
3256 if ((data[++index] == 'h') && (data[++index] == 'r')
3257 && (data[++index] == 'o') && (data[++index] == 'w'))
3258 return TokenNamethrow;
3260 return TokenNameIdentifier;
3262 return TokenNameIdentifier;
3268 if ((data[++index] == 's') && (data[++index] == 'e'))
3269 return TokenNameuse;
3271 return TokenNameIdentifier;
3273 if ((data[++index] == 'n') && (data[++index] == 's')
3274 && (data[++index] == 'e') && (data[++index] == 't'))
3275 return TokenNameunset;
3277 return TokenNameIdentifier;
3279 return TokenNameIdentifier;
3285 if ((data[++index] == 'a') && (data[++index] == 'r'))
3286 return TokenNamevar;
3288 return TokenNameIdentifier;
3290 return TokenNameIdentifier;
3296 if ((data[++index] == 'h') && (data[++index] == 'i')
3297 && (data[++index] == 'l') && (data[++index] == 'e'))
3298 return TokenNamewhile;
3300 return TokenNameIdentifier;
3301 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3302 // (data[++index]=='e') && (data[++index]=='f')&&
3303 // (data[++index]=='p'))
3304 //return TokenNamewidefp ;
3306 //return TokenNameIdentifier;
3308 return TokenNameIdentifier;
3314 if ((data[++index] == 'o') && (data[++index] == 'r'))
3315 return TokenNamexor;
3317 return TokenNameIdentifier;
3319 return TokenNameIdentifier;
3322 return TokenNameIdentifier;
3325 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3326 //when entering this method the currentCharacter is the firt
3327 //digit of the number , i.e. it may be preceeded by a . when
3329 boolean floating = dotPrefix;
3330 if ((!dotPrefix) && (currentCharacter == '0')) {
3331 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3332 //force the first char of the hexa number do exist...
3333 // consume next character
3334 unicodeAsBackSlash = false;
3335 currentCharacter = source[currentPosition++];
3336 // if (((currentCharacter = source[currentPosition++]) == '\\')
3337 // && (source[currentPosition] == 'u')) {
3338 // getNextUnicodeChar();
3340 // if (withoutUnicodePtr != 0) {
3341 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3344 if (Character.digit(currentCharacter, 16) == -1)
3345 throw new InvalidInputException(INVALID_HEXA);
3347 while (getNextCharAsDigit(16)) {
3349 // if (getNextChar('l', 'L') >= 0)
3350 // return TokenNameLongLiteral;
3352 return TokenNameIntegerLiteral;
3354 //there is x or X in the number
3355 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3356 // 00078.0 is true !!!!! crazy language
3357 if (getNextCharAsDigit()) {
3358 //-------------potential octal-----------------
3359 while (getNextCharAsDigit()) {
3361 // if (getNextChar('l', 'L') >= 0) {
3362 // return TokenNameLongLiteral;
3365 // if (getNextChar('f', 'F') >= 0) {
3366 // return TokenNameFloatingPointLiteral;
3368 if (getNextChar('d', 'D') >= 0) {
3369 return TokenNameDoubleLiteral;
3370 } else { //make the distinction between octal and float ....
3371 if (getNextChar('.')) { //bingo ! ....
3372 while (getNextCharAsDigit()) {
3374 if (getNextChar('e', 'E') >= 0) {
3375 // consume next character
3376 unicodeAsBackSlash = false;
3377 currentCharacter = source[currentPosition++];
3378 // if (((currentCharacter = source[currentPosition++]) == '\\')
3379 // && (source[currentPosition] == 'u')) {
3380 // getNextUnicodeChar();
3382 // if (withoutUnicodePtr != 0) {
3383 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3386 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3387 // consume next character
3388 unicodeAsBackSlash = false;
3389 currentCharacter = source[currentPosition++];
3390 // if (((currentCharacter = source[currentPosition++]) == '\\')
3391 // && (source[currentPosition] == 'u')) {
3392 // getNextUnicodeChar();
3394 // if (withoutUnicodePtr != 0) {
3395 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3396 // currentCharacter;
3400 if (!Character.isDigit(currentCharacter))
3401 throw new InvalidInputException(INVALID_FLOAT);
3402 while (getNextCharAsDigit()) {
3405 // if (getNextChar('f', 'F') >= 0)
3406 // return TokenNameFloatingPointLiteral;
3407 getNextChar('d', 'D'); //jump over potential d or D
3408 return TokenNameDoubleLiteral;
3410 return TokenNameIntegerLiteral;
3417 while (getNextCharAsDigit()) {
3419 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3420 // return TokenNameLongLiteral;
3421 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3422 while (getNextCharAsDigit()) {
3426 //if floating is true both exponant and suffix may be optional
3427 if (getNextChar('e', 'E') >= 0) {
3429 // consume next character
3430 unicodeAsBackSlash = false;
3431 currentCharacter = source[currentPosition++];
3432 // if (((currentCharacter = source[currentPosition++]) == '\\')
3433 // && (source[currentPosition] == 'u')) {
3434 // getNextUnicodeChar();
3436 // if (withoutUnicodePtr != 0) {
3437 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3440 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3443 unicodeAsBackSlash = false;
3444 currentCharacter = source[currentPosition++];
3445 // if (((currentCharacter = source[currentPosition++]) == '\\')
3446 // && (source[currentPosition] == 'u')) {
3447 // getNextUnicodeChar();
3449 // if (withoutUnicodePtr != 0) {
3450 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3454 if (!Character.isDigit(currentCharacter))
3455 throw new InvalidInputException(INVALID_FLOAT);
3456 while (getNextCharAsDigit()) {
3459 if (getNextChar('d', 'D') >= 0)
3460 return TokenNameDoubleLiteral;
3461 // if (getNextChar('f', 'F') >= 0)
3462 // return TokenNameFloatingPointLiteral;
3463 //the long flag has been tested before
3464 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3467 * Search the line number corresponding to a specific position
3470 public final int getLineNumber(int position) {
3471 if (lineEnds == null)
3473 int length = linePtr + 1;
3476 int g = 0, d = length - 1;
3480 if (position < lineEnds[m]) {
3482 } else if (position > lineEnds[m]) {
3488 if (position < lineEnds[m]) {
3493 public void setPHPMode(boolean mode) {
3496 public final void setSource(char[] source) {
3497 //the source-buffer is set to sourceString
3498 if (source == null) {
3499 this.source = new char[0];
3501 this.source = source;
3504 initialPosition = currentPosition = 0;
3505 containsAssertKeyword = false;
3506 withoutUnicodeBuffer = new char[this.source.length];
3507 encapsedStringStack = new Stack();
3509 public String toString() {
3510 if (startPosition == source.length)
3511 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3512 if (currentPosition > source.length)
3513 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3514 char front[] = new char[startPosition];
3515 System.arraycopy(source, 0, front, 0, startPosition);
3516 int middleLength = (currentPosition - 1) - startPosition + 1;
3518 if (middleLength > -1) {
3519 middle = new char[middleLength];
3520 System.arraycopy(source, startPosition, middle, 0, middleLength);
3522 middle = new char[0];
3524 char end[] = new char[source.length - (currentPosition - 1)];
3525 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3526 - (currentPosition - 1) - 1);
3527 return new String(front)
3528 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3529 + new String(middle)
3530 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3533 public final String toStringAction(int act) {
3535 case TokenNameERROR :
3536 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3538 case TokenNameINLINE_HTML :
3539 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3540 case TokenNameIdentifier :
3541 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3542 case TokenNameVariable :
3543 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3544 case TokenNameabstract :
3545 return "abstract"; //$NON-NLS-1$
3547 return "AND"; //$NON-NLS-1$
3548 case TokenNamearray :
3549 return "array"; //$NON-NLS-1$
3551 return "as"; //$NON-NLS-1$
3552 case TokenNamebreak :
3553 return "break"; //$NON-NLS-1$
3554 case TokenNamecase :
3555 return "case"; //$NON-NLS-1$
3556 case TokenNameclass :
3557 return "class"; //$NON-NLS-1$
3558 case TokenNamecatch :
3559 return "catch"; //$NON-NLS-1$
3560 case TokenNameclone :
3563 case TokenNameconst :
3566 case TokenNamecontinue :
3567 return "continue"; //$NON-NLS-1$
3568 case TokenNamedefault :
3569 return "default"; //$NON-NLS-1$
3570 // case TokenNamedefine :
3571 // return "define"; //$NON-NLS-1$
3573 return "do"; //$NON-NLS-1$
3574 case TokenNameecho :
3575 return "echo"; //$NON-NLS-1$
3576 case TokenNameelse :
3577 return "else"; //$NON-NLS-1$
3578 case TokenNameelseif :
3579 return "elseif"; //$NON-NLS-1$
3580 case TokenNameendfor :
3581 return "endfor"; //$NON-NLS-1$
3582 case TokenNameendforeach :
3583 return "endforeach"; //$NON-NLS-1$
3584 case TokenNameendif :
3585 return "endif"; //$NON-NLS-1$
3586 case TokenNameendswitch :
3587 return "endswitch"; //$NON-NLS-1$
3588 case TokenNameendwhile :
3589 return "endwhile"; //$NON-NLS-1$
3592 case TokenNameextends :
3593 return "extends"; //$NON-NLS-1$
3594 // case TokenNamefalse :
3595 // return "false"; //$NON-NLS-1$
3596 case TokenNamefinal :
3597 return "final"; //$NON-NLS-1$
3599 return "for"; //$NON-NLS-1$
3600 case TokenNameforeach :
3601 return "foreach"; //$NON-NLS-1$
3602 case TokenNamefunction :
3603 return "function"; //$NON-NLS-1$
3604 case TokenNameglobal :
3605 return "global"; //$NON-NLS-1$
3607 return "if"; //$NON-NLS-1$
3608 case TokenNameimplements :
3609 return "implements"; //$NON-NLS-1$
3610 case TokenNameinclude :
3611 return "include"; //$NON-NLS-1$
3612 case TokenNameinclude_once :
3613 return "include_once"; //$NON-NLS-1$
3614 case TokenNameinstanceof :
3615 return "instanceof"; //$NON-NLS-1$
3616 case TokenNameinterface :
3617 return "interface"; //$NON-NLS-1$
3618 case TokenNameisset :
3619 return "isset"; //$NON-NLS-1$
3620 case TokenNamelist :
3621 return "list"; //$NON-NLS-1$
3623 return "new"; //$NON-NLS-1$
3624 // case TokenNamenull :
3625 // return "null"; //$NON-NLS-1$
3627 return "OR"; //$NON-NLS-1$
3628 case TokenNameprint :
3629 return "print"; //$NON-NLS-1$
3630 case TokenNameprivate :
3631 return "private"; //$NON-NLS-1$
3632 case TokenNameprotected :
3633 return "protected"; //$NON-NLS-1$
3634 case TokenNamepublic :
3635 return "public"; //$NON-NLS-1$
3636 case TokenNamerequire :
3637 return "require"; //$NON-NLS-1$
3638 case TokenNamerequire_once :
3639 return "require_once"; //$NON-NLS-1$
3640 case TokenNamereturn :
3641 return "return"; //$NON-NLS-1$
3642 case TokenNamestatic :
3643 return "static"; //$NON-NLS-1$
3644 case TokenNameswitch :
3645 return "switch"; //$NON-NLS-1$
3646 // case TokenNametrue :
3647 // return "true"; //$NON-NLS-1$
3648 case TokenNameunset :
3649 return "unset"; //$NON-NLS-1$
3651 return "var"; //$NON-NLS-1$
3652 case TokenNamewhile :
3653 return "while"; //$NON-NLS-1$
3655 return "XOR"; //$NON-NLS-1$
3656 // case TokenNamethis :
3657 // return "$this"; //$NON-NLS-1$
3658 case TokenNameIntegerLiteral :
3659 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3660 case TokenNameDoubleLiteral :
3661 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3662 case TokenNameStringLiteral :
3663 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3664 case TokenNameStringConstant :
3665 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3666 case TokenNameStringInterpolated :
3667 return "StringInterpolated(" + new String(getCurrentTokenSource())
3668 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3669 case TokenNameEncapsedString0 :
3670 return "`"; //$NON-NLS-1$
3671 case TokenNameEncapsedString1 :
3672 return "\'"; //$NON-NLS-1$
3673 case TokenNameEncapsedString2 :
3674 return "\""; //$NON-NLS-1$
3675 case TokenNameSTRING :
3676 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3677 case TokenNameHEREDOC :
3678 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3679 case TokenNamePLUS_PLUS :
3680 return "++"; //$NON-NLS-1$
3681 case TokenNameMINUS_MINUS :
3682 return "--"; //$NON-NLS-1$
3683 case TokenNameEQUAL_EQUAL :
3684 return "=="; //$NON-NLS-1$
3685 case TokenNameEQUAL_EQUAL_EQUAL :
3686 return "==="; //$NON-NLS-1$
3687 case TokenNameEQUAL_GREATER :
3688 return "=>"; //$NON-NLS-1$
3689 case TokenNameLESS_EQUAL :
3690 return "<="; //$NON-NLS-1$
3691 case TokenNameGREATER_EQUAL :
3692 return ">="; //$NON-NLS-1$
3693 case TokenNameNOT_EQUAL :
3694 return "!="; //$NON-NLS-1$
3695 case TokenNameNOT_EQUAL_EQUAL :
3696 return "!=="; //$NON-NLS-1$
3697 case TokenNameLEFT_SHIFT :
3698 return "<<"; //$NON-NLS-1$
3699 case TokenNameRIGHT_SHIFT :
3700 return ">>"; //$NON-NLS-1$
3701 case TokenNamePLUS_EQUAL :
3702 return "+="; //$NON-NLS-1$
3703 case TokenNameMINUS_EQUAL :
3704 return "-="; //$NON-NLS-1$
3705 case TokenNameMULTIPLY_EQUAL :
3706 return "*="; //$NON-NLS-1$
3707 case TokenNameDIVIDE_EQUAL :
3708 return "/="; //$NON-NLS-1$
3709 case TokenNameAND_EQUAL :
3710 return "&="; //$NON-NLS-1$
3711 case TokenNameOR_EQUAL :
3712 return "|="; //$NON-NLS-1$
3713 case TokenNameXOR_EQUAL :
3714 return "^="; //$NON-NLS-1$
3715 case TokenNameREMAINDER_EQUAL :
3716 return "%="; //$NON-NLS-1$
3717 case TokenNameDOT_EQUAL :
3718 return ".="; //$NON-NLS-1$
3719 case TokenNameLEFT_SHIFT_EQUAL :
3720 return "<<="; //$NON-NLS-1$
3721 case TokenNameRIGHT_SHIFT_EQUAL :
3722 return ">>="; //$NON-NLS-1$
3723 case TokenNameOR_OR :
3724 return "||"; //$NON-NLS-1$
3725 case TokenNameAND_AND :
3726 return "&&"; //$NON-NLS-1$
3727 case TokenNamePLUS :
3728 return "+"; //$NON-NLS-1$
3729 case TokenNameMINUS :
3730 return "-"; //$NON-NLS-1$
3731 case TokenNameMINUS_GREATER :
3734 return "!"; //$NON-NLS-1$
3735 case TokenNameREMAINDER :
3736 return "%"; //$NON-NLS-1$
3738 return "^"; //$NON-NLS-1$
3740 return "&"; //$NON-NLS-1$
3741 case TokenNameMULTIPLY :
3742 return "*"; //$NON-NLS-1$
3744 return "|"; //$NON-NLS-1$
3745 case TokenNameTWIDDLE :
3746 return "~"; //$NON-NLS-1$
3747 case TokenNameTWIDDLE_EQUAL :
3748 return "~="; //$NON-NLS-1$
3749 case TokenNameDIVIDE :
3750 return "/"; //$NON-NLS-1$
3751 case TokenNameGREATER :
3752 return ">"; //$NON-NLS-1$
3753 case TokenNameLESS :
3754 return "<"; //$NON-NLS-1$
3755 case TokenNameLPAREN :
3756 return "("; //$NON-NLS-1$
3757 case TokenNameRPAREN :
3758 return ")"; //$NON-NLS-1$
3759 case TokenNameLBRACE :
3760 return "{"; //$NON-NLS-1$
3761 case TokenNameRBRACE :
3762 return "}"; //$NON-NLS-1$
3763 case TokenNameLBRACKET :
3764 return "["; //$NON-NLS-1$
3765 case TokenNameRBRACKET :
3766 return "]"; //$NON-NLS-1$
3767 case TokenNameSEMICOLON :
3768 return ";"; //$NON-NLS-1$
3769 case TokenNameQUESTION :
3770 return "?"; //$NON-NLS-1$
3771 case TokenNameCOLON :
3772 return ":"; //$NON-NLS-1$
3773 case TokenNameCOMMA :
3774 return ","; //$NON-NLS-1$
3776 return "."; //$NON-NLS-1$
3777 case TokenNameEQUAL :
3778 return "="; //$NON-NLS-1$
3781 case TokenNameDOLLAR :
3783 case TokenNameDOLLAR_LBRACE :
3786 return "EOF"; //$NON-NLS-1$
3787 case TokenNameWHITESPACE :
3788 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3789 case TokenNameCOMMENT_LINE :
3790 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3791 case TokenNameCOMMENT_BLOCK :
3792 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3793 case TokenNameCOMMENT_PHPDOC :
3794 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3795 // case TokenNameHTML :
3796 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3798 case TokenNameFILE :
3799 return "__FILE__"; //$NON-NLS-1$
3800 case TokenNameLINE :
3801 return "__LINE__"; //$NON-NLS-1$
3802 case TokenNameCLASS_C :
3803 return "__CLASS__"; //$NON-NLS-1$
3804 case TokenNameMETHOD_C :
3805 return "__METHOD__"; //$NON-NLS-1$
3806 case TokenNameFUNC_C :
3807 return "__FUNCTION__"; //$NON-NLS-1
3808 case TokenNameboolCAST :
3809 return "( bool )"; //$NON-NLS-1$
3810 case TokenNameintCAST :
3811 return "( int )"; //$NON-NLS-1$
3812 case TokenNamedoubleCAST :
3813 return "( double )"; //$NON-NLS-1$
3814 case TokenNameobjectCAST :
3815 return "( object )"; //$NON-NLS-1$
3816 case TokenNamestringCAST :
3817 return "( string )"; //$NON-NLS-1$
3819 return "not-a-token(" + (new Integer(act)) + ") "
3820 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3827 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3828 this(tokenizeComments, tokenizeWhiteSpace, false);
3830 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3831 boolean checkNonExternalizedStringLiterals) {
3832 this(tokenizeComments, tokenizeWhiteSpace,
3833 checkNonExternalizedStringLiterals, false);
3835 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3836 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3837 this(tokenizeComments, tokenizeWhiteSpace,
3838 checkNonExternalizedStringLiterals, assertMode, false, null, null);
3840 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3841 boolean checkNonExternalizedStringLiterals, boolean assertMode,
3842 boolean tokenizeStrings,
3844 char[][] taskPriorities) {
3845 this.eofPosition = Integer.MAX_VALUE;
3846 this.tokenizeComments = tokenizeComments;
3847 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3848 this.tokenizeStrings = tokenizeStrings;
3849 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3850 this.assertMode = assertMode;
3851 this.encapsedStringStack = null;
3852 this.taskTags = taskTags;
3853 this.taskPriorities = taskPriorities;
3855 private void checkNonExternalizeString() throws InvalidInputException {
3856 if (currentLine == null)
3858 parseTags(currentLine);
3860 private void parseTags(NLSLine line) throws InvalidInputException {
3861 String s = new String(getCurrentTokenSource());
3862 int pos = s.indexOf(TAG_PREFIX);
3863 int lineLength = line.size();
3865 int start = pos + TAG_PREFIX_LENGTH;
3866 int end = s.indexOf(TAG_POSTFIX, start);
3867 String index = s.substring(start, end);
3870 i = Integer.parseInt(index) - 1;
3871 // Tags are one based not zero based.
3872 } catch (NumberFormatException e) {
3873 i = -1; // we don't want to consider this as a valid NLS tag
3875 if (line.exists(i)) {
3878 pos = s.indexOf(TAG_PREFIX, start);
3880 this.nonNLSStrings = new StringLiteral[lineLength];
3881 int nonNLSCounter = 0;
3882 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3883 StringLiteral literal = (StringLiteral) iterator.next();
3884 if (literal != null) {
3885 this.nonNLSStrings[nonNLSCounter++] = literal;
3888 if (nonNLSCounter == 0) {
3889 this.nonNLSStrings = null;
3893 this.wasNonExternalizedStringLiteral = true;
3894 if (nonNLSCounter != lineLength) {
3895 System.arraycopy(this.nonNLSStrings, 0,
3896 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3901 public final void scanEscapeCharacter() throws InvalidInputException {
3902 // the string with "\\u" is a legal string of two chars \ and u
3903 //thus we use a direct access to the source (for regular cases).
3904 if (unicodeAsBackSlash) {
3905 // consume next character
3906 unicodeAsBackSlash = false;
3907 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3908 // (source[currentPosition] == 'u')) {
3909 // getNextUnicodeChar();
3911 if (withoutUnicodePtr != 0) {
3912 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3916 currentCharacter = source[currentPosition++];
3917 switch (currentCharacter) {
3919 currentCharacter = '\b';
3922 currentCharacter = '\t';
3925 currentCharacter = '\n';
3928 currentCharacter = '\f';
3931 currentCharacter = '\r';
3934 currentCharacter = '\"';
3937 currentCharacter = '\'';
3940 currentCharacter = '\\';
3943 // -----------octal escape--------------
3945 // OctalDigit OctalDigit
3946 // ZeroToThree OctalDigit OctalDigit
3947 int number = Character.getNumericValue(currentCharacter);
3948 if (number >= 0 && number <= 7) {
3949 boolean zeroToThreeNot = number > 3;
3950 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3951 int digit = Character.getNumericValue(currentCharacter);
3952 if (digit >= 0 && digit <= 7) {
3953 number = (number * 8) + digit;
3955 .isDigit(currentCharacter = source[currentPosition++])) {
3956 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3957 // Digit --> ignore last character
3960 digit = Character.getNumericValue(currentCharacter);
3961 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3962 // OctalDigit OctalDigit
3963 number = (number * 8) + digit;
3964 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3965 // --> ignore last character
3969 } else { // has read \OctalDigit NonDigit--> ignore last
3973 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3977 } else { // has read \OctalDigit --> ignore last character
3981 throw new InvalidInputException(INVALID_ESCAPE);
3982 currentCharacter = (char) number;
3984 throw new InvalidInputException(INVALID_ESCAPE);
3987 // chech presence of task: tags
3988 public void checkTaskTag(int commentStart, int commentEnd) {
3989 // only look for newer task: tags
3990 if (this.foundTaskCount > 0
3991 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3994 int foundTaskIndex = this.foundTaskCount;
3995 nextChar : for (int i = commentStart; i < commentEnd
3996 && i < this.eofPosition; i++) {
3998 char[] priority = null;
3999 // check for tag occurrence
4000 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4001 tag = this.taskTags[itag];
4002 priority = this.taskPriorities != null
4003 && itag < this.taskPriorities.length
4004 ? this.taskPriorities[itag]
4006 int tagLength = tag.length;
4007 for (int t = 0; t < tagLength; t++) {
4008 if (this.source[i + t] != tag[t])
4011 if (this.foundTaskTags == null) {
4012 this.foundTaskTags = new char[5][];
4013 this.foundTaskMessages = new char[5][];
4014 this.foundTaskPriorities = new char[5][];
4015 this.foundTaskPositions = new int[5][];
4016 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4017 System.arraycopy(this.foundTaskTags, 0,
4018 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4019 this.foundTaskCount);
4020 System.arraycopy(this.foundTaskMessages, 0,
4021 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4022 this.foundTaskCount);
4023 System.arraycopy(this.foundTaskPriorities, 0,
4024 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4025 0, this.foundTaskCount);
4026 System.arraycopy(this.foundTaskPositions, 0,
4027 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4028 this.foundTaskCount);
4030 this.foundTaskTags[this.foundTaskCount] = tag;
4031 this.foundTaskPriorities[this.foundTaskCount] = priority;
4032 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4034 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4035 this.foundTaskCount++;
4036 i += tagLength - 1; // will be incremented when looping
4039 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4040 // retrieve message start and end positions
4041 int msgStart = this.foundTaskPositions[i][0]
4042 + this.foundTaskTags[i].length;
4043 int max_value = i + 1 < this.foundTaskCount
4044 ? this.foundTaskPositions[i + 1][0] - 1
4046 // at most beginning of next task
4047 if (max_value < msgStart)
4048 max_value = msgStart; // would only occur if tag is before EOF.
4051 for (int j = msgStart; j < max_value; j++) {
4052 if ((c = this.source[j]) == '\n' || c == '\r') {
4058 for (int j = max_value; j > msgStart; j--) {
4059 if ((c = this.source[j]) == '*') {
4067 if (msgStart == end)
4070 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4072 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4074 // update the end position of the task
4075 this.foundTaskPositions[i][1] = end;
4076 // get the message source
4077 final int messageLength = end - msgStart + 1;
4078 char[] message = new char[messageLength];
4079 System.arraycopy(source, msgStart, message, 0, messageLength);
4080 this.foundTaskMessages[i] = message;