1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this
24 * value is not memorized by the scanner) - getCurrentTokenSource() which
25 * provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 // private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 // flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public boolean phpExpressionTag = false;
44 // public Stack encapsedStringStack = null;
46 public char currentCharacter;
48 public int startPosition;
50 public int currentPosition;
52 public int initialPosition, eofPosition;
54 // after this position eof are generated instead of real token from the
56 public boolean tokenizeComments;
58 public boolean tokenizeWhiteSpace;
60 public boolean tokenizeStrings;
62 // source should be viewed as a window (aka a part)
63 // of a entire very large stream
67 public char[] withoutUnicodeBuffer;
69 public int withoutUnicodePtr;
71 // when == 0 ==> no unicode in the current token
72 public boolean unicodeAsBackSlash = false;
74 public boolean scanningFloatLiteral = false;
76 // support for /** comments
77 public int[] commentStops = new int[10];
79 public int[] commentStarts = new int[10];
81 public int commentPtr = -1; // no comment test with commentPtr value -1
83 protected int lastCommentLinePosition = -1;
85 // diet parsing support - jump over some method body when requested
86 public boolean diet = false;
88 // support for the poor-line-debuggers ....
89 // remember the position of the cr/lf
90 public int[] lineEnds = new int[250];
92 public int linePtr = -1;
94 public boolean wasAcr = false;
96 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
98 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
100 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
102 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
104 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
106 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
108 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
110 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
112 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
116 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
118 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
120 // ----------------optimized identifier managment------------------
121 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
122 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
123 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
124 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
125 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
126 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
127 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
128 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
129 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
131 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
132 '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
133 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
134 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
135 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
136 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
137 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
138 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
139 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
141 public final static int MAX_OBVIOUS = 256;
143 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
145 public final static int C_DOLLAR = 8;
147 public final static int C_LETTER = 4;
149 public final static int C_DIGIT = 3;
151 public final static int C_SEPARATOR = 2;
153 public final static int C_SPACE = 1;
155 for (int i = '0'; i <= '9'; i++)
156 ObviousIdentCharNatures[i] = C_DIGIT;
158 for (int i = 'a'; i <= 'z'; i++)
159 ObviousIdentCharNatures[i] = C_LETTER;
160 for (int i = 'A'; i <= 'Z'; i++)
161 ObviousIdentCharNatures[i] = C_LETTER;
162 ObviousIdentCharNatures['_'] = C_LETTER;
163 for (int i = 127; i <= 255; i++)
164 ObviousIdentCharNatures[i] = C_LETTER;
166 ObviousIdentCharNatures['$'] = C_DOLLAR;
168 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
169 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
170 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
171 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
172 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
174 ObviousIdentCharNatures['.'] = C_SEPARATOR;
175 ObviousIdentCharNatures[':'] = C_SEPARATOR;
176 ObviousIdentCharNatures[';'] = C_SEPARATOR;
177 ObviousIdentCharNatures[','] = C_SEPARATOR;
178 ObviousIdentCharNatures['['] = C_SEPARATOR;
179 ObviousIdentCharNatures[']'] = C_SEPARATOR;
180 ObviousIdentCharNatures['('] = C_SEPARATOR;
181 ObviousIdentCharNatures[')'] = C_SEPARATOR;
182 ObviousIdentCharNatures['{'] = C_SEPARATOR;
183 ObviousIdentCharNatures['}'] = C_SEPARATOR;
184 ObviousIdentCharNatures['+'] = C_SEPARATOR;
185 ObviousIdentCharNatures['-'] = C_SEPARATOR;
186 ObviousIdentCharNatures['*'] = C_SEPARATOR;
187 ObviousIdentCharNatures['/'] = C_SEPARATOR;
188 ObviousIdentCharNatures['='] = C_SEPARATOR;
189 ObviousIdentCharNatures['&'] = C_SEPARATOR;
190 ObviousIdentCharNatures['|'] = C_SEPARATOR;
191 ObviousIdentCharNatures['?'] = C_SEPARATOR;
192 ObviousIdentCharNatures['<'] = C_SEPARATOR;
193 ObviousIdentCharNatures['>'] = C_SEPARATOR;
194 ObviousIdentCharNatures['!'] = C_SEPARATOR;
195 ObviousIdentCharNatures['%'] = C_SEPARATOR;
196 ObviousIdentCharNatures['^'] = C_SEPARATOR;
197 ObviousIdentCharNatures['~'] = C_SEPARATOR;
198 ObviousIdentCharNatures['"'] = C_SEPARATOR;
199 ObviousIdentCharNatures['\''] = C_SEPARATOR;
202 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
204 static final int TableSize = 30, InternalTableSize = 6;
206 // 30*6 = 180 entries
207 public static final int OptimizedLength = 6;
210 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
212 // support for detecting non-externalized string literals
213 int currentLineNr = -1;
215 int previousLineNr = -1;
217 NLSLine currentLine = null;
219 List lines = new ArrayList();
221 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
223 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
225 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
227 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
229 public StringLiteral[] nonNLSStrings = null;
231 public boolean checkNonExternalizedStringLiterals = true;
233 public boolean wasNonExternalizedStringLiteral = false;
236 for (int i = 0; i < 6; i++) {
237 for (int j = 0; j < TableSize; j++) {
238 for (int k = 0; k < InternalTableSize; k++) {
239 charArray_length[i][j][k] = initCharArray;
245 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
247 public static final int RoundBracket = 0;
249 public static final int SquareBracket = 1;
251 public static final int CurlyBracket = 2;
253 public static final int BracketKinds = 3;
256 public char[][] foundTaskTags = null;
258 public char[][] foundTaskMessages;
260 public char[][] foundTaskPriorities = null;
262 public int[][] foundTaskPositions;
264 public int foundTaskCount = 0;
266 public char[][] taskTags = null;
268 public char[][] taskPriorities = null;
270 public boolean isTaskCaseSensitive = true;
272 public static final boolean DEBUG = false;
274 public static final boolean TRACE = false;
276 public ICompilationUnit compilationUnit = null;
279 * Determines if the specified character is permissible as the first character
280 * in a PHP identifier or variable
282 * The '$' character for PHP variables is regarded as a correct first
286 public static boolean isPHPIdentOrVarStart(char ch) {
287 if (ch < MAX_OBVIOUS) {
288 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
291 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
292 // ch && ch <= 0xFF);
296 * Determines if the specified character is permissible as the first character
297 * in a PHP identifier.
299 * The '$' character for PHP variables isn't regarded as the first character !
301 public static boolean isPHPIdentifierStart(char ch) {
302 if (ch < MAX_OBVIOUS) {
303 return ObviousIdentCharNatures[ch] == C_LETTER;
306 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
311 * Determines if the specified character may be part of a PHP identifier as
312 * other than the first character
314 public static boolean isPHPIdentifierPart(char ch) {
315 if (ch < MAX_OBVIOUS) {
316 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
319 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
323 public static boolean isSQLIdentifierPart(char ch) {
324 if (ch < MAX_OBVIOUS) {
325 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
330 public final boolean atEnd() {
331 // This code is not relevant if source is
332 // Only a part of the real stream input
333 return source.length == currentPosition;
336 public char[] getCurrentIdentifierSource() {
337 // return the token REAL source (aka unicodes are precomputed)
339 // if (withoutUnicodePtr != 0)
340 // //0 is used as a fast test flag so the real first char is in position 1
342 // withoutUnicodeBuffer,
344 // result = new char[withoutUnicodePtr],
346 // withoutUnicodePtr);
348 int length = currentPosition - startPosition;
349 switch (length) { // see OptimizedLength
351 return optimizedCurrentTokenSource1();
353 return optimizedCurrentTokenSource2();
355 return optimizedCurrentTokenSource3();
357 return optimizedCurrentTokenSource4();
359 return optimizedCurrentTokenSource5();
361 return optimizedCurrentTokenSource6();
364 System.arraycopy(source, startPosition, result = new char[length], 0, length);
369 public int getCurrentTokenEndPosition() {
370 return this.currentPosition - 1;
373 public final char[] getCurrentTokenSource() {
374 // Return the token REAL source (aka unicodes are precomputed)
376 // if (withoutUnicodePtr != 0)
377 // // 0 is used as a fast test flag so the real first char is in position 1
379 // withoutUnicodeBuffer,
381 // result = new char[withoutUnicodePtr],
383 // withoutUnicodePtr);
386 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
391 public final char[] getCurrentTokenSource(int startPos) {
392 // Return the token REAL source (aka unicodes are precomputed)
394 // if (withoutUnicodePtr != 0)
395 // // 0 is used as a fast test flag so the real first char is in position 1
397 // withoutUnicodeBuffer,
399 // result = new char[withoutUnicodePtr],
401 // withoutUnicodePtr);
404 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
409 public final char[] getCurrentTokenSourceString() {
410 // return the token REAL source (aka unicodes are precomputed).
411 // REMOVE the two " that are at the beginning and the end.
413 if (withoutUnicodePtr != 0)
414 // 0 is used as a fast test flag so the real first char is in position 1
415 System.arraycopy(withoutUnicodeBuffer, 2,
416 // 2 is 1 (real start) + 1 (to jump over the ")
417 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
420 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
425 public final boolean equalsCurrentTokenSource(char[] word) {
426 if (word.length != currentPosition - startPosition) {
429 for (int i = 0; i < word.length; i++) {
430 if (word[i] != source[startPosition + i]) {
437 public final char[] getRawTokenSourceEnd() {
438 int length = this.eofPosition - this.currentPosition - 1;
439 char[] sourceEnd = new char[length];
440 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
444 public int getCurrentTokenStartPosition() {
445 return this.startPosition;
448 public final String getCurrentStringLiteral() {
449 char[] result = getCurrentStringLiteralSource();
450 return new String(result);
453 public final char[] getCurrentStringLiteralSource() {
454 // Return the token REAL source (aka unicodes are precomputed)
455 if (startPosition + 1 >= currentPosition) {
460 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
465 public final char[] getCurrentStringLiteralSource(int startPos) {
466 // Return the token REAL source (aka unicodes are precomputed)
469 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
475 * Search the source position corresponding to the end of a given line number
477 * Line numbers are 1-based, and relative to the scanner initialPosition.
478 * Character positions are 0-based.
480 * In case the given line number is inconsistent, answers -1.
482 public final int getLineEnd(int lineNumber) {
483 if (lineEnds == null)
485 if (lineNumber >= lineEnds.length)
489 if (lineNumber == lineEnds.length - 1)
491 return lineEnds[lineNumber - 1];
492 // next line start one character behind the lineEnd of the previous line
496 * Search the source position corresponding to the beginning of a given line
499 * Line numbers are 1-based, and relative to the scanner initialPosition.
500 * Character positions are 0-based.
502 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
504 * In case the given line number is inconsistent, answers -1.
506 public final int getLineStart(int lineNumber) {
507 if (lineEnds == null)
509 if (lineNumber >= lineEnds.length)
514 return initialPosition;
515 return lineEnds[lineNumber - 2] + 1;
516 // next line start one character behind the lineEnd of the previous line
519 public final boolean getNextChar(char testedChar) {
521 // handle the case of unicode.
522 // when a unicode appears then we must use a buffer that holds char
524 // At the end of this method currentCharacter holds the new visited char
525 // and currentPosition points right next after it
526 // Both previous lines are true if the currentCharacter is == to the
528 // On false, no side effect has occured.
529 // ALL getNextChar.... ARE OPTIMIZED COPIES
530 int temp = currentPosition;
532 currentCharacter = source[currentPosition++];
533 // if (((currentCharacter = source[currentPosition++]) == '\\')
534 // && (source[currentPosition] == 'u')) {
535 // //-------------unicode traitement ------------
536 // int c1, c2, c3, c4;
537 // int unicodeSize = 6;
538 // currentPosition++;
539 // while (source[currentPosition] == 'u') {
540 // currentPosition++;
544 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
546 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
548 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
550 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
552 // currentPosition = temp;
556 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
557 // if (currentCharacter != testedChar) {
558 // currentPosition = temp;
561 // unicodeAsBackSlash = currentCharacter == '\\';
563 // //need the unicode buffer
564 // if (withoutUnicodePtr == 0) {
565 // //buffer all the entries that have been left aside....
566 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
570 // withoutUnicodeBuffer,
572 // withoutUnicodePtr);
574 // //fill the buffer with the char
575 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
578 // } //-------------end unicode traitement--------------
580 if (currentCharacter != testedChar) {
581 currentPosition = temp;
584 unicodeAsBackSlash = false;
585 // if (withoutUnicodePtr != 0)
586 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
589 } catch (IndexOutOfBoundsException e) {
590 unicodeAsBackSlash = false;
591 currentPosition = temp;
596 public final int getNextChar(char testedChar1, char testedChar2) {
597 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
598 // test can be done with (x==0) for the first and (x>0) for the second
599 // handle the case of unicode.
600 // when a unicode appears then we must use a buffer that holds char
602 // At the end of this method currentCharacter holds the new visited char
603 // and currentPosition points right next after it
604 // Both previous lines are true if the currentCharacter is == to the
606 // On false, no side effect has occured.
607 // ALL getNextChar.... ARE OPTIMIZED COPIES
608 int temp = currentPosition;
611 currentCharacter = source[currentPosition++];
612 // if (((currentCharacter = source[currentPosition++]) == '\\')
613 // && (source[currentPosition] == 'u')) {
614 // //-------------unicode traitement ------------
615 // int c1, c2, c3, c4;
616 // int unicodeSize = 6;
617 // currentPosition++;
618 // while (source[currentPosition] == 'u') {
619 // currentPosition++;
623 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
625 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
627 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
629 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
631 // currentPosition = temp;
635 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
636 // if (currentCharacter == testedChar1)
638 // else if (currentCharacter == testedChar2)
641 // currentPosition = temp;
645 // //need the unicode buffer
646 // if (withoutUnicodePtr == 0) {
647 // //buffer all the entries that have been left aside....
648 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
652 // withoutUnicodeBuffer,
654 // withoutUnicodePtr);
656 // //fill the buffer with the char
657 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
659 // } //-------------end unicode traitement--------------
661 if (currentCharacter == testedChar1)
663 else if (currentCharacter == testedChar2)
666 currentPosition = temp;
669 // if (withoutUnicodePtr != 0)
670 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
673 } catch (IndexOutOfBoundsException e) {
674 currentPosition = temp;
679 public final boolean getNextCharAsDigit() {
681 // handle the case of unicode.
682 // when a unicode appears then we must use a buffer that holds char
684 // At the end of this method currentCharacter holds the new visited char
685 // and currentPosition points right next after it
686 // Both previous lines are true if the currentCharacter is a digit
687 // On false, no side effect has occured.
688 // ALL getNextChar.... ARE OPTIMIZED COPIES
689 int temp = currentPosition;
691 currentCharacter = source[currentPosition++];
692 // if (((currentCharacter = source[currentPosition++]) == '\\')
693 // && (source[currentPosition] == 'u')) {
694 // //-------------unicode traitement ------------
695 // int c1, c2, c3, c4;
696 // int unicodeSize = 6;
697 // currentPosition++;
698 // while (source[currentPosition] == 'u') {
699 // currentPosition++;
703 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
705 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
707 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
709 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
711 // currentPosition = temp;
715 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
716 // if (!Character.isDigit(currentCharacter)) {
717 // currentPosition = temp;
721 // //need the unicode buffer
722 // if (withoutUnicodePtr == 0) {
723 // //buffer all the entries that have been left aside....
724 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
728 // withoutUnicodeBuffer,
730 // withoutUnicodePtr);
732 // //fill the buffer with the char
733 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
735 // } //-------------end unicode traitement--------------
737 if (!Character.isDigit(currentCharacter)) {
738 currentPosition = temp;
741 // if (withoutUnicodePtr != 0)
742 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
745 } catch (IndexOutOfBoundsException e) {
746 currentPosition = temp;
751 public final boolean getNextCharAsDigit(int radix) {
753 // handle the case of unicode.
754 // when a unicode appears then we must use a buffer that holds char
756 // At the end of this method currentCharacter holds the new visited char
757 // and currentPosition points right next after it
758 // Both previous lines are true if the currentCharacter is a digit base on
760 // On false, no side effect has occured.
761 // ALL getNextChar.... ARE OPTIMIZED COPIES
762 int temp = currentPosition;
764 currentCharacter = source[currentPosition++];
765 // if (((currentCharacter = source[currentPosition++]) == '\\')
766 // && (source[currentPosition] == 'u')) {
767 // //-------------unicode traitement ------------
768 // int c1, c2, c3, c4;
769 // int unicodeSize = 6;
770 // currentPosition++;
771 // while (source[currentPosition] == 'u') {
772 // currentPosition++;
776 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
778 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
780 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
782 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
784 // currentPosition = temp;
788 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
789 // if (Character.digit(currentCharacter, radix) == -1) {
790 // currentPosition = temp;
794 // //need the unicode buffer
795 // if (withoutUnicodePtr == 0) {
796 // //buffer all the entries that have been left aside....
797 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
801 // withoutUnicodeBuffer,
803 // withoutUnicodePtr);
805 // //fill the buffer with the char
806 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
808 // } //-------------end unicode traitement--------------
810 if (Character.digit(currentCharacter, radix) == -1) {
811 currentPosition = temp;
814 // if (withoutUnicodePtr != 0)
815 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
818 } catch (IndexOutOfBoundsException e) {
819 currentPosition = temp;
824 public boolean getNextCharAsJavaIdentifierPart() {
826 // handle the case of unicode.
827 // when a unicode appears then we must use a buffer that holds char
829 // At the end of this method currentCharacter holds the new visited char
830 // and currentPosition points right next after it
831 // Both previous lines are true if the currentCharacter is a
832 // JavaIdentifierPart
833 // On false, no side effect has occured.
834 // ALL getNextChar.... ARE OPTIMIZED COPIES
835 int temp = currentPosition;
837 currentCharacter = source[currentPosition++];
838 // if (((currentCharacter = source[currentPosition++]) == '\\')
839 // && (source[currentPosition] == 'u')) {
840 // //-------------unicode traitement ------------
841 // int c1, c2, c3, c4;
842 // int unicodeSize = 6;
843 // currentPosition++;
844 // while (source[currentPosition] == 'u') {
845 // currentPosition++;
849 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
851 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
853 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
855 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
857 // currentPosition = temp;
861 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
862 // if (!isPHPIdentifierPart(currentCharacter)) {
863 // currentPosition = temp;
867 // //need the unicode buffer
868 // if (withoutUnicodePtr == 0) {
869 // //buffer all the entries that have been left aside....
870 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
874 // withoutUnicodeBuffer,
876 // withoutUnicodePtr);
878 // //fill the buffer with the char
879 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
881 // } //-------------end unicode traitement--------------
883 if (!isPHPIdentifierPart(currentCharacter)) {
884 currentPosition = temp;
887 // if (withoutUnicodePtr != 0)
888 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
891 } catch (IndexOutOfBoundsException e) {
892 currentPosition = temp;
897 public int getCastOrParen() {
898 int tempPosition = currentPosition;
899 char tempCharacter = currentCharacter;
900 int tempToken = TokenNameLPAREN;
901 boolean found = false;
902 StringBuffer buf = new StringBuffer();
905 currentCharacter = source[currentPosition++];
906 } while (currentCharacter == ' ' || currentCharacter == '\t');
907 while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
908 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
909 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
910 buf.append(currentCharacter);
911 currentCharacter = source[currentPosition++];
913 if (buf.length() >= 3 && buf.length() <= 7) {
914 char[] data = buf.toString().toCharArray();
916 switch (data.length) {
919 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
921 tempToken = TokenNameintCAST;
926 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
928 tempToken = TokenNameboolCAST;
931 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
933 tempToken = TokenNamedoubleCAST;
939 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
940 && (data[++index] == 'y')) {
942 tempToken = TokenNamearrayCAST;
945 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
946 && (data[++index] == 't')) {
948 tempToken = TokenNameunsetCAST;
951 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
952 && (data[++index] == 't')) {
954 tempToken = TokenNamedoubleCAST;
960 // object string double
961 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
962 && (data[++index] == 'c') && (data[++index] == 't')) {
964 tempToken = TokenNameobjectCAST;
967 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
968 && (data[++index] == 'n') && (data[++index] == 'g')) {
970 tempToken = TokenNamestringCAST;
973 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
974 && (data[++index] == 'l') && (data[++index] == 'e')) {
976 tempToken = TokenNamedoubleCAST;
983 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
984 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
986 tempToken = TokenNameboolCAST;
989 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
990 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
992 tempToken = TokenNameintCAST;
998 while (currentCharacter == ' ' || currentCharacter == '\t') {
999 currentCharacter = source[currentPosition++];
1001 if (currentCharacter == ')') {
1006 } catch (IndexOutOfBoundsException e) {
1008 currentCharacter = tempCharacter;
1009 currentPosition = tempPosition;
1010 return TokenNameLPAREN;
1013 public void consumeStringInterpolated() throws InvalidInputException {
1015 // consume next character
1016 unicodeAsBackSlash = false;
1017 currentCharacter = source[currentPosition++];
1018 // if (((currentCharacter = source[currentPosition++]) == '\\')
1019 // && (source[currentPosition] == 'u')) {
1020 // getNextUnicodeChar();
1022 // if (withoutUnicodePtr != 0) {
1023 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1024 // currentCharacter;
1027 while (currentCharacter != '`') {
1028 /** ** in PHP \r and \n are valid in string literals *** */
1029 // if ((currentCharacter == '\n')
1030 // || (currentCharacter == '\r')) {
1031 // // relocate if finding another quote fairly close: thus unicode
1032 // '/u000D' will be fully consumed
1033 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1034 // if (currentPosition + lookAhead == source.length)
1036 // if (source[currentPosition + lookAhead] == '\n')
1038 // if (source[currentPosition + lookAhead] == '\"') {
1039 // currentPosition += lookAhead + 1;
1043 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1045 if (currentCharacter == '\\') {
1046 int escapeSize = currentPosition;
1047 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1048 // scanEscapeCharacter make a side effect on this value and we need
1049 // the previous value few lines down this one
1050 scanDoubleQuotedEscapeCharacter();
1051 escapeSize = currentPosition - escapeSize;
1052 if (withoutUnicodePtr == 0) {
1053 // buffer all the entries that have been left aside....
1054 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1055 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1056 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1057 } else { // overwrite the / in the buffer
1058 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1059 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1060 // where only one is correct
1061 withoutUnicodePtr--;
1064 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1065 if (recordLineSeparator) {
1066 pushLineSeparator();
1069 // consume next character
1070 unicodeAsBackSlash = false;
1071 currentCharacter = source[currentPosition++];
1072 // if (((currentCharacter = source[currentPosition++]) == '\\')
1073 // && (source[currentPosition] == 'u')) {
1074 // getNextUnicodeChar();
1076 if (withoutUnicodePtr != 0) {
1077 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1081 } catch (IndexOutOfBoundsException e) {
1082 // reset end position for error reporting
1083 currentPosition -= 2;
1084 throw new InvalidInputException(UNTERMINATED_STRING);
1085 } catch (InvalidInputException e) {
1086 if (e.getMessage().equals(INVALID_ESCAPE)) {
1087 // relocate if finding another quote fairly close: thus unicode
1088 // '/u000D' will be fully consumed
1089 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1090 if (currentPosition + lookAhead == source.length)
1092 if (source[currentPosition + lookAhead] == '\n')
1094 if (source[currentPosition + lookAhead] == '`') {
1095 currentPosition += lookAhead + 1;
1102 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1103 // //$NON-NLS-?$ where ? is an
1105 if (currentLine == null) {
1106 currentLine = new NLSLine();
1107 lines.add(currentLine);
1109 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1113 public void consumeStringConstant() throws InvalidInputException {
1115 // consume next character
1116 unicodeAsBackSlash = false;
1117 currentCharacter = source[currentPosition++];
1118 // if (((currentCharacter = source[currentPosition++]) == '\\')
1119 // && (source[currentPosition] == 'u')) {
1120 // getNextUnicodeChar();
1122 // if (withoutUnicodePtr != 0) {
1123 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1124 // currentCharacter;
1127 while (currentCharacter != '\'') {
1128 /** ** in PHP \r and \n are valid in string literals *** */
1129 // if ((currentCharacter == '\n')
1130 // || (currentCharacter == '\r')) {
1131 // // relocate if finding another quote fairly close: thus unicode
1132 // '/u000D' will be fully consumed
1133 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1134 // if (currentPosition + lookAhead == source.length)
1136 // if (source[currentPosition + lookAhead] == '\n')
1138 // if (source[currentPosition + lookAhead] == '\"') {
1139 // currentPosition += lookAhead + 1;
1143 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1145 if (currentCharacter == '\\') {
1146 int escapeSize = currentPosition;
1147 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1148 // scanEscapeCharacter make a side effect on this value and we need
1149 // the previous value few lines down this one
1150 scanSingleQuotedEscapeCharacter();
1151 escapeSize = currentPosition - escapeSize;
1152 if (withoutUnicodePtr == 0) {
1153 // buffer all the entries that have been left aside....
1154 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1155 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1156 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1157 } else { // overwrite the / in the buffer
1158 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1159 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1160 // where only one is correct
1161 withoutUnicodePtr--;
1164 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1165 if (recordLineSeparator) {
1166 pushLineSeparator();
1169 // consume next character
1170 unicodeAsBackSlash = false;
1171 currentCharacter = source[currentPosition++];
1172 // if (((currentCharacter = source[currentPosition++]) == '\\')
1173 // && (source[currentPosition] == 'u')) {
1174 // getNextUnicodeChar();
1176 if (withoutUnicodePtr != 0) {
1177 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1181 } catch (IndexOutOfBoundsException e) {
1182 // reset end position for error reporting
1183 currentPosition -= 2;
1184 throw new InvalidInputException(UNTERMINATED_STRING);
1185 } catch (InvalidInputException e) {
1186 if (e.getMessage().equals(INVALID_ESCAPE)) {
1187 // relocate if finding another quote fairly close: thus unicode
1188 // '/u000D' will be fully consumed
1189 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1190 if (currentPosition + lookAhead == source.length)
1192 if (source[currentPosition + lookAhead] == '\n')
1194 if (source[currentPosition + lookAhead] == '\'') {
1195 currentPosition += lookAhead + 1;
1202 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1203 // //$NON-NLS-?$ where ? is an
1205 if (currentLine == null) {
1206 currentLine = new NLSLine();
1207 lines.add(currentLine);
1209 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1213 public void consumeStringLiteral() throws InvalidInputException {
1215 boolean openDollarBrace = false;
1216 // consume next character
1217 unicodeAsBackSlash = false;
1218 currentCharacter = source[currentPosition++];
1219 while (currentCharacter != '"' || openDollarBrace) {
1220 /** ** in PHP \r and \n are valid in string literals *** */
1221 if (currentCharacter == '\\') {
1222 int escapeSize = currentPosition;
1223 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1224 // scanEscapeCharacter make a side effect on this value and we need
1225 // the previous value few lines down this one
1226 scanDoubleQuotedEscapeCharacter();
1227 escapeSize = currentPosition - escapeSize;
1228 if (withoutUnicodePtr == 0) {
1229 // buffer all the entries that have been left aside....
1230 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1231 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1232 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1233 } else { // overwrite the / in the buffer
1234 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1235 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1236 // where only one is correct
1237 withoutUnicodePtr--;
1240 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1241 openDollarBrace = true;
1242 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1243 openDollarBrace = true;
1244 } else if (currentCharacter == '}') {
1245 openDollarBrace = false;
1246 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1247 if (recordLineSeparator) {
1248 pushLineSeparator();
1251 // consume next character
1252 unicodeAsBackSlash = false;
1253 currentCharacter = source[currentPosition++];
1254 if (withoutUnicodePtr != 0) {
1255 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1258 } catch (IndexOutOfBoundsException e) {
1259 // reset end position for error reporting
1260 currentPosition -= 2;
1261 throw new InvalidInputException(UNTERMINATED_STRING);
1262 } catch (InvalidInputException e) {
1263 if (e.getMessage().equals(INVALID_ESCAPE)) {
1264 // relocate if finding another quote fairly close: thus unicode
1265 // '/u000D' will be fully consumed
1266 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1267 if (currentPosition + lookAhead == source.length)
1269 if (source[currentPosition + lookAhead] == '\n')
1271 if (source[currentPosition + lookAhead] == '\"') {
1272 currentPosition += lookAhead + 1;
1279 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1280 // //$NON-NLS-?$ where ? is an
1282 if (currentLine == null) {
1283 currentLine = new NLSLine();
1284 lines.add(currentLine);
1286 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1290 public int getNextToken() throws InvalidInputException {
1291 phpExpressionTag = false;
1293 return getInlinedHTMLToken(currentPosition);
1296 this.wasAcr = false;
1298 jumpOverMethodBody();
1300 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1304 withoutUnicodePtr = 0;
1305 // start with a new token
1306 char encapsedChar = ' ';
1307 // if (!encapsedStringStack.isEmpty()) {
1308 // encapsedChar = ((Character)
1309 // encapsedStringStack.peek()).charValue();
1311 // if (encapsedChar != '$' && encapsedChar != ' ') {
1312 // currentCharacter = source[currentPosition++];
1313 // if (currentCharacter == encapsedChar) {
1314 // switch (currentCharacter) {
1316 // return TokenNameEncapsedString0;
1318 // return TokenNameEncapsedString1;
1320 // return TokenNameEncapsedString2;
1323 // while (currentCharacter != encapsedChar) {
1324 // /** ** in PHP \r and \n are valid in string literals *** */
1325 // switch (currentCharacter) {
1327 // int escapeSize = currentPosition;
1328 // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1329 // //scanEscapeCharacter make a side effect on this value and
1330 // // we need the previous value few lines down this one
1331 // scanDoubleQuotedEscapeCharacter();
1332 // escapeSize = currentPosition - escapeSize;
1333 // if (withoutUnicodePtr == 0) {
1334 // //buffer all the entries that have been left aside....
1335 // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1337 // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1338 // withoutUnicodePtr);
1339 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1340 // } else { //overwrite the / in the buffer
1341 // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1342 // if (backSlashAsUnicodeInString) { //there are TWO \ in
1343 // withoutUnicodePtr--;
1349 // if (recordLineSeparator) {
1350 // pushLineSeparator();
1354 // if (isPHPIdentifierStart(source[currentPosition]) ||
1355 // source[currentPosition] == '{') {
1356 // currentPosition--;
1357 // encapsedStringStack.push(new Character('$'));
1358 // return TokenNameSTRING;
1362 // if (source[currentPosition] == '$') { // CURLY_OPEN
1363 // currentPosition--;
1364 // encapsedStringStack.push(new Character('$'));
1365 // return TokenNameSTRING;
1368 // // consume next character
1369 // unicodeAsBackSlash = false;
1370 // currentCharacter = source[currentPosition++];
1371 // if (withoutUnicodePtr != 0) {
1372 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1376 // currentPosition--;
1377 // return TokenNameSTRING;
1379 // ---------Consume white space and handles startPosition---------
1380 int whiteStart = currentPosition;
1381 startPosition = currentPosition;
1382 currentCharacter = source[currentPosition++];
1383 // if (encapsedChar == '$') {
1384 // switch (currentCharacter) {
1386 // currentCharacter = source[currentPosition++];
1387 // return TokenNameSTRING;
1389 // if (encapsedChar == '$') {
1390 // if (getNextChar('$'))
1391 // return TokenNameLBRACE_DOLLAR;
1393 // return TokenNameLBRACE;
1395 // return TokenNameRBRACE;
1397 // return TokenNameLBRACKET;
1399 // return TokenNameRBRACKET;
1401 // if (tokenizeStrings) {
1402 // consumeStringConstant();
1403 // return TokenNameStringSingleQuote;
1405 // return TokenNameEncapsedString1;
1407 // return TokenNameEncapsedString2;
1409 // if (tokenizeStrings) {
1410 // consumeStringInterpolated();
1411 // return TokenNameStringInterpolated;
1413 // return TokenNameEncapsedString0;
1415 // if (getNextChar('>'))
1416 // return TokenNameMINUS_GREATER;
1417 // return TokenNameSTRING;
1419 // if (currentCharacter == '$') {
1420 // int oldPosition = currentPosition;
1422 // currentCharacter = source[currentPosition++];
1423 // if (currentCharacter == '{') {
1424 // return TokenNameDOLLAR_LBRACE;
1426 // if (isPHPIdentifierStart(currentCharacter)) {
1427 // return scanIdentifierOrKeyword(true);
1429 // currentPosition = oldPosition;
1430 // return TokenNameSTRING;
1432 // } catch (IndexOutOfBoundsException e) {
1433 // currentPosition = oldPosition;
1434 // return TokenNameSTRING;
1437 // if (isPHPIdentifierStart(currentCharacter))
1438 // return scanIdentifierOrKeyword(false);
1439 // if (Character.isDigit(currentCharacter))
1440 // return scanNumber(false);
1441 // return TokenNameERROR;
1444 // boolean isWhiteSpace;
1446 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1447 startPosition = currentPosition;
1448 currentCharacter = source[currentPosition++];
1449 // if (((currentCharacter = source[currentPosition++]) == '\\')
1450 // && (source[currentPosition] == 'u')) {
1451 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1453 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1454 checkNonExternalizeString();
1455 if (recordLineSeparator) {
1456 pushLineSeparator();
1461 // isWhiteSpace = (currentCharacter == ' ')
1462 // || Character.isWhitespace(currentCharacter);
1465 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1466 // reposition scanner in case we are interested by spaces as tokens
1468 startPosition = whiteStart;
1469 return TokenNameWHITESPACE;
1471 // little trick to get out in the middle of a source compuation
1472 if (currentPosition > eofPosition)
1473 return TokenNameEOF;
1474 // ---------Identify the next token-------------
1475 switch (currentCharacter) {
1477 return getCastOrParen();
1479 return TokenNameRPAREN;
1481 return TokenNameLBRACE;
1483 return TokenNameRBRACE;
1485 return TokenNameLBRACKET;
1487 return TokenNameRBRACKET;
1489 return TokenNameSEMICOLON;
1491 return TokenNameCOMMA;
1493 if (getNextChar('='))
1494 return TokenNameDOT_EQUAL;
1495 if (getNextCharAsDigit())
1496 return scanNumber(true);
1497 return TokenNameDOT;
1500 if ((test = getNextChar('+', '=')) == 0)
1501 return TokenNamePLUS_PLUS;
1503 return TokenNamePLUS_EQUAL;
1504 return TokenNamePLUS;
1508 if ((test = getNextChar('-', '=')) == 0)
1509 return TokenNameMINUS_MINUS;
1511 return TokenNameMINUS_EQUAL;
1512 if (getNextChar('>'))
1513 return TokenNameMINUS_GREATER;
1514 return TokenNameMINUS;
1517 if (getNextChar('='))
1518 return TokenNameTWIDDLE_EQUAL;
1519 return TokenNameTWIDDLE;
1521 if (getNextChar('=')) {
1522 if (getNextChar('=')) {
1523 return TokenNameNOT_EQUAL_EQUAL;
1525 return TokenNameNOT_EQUAL;
1527 return TokenNameNOT;
1529 if (getNextChar('='))
1530 return TokenNameMULTIPLY_EQUAL;
1531 return TokenNameMULTIPLY;
1533 if (getNextChar('='))
1534 return TokenNameREMAINDER_EQUAL;
1535 return TokenNameREMAINDER;
1537 int oldPosition = currentPosition;
1539 currentCharacter = source[currentPosition++];
1540 } catch (IndexOutOfBoundsException e) {
1541 currentPosition = oldPosition;
1542 return TokenNameLESS;
1544 switch (currentCharacter) {
1546 return TokenNameLESS_EQUAL;
1548 return TokenNameNOT_EQUAL;
1550 if (getNextChar('='))
1551 return TokenNameLEFT_SHIFT_EQUAL;
1552 if (getNextChar('<')) {
1553 currentCharacter = source[currentPosition++];
1554 while (Character.isWhitespace(currentCharacter)) {
1555 currentCharacter = source[currentPosition++];
1557 int heredocStart = currentPosition - 1;
1558 int heredocLength = 0;
1559 if (isPHPIdentifierStart(currentCharacter)) {
1560 currentCharacter = source[currentPosition++];
1562 return TokenNameERROR;
1564 while (isPHPIdentifierPart(currentCharacter)) {
1565 currentCharacter = source[currentPosition++];
1567 heredocLength = currentPosition - heredocStart - 1;
1568 // heredoc end-tag determination
1569 boolean endTag = true;
1572 ch = source[currentPosition++];
1573 if (ch == '\r' || ch == '\n') {
1574 if (recordLineSeparator) {
1575 pushLineSeparator();
1579 for (int i = 0; i < heredocLength; i++) {
1580 if (source[currentPosition + i] != source[heredocStart + i]) {
1586 currentPosition += heredocLength - 1;
1587 currentCharacter = source[currentPosition++];
1588 break; // do...while loop
1594 return TokenNameHEREDOC;
1596 return TokenNameLEFT_SHIFT;
1598 currentPosition = oldPosition;
1599 return TokenNameLESS;
1603 if ((test = getNextChar('=', '>')) == 0)
1604 return TokenNameGREATER_EQUAL;
1606 if ((test = getNextChar('=', '>')) == 0)
1607 return TokenNameRIGHT_SHIFT_EQUAL;
1608 return TokenNameRIGHT_SHIFT;
1610 return TokenNameGREATER;
1613 if (getNextChar('=')) {
1614 if (getNextChar('=')) {
1615 return TokenNameEQUAL_EQUAL_EQUAL;
1617 return TokenNameEQUAL_EQUAL;
1619 if (getNextChar('>'))
1620 return TokenNameEQUAL_GREATER;
1621 return TokenNameEQUAL;
1624 if ((test = getNextChar('&', '=')) == 0)
1625 return TokenNameAND_AND;
1627 return TokenNameAND_EQUAL;
1628 return TokenNameAND;
1632 if ((test = getNextChar('|', '=')) == 0)
1633 return TokenNameOR_OR;
1635 return TokenNameOR_EQUAL;
1639 if (getNextChar('='))
1640 return TokenNameXOR_EQUAL;
1641 return TokenNameXOR;
1643 if (getNextChar('>')) {
1645 if (currentPosition == source.length) {
1647 return TokenNameINLINE_HTML;
1649 return getInlinedHTMLToken(currentPosition - 2);
1651 return TokenNameQUESTION;
1653 if (getNextChar(':'))
1654 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1655 return TokenNameCOLON;
1659 consumeStringConstant();
1660 return TokenNameStringSingleQuote;
1662 // if (tokenizeStrings) {
1663 consumeStringLiteral();
1664 return TokenNameStringDoubleQuote;
1666 // return TokenNameEncapsedString2;
1668 // if (tokenizeStrings) {
1669 consumeStringInterpolated();
1670 return TokenNameStringInterpolated;
1672 // return TokenNameEncapsedString0;
1675 char startChar = currentCharacter;
1676 if (getNextChar('=') && startChar == '/') {
1677 return TokenNameDIVIDE_EQUAL;
1680 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1682 this.lastCommentLinePosition = this.currentPosition;
1683 int endPositionForLineComment = 0;
1684 try { // get the next char
1685 currentCharacter = source[currentPosition++];
1686 // if (((currentCharacter = source[currentPosition++])
1688 // && (source[currentPosition] == 'u')) {
1689 // //-------------unicode traitement ------------
1690 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1691 // currentPosition++;
1692 // while (source[currentPosition] == 'u') {
1693 // currentPosition++;
1696 // Character.getNumericValue(source[currentPosition++]))
1700 // Character.getNumericValue(source[currentPosition++]))
1704 // Character.getNumericValue(source[currentPosition++]))
1708 // Character.getNumericValue(source[currentPosition++]))
1712 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1714 // currentCharacter =
1715 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1718 // handle the \\u case manually into comment
1719 // if (currentCharacter == '\\') {
1720 // if (source[currentPosition] == '\\')
1721 // currentPosition++;
1722 // } //jump over the \\
1723 boolean isUnicode = false;
1724 while (currentCharacter != '\r' && currentCharacter != '\n') {
1725 this.lastCommentLinePosition = this.currentPosition;
1726 if (currentCharacter == '?') {
1727 if (getNextChar('>')) {
1728 // ?> breaks line comments
1729 startPosition = currentPosition - 2;
1731 return TokenNameINLINE_HTML;
1734 // get the next char
1736 currentCharacter = source[currentPosition++];
1737 // if (((currentCharacter = source[currentPosition++])
1739 // && (source[currentPosition] == 'u')) {
1740 // isUnicode = true;
1741 // //-------------unicode traitement ------------
1742 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1743 // currentPosition++;
1744 // while (source[currentPosition] == 'u') {
1745 // currentPosition++;
1748 // Character.getNumericValue(source[currentPosition++]))
1752 // Character.getNumericValue(
1753 // source[currentPosition++]))
1757 // Character.getNumericValue(
1758 // source[currentPosition++]))
1762 // Character.getNumericValue(
1763 // source[currentPosition++]))
1767 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1769 // currentCharacter =
1770 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1773 // handle the \\u case manually into comment
1774 // if (currentCharacter == '\\') {
1775 // if (source[currentPosition] == '\\')
1776 // currentPosition++;
1777 // } //jump over the \\
1780 endPositionForLineComment = currentPosition - 6;
1782 endPositionForLineComment = currentPosition - 1;
1784 // recordComment(false);
1785 recordComment(TokenNameCOMMENT_LINE);
1786 if (this.taskTags != null)
1787 checkTaskTag(this.startPosition, this.currentPosition);
1788 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1789 checkNonExternalizeString();
1790 if (recordLineSeparator) {
1792 pushUnicodeLineSeparator();
1794 pushLineSeparator();
1800 if (tokenizeComments) {
1802 currentPosition = endPositionForLineComment;
1803 // reset one character behind
1805 return TokenNameCOMMENT_LINE;
1807 } catch (IndexOutOfBoundsException e) { // an eof will them
1809 if (tokenizeComments) {
1811 // reset one character behind
1812 return TokenNameCOMMENT_LINE;
1818 // traditional and annotation comment
1819 boolean isJavadoc = false, star = false;
1820 // consume next character
1821 unicodeAsBackSlash = false;
1822 currentCharacter = source[currentPosition++];
1823 // if (((currentCharacter = source[currentPosition++]) ==
1825 // && (source[currentPosition] == 'u')) {
1826 // getNextUnicodeChar();
1828 // if (withoutUnicodePtr != 0) {
1829 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1830 // currentCharacter;
1833 if (currentCharacter == '*') {
1837 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1838 checkNonExternalizeString();
1839 if (recordLineSeparator) {
1840 pushLineSeparator();
1845 try { // get the next char
1846 currentCharacter = source[currentPosition++];
1847 // if (((currentCharacter = source[currentPosition++])
1849 // && (source[currentPosition] == 'u')) {
1850 // //-------------unicode traitement ------------
1851 // getNextUnicodeChar();
1853 // handle the \\u case manually into comment
1854 // if (currentCharacter == '\\') {
1855 // if (source[currentPosition] == '\\')
1856 // currentPosition++;
1857 // //jump over the \\
1859 // empty comment is not a javadoc /**/
1860 if (currentCharacter == '/') {
1863 // loop until end of comment */
1864 while ((currentCharacter != '/') || (!star)) {
1865 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1866 checkNonExternalizeString();
1867 if (recordLineSeparator) {
1868 pushLineSeparator();
1873 star = currentCharacter == '*';
1875 currentCharacter = source[currentPosition++];
1876 // if (((currentCharacter = source[currentPosition++])
1878 // && (source[currentPosition] == 'u')) {
1879 // //-------------unicode traitement ------------
1880 // getNextUnicodeChar();
1882 // handle the \\u case manually into comment
1883 // if (currentCharacter == '\\') {
1884 // if (source[currentPosition] == '\\')
1885 // currentPosition++;
1886 // } //jump over the \\
1888 // recordComment(isJavadoc);
1890 recordComment(TokenNameCOMMENT_PHPDOC);
1892 recordComment(TokenNameCOMMENT_BLOCK);
1895 if (tokenizeComments) {
1897 return TokenNameCOMMENT_PHPDOC;
1898 return TokenNameCOMMENT_BLOCK;
1901 if (this.taskTags != null) {
1902 checkTaskTag(this.startPosition, this.currentPosition);
1904 } catch (IndexOutOfBoundsException e) {
1905 // reset end position for error reporting
1906 currentPosition -= 2;
1907 throw new InvalidInputException(UNTERMINATED_COMMENT);
1911 return TokenNameDIVIDE;
1915 return TokenNameEOF;
1916 // the atEnd may not be <currentPosition == source.length> if
1917 // source is only some part of a real (external) stream
1918 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1920 if (currentCharacter == '$') {
1921 int oldPosition = currentPosition;
1923 currentCharacter = source[currentPosition++];
1924 if (isPHPIdentifierStart(currentCharacter)) {
1925 return scanIdentifierOrKeyword(true);
1927 currentPosition = oldPosition;
1928 return TokenNameDOLLAR;
1930 } catch (IndexOutOfBoundsException e) {
1931 currentPosition = oldPosition;
1932 return TokenNameDOLLAR;
1935 if (isPHPIdentifierStart(currentCharacter))
1936 return scanIdentifierOrKeyword(false);
1937 if (Character.isDigit(currentCharacter))
1938 return scanNumber(false);
1939 return TokenNameERROR;
1942 } // -----------------end switch while try--------------------
1943 catch (IndexOutOfBoundsException e) {
1946 return TokenNameEOF;
1951 * @throws InvalidInputException
1953 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1954 if (currentPosition > source.length) {
1955 currentPosition = source.length;
1956 return TokenNameEOF;
1958 startPosition = start;
1961 currentCharacter = source[currentPosition++];
1962 if (currentCharacter == '<') {
1963 if (getNextChar('?')) {
1964 currentCharacter = source[currentPosition++];
1965 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1966 if (currentCharacter != '=') { // <?=
1969 phpExpressionTag = true;
1972 if (ignorePHPOneLiner) { // for CodeFormatter
1973 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1975 return TokenNameINLINE_HTML;
1979 return TokenNameINLINE_HTML;
1982 // boolean phpStart = (currentCharacter == 'P') ||
1983 // (currentCharacter == 'p');
1985 int test = getNextChar('H', 'h');
1987 test = getNextChar('P', 'p');
1990 if (ignorePHPOneLiner) {
1991 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1993 return TokenNameINLINE_HTML;
1997 return TokenNameINLINE_HTML;
2005 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2006 if (recordLineSeparator) {
2007 pushLineSeparator();
2012 } // -----------------while--------------------
2014 return TokenNameINLINE_HTML;
2015 } // -----------------try--------------------
2016 catch (IndexOutOfBoundsException e) {
2017 startPosition = start;
2021 return TokenNameINLINE_HTML;
2027 private int lookAheadLinePHPTag() {
2028 // check if the PHP is only in this line (for CodeFormatter)
2029 int currentPositionInLine = currentPosition;
2030 char previousCharInLine = ' ';
2031 char currentCharInLine = ' ';
2032 boolean singleQuotedStringActive = false;
2033 boolean doubleQuotedStringActive = false;
2036 // look ahead in this line
2038 previousCharInLine = currentCharInLine;
2039 currentCharInLine = source[currentPositionInLine++];
2040 switch (currentCharInLine) {
2042 if (previousCharInLine == '?') {
2043 // update the scanner's current Position in the source
2044 currentPosition = currentPositionInLine;
2045 // use as "dummy" token
2046 return TokenNameEOF;
2050 if (doubleQuotedStringActive) {
2051 // ignore escaped characters in double quoted strings
2052 previousCharInLine = currentCharInLine;
2053 currentCharInLine = source[currentPositionInLine++];
2056 if (doubleQuotedStringActive) {
2057 doubleQuotedStringActive = false;
2059 if (!singleQuotedStringActive) {
2060 doubleQuotedStringActive = true;
2065 if (singleQuotedStringActive) {
2066 if (previousCharInLine != '\\') {
2067 singleQuotedStringActive = false;
2070 if (!doubleQuotedStringActive) {
2071 singleQuotedStringActive = true;
2077 return TokenNameINLINE_HTML;
2079 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2081 return TokenNameINLINE_HTML;
2085 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2087 return TokenNameINLINE_HTML;
2091 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2093 return TokenNameINLINE_HTML;
2098 } catch (IndexOutOfBoundsException e) {
2100 currentPosition = currentPositionInLine;
2101 return TokenNameINLINE_HTML;
2105 // public final void getNextUnicodeChar()
2106 // throws IndexOutOfBoundsException, InvalidInputException {
2108 // //handle the case of unicode.
2109 // //when a unicode appears then we must use a buffer that holds char
2111 // //At the end of this method currentCharacter holds the new visited char
2112 // //and currentPosition points right next after it
2114 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2116 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2117 // currentPosition++;
2118 // while (source[currentPosition] == 'u') {
2119 // currentPosition++;
2123 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2125 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2127 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2129 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2131 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2133 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2134 // //need the unicode buffer
2135 // if (withoutUnicodePtr == 0) {
2136 // //buffer all the entries that have been left aside....
2137 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2138 // System.arraycopy(
2141 // withoutUnicodeBuffer,
2143 // withoutUnicodePtr);
2145 // //fill the buffer with the char
2146 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2148 // unicodeAsBackSlash = currentCharacter == '\\';
2151 * Tokenize a method body, assuming that curly brackets are properly balanced.
2153 public final void jumpOverMethodBody() {
2154 this.wasAcr = false;
2157 while (true) { // loop for jumping over comments
2158 // ---------Consume white space and handles startPosition---------
2159 boolean isWhiteSpace;
2161 startPosition = currentPosition;
2162 currentCharacter = source[currentPosition++];
2163 // if (((currentCharacter = source[currentPosition++]) == '\\')
2164 // && (source[currentPosition] == 'u')) {
2165 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2167 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2168 pushLineSeparator();
2169 isWhiteSpace = Character.isWhitespace(currentCharacter);
2171 } while (isWhiteSpace);
2172 // -------consume token until } is found---------
2173 switch (currentCharacter) {
2184 test = getNextChar('\\');
2187 scanDoubleQuotedEscapeCharacter();
2188 } catch (InvalidInputException ex) {
2192 // try { // consume next character
2193 unicodeAsBackSlash = false;
2194 currentCharacter = source[currentPosition++];
2195 // if (((currentCharacter = source[currentPosition++]) == '\\')
2196 // && (source[currentPosition] == 'u')) {
2197 // getNextUnicodeChar();
2199 if (withoutUnicodePtr != 0) {
2200 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2203 // } catch (InvalidInputException ex) {
2211 // try { // consume next character
2212 unicodeAsBackSlash = false;
2213 currentCharacter = source[currentPosition++];
2214 // if (((currentCharacter = source[currentPosition++]) == '\\')
2215 // && (source[currentPosition] == 'u')) {
2216 // getNextUnicodeChar();
2218 if (withoutUnicodePtr != 0) {
2219 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2222 // } catch (InvalidInputException ex) {
2224 while (currentCharacter != '"') {
2225 if (currentCharacter == '\r') {
2226 if (source[currentPosition] == '\n')
2229 // the string cannot go further that the line
2231 if (currentCharacter == '\n') {
2233 // the string cannot go further that the line
2235 if (currentCharacter == '\\') {
2237 scanDoubleQuotedEscapeCharacter();
2238 } catch (InvalidInputException ex) {
2242 // try { // consume next character
2243 unicodeAsBackSlash = false;
2244 currentCharacter = source[currentPosition++];
2245 // if (((currentCharacter = source[currentPosition++]) == '\\')
2246 // && (source[currentPosition] == 'u')) {
2247 // getNextUnicodeChar();
2249 if (withoutUnicodePtr != 0) {
2250 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2253 // } catch (InvalidInputException ex) {
2256 } catch (IndexOutOfBoundsException e) {
2262 if ((test = getNextChar('/', '*')) == 0) {
2265 // get the next char
2266 currentCharacter = source[currentPosition++];
2267 // if (((currentCharacter = source[currentPosition++]) ==
2269 // && (source[currentPosition] == 'u')) {
2270 // //-------------unicode traitement ------------
2271 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2272 // currentPosition++;
2273 // while (source[currentPosition] == 'u') {
2274 // currentPosition++;
2277 // Character.getNumericValue(source[currentPosition++]))
2281 // Character.getNumericValue(source[currentPosition++]))
2285 // Character.getNumericValue(source[currentPosition++]))
2289 // Character.getNumericValue(source[currentPosition++]))
2292 // //error don't care of the value
2293 // currentCharacter = 'A';
2294 // } //something different from \n and \r
2296 // currentCharacter =
2297 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2300 while (currentCharacter != '\r' && currentCharacter != '\n') {
2301 // get the next char
2302 currentCharacter = source[currentPosition++];
2303 // if (((currentCharacter = source[currentPosition++])
2305 // && (source[currentPosition] == 'u')) {
2306 // //-------------unicode traitement ------------
2307 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2308 // currentPosition++;
2309 // while (source[currentPosition] == 'u') {
2310 // currentPosition++;
2313 // Character.getNumericValue(source[currentPosition++]))
2317 // Character.getNumericValue(source[currentPosition++]))
2321 // Character.getNumericValue(source[currentPosition++]))
2325 // Character.getNumericValue(source[currentPosition++]))
2328 // //error don't care of the value
2329 // currentCharacter = 'A';
2330 // } //something different from \n and \r
2332 // currentCharacter =
2333 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2337 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2338 pushLineSeparator();
2339 } catch (IndexOutOfBoundsException e) {
2340 } // an eof will them be generated
2344 // traditional and annotation comment
2345 boolean star = false;
2346 // try { // consume next character
2347 unicodeAsBackSlash = false;
2348 currentCharacter = source[currentPosition++];
2349 // if (((currentCharacter = source[currentPosition++]) == '\\')
2350 // && (source[currentPosition] == 'u')) {
2351 // getNextUnicodeChar();
2353 if (withoutUnicodePtr != 0) {
2354 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2357 // } catch (InvalidInputException ex) {
2359 if (currentCharacter == '*') {
2362 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2363 pushLineSeparator();
2364 try { // get the next char
2365 currentCharacter = source[currentPosition++];
2366 // if (((currentCharacter = source[currentPosition++]) ==
2368 // && (source[currentPosition] == 'u')) {
2369 // //-------------unicode traitement ------------
2370 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2371 // currentPosition++;
2372 // while (source[currentPosition] == 'u') {
2373 // currentPosition++;
2376 // Character.getNumericValue(source[currentPosition++]))
2380 // Character.getNumericValue(source[currentPosition++]))
2384 // Character.getNumericValue(source[currentPosition++]))
2388 // Character.getNumericValue(source[currentPosition++]))
2391 // //error don't care of the value
2392 // currentCharacter = 'A';
2393 // } //something different from * and /
2395 // currentCharacter =
2396 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2399 // loop until end of comment */
2400 while ((currentCharacter != '/') || (!star)) {
2401 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2402 pushLineSeparator();
2403 star = currentCharacter == '*';
2405 currentCharacter = source[currentPosition++];
2406 // if (((currentCharacter = source[currentPosition++])
2408 // && (source[currentPosition] == 'u')) {
2409 // //-------------unicode traitement ------------
2410 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2411 // currentPosition++;
2412 // while (source[currentPosition] == 'u') {
2413 // currentPosition++;
2416 // Character.getNumericValue(source[currentPosition++]))
2420 // Character.getNumericValue(source[currentPosition++]))
2424 // Character.getNumericValue(source[currentPosition++]))
2428 // Character.getNumericValue(source[currentPosition++]))
2431 // //error don't care of the value
2432 // currentCharacter = 'A';
2433 // } //something different from * and /
2435 // currentCharacter =
2436 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2440 } catch (IndexOutOfBoundsException e) {
2448 if (isPHPIdentOrVarStart(currentCharacter)) {
2450 scanIdentifierOrKeyword((currentCharacter == '$'));
2451 } catch (InvalidInputException ex) {
2456 if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2457 // if (Character.isDigit(currentCharacter)) {
2460 } catch (InvalidInputException ex) {
2467 // -----------------end switch while try--------------------
2468 } catch (IndexOutOfBoundsException e) {
2469 } catch (InvalidInputException e) {
2474 // public final boolean jumpOverUnicodeWhiteSpace()
2475 // throws InvalidInputException {
2477 // //handle the case of unicode. Jump over the next whiteSpace
2478 // //making startPosition pointing on the next available char
2479 // //On false, the currentCharacter is filled up with a potential
2483 // this.wasAcr = false;
2484 // int c1, c2, c3, c4;
2485 // int unicodeSize = 6;
2486 // currentPosition++;
2487 // while (source[currentPosition] == 'u') {
2488 // currentPosition++;
2492 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2494 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2496 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2498 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2500 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2503 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2504 // if (recordLineSeparator
2505 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2506 // pushLineSeparator();
2507 // if (Character.isWhitespace(currentCharacter))
2510 // //buffer the new char which is not a white space
2511 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2512 // //withoutUnicodePtr == 1 is true here
2514 // } catch (IndexOutOfBoundsException e) {
2515 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2518 public final int[] getLineEnds() {
2519 // return a bounded copy of this.lineEnds
2521 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2525 public char[] getSource() {
2529 public static boolean isIdentifierOrKeyword(int token) {
2530 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2533 final char[] optimizedCurrentTokenSource1() {
2534 // return always the same char[] build only once
2535 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2536 char charOne = source[startPosition];
2591 return new char[] { charOne };
2595 final char[] optimizedCurrentTokenSource2() {
2597 c0 = source[startPosition];
2598 c1 = source[startPosition + 1];
2600 // return always the same char[] build only once
2601 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2604 return charArray_va;
2606 return charArray_vb;
2608 return charArray_vc;
2610 return charArray_vd;
2612 return charArray_ve;
2614 return charArray_vf;
2616 return charArray_vg;
2618 return charArray_vh;
2620 return charArray_vi;
2622 return charArray_vj;
2624 return charArray_vk;
2626 return charArray_vl;
2628 return charArray_vm;
2630 return charArray_vn;
2632 return charArray_vo;
2634 return charArray_vp;
2636 return charArray_vq;
2638 return charArray_vr;
2640 return charArray_vs;
2642 return charArray_vt;
2644 return charArray_vu;
2646 return charArray_vv;
2648 return charArray_vw;
2650 return charArray_vx;
2652 return charArray_vy;
2654 return charArray_vz;
2657 // try to return the same char[] build only once
2658 int hash = ((c0 << 6) + c1) % TableSize;
2659 char[][] table = charArray_length[0][hash];
2661 while (++i < InternalTableSize) {
2662 char[] charArray = table[i];
2663 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2666 // ---------other side---------
2668 int max = newEntry2;
2669 while (++i <= max) {
2670 char[] charArray = table[i];
2671 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2674 // --------add the entry-------
2675 if (++max >= InternalTableSize)
2678 table[max] = (r = new char[] { c0, c1 });
2683 final char[] optimizedCurrentTokenSource3() {
2684 // try to return the same char[] build only once
2686 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2688 char[][] table = charArray_length[1][hash];
2690 while (++i < InternalTableSize) {
2691 char[] charArray = table[i];
2692 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2695 // ---------other side---------
2697 int max = newEntry3;
2698 while (++i <= max) {
2699 char[] charArray = table[i];
2700 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2703 // --------add the entry-------
2704 if (++max >= InternalTableSize)
2707 table[max] = (r = new char[] { c0, c1, c2 });
2712 final char[] optimizedCurrentTokenSource4() {
2713 // try to return the same char[] build only once
2714 char c0, c1, c2, c3;
2715 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2716 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2718 char[][] table = charArray_length[2][(int) hash];
2720 while (++i < InternalTableSize) {
2721 char[] charArray = table[i];
2722 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2725 // ---------other side---------
2727 int max = newEntry4;
2728 while (++i <= max) {
2729 char[] charArray = table[i];
2730 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2733 // --------add the entry-------
2734 if (++max >= InternalTableSize)
2737 table[max] = (r = new char[] { c0, c1, c2, c3 });
2742 final char[] optimizedCurrentTokenSource5() {
2743 // try to return the same char[] build only once
2744 char c0, c1, c2, c3, c4;
2745 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2746 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2748 char[][] table = charArray_length[3][(int) hash];
2750 while (++i < InternalTableSize) {
2751 char[] charArray = table[i];
2752 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2755 // ---------other side---------
2757 int max = newEntry5;
2758 while (++i <= max) {
2759 char[] charArray = table[i];
2760 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2763 // --------add the entry-------
2764 if (++max >= InternalTableSize)
2767 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2772 final char[] optimizedCurrentTokenSource6() {
2773 // try to return the same char[] build only once
2774 char c0, c1, c2, c3, c4, c5;
2775 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2776 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2777 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2779 char[][] table = charArray_length[4][(int) hash];
2781 while (++i < InternalTableSize) {
2782 char[] charArray = table[i];
2783 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2784 && (c5 == charArray[5]))
2787 // ---------other side---------
2789 int max = newEntry6;
2790 while (++i <= max) {
2791 char[] charArray = table[i];
2792 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2793 && (c5 == charArray[5]))
2796 // --------add the entry-------
2797 if (++max >= InternalTableSize)
2800 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2805 public final void pushLineSeparator() throws InvalidInputException {
2806 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2807 final int INCREMENT = 250;
2808 if (this.checkNonExternalizedStringLiterals) {
2809 // reinitialize the current line for non externalize strings purpose
2812 // currentCharacter is at position currentPosition-1
2814 if (currentCharacter == '\r') {
2815 int separatorPos = currentPosition - 1;
2816 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2818 // System.out.println("CR-" + separatorPos);
2820 lineEnds[++linePtr] = separatorPos;
2821 } catch (IndexOutOfBoundsException e) {
2822 // linePtr value is correct
2823 int oldLength = lineEnds.length;
2824 int[] old = lineEnds;
2825 lineEnds = new int[oldLength + INCREMENT];
2826 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2827 lineEnds[linePtr] = separatorPos;
2829 // look-ahead for merged cr+lf
2831 if (source[currentPosition] == '\n') {
2832 // System.out.println("look-ahead LF-" + currentPosition);
2833 lineEnds[linePtr] = currentPosition;
2839 } catch (IndexOutOfBoundsException e) {
2844 if (currentCharacter == '\n') {
2845 // must merge eventual cr followed by lf
2846 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2847 // System.out.println("merge LF-" + (currentPosition - 1));
2848 lineEnds[linePtr] = currentPosition - 1;
2850 int separatorPos = currentPosition - 1;
2851 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2853 // System.out.println("LF-" + separatorPos);
2855 lineEnds[++linePtr] = separatorPos;
2856 } catch (IndexOutOfBoundsException e) {
2857 // linePtr value is correct
2858 int oldLength = lineEnds.length;
2859 int[] old = lineEnds;
2860 lineEnds = new int[oldLength + INCREMENT];
2861 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2862 lineEnds[linePtr] = separatorPos;
2870 public final void pushUnicodeLineSeparator() {
2871 // isUnicode means that the \r or \n has been read as a unicode character
2872 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2873 final int INCREMENT = 250;
2874 // currentCharacter is at position currentPosition-1
2875 if (this.checkNonExternalizedStringLiterals) {
2876 // reinitialize the current line for non externalize strings purpose
2880 if (currentCharacter == '\r') {
2881 int separatorPos = currentPosition - 6;
2882 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2884 // System.out.println("CR-" + separatorPos);
2886 lineEnds[++linePtr] = separatorPos;
2887 } catch (IndexOutOfBoundsException e) {
2888 // linePtr value is correct
2889 int oldLength = lineEnds.length;
2890 int[] old = lineEnds;
2891 lineEnds = new int[oldLength + INCREMENT];
2892 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2893 lineEnds[linePtr] = separatorPos;
2895 // look-ahead for merged cr+lf
2896 if (source[currentPosition] == '\n') {
2897 // System.out.println("look-ahead LF-" + currentPosition);
2898 lineEnds[linePtr] = currentPosition;
2906 if (currentCharacter == '\n') {
2907 // must merge eventual cr followed by lf
2908 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2909 // System.out.println("merge LF-" + (currentPosition - 1));
2910 lineEnds[linePtr] = currentPosition - 6;
2912 int separatorPos = currentPosition - 6;
2913 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2915 // System.out.println("LF-" + separatorPos);
2917 lineEnds[++linePtr] = separatorPos;
2918 } catch (IndexOutOfBoundsException e) {
2919 // linePtr value is correct
2920 int oldLength = lineEnds.length;
2921 int[] old = lineEnds;
2922 lineEnds = new int[oldLength + INCREMENT];
2923 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2924 lineEnds[linePtr] = separatorPos;
2932 public void recordComment(int token) {
2934 int stopPosition = this.currentPosition;
2936 case TokenNameCOMMENT_LINE:
2937 stopPosition = -this.lastCommentLinePosition;
2939 case TokenNameCOMMENT_BLOCK:
2940 stopPosition = -this.currentPosition;
2944 // a new comment is recorded
2945 int length = this.commentStops.length;
2946 if (++this.commentPtr >= length) {
2947 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2948 // grows the positions buffers too
2949 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2951 this.commentStops[this.commentPtr] = stopPosition;
2952 this.commentStarts[this.commentPtr] = this.startPosition;
2955 // public final void recordComment(boolean isJavadoc) {
2956 // // a new annotation comment is recorded
2958 // commentStops[++commentPtr] = isJavadoc
2959 // ? currentPosition
2960 // : -currentPosition;
2961 // } catch (IndexOutOfBoundsException e) {
2962 // int oldStackLength = commentStops.length;
2963 // int[] oldStack = commentStops;
2964 // commentStops = new int[oldStackLength + 30];
2965 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2966 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2967 // //grows the positions buffers too
2968 // int[] old = commentStarts;
2969 // commentStarts = new int[oldStackLength + 30];
2970 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2972 // //the buffer is of a correct size here
2973 // commentStarts[commentPtr] = startPosition;
2975 public void resetTo(int begin, int end) {
2976 // reset the scanner to a given position where it may rescan again
2978 initialPosition = startPosition = currentPosition = begin;
2979 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2980 commentPtr = -1; // reset comment stack
2983 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2984 // the string with "\\u" is a legal string of two chars \ and u
2985 // thus we use a direct access to the source (for regular cases).
2986 // if (unicodeAsBackSlash) {
2987 // // consume next character
2988 // unicodeAsBackSlash = false;
2989 // if (((currentCharacter = source[currentPosition++]) == '\\')
2990 // && (source[currentPosition] == 'u')) {
2991 // getNextUnicodeChar();
2993 // if (withoutUnicodePtr != 0) {
2994 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2998 currentCharacter = source[currentPosition++];
2999 switch (currentCharacter) {
3001 currentCharacter = '\'';
3004 currentCharacter = '\\';
3007 currentCharacter = '\\';
3012 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
3013 currentCharacter = source[currentPosition++];
3014 switch (currentCharacter) {
3016 // currentCharacter = '\b';
3019 currentCharacter = '\t';
3022 currentCharacter = '\n';
3025 // currentCharacter = '\f';
3028 currentCharacter = '\r';
3031 currentCharacter = '\"';
3034 currentCharacter = '\'';
3037 currentCharacter = '\\';
3040 currentCharacter = '$';
3043 // -----------octal escape--------------
3045 // OctalDigit OctalDigit
3046 // ZeroToThree OctalDigit OctalDigit
3047 int number = Character.getNumericValue(currentCharacter);
3048 if (number >= 0 && number <= 7) {
3049 boolean zeroToThreeNot = number > 3;
3050 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3051 int digit = Character.getNumericValue(currentCharacter);
3052 if (digit >= 0 && digit <= 7) {
3053 number = (number * 8) + digit;
3054 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3055 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3056 // Digit --> ignore last character
3059 digit = Character.getNumericValue(currentCharacter);
3060 if (digit >= 0 && digit <= 7) {
3061 // has read \ZeroToThree OctalDigit OctalDigit
3062 number = (number * 8) + digit;
3063 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3064 // --> ignore last character
3068 } else { // has read \OctalDigit NonDigit--> ignore last
3072 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3076 } else { // has read \OctalDigit --> ignore last character
3080 throw new InvalidInputException(INVALID_ESCAPE);
3081 currentCharacter = (char) number;
3084 // throw new InvalidInputException(INVALID_ESCAPE);
3088 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3089 // return scanIdentifierOrKeyword( false );
3091 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3093 // first dispatch on the first char.
3094 // then the length. If there are several
3095 // keywors with the same length AND the same first char, then do another
3096 // disptach on the second char :-)...cool....but fast !
3097 useAssertAsAnIndentifier = false;
3098 while (getNextCharAsJavaIdentifierPart()) {
3102 // if (new String(getCurrentTokenSource()).equals("$this")) {
3103 // return TokenNamethis;
3105 return TokenNameVariable;
3110 // if (withoutUnicodePtr == 0)
3111 // quick test on length == 1 but not on length > 12 while most identifier
3112 // have a length which is <= 12...but there are lots of identifier with
3113 // only one char....
3115 if ((length = currentPosition - startPosition) == 1)
3116 return TokenNameIdentifier;
3118 data = new char[length];
3119 index = startPosition;
3120 for (int i = 0; i < length; i++) {
3121 data[i] = Character.toLowerCase(source[index + i]);
3125 // if ((length = withoutUnicodePtr) == 1)
3126 // return TokenNameIdentifier;
3127 // // data = withoutUnicodeBuffer;
3128 // data = new char[withoutUnicodeBuffer.length];
3129 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3130 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3134 firstLetter = data[index];
3135 switch (firstLetter) {
3140 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3141 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3142 return TokenNameFILE;
3143 index = 0; // __LINE__
3144 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3145 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3146 return TokenNameLINE;
3150 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3151 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3152 return TokenNameCLASS_C;
3156 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3157 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3158 && (data[++index] == '_'))
3159 return TokenNameMETHOD_C;
3163 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3164 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3165 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3166 return TokenNameFUNC_C;
3169 return TokenNameIdentifier;
3171 // as and array abstract
3175 if ((data[++index] == 's')) {
3178 return TokenNameIdentifier;
3182 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3183 return TokenNameand;
3185 return TokenNameIdentifier;
3189 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3190 return TokenNamearray;
3192 return TokenNameIdentifier;
3194 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3195 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3196 return TokenNameabstract;
3198 return TokenNameIdentifier;
3200 return TokenNameIdentifier;
3206 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3207 return TokenNamebreak;
3209 return TokenNameIdentifier;
3211 return TokenNameIdentifier;
3214 // case catch class clone const continue
3217 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3218 return TokenNamecase;
3220 return TokenNameIdentifier;
3222 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3223 return TokenNamecatch;
3225 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3226 return TokenNameclass;
3228 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3229 return TokenNameclone;
3231 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3232 return TokenNameconst;
3234 return TokenNameIdentifier;
3236 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3237 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3238 return TokenNamecontinue;
3240 return TokenNameIdentifier;
3242 return TokenNameIdentifier;
3245 // declare default do die
3246 // TODO delete define ==> no keyword !
3249 if ((data[++index] == 'o'))
3252 return TokenNameIdentifier;
3254 // if ((data[++index] == 'e')
3255 // && (data[++index] == 'f')
3256 // && (data[++index] == 'i')
3257 // && (data[++index] == 'n')
3258 // && (data[++index] == 'e'))
3259 // return TokenNamedefine;
3261 // return TokenNameIdentifier;
3263 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3264 && (data[++index] == 'r') && (data[++index] == 'e'))
3265 return TokenNamedeclare;
3267 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3268 && (data[++index] == 'l') && (data[++index] == 't'))
3269 return TokenNamedefault;
3271 return TokenNameIdentifier;
3273 return TokenNameIdentifier;
3276 // echo else exit elseif extends eval
3279 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3280 return TokenNameecho;
3281 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3282 return TokenNameelse;
3283 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3284 return TokenNameexit;
3285 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3286 return TokenNameeval;
3288 return TokenNameIdentifier;
3291 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3292 return TokenNameendif;
3293 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3294 return TokenNameempty;
3296 return TokenNameIdentifier;
3299 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3300 && (data[++index] == 'r'))
3301 return TokenNameendfor;
3302 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3303 && (data[++index] == 'f'))
3304 return TokenNameelseif;
3306 return TokenNameIdentifier;
3308 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3309 && (data[++index] == 'd') && (data[++index] == 's'))
3310 return TokenNameextends;
3312 return TokenNameIdentifier;
3315 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3316 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3317 return TokenNameendwhile;
3319 return TokenNameIdentifier;
3322 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3323 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3324 return TokenNameendswitch;
3326 return TokenNameIdentifier;
3329 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3330 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3331 && (data[++index] == 'e'))
3332 return TokenNameenddeclare;
3334 if ((data[++index] == 'n') // endforeach
3335 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3336 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3337 return TokenNameendforeach;
3339 return TokenNameIdentifier;
3341 return TokenNameIdentifier;
3344 // for false final function
3347 if ((data[++index] == 'o') && (data[++index] == 'r'))
3348 return TokenNamefor;
3350 return TokenNameIdentifier;
3352 // if ((data[++index] == 'a') && (data[++index] == 'l')
3353 // && (data[++index] == 's') && (data[++index] == 'e'))
3354 // return TokenNamefalse;
3355 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3356 return TokenNamefinal;
3358 return TokenNameIdentifier;
3361 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3362 && (data[++index] == 'c') && (data[++index] == 'h'))
3363 return TokenNameforeach;
3365 return TokenNameIdentifier;
3368 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3369 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3370 return TokenNamefunction;
3372 return TokenNameIdentifier;
3374 return TokenNameIdentifier;
3379 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3380 && (data[++index] == 'l')) {
3381 return TokenNameglobal;
3384 return TokenNameIdentifier;
3386 // if int isset include include_once instanceof interface implements
3389 if (data[++index] == 'f')
3392 return TokenNameIdentifier;
3394 // if ((data[++index] == 'n') && (data[++index] == 't'))
3395 // return TokenNameint;
3397 // return TokenNameIdentifier;
3399 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3400 return TokenNameisset;
3402 return TokenNameIdentifier;
3404 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3405 && (data[++index] == 'd') && (data[++index] == 'e'))
3406 return TokenNameinclude;
3408 return TokenNameIdentifier;
3411 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3412 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3413 return TokenNameinterface;
3415 return TokenNameIdentifier;
3418 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3419 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3420 && (data[++index] == 'f'))
3421 return TokenNameinstanceof;
3422 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3423 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3424 && (data[++index] == 's'))
3425 return TokenNameimplements;
3427 return TokenNameIdentifier;
3429 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3430 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3431 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3432 return TokenNameinclude_once;
3434 return TokenNameIdentifier;
3436 return TokenNameIdentifier;
3441 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3442 return TokenNamelist;
3445 return TokenNameIdentifier;
3450 if ((data[++index] == 'e') && (data[++index] == 'w'))
3451 return TokenNamenew;
3453 return TokenNameIdentifier;
3455 // if ((data[++index] == 'u') && (data[++index] == 'l')
3456 // && (data[++index] == 'l'))
3457 // return TokenNamenull;
3459 // return TokenNameIdentifier;
3461 return TokenNameIdentifier;
3466 if (data[++index] == 'r') {
3470 // if (length == 12) {
3471 // if ((data[++index] == 'l')
3472 // && (data[++index] == 'd')
3473 // && (data[++index] == '_')
3474 // && (data[++index] == 'f')
3475 // && (data[++index] == 'u')
3476 // && (data[++index] == 'n')
3477 // && (data[++index] == 'c')
3478 // && (data[++index] == 't')
3479 // && (data[++index] == 'i')
3480 // && (data[++index] == 'o')
3481 // && (data[++index] == 'n')) {
3482 // return TokenNameold_function;
3485 return TokenNameIdentifier;
3487 // print public private protected
3490 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3491 return TokenNameprint;
3493 return TokenNameIdentifier;
3495 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3496 && (data[++index] == 'c')) {
3497 return TokenNamepublic;
3499 return TokenNameIdentifier;
3501 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3502 && (data[++index] == 't') && (data[++index] == 'e')) {
3503 return TokenNameprivate;
3505 return TokenNameIdentifier;
3507 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3508 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3509 return TokenNameprotected;
3511 return TokenNameIdentifier;
3513 return TokenNameIdentifier;
3515 // return require require_once
3517 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3518 && (data[++index] == 'n')) {
3519 return TokenNamereturn;
3521 } else if (length == 7) {
3522 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3523 && (data[++index] == 'r') && (data[++index] == 'e')) {
3524 return TokenNamerequire;
3526 } else if (length == 12) {
3527 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3528 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3529 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3530 return TokenNamerequire_once;
3533 return TokenNameIdentifier;
3535 // self static switch
3538 // if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index] == 'f')) {
3539 // return TokenNameself;
3541 // return TokenNameIdentifier;
3543 if (data[++index] == 't')
3544 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3545 return TokenNamestatic;
3547 return TokenNameIdentifier;
3548 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3549 && (data[++index] == 'h'))
3550 return TokenNameswitch;
3552 return TokenNameIdentifier;
3554 return TokenNameIdentifier;
3560 if ((data[++index] == 'r') && (data[++index] == 'y'))
3561 return TokenNametry;
3563 return TokenNameIdentifier;
3565 // if ((data[++index] == 'r') && (data[++index] == 'u')
3566 // && (data[++index] == 'e'))
3567 // return TokenNametrue;
3569 // return TokenNameIdentifier;
3571 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3572 return TokenNamethrow;
3574 return TokenNameIdentifier;
3576 return TokenNameIdentifier;
3582 if ((data[++index] == 's') && (data[++index] == 'e'))
3583 return TokenNameuse;
3585 return TokenNameIdentifier;
3587 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3588 return TokenNameunset;
3590 return TokenNameIdentifier;
3592 return TokenNameIdentifier;
3598 if ((data[++index] == 'a') && (data[++index] == 'r'))
3599 return TokenNamevar;
3601 return TokenNameIdentifier;
3603 return TokenNameIdentifier;
3609 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3610 return TokenNamewhile;
3612 return TokenNameIdentifier;
3613 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3614 // (data[++index]=='e') && (data[++index]=='f')&&
3615 // (data[++index]=='p'))
3616 // return TokenNamewidefp ;
3618 // return TokenNameIdentifier;
3620 return TokenNameIdentifier;
3626 if ((data[++index] == 'o') && (data[++index] == 'r'))
3627 return TokenNamexor;
3629 return TokenNameIdentifier;
3631 return TokenNameIdentifier;
3634 return TokenNameIdentifier;
3638 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3639 // when entering this method the currentCharacter is the firt
3640 // digit of the number , i.e. it may be preceeded by a . when
3641 // dotPrefix is true
3642 boolean floating = dotPrefix;
3643 if ((!dotPrefix) && (currentCharacter == '0')) {
3644 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3645 // force the first char of the hexa number do exist...
3646 // consume next character
3647 unicodeAsBackSlash = false;
3648 currentCharacter = source[currentPosition++];
3649 // if (((currentCharacter = source[currentPosition++]) == '\\')
3650 // && (source[currentPosition] == 'u')) {
3651 // getNextUnicodeChar();
3653 // if (withoutUnicodePtr != 0) {
3654 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3657 if (Character.digit(currentCharacter, 16) == -1)
3658 throw new InvalidInputException(INVALID_HEXA);
3660 while (getNextCharAsDigit(16)) {
3663 // if (getNextChar('l', 'L') >= 0)
3664 // return TokenNameLongLiteral;
3666 return TokenNameIntegerLiteral;
3668 // there is x or X in the number
3669 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3670 // 00078.0 is true !!!!! crazy language
3671 if (getNextCharAsDigit()) {
3672 // -------------potential octal-----------------
3673 while (getNextCharAsDigit()) {
3676 // if (getNextChar('l', 'L') >= 0) {
3677 // return TokenNameLongLiteral;
3680 // if (getNextChar('f', 'F') >= 0) {
3681 // return TokenNameFloatingPointLiteral;
3683 if (getNextChar('d', 'D') >= 0) {
3684 return TokenNameDoubleLiteral;
3685 } else { // make the distinction between octal and float ....
3686 if (getNextChar('.')) { // bingo ! ....
3687 while (getNextCharAsDigit()) {
3690 if (getNextChar('e', 'E') >= 0) {
3691 // consume next character
3692 unicodeAsBackSlash = false;
3693 currentCharacter = source[currentPosition++];
3694 // if (((currentCharacter = source[currentPosition++]) == '\\')
3695 // && (source[currentPosition] == 'u')) {
3696 // getNextUnicodeChar();
3698 // if (withoutUnicodePtr != 0) {
3699 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3702 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3703 // consume next character
3704 unicodeAsBackSlash = false;
3705 currentCharacter = source[currentPosition++];
3706 // if (((currentCharacter = source[currentPosition++]) == '\\')
3707 // && (source[currentPosition] == 'u')) {
3708 // getNextUnicodeChar();
3710 // if (withoutUnicodePtr != 0) {
3711 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3712 // currentCharacter;
3716 if (!Character.isDigit(currentCharacter))
3717 throw new InvalidInputException(INVALID_FLOAT);
3718 while (getNextCharAsDigit()) {
3722 // if (getNextChar('f', 'F') >= 0)
3723 // return TokenNameFloatingPointLiteral;
3724 getNextChar('d', 'D'); // jump over potential d or D
3725 return TokenNameDoubleLiteral;
3727 return TokenNameIntegerLiteral;
3734 while (getNextCharAsDigit()) {
3737 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3738 // return TokenNameLongLiteral;
3739 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3740 while (getNextCharAsDigit()) {
3745 // if floating is true both exponant and suffix may be optional
3746 if (getNextChar('e', 'E') >= 0) {
3748 // consume next character
3749 unicodeAsBackSlash = false;
3750 currentCharacter = source[currentPosition++];
3751 // if (((currentCharacter = source[currentPosition++]) == '\\')
3752 // && (source[currentPosition] == 'u')) {
3753 // getNextUnicodeChar();
3755 // if (withoutUnicodePtr != 0) {
3756 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3759 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3762 unicodeAsBackSlash = false;
3763 currentCharacter = source[currentPosition++];
3764 // if (((currentCharacter = source[currentPosition++]) == '\\')
3765 // && (source[currentPosition] == 'u')) {
3766 // getNextUnicodeChar();
3768 // if (withoutUnicodePtr != 0) {
3769 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3773 if (!Character.isDigit(currentCharacter))
3774 throw new InvalidInputException(INVALID_FLOAT);
3775 while (getNextCharAsDigit()) {
3779 if (getNextChar('d', 'D') >= 0)
3780 return TokenNameDoubleLiteral;
3781 // if (getNextChar('f', 'F') >= 0)
3782 // return TokenNameFloatingPointLiteral;
3783 // the long flag has been tested before
3784 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3788 * Search the line number corresponding to a specific position
3791 public final int getLineNumber(int position) {
3792 if (lineEnds == null)
3794 int length = linePtr + 1;
3797 int g = 0, d = length - 1;
3801 if (position < lineEnds[m]) {
3803 } else if (position > lineEnds[m]) {
3809 if (position < lineEnds[m]) {
3815 public void setPHPMode(boolean mode) {
3819 public final void setSource(char[] source) {
3820 setSource(null, source);
3823 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3824 // the source-buffer is set to sourceString
3825 this.compilationUnit = compilationUnit;
3826 if (source == null) {
3827 this.source = new char[0];
3829 this.source = source;
3832 initialPosition = currentPosition = 0;
3833 containsAssertKeyword = false;
3834 withoutUnicodeBuffer = new char[this.source.length];
3835 // encapsedStringStack = new Stack();
3838 public String toString() {
3839 if (startPosition == source.length)
3840 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3841 if (currentPosition > source.length)
3842 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3843 char front[] = new char[startPosition];
3844 System.arraycopy(source, 0, front, 0, startPosition);
3845 int middleLength = (currentPosition - 1) - startPosition + 1;
3847 if (middleLength > -1) {
3848 middle = new char[middleLength];
3849 System.arraycopy(source, startPosition, middle, 0, middleLength);
3851 middle = new char[0];
3853 char end[] = new char[source.length - (currentPosition - 1)];
3854 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3855 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3856 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3860 public final String toStringAction(int act) {
3862 case TokenNameERROR:
3863 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3865 case TokenNameINLINE_HTML:
3866 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3867 case TokenNameIdentifier:
3868 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3869 case TokenNameVariable:
3870 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3871 case TokenNameabstract:
3872 return "abstract"; //$NON-NLS-1$
3874 return "AND"; //$NON-NLS-1$
3875 case TokenNamearray:
3876 return "array"; //$NON-NLS-1$
3878 return "as"; //$NON-NLS-1$
3879 case TokenNamebreak:
3880 return "break"; //$NON-NLS-1$
3882 return "case"; //$NON-NLS-1$
3883 case TokenNameclass:
3884 return "class"; //$NON-NLS-1$
3885 case TokenNamecatch:
3886 return "catch"; //$NON-NLS-1$
3887 case TokenNameclone:
3890 case TokenNameconst:
3893 case TokenNamecontinue:
3894 return "continue"; //$NON-NLS-1$
3895 case TokenNamedefault:
3896 return "default"; //$NON-NLS-1$
3897 // case TokenNamedefine :
3898 // return "define"; //$NON-NLS-1$
3900 return "do"; //$NON-NLS-1$
3902 return "echo"; //$NON-NLS-1$
3904 return "else"; //$NON-NLS-1$
3905 case TokenNameelseif:
3906 return "elseif"; //$NON-NLS-1$
3907 case TokenNameendfor:
3908 return "endfor"; //$NON-NLS-1$
3909 case TokenNameendforeach:
3910 return "endforeach"; //$NON-NLS-1$
3911 case TokenNameendif:
3912 return "endif"; //$NON-NLS-1$
3913 case TokenNameendswitch:
3914 return "endswitch"; //$NON-NLS-1$
3915 case TokenNameendwhile:
3916 return "endwhile"; //$NON-NLS-1$
3919 case TokenNameextends:
3920 return "extends"; //$NON-NLS-1$
3921 // case TokenNamefalse :
3922 // return "false"; //$NON-NLS-1$
3923 case TokenNamefinal:
3924 return "final"; //$NON-NLS-1$
3926 return "for"; //$NON-NLS-1$
3927 case TokenNameforeach:
3928 return "foreach"; //$NON-NLS-1$
3929 case TokenNamefunction:
3930 return "function"; //$NON-NLS-1$
3931 case TokenNameglobal:
3932 return "global"; //$NON-NLS-1$
3934 return "if"; //$NON-NLS-1$
3935 case TokenNameimplements:
3936 return "implements"; //$NON-NLS-1$
3937 case TokenNameinclude:
3938 return "include"; //$NON-NLS-1$
3939 case TokenNameinclude_once:
3940 return "include_once"; //$NON-NLS-1$
3941 case TokenNameinstanceof:
3942 return "instanceof"; //$NON-NLS-1$
3943 case TokenNameinterface:
3944 return "interface"; //$NON-NLS-1$
3945 case TokenNameisset:
3946 return "isset"; //$NON-NLS-1$
3948 return "list"; //$NON-NLS-1$
3950 return "new"; //$NON-NLS-1$
3951 // case TokenNamenull :
3952 // return "null"; //$NON-NLS-1$
3954 return "OR"; //$NON-NLS-1$
3955 case TokenNameprint:
3956 return "print"; //$NON-NLS-1$
3957 case TokenNameprivate:
3958 return "private"; //$NON-NLS-1$
3959 case TokenNameprotected:
3960 return "protected"; //$NON-NLS-1$
3961 case TokenNamepublic:
3962 return "public"; //$NON-NLS-1$
3963 case TokenNamerequire:
3964 return "require"; //$NON-NLS-1$
3965 case TokenNamerequire_once:
3966 return "require_once"; //$NON-NLS-1$
3967 case TokenNamereturn:
3968 return "return"; //$NON-NLS-1$
3969 // case TokenNameself:
3970 // return "self"; //$NON-NLS-1$
3971 case TokenNamestatic:
3972 return "static"; //$NON-NLS-1$
3973 case TokenNameswitch:
3974 return "switch"; //$NON-NLS-1$
3975 // case TokenNametrue :
3976 // return "true"; //$NON-NLS-1$
3977 case TokenNameunset:
3978 return "unset"; //$NON-NLS-1$
3980 return "var"; //$NON-NLS-1$
3981 case TokenNamewhile:
3982 return "while"; //$NON-NLS-1$
3984 return "XOR"; //$NON-NLS-1$
3985 // case TokenNamethis :
3986 // return "$this"; //$NON-NLS-1$
3987 case TokenNameIntegerLiteral:
3988 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3989 case TokenNameDoubleLiteral:
3990 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3991 case TokenNameStringDoubleQuote:
3992 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3993 case TokenNameStringSingleQuote:
3994 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3995 case TokenNameStringInterpolated:
3996 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3997 case TokenNameEncapsedString0:
3998 return "`"; //$NON-NLS-1$
3999 // case TokenNameEncapsedString1:
4000 // return "\'"; //$NON-NLS-1$
4001 // case TokenNameEncapsedString2:
4002 // return "\""; //$NON-NLS-1$
4003 case TokenNameSTRING:
4004 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4005 case TokenNameHEREDOC:
4006 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4007 case TokenNamePLUS_PLUS:
4008 return "++"; //$NON-NLS-1$
4009 case TokenNameMINUS_MINUS:
4010 return "--"; //$NON-NLS-1$
4011 case TokenNameEQUAL_EQUAL:
4012 return "=="; //$NON-NLS-1$
4013 case TokenNameEQUAL_EQUAL_EQUAL:
4014 return "==="; //$NON-NLS-1$
4015 case TokenNameEQUAL_GREATER:
4016 return "=>"; //$NON-NLS-1$
4017 case TokenNameLESS_EQUAL:
4018 return "<="; //$NON-NLS-1$
4019 case TokenNameGREATER_EQUAL:
4020 return ">="; //$NON-NLS-1$
4021 case TokenNameNOT_EQUAL:
4022 return "!="; //$NON-NLS-1$
4023 case TokenNameNOT_EQUAL_EQUAL:
4024 return "!=="; //$NON-NLS-1$
4025 case TokenNameLEFT_SHIFT:
4026 return "<<"; //$NON-NLS-1$
4027 case TokenNameRIGHT_SHIFT:
4028 return ">>"; //$NON-NLS-1$
4029 case TokenNamePLUS_EQUAL:
4030 return "+="; //$NON-NLS-1$
4031 case TokenNameMINUS_EQUAL:
4032 return "-="; //$NON-NLS-1$
4033 case TokenNameMULTIPLY_EQUAL:
4034 return "*="; //$NON-NLS-1$
4035 case TokenNameDIVIDE_EQUAL:
4036 return "/="; //$NON-NLS-1$
4037 case TokenNameAND_EQUAL:
4038 return "&="; //$NON-NLS-1$
4039 case TokenNameOR_EQUAL:
4040 return "|="; //$NON-NLS-1$
4041 case TokenNameXOR_EQUAL:
4042 return "^="; //$NON-NLS-1$
4043 case TokenNameREMAINDER_EQUAL:
4044 return "%="; //$NON-NLS-1$
4045 case TokenNameDOT_EQUAL:
4046 return ".="; //$NON-NLS-1$
4047 case TokenNameLEFT_SHIFT_EQUAL:
4048 return "<<="; //$NON-NLS-1$
4049 case TokenNameRIGHT_SHIFT_EQUAL:
4050 return ">>="; //$NON-NLS-1$
4051 case TokenNameOR_OR:
4052 return "||"; //$NON-NLS-1$
4053 case TokenNameAND_AND:
4054 return "&&"; //$NON-NLS-1$
4056 return "+"; //$NON-NLS-1$
4057 case TokenNameMINUS:
4058 return "-"; //$NON-NLS-1$
4059 case TokenNameMINUS_GREATER:
4062 return "!"; //$NON-NLS-1$
4063 case TokenNameREMAINDER:
4064 return "%"; //$NON-NLS-1$
4066 return "^"; //$NON-NLS-1$
4068 return "&"; //$NON-NLS-1$
4069 case TokenNameMULTIPLY:
4070 return "*"; //$NON-NLS-1$
4072 return "|"; //$NON-NLS-1$
4073 case TokenNameTWIDDLE:
4074 return "~"; //$NON-NLS-1$
4075 case TokenNameTWIDDLE_EQUAL:
4076 return "~="; //$NON-NLS-1$
4077 case TokenNameDIVIDE:
4078 return "/"; //$NON-NLS-1$
4079 case TokenNameGREATER:
4080 return ">"; //$NON-NLS-1$
4082 return "<"; //$NON-NLS-1$
4083 case TokenNameLPAREN:
4084 return "("; //$NON-NLS-1$
4085 case TokenNameRPAREN:
4086 return ")"; //$NON-NLS-1$
4087 case TokenNameLBRACE:
4088 return "{"; //$NON-NLS-1$
4089 case TokenNameRBRACE:
4090 return "}"; //$NON-NLS-1$
4091 case TokenNameLBRACKET:
4092 return "["; //$NON-NLS-1$
4093 case TokenNameRBRACKET:
4094 return "]"; //$NON-NLS-1$
4095 case TokenNameSEMICOLON:
4096 return ";"; //$NON-NLS-1$
4097 case TokenNameQUESTION:
4098 return "?"; //$NON-NLS-1$
4099 case TokenNameCOLON:
4100 return ":"; //$NON-NLS-1$
4101 case TokenNameCOMMA:
4102 return ","; //$NON-NLS-1$
4104 return "."; //$NON-NLS-1$
4105 case TokenNameEQUAL:
4106 return "="; //$NON-NLS-1$
4109 case TokenNameDOLLAR:
4111 case TokenNameDOLLAR_LBRACE:
4113 case TokenNameLBRACE_DOLLAR:
4116 return "EOF"; //$NON-NLS-1$
4117 case TokenNameWHITESPACE:
4118 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4119 case TokenNameCOMMENT_LINE:
4120 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4121 case TokenNameCOMMENT_BLOCK:
4122 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4123 case TokenNameCOMMENT_PHPDOC:
4124 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4125 // case TokenNameHTML :
4126 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4129 return "__FILE__"; //$NON-NLS-1$
4131 return "__LINE__"; //$NON-NLS-1$
4132 case TokenNameCLASS_C:
4133 return "__CLASS__"; //$NON-NLS-1$
4134 case TokenNameMETHOD_C:
4135 return "__METHOD__"; //$NON-NLS-1$
4136 case TokenNameFUNC_C:
4137 return "__FUNCTION__"; //$NON-NLS-1
4138 case TokenNameboolCAST:
4139 return "( bool )"; //$NON-NLS-1$
4140 case TokenNameintCAST:
4141 return "( int )"; //$NON-NLS-1$
4142 case TokenNamedoubleCAST:
4143 return "( double )"; //$NON-NLS-1$
4144 case TokenNameobjectCAST:
4145 return "( object )"; //$NON-NLS-1$
4146 case TokenNamestringCAST:
4147 return "( string )"; //$NON-NLS-1$
4149 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4157 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4158 this(tokenizeComments, tokenizeWhiteSpace, false);
4161 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4162 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4165 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4166 boolean assertMode) {
4167 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4170 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4171 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4172 this.eofPosition = Integer.MAX_VALUE;
4173 this.tokenizeComments = tokenizeComments;
4174 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4175 this.tokenizeStrings = tokenizeStrings;
4176 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4177 // this.assertMode = assertMode;
4178 // this.encapsedStringStack = null;
4179 this.taskTags = taskTags;
4180 this.taskPriorities = taskPriorities;
4183 private void checkNonExternalizeString() throws InvalidInputException {
4184 if (currentLine == null)
4186 parseTags(currentLine);
4189 private void parseTags(NLSLine line) throws InvalidInputException {
4190 String s = new String(getCurrentTokenSource());
4191 int pos = s.indexOf(TAG_PREFIX);
4192 int lineLength = line.size();
4194 int start = pos + TAG_PREFIX_LENGTH;
4195 int end = s.indexOf(TAG_POSTFIX, start);
4196 String index = s.substring(start, end);
4199 i = Integer.parseInt(index) - 1;
4200 // Tags are one based not zero based.
4201 } catch (NumberFormatException e) {
4202 i = -1; // we don't want to consider this as a valid NLS tag
4204 if (line.exists(i)) {
4207 pos = s.indexOf(TAG_PREFIX, start);
4209 this.nonNLSStrings = new StringLiteral[lineLength];
4210 int nonNLSCounter = 0;
4211 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4212 StringLiteral literal = (StringLiteral) iterator.next();
4213 if (literal != null) {
4214 this.nonNLSStrings[nonNLSCounter++] = literal;
4217 if (nonNLSCounter == 0) {
4218 this.nonNLSStrings = null;
4222 this.wasNonExternalizedStringLiteral = true;
4223 if (nonNLSCounter != lineLength) {
4224 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4229 public final void scanEscapeCharacter() throws InvalidInputException {
4230 // the string with "\\u" is a legal string of two chars \ and u
4231 // thus we use a direct access to the source (for regular cases).
4232 if (unicodeAsBackSlash) {
4233 // consume next character
4234 unicodeAsBackSlash = false;
4235 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4236 // (source[currentPosition] == 'u')) {
4237 // getNextUnicodeChar();
4239 if (withoutUnicodePtr != 0) {
4240 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4244 currentCharacter = source[currentPosition++];
4245 switch (currentCharacter) {
4247 currentCharacter = '\b';
4250 currentCharacter = '\t';
4253 currentCharacter = '\n';
4256 currentCharacter = '\f';
4259 currentCharacter = '\r';
4262 currentCharacter = '\"';
4265 currentCharacter = '\'';
4268 currentCharacter = '\\';
4271 // -----------octal escape--------------
4273 // OctalDigit OctalDigit
4274 // ZeroToThree OctalDigit OctalDigit
4275 int number = Character.getNumericValue(currentCharacter);
4276 if (number >= 0 && number <= 7) {
4277 boolean zeroToThreeNot = number > 3;
4278 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4279 int digit = Character.getNumericValue(currentCharacter);
4280 if (digit >= 0 && digit <= 7) {
4281 number = (number * 8) + digit;
4282 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4283 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4284 // Digit --> ignore last character
4287 digit = Character.getNumericValue(currentCharacter);
4288 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4289 // OctalDigit OctalDigit
4290 number = (number * 8) + digit;
4291 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4292 // --> ignore last character
4296 } else { // has read \OctalDigit NonDigit--> ignore last
4300 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4304 } else { // has read \OctalDigit --> ignore last character
4308 throw new InvalidInputException(INVALID_ESCAPE);
4309 currentCharacter = (char) number;
4311 throw new InvalidInputException(INVALID_ESCAPE);
4315 // chech presence of task: tags
4316 // TODO (frederic) see if we need to take unicode characters into account...
4317 public void checkTaskTag(int commentStart, int commentEnd) {
4318 char[] src = this.source;
4320 // only look for newer task: tags
4321 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4324 int foundTaskIndex = this.foundTaskCount;
4325 char previous = src[commentStart + 1]; // should be '*' or '/'
4326 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4328 char[] priority = null;
4329 // check for tag occurrence only if not ambiguous with javadoc tag
4330 if (previous != '@') {
4331 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4332 tag = this.taskTags[itag];
4333 int tagLength = tag.length;
4337 // ensure tag is not leaded with letter if tag starts with a letter
4338 if (Scanner.isPHPIdentifierStart(tag[0])) {
4339 if (Scanner.isPHPIdentifierPart(previous)) {
4344 for (int t = 0; t < tagLength; t++) {
4347 if (x >= this.eofPosition || x >= commentEnd)
4349 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4350 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4357 // ensure tag is not followed with letter if tag finishes with a
4359 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4360 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4363 if (this.foundTaskTags == null) {
4364 this.foundTaskTags = new char[5][];
4365 this.foundTaskMessages = new char[5][];
4366 this.foundTaskPriorities = new char[5][];
4367 this.foundTaskPositions = new int[5][];
4368 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4369 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4370 this.foundTaskCount);
4371 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4372 this.foundTaskCount);
4373 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4374 this.foundTaskCount);
4375 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4376 this.foundTaskCount);
4379 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4381 this.foundTaskTags[this.foundTaskCount] = tag;
4382 this.foundTaskPriorities[this.foundTaskCount] = priority;
4383 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4384 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4385 this.foundTaskCount++;
4386 i += tagLength - 1; // will be incremented when looping
4392 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4393 // retrieve message start and end positions
4394 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4395 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4396 // at most beginning of next task
4397 if (max_value < msgStart) {
4398 max_value = msgStart; // would only occur if tag is before EOF.
4402 for (int j = msgStart; j < max_value; j++) {
4403 if ((c = src[j]) == '\n' || c == '\r') {
4409 for (int j = max_value; j > msgStart; j--) {
4410 if ((c = src[j]) == '*') {
4418 if (msgStart == end)
4421 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4423 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4425 // update the end position of the task
4426 this.foundTaskPositions[i][1] = end;
4427 // get the message source
4428 final int messageLength = end - msgStart + 1;
4429 char[] message = new char[messageLength];
4430 System.arraycopy(src, msgStart, message, 0, messageLength);
4431 this.foundTaskMessages[i] = message;
4435 // chech presence of task: tags
4436 // public void checkTaskTag(int commentStart, int commentEnd) {
4437 // // only look for newer task: tags
4438 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4439 // - 1][0] >= commentStart) {
4442 // int foundTaskIndex = this.foundTaskCount;
4443 // nextChar: for (int i = commentStart; i < commentEnd && i <
4444 // this.eofPosition; i++) {
4445 // char[] tag = null;
4446 // char[] priority = null;
4447 // // check for tag occurrence
4448 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4449 // tag = this.taskTags[itag];
4450 // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4451 // ? this.taskPriorities[itag] : null;
4452 // int tagLength = tag.length;
4453 // for (int t = 0; t < tagLength; t++) {
4454 // if (this.source[i + t] != tag[t])
4455 // continue nextTag;
4457 // if (this.foundTaskTags == null) {
4458 // this.foundTaskTags = new char[5][];
4459 // this.foundTaskMessages = new char[5][];
4460 // this.foundTaskPriorities = new char[5][];
4461 // this.foundTaskPositions = new int[5][];
4462 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4463 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4464 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4465 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4466 // char[this.foundTaskCount * 2][], 0,
4467 // this.foundTaskCount);
4468 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4469 // new char[this.foundTaskCount * 2][], 0,
4470 // this.foundTaskCount);
4471 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4472 // int[this.foundTaskCount * 2][], 0,
4473 // this.foundTaskCount);
4475 // this.foundTaskTags[this.foundTaskCount] = tag;
4476 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4477 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4479 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4480 // this.foundTaskCount++;
4481 // i += tagLength - 1; // will be incremented when looping
4484 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4485 // // retrieve message start and end positions
4486 // int msgStart = this.foundTaskPositions[i][0] +
4487 // this.foundTaskTags[i].length;
4488 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4489 // 1][0] - 1 : commentEnd - 1;
4490 // // at most beginning of next task
4491 // if (max_value < msgStart)
4492 // max_value = msgStart; // would only occur if tag is before EOF.
4495 // for (int j = msgStart; j < max_value; j++) {
4496 // if ((c = this.source[j]) == '\n' || c == '\r') {
4502 // for (int j = max_value; j > msgStart; j--) {
4503 // if ((c = this.source[j]) == '*') {
4511 // if (msgStart == end)
4512 // continue; // empty
4513 // // trim the message
4514 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4516 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4518 // // update the end position of the task
4519 // this.foundTaskPositions[i][1] = end;
4520 // // get the message source
4521 // final int messageLength = end - msgStart + 1;
4522 // char[] message = new char[messageLength];
4523 // System.arraycopy(source, msgStart, message, 0, messageLength);
4524 // this.foundTaskMessages[i] = message;