X-Git-Url: http://secure.phpeclipse.com diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java index 36adb95..0826535 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java @@ -156,12 +156,31 @@ public class Scanner implements IScanner, ITerminalSymbols { public static final int SquareBracket = 1; public static final int CurlyBracket = 2; public static final int BracketKinds = 3; + + public static final boolean DEBUG = false; public Scanner() { this(false, false); } public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) { this(tokenizeComments, tokenizeWhiteSpace, false); } + + /** + * Determines if the specified character is + * permissible as the first character in a PHP identifier + */ + public static boolean isPHPIdentifierStart(char ch) { + return Character.isLetter(ch) || (ch == '_'); + } + + /** + * Determines if the specified character may be part of a PHP identifier as + * other than the first character + */ + public static boolean isPHPIdentifierPart(char ch) { + return Character.isLetterOrDigit(ch) || (ch == '_'); + } + public final boolean atEnd() { // This code is not relevant if source is // Only a part of the real stream input @@ -232,7 +251,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } return result; } - + public final char[] getCurrentTokenSource(int startPos) { // Return the token REAL source (aka unicodes are precomputed) @@ -256,7 +275,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } return result; } - + public final char[] getCurrentTokenSourceString() { //return the token REAL source (aka unicodes are precomputed). //REMOVE the two " that are at the beginning and the end. @@ -663,7 +682,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (!Character.isJavaIdentifierPart(currentCharacter)) { + if (!isPHPIdentifierPart(currentCharacter)) { currentPosition = temp; return false; } @@ -684,7 +703,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return true; } //-------------end unicode traitement-------------- else { - if (!Character.isJavaIdentifierPart(currentCharacter)) { + if (!isPHPIdentifierPart(currentCharacter)) { currentPosition = temp; return false; } @@ -860,12 +879,12 @@ public class Scanner implements IScanner, ITerminalSymbols { int heredocStart = currentPosition; int heredocLength = 0; currentCharacter = source[currentPosition++]; - if (Character.isJavaIdentifierStart(currentCharacter)) { + if (isPHPIdentifierStart(currentCharacter)) { currentCharacter = source[currentPosition++]; } else { return TokenNameERROR; } - while (Character.isJavaIdentifierPart(currentCharacter)) { + while (isPHPIdentifierPart(currentCharacter)) { currentCharacter = source[currentPosition++]; } @@ -1047,28 +1066,28 @@ public class Scanner implements IScanner, ITerminalSymbols { } while (currentCharacter != '\'') { - + /**** in PHP \r and \n are valid in string literals ****/ -// if ((currentCharacter == '\n') -// || (currentCharacter == '\r')) { -// // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed -// for (int lookAhead = 0; lookAhead < 50; lookAhead++) { -// if (currentPosition + lookAhead == source.length) -// break; -// if (source[currentPosition + lookAhead] == '\n') -// break; -// if (source[currentPosition + lookAhead] == '\"') { -// currentPosition += lookAhead + 1; -// break; -// } -// } -// throw new InvalidInputException(INVALID_CHAR_IN_STRING); -// } + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } if (currentCharacter == '\\') { int escapeSize = currentPosition; boolean backSlashAsUnicodeInString = unicodeAsBackSlash; //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one - scanEscapeCharacter(); + scanSingleQuotedEscapeCharacter(); escapeSize = currentPosition - escapeSize; if (withoutUnicodePtr == 0) { //buffer all the entries that have been left aside.... @@ -1149,29 +1168,28 @@ public class Scanner implements IScanner, ITerminalSymbols { } while (currentCharacter != '"') { - - + /**** in PHP \r and \n are valid in string literals ****/ -// if ((currentCharacter == '\n') -// || (currentCharacter == '\r')) { -// // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed -// for (int lookAhead = 0; lookAhead < 50; lookAhead++) { -// if (currentPosition + lookAhead == source.length) -// break; -// if (source[currentPosition + lookAhead] == '\n') -// break; -// if (source[currentPosition + lookAhead] == '\"') { -// currentPosition += lookAhead + 1; -// break; -// } -// } -// throw new InvalidInputException(INVALID_CHAR_IN_STRING); -// } + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } if (currentCharacter == '\\') { int escapeSize = currentPosition; boolean backSlashAsUnicodeInString = unicodeAsBackSlash; //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one - scanEscapeCharacter(); + scanDoubleQuotedEscapeCharacter(); escapeSize = currentPosition - escapeSize; if (withoutUnicodePtr == 0) { //buffer all the entries that have been left aside.... @@ -1238,64 +1256,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } return TokenNameStringLiteral; case '`' : - try { - // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } - - while (currentCharacter != '`') { - - - /**** in PHP \r and \n are valid in string literals ****/ -// if ((currentCharacter == '\n') -// || (currentCharacter == '\r')) { -// // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed -// for (int lookAhead = 0; lookAhead < 50; lookAhead++) { -// if (currentPosition + lookAhead == source.length) -// break; -// if (source[currentPosition + lookAhead] == '\n') -// break; -// if (source[currentPosition + lookAhead] == '\"') { -// currentPosition += lookAhead + 1; -// break; -// } -// } -// throw new InvalidInputException(INVALID_CHAR_IN_STRING); -// } - if (currentCharacter == '\\') { - int escapeSize = currentPosition; - boolean backSlashAsUnicodeInString = unicodeAsBackSlash; - //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one - scanEscapeCharacter(); - escapeSize = currentPosition - escapeSize; - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = - currentPosition - escapeSize - 1 - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = - currentCharacter; - if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct - withoutUnicodePtr--; - } - } - } + try { // consume next character unicodeAsBackSlash = false; if (((currentCharacter = source[currentPosition++]) == '\\') @@ -1308,38 +1269,94 @@ public class Scanner implements IScanner, ITerminalSymbols { } } - } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_STRING); - } catch (InvalidInputException e) { - if (e.getMessage().equals(INVALID_ESCAPE)) { - // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '`') { - currentPosition += lookAhead + 1; - break; + while (currentCharacter != '`') { + + /**** in PHP \r and \n are valid in string literals ****/ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = + currentPosition - escapeSize - 1 - startPosition; + System.arraycopy( + source, + startPosition, + withoutUnicodeBuffer, + 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = + currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct + withoutUnicodePtr--; + } + } } + // consume next character + unicodeAsBackSlash = false; + if (((currentCharacter = source[currentPosition++]) == '\\') + && (source[currentPosition] == 'u')) { + getNextUnicodeChar(); + } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } + } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '`') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow } - throw e; // rethrow - } - if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. - if (currentLine == null) { - currentLine = new NLSLine(); - lines.add(currentLine); + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add( + new StringLiteral( + getCurrentTokenSourceString(), + startPosition, + currentPosition - 1)); } - currentLine.add( - new StringLiteral( - getCurrentTokenSourceString(), - startPosition, - currentPosition - 1)); - } - return TokenNameStringInterpolated; + return TokenNameStringInterpolated; case '#' : case '/' : { @@ -1548,7 +1565,7 @@ public class Scanner implements IScanner, ITerminalSymbols { recordComment(isJavadoc); if (tokenizeComments) { if (isJavadoc) - return TokenNameCOMMENT_JAVADOC; + return TokenNameCOMMENT_PHPDOC; return TokenNameCOMMENT_BLOCK; } } catch (IndexOutOfBoundsException e) { @@ -1568,11 +1585,15 @@ public class Scanner implements IScanner, ITerminalSymbols { default : if (currentCharacter == '$') { - if (getNextChar('{')) + while ( (currentCharacter = source[currentPosition++])=='$') { + } + if (currentCharacter == '{') return TokenNameDOLLAR_LBRACE; - return scanIdentifierOrKeyword(true); + if (isPHPIdentifierStart(currentCharacter)) + return scanIdentifierOrKeyword(true); + return TokenNameERROR; } - if (Character.isJavaIdentifierStart(currentCharacter)) + if (isPHPIdentifierStart(currentCharacter)) return scanIdentifierOrKeyword(false); if (Character.isDigit(currentCharacter)) return scanNumber(false); @@ -1669,7 +1690,7 @@ public class Scanner implements IScanner, ITerminalSymbols { test = getNextChar('\\'); if (test) { try { - scanEscapeCharacter(); + scanDoubleQuotedEscapeCharacter(); } catch (InvalidInputException ex) { }; } else { @@ -1718,7 +1739,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } if (currentCharacter == '\\') { try { - scanEscapeCharacter(); + scanDoubleQuotedEscapeCharacter(); } catch (InvalidInputException ex) { }; } @@ -1930,7 +1951,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } default : - if (Character.isJavaIdentifierStart(currentCharacter) + if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') { try { scanIdentifierOrKeyword((currentCharacter == '$')); @@ -2421,7 +2442,8 @@ public class Scanner implements IScanner, ITerminalSymbols { commentPtr = -1; // reset comment stack } - public final void scanEscapeCharacter() throws InvalidInputException { + public final void scanSingleQuotedEscapeCharacter() + throws InvalidInputException { // the string with "\\u" is a legal string of two chars \ and u //thus we use a direct access to the source (for regular cases). @@ -2439,18 +2461,49 @@ public class Scanner implements IScanner, ITerminalSymbols { } else currentCharacter = source[currentPosition++]; switch (currentCharacter) { - case 'b' : - currentCharacter = '\b'; + case '\'' : + currentCharacter = '\''; break; + case '\\' : + currentCharacter = '\\'; + break; + default : + currentCharacter = '\\'; + currentPosition--; + } + } + + public final void scanDoubleQuotedEscapeCharacter() + throws InvalidInputException { + // the string with "\\u" is a legal string of two chars \ and u + //thus we use a direct access to the source (for regular cases). + + if (unicodeAsBackSlash) { + // consume next character + unicodeAsBackSlash = false; + if (((currentCharacter = source[currentPosition++]) == '\\') + && (source[currentPosition] == 'u')) { + getNextUnicodeChar(); + } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + } + } else + currentCharacter = source[currentPosition++]; + switch (currentCharacter) { + // case 'b' : + // currentCharacter = '\b'; + // break; case 't' : currentCharacter = '\t'; break; case 'n' : currentCharacter = '\n'; break; - case 'f' : - currentCharacter = '\f'; - break; + // case 'f' : + // currentCharacter = '\f'; + // break; case 'r' : currentCharacter = '\r'; break; @@ -2463,6 +2516,9 @@ public class Scanner implements IScanner, ITerminalSymbols { case '\\' : currentCharacter = '\\'; break; + case '$' : + currentCharacter = '$'; + break; default : // -----------octal escape-------------- // OctalDigit @@ -2522,6 +2578,7 @@ public class Scanner implements IScanner, ITerminalSymbols { //disptach on the second char :-)...cool....but fast ! useAssertAsAnIndentifier = false; + while (getNextCharAsJavaIdentifierPart()) { };