X-Git-Url: http://secure.phpeclipse.com diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java index 1ae5d16..ea1f9c0 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java @@ -12,6 +12,7 @@ package net.sourceforge.phpdt.internal.compiler.parser; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Stack; import net.sourceforge.phpdt.core.compiler.CharOperation; import net.sourceforge.phpdt.core.compiler.IScanner; import net.sourceforge.phpdt.core.compiler.ITerminalSymbols; @@ -32,6 +33,7 @@ public class Scanner implements IScanner, ITerminalSymbols { public boolean containsAssertKeyword = false; public boolean recordLineSeparator; public boolean phpMode = false; + public Stack encapsedStringStack = null; public char currentCharacter; public int startPosition; public int currentPosition; @@ -40,6 +42,7 @@ public class Scanner implements IScanner, ITerminalSymbols { // source public boolean tokenizeComments; public boolean tokenizeWhiteSpace; + public boolean tokenizeStrings; //source should be viewed as a window (aka a part) //of a entire very large stream public char source[]; @@ -132,12 +135,7 @@ public class Scanner implements IScanner, ITerminalSymbols { public char[][] taskPriorities = null; public static final boolean DEBUG = false; public static final boolean TRACE = false; - public Scanner() { - this(false, false); - } - public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) { - this(tokenizeComments, tokenizeWhiteSpace, false); - } + /** * Determines if the specified character is permissible as the first * character in a PHP identifier @@ -803,6 +801,294 @@ public class Scanner implements IScanner, ITerminalSymbols { currentPosition = tempPosition; return TokenNameLPAREN; } + public void consumeStringInterpolated() throws InvalidInputException { + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + while (currentCharacter != '`') { + /** ** in PHP \r and \n are valid in string literals *** */ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need + // the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = currentPosition - escapeSize - 1 + - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream + // where only one is correct + withoutUnicodePtr--; + } + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '`') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow + } + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags + // //$NON-NLS-?$ where ? is an + // int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), + startPosition, currentPosition - 1)); + } + } + public void consumeStringConstant() throws InvalidInputException { + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + while (currentCharacter != '\'') { + /** ** in PHP \r and \n are valid in string literals *** */ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need + // the previous value few lines down this one + scanSingleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = currentPosition - escapeSize - 1 + - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream + // where only one is correct + withoutUnicodePtr--; + } + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '\'') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow + } + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags + // //$NON-NLS-?$ where ? is an + // int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), + startPosition, currentPosition - 1)); + } + } + public void consumeStringLiteral() throws InvalidInputException { + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + while (currentCharacter != '"') { + /** ** in PHP \r and \n are valid in string literals *** */ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need + // the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = currentPosition - escapeSize - 1 + - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream + // where only one is correct + withoutUnicodePtr--; + } + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '\"') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow + } + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags + // //$NON-NLS-?$ where ? is an + // int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), + startPosition, currentPosition - 1)); + } + } public int getNextToken() throws InvalidInputException { if (!phpMode) { return getInlinedHTML(currentPosition); @@ -815,13 +1101,144 @@ public class Scanner implements IScanner, ITerminalSymbols { return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE; } try { - while (true) { //loop for jumping over comments + while (true) { withoutUnicodePtr = 0; - //start with a new token (even comment written with unicode ) + //start with a new token + char encapsedChar = ' '; + if (!encapsedStringStack.isEmpty()) { + encapsedChar = ((Character) encapsedStringStack.peek()).charValue(); + } + if (encapsedChar != '$' && encapsedChar != ' ') { + currentCharacter = source[currentPosition++]; + if (currentCharacter == encapsedChar) { + switch (currentCharacter) { + case '`' : + return TokenNameEncapsedString0; + case '\'' : + return TokenNameEncapsedString1; + case '"' : + return TokenNameEncapsedString2; + } + } + while (currentCharacter != encapsedChar) { + /** ** in PHP \r and \n are valid in string literals *** */ + switch (currentCharacter) { + case '\\' : + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and + // we need the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = currentPosition - escapeSize - 1 + - startPosition; + System.arraycopy(source, startPosition, + withoutUnicodeBuffer, 1, withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in + withoutUnicodePtr--; + } + } + break; + case '$' : + if (isPHPIdentifierStart(source[currentPosition]) + || source[currentPosition] == '{') { + currentPosition--; + encapsedStringStack.push(new Character('$')); + return TokenNameSTRING; + } + break; + case '{' : + if (source[currentPosition] == '$') { // CURLY_OPEN + currentPosition--; + encapsedStringStack.push(new Character('$')); + return TokenNameSTRING; + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + } // end while + currentPosition--; + return TokenNameSTRING; + } // ---------Consume white space and handles startPosition--------- int whiteStart = currentPosition; - boolean isWhiteSpace; - do { + startPosition = currentPosition; + currentCharacter = source[currentPosition++]; + if (encapsedChar == '$') { + switch (currentCharacter) { + case '\\' : + currentCharacter = source[currentPosition++]; + return TokenNameSTRING; + case '{' : + if (encapsedChar == '$') { + if (getNextChar('$')) + return TokenNameCURLY_OPEN; + } + return TokenNameLBRACE; + case '}' : + return TokenNameRBRACE; + case '[' : + return TokenNameLBRACKET; + case ']' : + return TokenNameRBRACKET; + case '\'' : + if (tokenizeStrings) { + consumeStringConstant(); + return TokenNameStringConstant; + } + return TokenNameEncapsedString1; + case '"' : + return TokenNameEncapsedString2; + case '`' : + if (tokenizeStrings) { + consumeStringInterpolated(); + return TokenNameStringInterpolated; + } + return TokenNameEncapsedString0; + case '-' : + if (getNextChar('>')) + return TokenNameMINUS_GREATER; + return TokenNameSTRING; + default : + if (currentCharacter == '$') { + int oldPosition = currentPosition; + try { + currentCharacter = source[currentPosition++]; + if (currentCharacter == '{') { + return TokenNameDOLLAR_LBRACE; + } + if (isPHPIdentifierStart(currentCharacter)) { + return scanIdentifierOrKeyword(true); + } else { + currentPosition = oldPosition; + return TokenNameSTRING; + } + } catch (IndexOutOfBoundsException e) { + currentPosition = oldPosition; + return TokenNameSTRING; + } + } + if (isPHPIdentifierStart(currentCharacter)) + return scanIdentifierOrKeyword(false); + if (Character.isDigit(currentCharacter)) + return scanNumber(false); + return TokenNameERROR; + } + } + // boolean isWhiteSpace; + + while ((currentCharacter == ' ') + || Character.isWhitespace(currentCharacter)) { startPosition = currentPosition; currentCharacter = source[currentPosition++]; // if (((currentCharacter = source[currentPosition++]) == '\\') @@ -836,10 +1253,10 @@ public class Scanner implements IScanner, ITerminalSymbols { currentLine = null; } } - isWhiteSpace = (currentCharacter == ' ') - || Character.isWhitespace(currentCharacter); + // isWhiteSpace = (currentCharacter == ' ') + // || Character.isWhitespace(currentCharacter); // } - } while (isWhiteSpace); + } if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) { // reposition scanner in case we are interested by spaces as tokens currentPosition--; @@ -931,9 +1348,12 @@ public class Scanner implements IScanner, ITerminalSymbols { if (getNextChar('=')) return TokenNameLEFT_SHIFT_EQUAL; if (getNextChar('<')) { - int heredocStart = currentPosition; - int heredocLength = 0; currentCharacter = source[currentPosition++]; + while (Character.isWhitespace(currentCharacter)) { + currentCharacter = source[currentPosition++]; + } + int heredocStart = currentPosition - 1; + int heredocLength = 0; if (isPHPIdentifierStart(currentCharacter)) { currentCharacter = source[currentPosition++]; } else { @@ -1024,7 +1444,7 @@ public class Scanner implements IScanner, ITerminalSymbols { case '?' : if (getNextChar('>')) { phpMode = false; - if (currentPosition==source.length) { + if (currentPosition == source.length) { phpMode = true; return TokenNameINLINE_HTML; } @@ -1037,383 +1457,21 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameCOLON; case '@' : return TokenNameAT; - // case '\'' : - // { - // int test; - // if ((test = getNextChar('\n', '\r')) == 0) { - // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); - // } - // if (test > 0) { - // // relocate if finding another quote fairly close: thus unicode - // '/u000D' will be fully consumed - // for (int lookAhead = 0; - // lookAhead < 3; - // lookAhead++) { - // if (currentPosition + lookAhead - // == source.length) - // break; - // if (source[currentPosition + lookAhead] - // == '\n') - // break; - // if (source[currentPosition + lookAhead] - // == '\'') { - // currentPosition += lookAhead + 1; - // break; - // } - // } - // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); - // } - // } - // if (getNextChar('\'')) { - // // relocate if finding another quote fairly close: thus unicode - // '/u000D' will be fully consumed - // for (int lookAhead = 0; - // lookAhead < 3; - // lookAhead++) { - // if (currentPosition + lookAhead - // == source.length) - // break; - // if (source[currentPosition + lookAhead] - // == '\n') - // break; - // if (source[currentPosition + lookAhead] - // == '\'') { - // currentPosition += lookAhead + 1; - // break; - // } - // } - // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); - // } - // if (getNextChar('\\')) - // scanEscapeCharacter(); - // else { // consume next character - // unicodeAsBackSlash = false; - // if (((currentCharacter = source[currentPosition++]) - // == '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - // if (withoutUnicodePtr != 0) { - // withoutUnicodeBuffer[++withoutUnicodePtr] = - // currentCharacter; - // } - // } - // } - // // if (getNextChar('\'')) - // // return TokenNameCharacterLiteral; - // // relocate if finding another quote fairly close: thus unicode - // '/u000D' will be fully consumed - // for (int lookAhead = 0; lookAhead < 20; lookAhead++) { - // if (currentPosition + lookAhead == source.length) - // break; - // if (source[currentPosition + lookAhead] == '\n') - // break; - // if (source[currentPosition + lookAhead] == '\'') { - // currentPosition += lookAhead + 1; - // break; - // } - // } - // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); case '\'' : - try { - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - // if (withoutUnicodePtr != 0) { - // withoutUnicodeBuffer[++withoutUnicodePtr] = - // currentCharacter; - // } - // } - while (currentCharacter != '\'') { - /** ** in PHP \r and \n are valid in string literals *** */ - // if ((currentCharacter == '\n') - // || (currentCharacter == '\r')) { - // // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - // if (currentPosition + lookAhead == source.length) - // break; - // if (source[currentPosition + lookAhead] == '\n') - // break; - // if (source[currentPosition + lookAhead] == '\"') { - // currentPosition += lookAhead + 1; - // break; - // } - // } - // throw new InvalidInputException(INVALID_CHAR_IN_STRING); - // } - if (currentCharacter == '\\') { - int escapeSize = currentPosition; - boolean backSlashAsUnicodeInString = unicodeAsBackSlash; - //scanEscapeCharacter make a side effect on this value and - // we need the previous value few lines down this one - scanSingleQuotedEscapeCharacter(); - escapeSize = currentPosition - escapeSize; - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - escapeSize - 1 - - startPosition; - System.arraycopy(source, startPosition, - withoutUnicodeBuffer, 1, withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; - if (backSlashAsUnicodeInString) { //there are TWO \ in - // the stream where - // only one is correct - withoutUnicodePtr--; - } - } - } - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == - // '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - // } - } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_STRING); - } catch (InvalidInputException e) { - if (e.getMessage().equals(INVALID_ESCAPE)) { - // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '\'') { - currentPosition += lookAhead + 1; - break; - } - } - } - throw e; // rethrow - } - if (checkNonExternalizedStringLiterals) { // check for presence - // of NLS tags - // //$NON-NLS-?$ where - // ? is an int. - if (currentLine == null) { - currentLine = new NLSLine(); - lines.add(currentLine); - } - currentLine.add(new StringLiteral( - getCurrentTokenSourceString(), startPosition, - currentPosition - 1)); - } + consumeStringConstant(); return TokenNameStringConstant; case '"' : - try { - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - // if (withoutUnicodePtr != 0) { - // withoutUnicodeBuffer[++withoutUnicodePtr] = - // currentCharacter; - // } - // } - while (currentCharacter != '"') { - /** ** in PHP \r and \n are valid in string literals *** */ - // if ((currentCharacter == '\n') - // || (currentCharacter == '\r')) { - // // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - // if (currentPosition + lookAhead == source.length) - // break; - // if (source[currentPosition + lookAhead] == '\n') - // break; - // if (source[currentPosition + lookAhead] == '\"') { - // currentPosition += lookAhead + 1; - // break; - // } - // } - // throw new InvalidInputException(INVALID_CHAR_IN_STRING); - // } - if (currentCharacter == '\\') { - int escapeSize = currentPosition; - boolean backSlashAsUnicodeInString = unicodeAsBackSlash; - //scanEscapeCharacter make a side effect on this value and - // we need the previous value few lines down this one - scanDoubleQuotedEscapeCharacter(); - escapeSize = currentPosition - escapeSize; - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - escapeSize - 1 - - startPosition; - System.arraycopy(source, startPosition, - withoutUnicodeBuffer, 1, withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; - if (backSlashAsUnicodeInString) { //there are TWO \ in - // the stream where - // only one is correct - withoutUnicodePtr--; - } - } - } - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == - // '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - // } - } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_STRING); - } catch (InvalidInputException e) { - if (e.getMessage().equals(INVALID_ESCAPE)) { - // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '\"') { - currentPosition += lookAhead + 1; - break; - } - } - } - throw e; // rethrow + if (tokenizeStrings) { + consumeStringLiteral(); + return TokenNameStringLiteral; } - if (checkNonExternalizedStringLiterals) { // check for presence - // of NLS tags - // //$NON-NLS-?$ where - // ? is an int. - if (currentLine == null) { - currentLine = new NLSLine(); - lines.add(currentLine); - } - currentLine.add(new StringLiteral( - getCurrentTokenSourceString(), startPosition, - currentPosition - 1)); - } - return TokenNameStringLiteral; + return TokenNameEncapsedString2; case '`' : - try { - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - // if (withoutUnicodePtr != 0) { - // withoutUnicodeBuffer[++withoutUnicodePtr] = - // currentCharacter; - // } - // } - while (currentCharacter != '`') { - /** ** in PHP \r and \n are valid in string literals *** */ - // if ((currentCharacter == '\n') - // || (currentCharacter == '\r')) { - // // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - // if (currentPosition + lookAhead == source.length) - // break; - // if (source[currentPosition + lookAhead] == '\n') - // break; - // if (source[currentPosition + lookAhead] == '\"') { - // currentPosition += lookAhead + 1; - // break; - // } - // } - // throw new InvalidInputException(INVALID_CHAR_IN_STRING); - // } - if (currentCharacter == '\\') { - int escapeSize = currentPosition; - boolean backSlashAsUnicodeInString = unicodeAsBackSlash; - //scanEscapeCharacter make a side effect on this value and - // we need the previous value few lines down this one - scanDoubleQuotedEscapeCharacter(); - escapeSize = currentPosition - escapeSize; - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - escapeSize - 1 - - startPosition; - System.arraycopy(source, startPosition, - withoutUnicodeBuffer, 1, withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; - if (backSlashAsUnicodeInString) { //there are TWO \ in - // the stream where - // only one is correct - withoutUnicodePtr--; - } - } - } - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == - // '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - // } - } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_STRING); - } catch (InvalidInputException e) { - if (e.getMessage().equals(INVALID_ESCAPE)) { - // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '`') { - currentPosition += lookAhead + 1; - break; - } - } - } - throw e; // rethrow - } - if (checkNonExternalizedStringLiterals) { // check for presence - // of NLS tags - // //$NON-NLS-?$ where - // ? is an int. - if (currentLine == null) { - currentLine = new NLSLine(); - lines.add(currentLine); - } - currentLine.add(new StringLiteral( - getCurrentTokenSourceString(), startPosition, - currentPosition - 1)); + if (tokenizeStrings) { + consumeStringInterpolated(); + return TokenNameStringInterpolated; } - return TokenNameStringInterpolated; + return TokenNameEncapsedString0; case '#' : case '/' : { @@ -1452,7 +1510,8 @@ public class Scanner implements IScanner, ITerminalSymbols { // Character.getNumericValue(source[currentPosition++])) // > 15 // || c4 < 0) { - // throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + // throw new + // InvalidInputException(INVALID_UNICODE_ESCAPE); // } else { // currentCharacter = // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); @@ -1523,7 +1582,8 @@ public class Scanner implements IScanner, ITerminalSymbols { endPositionForLineComment = currentPosition - 1; } recordComment(false); - if ((currentCharacter == '\r') + if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition); + if ((currentCharacter == '\r') || (currentCharacter == '\n')) { checkNonExternalizeString(); if (recordLineSeparator) { @@ -1679,9 +1739,9 @@ public class Scanner implements IScanner, ITerminalSymbols { */ private int getInlinedHTML(int start) throws InvalidInputException { // int htmlPosition = start; - if (currentPosition>source.length) { - currentPosition = source.length; - return TokenNameEOF; + if (currentPosition > source.length) { + currentPosition = source.length; + return TokenNameEOF; } startPosition = start; try { @@ -3444,6 +3504,7 @@ public class Scanner implements IScanner, ITerminalSymbols { initialPosition = currentPosition = 0; containsAssertKeyword = false; withoutUnicodeBuffer = new char[this.source.length]; + encapsedStringStack = new Stack(); } public String toString() { if (startPosition == source.length) @@ -3526,6 +3587,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return "endswitch"; //$NON-NLS-1$ case TokenNameendwhile : return "endwhile"; //$NON-NLS-1$ + case TokenNameexit: + return "exit"; case TokenNameextends : return "extends"; //$NON-NLS-1$ // case TokenNamefalse : @@ -3597,12 +3660,20 @@ public class Scanner implements IScanner, ITerminalSymbols { case TokenNameDoubleLiteral : return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameStringLiteral : - return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ + return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameStringConstant : return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameStringInterpolated : return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ + case TokenNameEncapsedString0 : + return "`"; //$NON-NLS-1$ + case TokenNameEncapsedString1 : + return "\'"; //$NON-NLS-1$ + case TokenNameEncapsedString2 : + return "\""; //$NON-NLS-1$ + case TokenNameSTRING : + return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameHEREDOC : return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ case TokenNamePLUS_PLUS : @@ -3709,8 +3780,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return "@"; case TokenNameDOLLAR : return "$"; - // case TokenNameDOLLAR_LBRACE : - // return "${"; + case TokenNameDOLLAR_LBRACE : + return "${"; case TokenNameEOF : return "EOF"; //$NON-NLS-1$ case TokenNameWHITESPACE : @@ -3721,9 +3792,9 @@ public class Scanner implements IScanner, ITerminalSymbols { return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ case TokenNameCOMMENT_PHPDOC : return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ - // case TokenNameHTML : - // return "HTML(" + new String(getCurrentTokenSource()) + ")"; - // //$NON-NLS-1$ + // case TokenNameHTML : + // return "HTML(" + new String(getCurrentTokenSource()) + ")"; + // //$NON-NLS-1$ case TokenNameFILE : return "__FILE__"; //$NON-NLS-1$ case TokenNameLINE : @@ -3749,6 +3820,13 @@ public class Scanner implements IScanner, ITerminalSymbols { + new String(getCurrentTokenSource()); //$NON-NLS-1$ } } + + public Scanner() { + this(false, false); + } + public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) { + this(tokenizeComments, tokenizeWhiteSpace, false); + } public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) { this(tokenizeComments, tokenizeWhiteSpace, @@ -3756,11 +3834,23 @@ public class Scanner implements IScanner, ITerminalSymbols { } public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals, boolean assertMode) { + this(tokenizeComments, tokenizeWhiteSpace, + checkNonExternalizedStringLiterals, assertMode, false, null, null); + } + public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, + boolean checkNonExternalizedStringLiterals, boolean assertMode, + boolean tokenizeStrings, + char[][] taskTags, + char[][] taskPriorities) { this.eofPosition = Integer.MAX_VALUE; this.tokenizeComments = tokenizeComments; this.tokenizeWhiteSpace = tokenizeWhiteSpace; + this.tokenizeStrings = tokenizeStrings; this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals; this.assertMode = assertMode; + this.encapsedStringStack = null; + this.taskTags = taskTags; + this.taskPriorities = taskPriorities; } private void checkNonExternalizeString() throws InvalidInputException { if (currentLine == null)