X-Git-Url: http://secure.phpeclipse.com diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java index 8a7179b..5ff03b1 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java @@ -42,6 +42,7 @@ public class Scanner implements IScanner, ITerminalSymbols { // source public boolean tokenizeComments; public boolean tokenizeWhiteSpace; + public boolean tokenizeStrings; //source should be viewed as a window (aka a part) //of a entire very large stream public char source[]; @@ -131,7 +132,7 @@ public class Scanner implements IScanner, ITerminalSymbols { public int[][] foundTaskPositions; public int foundTaskCount = 0; public char[][] taskTags = null; - public char[][] taskPriorities = null; + public char[][] taskPriorities = null; public static final boolean DEBUG = false; public static final boolean TRACE = false; public Scanner() { @@ -805,6 +806,294 @@ public class Scanner implements IScanner, ITerminalSymbols { currentPosition = tempPosition; return TokenNameLPAREN; } + public void consumeStringInterpolated() throws InvalidInputException { + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + while (currentCharacter != '`') { + /** ** in PHP \r and \n are valid in string literals *** */ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need + // the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = currentPosition - escapeSize - 1 + - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream + // where only one is correct + withoutUnicodePtr--; + } + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '`') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow + } + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags + // //$NON-NLS-?$ where ? is an + // int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), + startPosition, currentPosition - 1)); + } + } + public void consumeStringConstant() throws InvalidInputException { + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + while (currentCharacter != '\'') { + /** ** in PHP \r and \n are valid in string literals *** */ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need + // the previous value few lines down this one + scanSingleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = currentPosition - escapeSize - 1 + - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream + // where only one is correct + withoutUnicodePtr--; + } + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '\'') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow + } + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags + // //$NON-NLS-?$ where ? is an + // int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), + startPosition, currentPosition - 1)); + } + } + public void consumeStringLiteral() throws InvalidInputException { + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + while (currentCharacter != '"') { + /** ** in PHP \r and \n are valid in string literals *** */ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need + // the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = currentPosition - escapeSize - 1 + - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream + // where only one is correct + withoutUnicodePtr--; + } + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode + // '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '\"') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow + } + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags + // //$NON-NLS-?$ where ? is an + // int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), + startPosition, currentPosition - 1)); + } + } public int getNextToken() throws InvalidInputException { if (!phpMode) { return getInlinedHTML(currentPosition); @@ -908,10 +1197,18 @@ public class Scanner implements IScanner, ITerminalSymbols { case ']' : return TokenNameRBRACKET; case '\'' : + if (tokenizeStrings) { + consumeStringConstant(); + return TokenNameStringConstant; + } return TokenNameEncapsedString1; case '"' : return TokenNameEncapsedString2; case '`' : + if (tokenizeStrings) { + consumeStringInterpolated(); + return TokenNameStringInterpolated; + } return TokenNameEncapsedString0; case '-' : if (getNextChar('>')) @@ -943,9 +1240,9 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameERROR; } } -// boolean isWhiteSpace; + // boolean isWhiteSpace; while ((currentCharacter == ' ') - || Character.isWhitespace(currentCharacter)) { + || Character.isWhitespace(currentCharacter)) { startPosition = currentPosition; currentCharacter = source[currentPosition++]; // if (((currentCharacter = source[currentPosition++]) == '\\') @@ -960,10 +1257,10 @@ public class Scanner implements IScanner, ITerminalSymbols { currentLine = null; } } -// isWhiteSpace = (currentCharacter == ' ') -// || Character.isWhitespace(currentCharacter); + // isWhiteSpace = (currentCharacter == ' ') + // || Character.isWhitespace(currentCharacter); // } - } + } if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) { // reposition scanner in case we are interested by spaces as tokens currentPosition--; @@ -973,559 +1270,465 @@ public class Scanner implements IScanner, ITerminalSymbols { //little trick to get out in the middle of a source compuation if (currentPosition > eofPosition) return TokenNameEOF; - - // ---------Identify the next token------------- - switch (currentCharacter) { - case '(' : - return getCastOrParen(); - case ')' : - return TokenNameRPAREN; - case '{' : - return TokenNameLBRACE; - case '}' : - return TokenNameRBRACE; - case '[' : - return TokenNameLBRACKET; - case ']' : - return TokenNameRBRACKET; - case ';' : - return TokenNameSEMICOLON; - case ',' : - return TokenNameCOMMA; - case '.' : - if (getNextChar('=')) - return TokenNameDOT_EQUAL; - if (getNextCharAsDigit()) - return scanNumber(true); - return TokenNameDOT; - case '+' : - { - int test; - if ((test = getNextChar('+', '=')) == 0) - return TokenNamePLUS_PLUS; - if (test > 0) - return TokenNamePLUS_EQUAL; - return TokenNamePLUS; - } - case '-' : + // ---------Identify the next token------------- + switch (currentCharacter) { + case '(' : + return getCastOrParen(); + case ')' : + return TokenNameRPAREN; + case '{' : + return TokenNameLBRACE; + case '}' : + return TokenNameRBRACE; + case '[' : + return TokenNameLBRACKET; + case ']' : + return TokenNameRBRACKET; + case ';' : + return TokenNameSEMICOLON; + case ',' : + return TokenNameCOMMA; + case '.' : + if (getNextChar('=')) + return TokenNameDOT_EQUAL; + if (getNextCharAsDigit()) + return scanNumber(true); + return TokenNameDOT; + case '+' : + { + int test; + if ((test = getNextChar('+', '=')) == 0) + return TokenNamePLUS_PLUS; + if (test > 0) + return TokenNamePLUS_EQUAL; + return TokenNamePLUS; + } + case '-' : { - int test; - if ((test = getNextChar('-', '=')) == 0) - return TokenNameMINUS_MINUS; - if (test > 0) - return TokenNameMINUS_EQUAL; - if (getNextChar('>')) - return TokenNameMINUS_GREATER; - return TokenNameMINUS; - } - case '~' : - if (getNextChar('=')) - return TokenNameTWIDDLE_EQUAL; - return TokenNameTWIDDLE; - case '!' : + int test; + if ((test = getNextChar('-', '=')) == 0) + return TokenNameMINUS_MINUS; + if (test > 0) + return TokenNameMINUS_EQUAL; + if (getNextChar('>')) + return TokenNameMINUS_GREATER; + return TokenNameMINUS; + } + case '~' : + if (getNextChar('=')) + return TokenNameTWIDDLE_EQUAL; + return TokenNameTWIDDLE; + case '!' : + if (getNextChar('=')) { if (getNextChar('=')) { - if (getNextChar('=')) { - return TokenNameNOT_EQUAL_EQUAL; - } - return TokenNameNOT_EQUAL; + return TokenNameNOT_EQUAL_EQUAL; } - return TokenNameNOT; - case '*' : - if (getNextChar('=')) - return TokenNameMULTIPLY_EQUAL; - return TokenNameMULTIPLY; - case '%' : - if (getNextChar('=')) - return TokenNameREMAINDER_EQUAL; - return TokenNameREMAINDER; - case '<' : - { - int oldPosition = currentPosition; - try { - currentCharacter = source[currentPosition++]; - } catch (IndexOutOfBoundsException e) { - currentPosition = oldPosition; - return TokenNameLESS; - } - switch (currentCharacter) { - case '=' : - return TokenNameLESS_EQUAL; - case '>' : - return TokenNameNOT_EQUAL; - case '<' : - if (getNextChar('=')) - return TokenNameLEFT_SHIFT_EQUAL; - if (getNextChar('<')) { + return TokenNameNOT_EQUAL; + } + return TokenNameNOT; + case '*' : + if (getNextChar('=')) + return TokenNameMULTIPLY_EQUAL; + return TokenNameMULTIPLY; + case '%' : + if (getNextChar('=')) + return TokenNameREMAINDER_EQUAL; + return TokenNameREMAINDER; + case '<' : + { + int oldPosition = currentPosition; + try { + currentCharacter = source[currentPosition++]; + } catch (IndexOutOfBoundsException e) { + currentPosition = oldPosition; + return TokenNameLESS; + } + switch (currentCharacter) { + case '=' : + return TokenNameLESS_EQUAL; + case '>' : + return TokenNameNOT_EQUAL; + case '<' : + if (getNextChar('=')) + return TokenNameLEFT_SHIFT_EQUAL; + if (getNextChar('<')) { + currentCharacter = source[currentPosition++]; + while (Character.isWhitespace(currentCharacter)) { currentCharacter = source[currentPosition++]; - while (Character.isWhitespace(currentCharacter)) { - currentCharacter = source[currentPosition++]; - } - int heredocStart = currentPosition - 1; - int heredocLength = 0; - if (isPHPIdentifierStart(currentCharacter)) { - currentCharacter = source[currentPosition++]; - } else { - return TokenNameERROR; - } - while (isPHPIdentifierPart(currentCharacter)) { - currentCharacter = source[currentPosition++]; - } - heredocLength = currentPosition - heredocStart - 1; - // heredoc end-tag determination - boolean endTag = true; - char ch; - do { - ch = source[currentPosition++]; - if (ch == '\r' || ch == '\n') { - if (recordLineSeparator) { - pushLineSeparator(); - } else { - currentLine = null; - } - for (int i = 0; i < heredocLength; i++) { - if (source[currentPosition + i] != source[heredocStart - + i]) { - endTag = false; - break; - } - } - if (endTag) { - currentPosition += heredocLength - 1; - currentCharacter = source[currentPosition++]; - break; // do...while loop - } else { - endTag = true; + } + int heredocStart = currentPosition - 1; + int heredocLength = 0; + if (isPHPIdentifierStart(currentCharacter)) { + currentCharacter = source[currentPosition++]; + } else { + return TokenNameERROR; + } + while (isPHPIdentifierPart(currentCharacter)) { + currentCharacter = source[currentPosition++]; + } + heredocLength = currentPosition - heredocStart - 1; + // heredoc end-tag determination + boolean endTag = true; + char ch; + do { + ch = source[currentPosition++]; + if (ch == '\r' || ch == '\n') { + if (recordLineSeparator) { + pushLineSeparator(); + } else { + currentLine = null; + } + for (int i = 0; i < heredocLength; i++) { + if (source[currentPosition + i] != source[heredocStart + + i]) { + endTag = false; + break; } } - } while (true); - return TokenNameHEREDOC; - } - return TokenNameLEFT_SHIFT; - } - currentPosition = oldPosition; - return TokenNameLESS; + if (endTag) { + currentPosition += heredocLength - 1; + currentCharacter = source[currentPosition++]; + break; // do...while loop + } else { + endTag = true; + } + } + } while (true); + return TokenNameHEREDOC; + } + return TokenNameLEFT_SHIFT; } - case '>' : - { - int test; + currentPosition = oldPosition; + return TokenNameLESS; + } + case '>' : + { + int test; + if ((test = getNextChar('=', '>')) == 0) + return TokenNameGREATER_EQUAL; + if (test > 0) { if ((test = getNextChar('=', '>')) == 0) - return TokenNameGREATER_EQUAL; - if (test > 0) { - if ((test = getNextChar('=', '>')) == 0) - return TokenNameRIGHT_SHIFT_EQUAL; - return TokenNameRIGHT_SHIFT; - } - return TokenNameGREATER; + return TokenNameRIGHT_SHIFT_EQUAL; + return TokenNameRIGHT_SHIFT; } - case '=' : + return TokenNameGREATER; + } + case '=' : + if (getNextChar('=')) { if (getNextChar('=')) { - if (getNextChar('=')) { - return TokenNameEQUAL_EQUAL_EQUAL; - } - return TokenNameEQUAL_EQUAL; + return TokenNameEQUAL_EQUAL_EQUAL; } - if (getNextChar('>')) - return TokenNameEQUAL_GREATER; - return TokenNameEQUAL; - case '&' : - { - int test; - if ((test = getNextChar('&', '=')) == 0) - return TokenNameAND_AND; - if (test > 0) - return TokenNameAND_EQUAL; - return TokenNameAND; - } - case '|' : - { - int test; - if ((test = getNextChar('|', '=')) == 0) - return TokenNameOR_OR; - if (test > 0) - return TokenNameOR_EQUAL; - return TokenNameOR; + return TokenNameEQUAL_EQUAL; + } + if (getNextChar('>')) + return TokenNameEQUAL_GREATER; + return TokenNameEQUAL; + case '&' : + { + int test; + if ((test = getNextChar('&', '=')) == 0) + return TokenNameAND_AND; + if (test > 0) + return TokenNameAND_EQUAL; + return TokenNameAND; + } + case '|' : + { + int test; + if ((test = getNextChar('|', '=')) == 0) + return TokenNameOR_OR; + if (test > 0) + return TokenNameOR_EQUAL; + return TokenNameOR; + } + case '^' : + if (getNextChar('=')) + return TokenNameXOR_EQUAL; + return TokenNameXOR; + case '?' : + if (getNextChar('>')) { + phpMode = false; + if (currentPosition == source.length) { + phpMode = true; + return TokenNameINLINE_HTML; } - case '^' : - if (getNextChar('=')) - return TokenNameXOR_EQUAL; - return TokenNameXOR; - case '?' : - if (getNextChar('>')) { - phpMode = false; - if (currentPosition == source.length) { - phpMode = true; - return TokenNameINLINE_HTML; - } - return getInlinedHTML(currentPosition - 2); + return getInlinedHTML(currentPosition - 2); + } + return TokenNameQUESTION; + case ':' : + if (getNextChar(':')) + return TokenNamePAAMAYIM_NEKUDOTAYIM; + return TokenNameCOLON; + case '@' : + return TokenNameAT; + case '\'' : + consumeStringConstant(); + return TokenNameStringConstant; + case '"' : + if (tokenizeStrings) { + consumeStringLiteral(); + return TokenNameStringLiteral; + } + return TokenNameEncapsedString2; + case '`' : + if (tokenizeStrings) { + consumeStringInterpolated(); + return TokenNameStringInterpolated; + } + return TokenNameEncapsedString0; + case '#' : + case '/' : + { + char startChar = currentCharacter; + if (getNextChar('=')) { + return TokenNameDIVIDE_EQUAL; } - return TokenNameQUESTION; - case ':' : - if (getNextChar(':')) - return TokenNamePAAMAYIM_NEKUDOTAYIM; - return TokenNameCOLON; - case '@' : - return TokenNameAT; - case '\'' : -// return TokenNameEncapsedString1; - try { - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - // if (withoutUnicodePtr != 0) { - // withoutUnicodeBuffer[++withoutUnicodePtr] = - // currentCharacter; - // } - // } - while (currentCharacter != '\'') { - /** ** in PHP \r and \n are valid in string literals *** */ - // if ((currentCharacter == '\n') - // || (currentCharacter == '\r')) { - // // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - // if (currentPosition + lookAhead == source.length) - // break; - // if (source[currentPosition + lookAhead] == '\n') - // break; - // if (source[currentPosition + lookAhead] == '\"') { - // currentPosition += lookAhead + 1; - // break; + int test; + if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) { + //line comment + int endPositionForLineComment = 0; + try { //get the next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c1 < 0 + // || (c2 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c4 < 0) { + // throw new + // InvalidInputException(INVALID_UNICODE_ESCAPE); + // } else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); // } // } - // throw new InvalidInputException(INVALID_CHAR_IN_STRING); - // } - if (currentCharacter == '\\') { - int escapeSize = currentPosition; - boolean backSlashAsUnicodeInString = unicodeAsBackSlash; - //scanEscapeCharacter make a side effect on this value and - // we need the previous value few lines down this one - scanSingleQuotedEscapeCharacter(); - escapeSize = currentPosition - escapeSize; - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - escapeSize - 1 - - startPosition; - System.arraycopy(source, startPosition, - withoutUnicodeBuffer, 1, withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; - if (backSlashAsUnicodeInString) { //there are TWO \ in - // the stream where - // only one is correct - withoutUnicodePtr--; + //handle the \\u case manually into comment + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // } //jump over the \\ + boolean isUnicode = false; + while (currentCharacter != '\r' && currentCharacter != '\n') { + if (currentCharacter == '?') { + if (getNextChar('>')) { + startPosition = currentPosition - 2; + phpMode = false; + return TokenNameINLINE_HTML; } } - } - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == - // '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - // } - } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_STRING); - } catch (InvalidInputException e) { - if (e.getMessage().equals(INVALID_ESCAPE)) { - // relocate if finding another quote fairly close: thus - // unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '\'') { - currentPosition += lookAhead + 1; - break; - } - } - } - throw e; // rethrow - } - if (checkNonExternalizedStringLiterals) { // check for presence - // of NLS tags - // //$NON-NLS-?$ where - // ? is an int. - if (currentLine == null) { - currentLine = new NLSLine(); - lines.add(currentLine); - } - currentLine.add(new StringLiteral( - getCurrentTokenSourceString(), startPosition, - currentPosition - 1)); - } - return TokenNameStringConstant; - case '"' : - return TokenNameEncapsedString2; - case '`' : - return TokenNameEncapsedString0; - case '#' : - case '/' : - { - char startChar = currentCharacter; - if (getNextChar('=')) { - return TokenNameDIVIDE_EQUAL; - } - int test; - if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) { - //line comment - int endPositionForLineComment = 0; - try { //get the next char + //get the next char + isUnicode = false; currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) - // == '\\') - // && (source[currentPosition] == 'u')) { - // //-------------unicode traitement ------------ - // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - // currentPosition++; - // while (source[currentPosition] == 'u') { + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // isUnicode = true; + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; // currentPosition++; - // } - // if ((c1 = - // Character.getNumericValue(source[currentPosition++])) - // > 15 - // || c1 < 0 - // || (c2 = - // Character.getNumericValue(source[currentPosition++])) - // > 15 - // || c2 < 0 - // || (c3 = - // Character.getNumericValue(source[currentPosition++])) - // > 15 - // || c3 < 0 - // || (c4 = + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = // Character.getNumericValue(source[currentPosition++])) // > 15 - // || c4 < 0) { - // throw new + // || c1 < 0 + // || (c2 = + // Character.getNumericValue( + // source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue( + // source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue( + // source[currentPosition++])) + // > 15 + // || c4 < 0) { + // throw new // InvalidInputException(INVALID_UNICODE_ESCAPE); - // } else { - // currentCharacter = - // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } // } - // } //handle the \\u case manually into comment - // if (currentCharacter == '\\') { - // if (source[currentPosition] == '\\') - // currentPosition++; - // } //jump over the \\ - boolean isUnicode = false; - while (currentCharacter != '\r' - && currentCharacter != '\n') { - if (currentCharacter == '?') { - if (getNextChar('>')) { - startPosition = currentPosition - 2; - phpMode = false; - return TokenNameINLINE_HTML; - } - } - //get the next char - isUnicode = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) - // == '\\') - // && (source[currentPosition] == 'u')) { - // isUnicode = true; - // //-------------unicode traitement ------------ - // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - // currentPosition++; - // while (source[currentPosition] == 'u') { - // currentPosition++; - // } - // if ((c1 = - // Character.getNumericValue(source[currentPosition++])) - // > 15 - // || c1 < 0 - // || (c2 = - // Character.getNumericValue( - // source[currentPosition++])) - // > 15 - // || c2 < 0 - // || (c3 = - // Character.getNumericValue( - // source[currentPosition++])) - // > 15 - // || c3 < 0 - // || (c4 = - // Character.getNumericValue( - // source[currentPosition++])) - // > 15 - // || c4 < 0) { - // throw new - // InvalidInputException(INVALID_UNICODE_ESCAPE); - // } else { - // currentCharacter = - // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - // } - // } - //handle the \\u case manually into comment - // if (currentCharacter == '\\') { - // if (source[currentPosition] == '\\') - // currentPosition++; - // } //jump over the \\ - } - if (isUnicode) { - endPositionForLineComment = currentPosition - 6; - } else { - endPositionForLineComment = currentPosition - 1; - } - recordComment(false); - if ((currentCharacter == '\r') - || (currentCharacter == '\n')) { - checkNonExternalizeString(); - if (recordLineSeparator) { - if (isUnicode) { - pushUnicodeLineSeparator(); - } else { - pushLineSeparator(); - } - } else { - currentLine = null; - } - } - if (tokenizeComments) { - if (!isUnicode) { - currentPosition = endPositionForLineComment; - // reset one character behind - } - return TokenNameCOMMENT_LINE; - } - } catch (IndexOutOfBoundsException e) { //an eof will them - // be generated - if (tokenizeComments) { - currentPosition--; - // reset one character behind - return TokenNameCOMMENT_LINE; - } + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // } //jump over the \\ } - break; - } - if (test > 0) { - //traditional and annotation comment - boolean isJavadoc = false, star = false; - // consume next character - unicodeAsBackSlash = false; - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) == - // '\\') - // && (source[currentPosition] == 'u')) { - // getNextUnicodeChar(); - // } else { - // if (withoutUnicodePtr != 0) { - // withoutUnicodeBuffer[++withoutUnicodePtr] = - // currentCharacter; - // } - // } - if (currentCharacter == '*') { - isJavadoc = true; - star = true; + if (isUnicode) { + endPositionForLineComment = currentPosition - 6; + } else { + endPositionForLineComment = currentPosition - 1; } + recordComment(false); if ((currentCharacter == '\r') || (currentCharacter == '\n')) { checkNonExternalizeString(); if (recordLineSeparator) { - pushLineSeparator(); + if (isUnicode) { + pushUnicodeLineSeparator(); + } else { + pushLineSeparator(); + } } else { currentLine = null; } } - try { //get the next char - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) - // == '\\') - // && (source[currentPosition] == 'u')) { - // //-------------unicode traitement ------------ - // getNextUnicodeChar(); - // } - //handle the \\u case manually into comment - // if (currentCharacter == '\\') { - // if (source[currentPosition] == '\\') - // currentPosition++; - // //jump over the \\ - // } - // empty comment is not a javadoc /**/ - if (currentCharacter == '/') { - isJavadoc = false; - } - //loop until end of comment */ - while ((currentCharacter != '/') || (!star)) { - if ((currentCharacter == '\r') - || (currentCharacter == '\n')) { - checkNonExternalizeString(); - if (recordLineSeparator) { - pushLineSeparator(); - } else { - currentLine = null; - } - } - star = currentCharacter == '*'; - //get next char - currentCharacter = source[currentPosition++]; - // if (((currentCharacter = source[currentPosition++]) - // == '\\') - // && (source[currentPosition] == 'u')) { - // //-------------unicode traitement ------------ - // getNextUnicodeChar(); - // } - //handle the \\u case manually into comment - // if (currentCharacter == '\\') { - // if (source[currentPosition] == '\\') - // currentPosition++; - // } //jump over the \\ - } - recordComment(isJavadoc); - if (tokenizeComments) { - if (isJavadoc) - return TokenNameCOMMENT_PHPDOC; - return TokenNameCOMMENT_BLOCK; + if (tokenizeComments) { + if (!isUnicode) { + currentPosition = endPositionForLineComment; + // reset one character behind } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_COMMENT); + return TokenNameCOMMENT_LINE; + } + } catch (IndexOutOfBoundsException e) { //an eof will them + // be generated + if (tokenizeComments) { + currentPosition--; + // reset one character behind + return TokenNameCOMMENT_LINE; } - break; } - return TokenNameDIVIDE; + break; } - case '\u001a' : - if (atEnd()) - return TokenNameEOF; - //the atEnd may not be if - // source is only some part of a real (external) stream - throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$ - default : - if (currentCharacter == '$') { - int oldPosition = currentPosition; - try { - currentCharacter = source[currentPosition++]; - if (isPHPIdentifierStart(currentCharacter)) { - return scanIdentifierOrKeyword(true); + if (test > 0) { + //traditional and annotation comment + boolean isJavadoc = false, star = false; + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == + // '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + if (currentCharacter == '*') { + isJavadoc = true; + star = true; + } + if ((currentCharacter == '\r') || (currentCharacter == '\n')) { + checkNonExternalizeString(); + if (recordLineSeparator) { + pushLineSeparator(); } else { - currentPosition = oldPosition; - return TokenNameDOLLAR; + currentLine = null; + } + } + try { //get the next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // getNextUnicodeChar(); + // } + //handle the \\u case manually into comment + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // //jump over the \\ + // } + // empty comment is not a javadoc /**/ + if (currentCharacter == '/') { + isJavadoc = false; + } + //loop until end of comment */ + while ((currentCharacter != '/') || (!star)) { + if ((currentCharacter == '\r') + || (currentCharacter == '\n')) { + checkNonExternalizeString(); + if (recordLineSeparator) { + pushLineSeparator(); + } else { + currentLine = null; + } + } + star = currentCharacter == '*'; + //get next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // getNextUnicodeChar(); + // } + //handle the \\u case manually into comment + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // } //jump over the \\ + } + recordComment(isJavadoc); + if (tokenizeComments) { + if (isJavadoc) + return TokenNameCOMMENT_PHPDOC; + return TokenNameCOMMENT_BLOCK; } } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_COMMENT); + } + break; + } + return TokenNameDIVIDE; + } + case '\u001a' : + if (atEnd()) + return TokenNameEOF; + //the atEnd may not be if + // source is only some part of a real (external) stream + throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$ + default : + if (currentCharacter == '$') { + int oldPosition = currentPosition; + try { + currentCharacter = source[currentPosition++]; + if (isPHPIdentifierStart(currentCharacter)) { + return scanIdentifierOrKeyword(true); + } else { currentPosition = oldPosition; return TokenNameDOLLAR; } + } catch (IndexOutOfBoundsException e) { + currentPosition = oldPosition; + return TokenNameDOLLAR; } - if (isPHPIdentifierStart(currentCharacter)) - return scanIdentifierOrKeyword(false); - if (Character.isDigit(currentCharacter)) - return scanNumber(false); - return TokenNameERROR; - } - + } + if (isPHPIdentifierStart(currentCharacter)) + return scanIdentifierOrKeyword(false); + if (Character.isDigit(currentCharacter)) + return scanNumber(false); + return TokenNameERROR; + } } } //-----------------end switch while try-------------------- catch (IndexOutOfBoundsException e) { @@ -3625,9 +3828,16 @@ public class Scanner implements IScanner, ITerminalSymbols { } public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals, boolean assertMode) { + this(tokenizeComments, tokenizeWhiteSpace, + checkNonExternalizedStringLiterals, assertMode, false); + } + public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, + boolean checkNonExternalizedStringLiterals, boolean assertMode, + boolean tokenizeStrings) { this.eofPosition = Integer.MAX_VALUE; this.tokenizeComments = tokenizeComments; this.tokenizeWhiteSpace = tokenizeWhiteSpace; + this.tokenizeStrings = tokenizeStrings; this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals; this.assertMode = assertMode; this.encapsedStringStack = null;