public static final int SquareBracket = 1;
public static final int CurlyBracket = 2;
public static final int BracketKinds = 3;
+
+ public static final boolean DEBUG = false;
public Scanner() {
this(false, false);
}
public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
this(tokenizeComments, tokenizeWhiteSpace, false);
}
+
+ /**
+ * Determines if the specified character is
+ * permissible as the first character in a PHP identifier
+ */
+ public static boolean isPHPIdentifierStart(char ch) {
+ return Character.isLetter(ch) || (ch == '_');
+ }
+
+ /**
+ * Determines if the specified character may be part of a PHP identifier as
+ * other than the first character
+ */
+ public static boolean isPHPIdentifierPart(char ch) {
+ return Character.isLetterOrDigit(ch) || (ch == '_');
+ }
+
public final boolean atEnd() {
// This code is not relevant if source is
// Only a part of the real stream input
}
return result;
}
-
+
public final char[] getCurrentTokenSource(int startPos) {
// Return the token REAL source (aka unicodes are precomputed)
}
return result;
}
-
+
public final char[] getCurrentTokenSourceString() {
//return the token REAL source (aka unicodes are precomputed).
//REMOVE the two " that are at the beginning and the end.
}
currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
- if (!Character.isJavaIdentifierPart(currentCharacter)) {
+ if (!isPHPIdentifierPart(currentCharacter)) {
currentPosition = temp;
return false;
}
return true;
} //-------------end unicode traitement--------------
else {
- if (!Character.isJavaIdentifierPart(currentCharacter)) {
+ if (!isPHPIdentifierPart(currentCharacter)) {
currentPosition = temp;
return false;
}
int heredocStart = currentPosition;
int heredocLength = 0;
currentCharacter = source[currentPosition++];
- if (Character.isJavaIdentifierStart(currentCharacter)) {
+ if (isPHPIdentifierStart(currentCharacter)) {
currentCharacter = source[currentPosition++];
} else {
return TokenNameERROR;
}
- while (Character.isJavaIdentifierPart(currentCharacter)) {
+ while (isPHPIdentifierPart(currentCharacter)) {
currentCharacter = source[currentPosition++];
}
}
while (currentCharacter != '\'') {
-
+
/**** in PHP \r and \n are valid in string literals ****/
-// if ((currentCharacter == '\n')
-// || (currentCharacter == '\r')) {
-// // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
-// for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-// if (currentPosition + lookAhead == source.length)
-// break;
-// if (source[currentPosition + lookAhead] == '\n')
-// break;
-// if (source[currentPosition + lookAhead] == '\"') {
-// currentPosition += lookAhead + 1;
-// break;
-// }
-// }
-// throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-// }
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
if (currentCharacter == '\\') {
int escapeSize = currentPosition;
boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
//scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
- scanEscapeCharacter();
+ scanSingleQuotedEscapeCharacter();
escapeSize = currentPosition - escapeSize;
if (withoutUnicodePtr == 0) {
//buffer all the entries that have been left aside....
}
while (currentCharacter != '"') {
-
-
+
/**** in PHP \r and \n are valid in string literals ****/
-// if ((currentCharacter == '\n')
-// || (currentCharacter == '\r')) {
-// // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
-// for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-// if (currentPosition + lookAhead == source.length)
-// break;
-// if (source[currentPosition + lookAhead] == '\n')
-// break;
-// if (source[currentPosition + lookAhead] == '\"') {
-// currentPosition += lookAhead + 1;
-// break;
-// }
-// }
-// throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-// }
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
if (currentCharacter == '\\') {
int escapeSize = currentPosition;
boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
//scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
- scanEscapeCharacter();
+ scanDoubleQuotedEscapeCharacter();
escapeSize = currentPosition - escapeSize;
if (withoutUnicodePtr == 0) {
//buffer all the entries that have been left aside....
}
return TokenNameStringLiteral;
case '`' :
- try {
- // consume next character
- unicodeAsBackSlash = false;
- if (((currentCharacter = source[currentPosition++]) == '\\')
- && (source[currentPosition] == 'u')) {
- getNextUnicodeChar();
- } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] =
- currentCharacter;
- }
- }
-
- while (currentCharacter != '`') {
-
-
- /**** in PHP \r and \n are valid in string literals ****/
-// if ((currentCharacter == '\n')
-// || (currentCharacter == '\r')) {
-// // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
-// for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-// if (currentPosition + lookAhead == source.length)
-// break;
-// if (source[currentPosition + lookAhead] == '\n')
-// break;
-// if (source[currentPosition + lookAhead] == '\"') {
-// currentPosition += lookAhead + 1;
-// break;
-// }
-// }
-// throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-// }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
- scanEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr =
- currentPosition - escapeSize - 1 - startPosition;
- System.arraycopy(
- source,
- startPosition,
- withoutUnicodeBuffer,
- 1,
- withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] =
- currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] =
- currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
- withoutUnicodePtr--;
- }
- }
- }
+ try {
// consume next character
unicodeAsBackSlash = false;
if (((currentCharacter = source[currentPosition++]) == '\\')
}
}
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '`') {
- currentPosition += lookAhead + 1;
- break;
+ while (currentCharacter != '`') {
+
+ /**** in PHP \r and \n are valid in string literals ****/
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr =
+ currentPosition - escapeSize - 1 - startPosition;
+ System.arraycopy(
+ source,
+ startPosition,
+ withoutUnicodeBuffer,
+ 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] =
+ currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] =
+ currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ if (((currentCharacter = source[currentPosition++]) == '\\')
+ && (source[currentPosition] == 'u')) {
+ getNextUnicodeChar();
+ } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] =
+ currentCharacter;
+ }
}
+
}
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '`') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
}
- throw e; // rethrow
- }
- if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(
+ new StringLiteral(
+ getCurrentTokenSourceString(),
+ startPosition,
+ currentPosition - 1));
}
- currentLine.add(
- new StringLiteral(
- getCurrentTokenSourceString(),
- startPosition,
- currentPosition - 1));
- }
- return TokenNameStringInterpolated;
+ return TokenNameStringInterpolated;
case '#' :
case '/' :
{
recordComment(isJavadoc);
if (tokenizeComments) {
if (isJavadoc)
- return TokenNameCOMMENT_JAVADOC;
+ return TokenNameCOMMENT_PHPDOC;
return TokenNameCOMMENT_BLOCK;
}
} catch (IndexOutOfBoundsException e) {
default :
if (currentCharacter == '$') {
- if (getNextChar('{'))
+ while ((currentCharacter = source[currentPosition++]) == '$') {
+ }
+ if (currentCharacter == '{')
return TokenNameDOLLAR_LBRACE;
- return scanIdentifierOrKeyword(true);
+ if (isPHPIdentifierStart(currentCharacter))
+ return scanIdentifierOrKeyword(true);
+ return TokenNameERROR;
}
- if (Character.isJavaIdentifierStart(currentCharacter))
+ if (isPHPIdentifierStart(currentCharacter))
return scanIdentifierOrKeyword(false);
if (Character.isDigit(currentCharacter))
return scanNumber(false);
test = getNextChar('\\');
if (test) {
try {
- scanEscapeCharacter();
+ scanDoubleQuotedEscapeCharacter();
} catch (InvalidInputException ex) {
};
} else {
}
if (currentCharacter == '\\') {
try {
- scanEscapeCharacter();
+ scanDoubleQuotedEscapeCharacter();
} catch (InvalidInputException ex) {
};
}
}
default :
- if (Character.isJavaIdentifierStart(currentCharacter)
+ if (isPHPIdentifierStart(currentCharacter)
|| currentCharacter == '$') {
try {
scanIdentifierOrKeyword((currentCharacter == '$'));
return new char[] { charOne };
}
}
+
final char[] optimizedCurrentTokenSource2() {
//try to return the same char[] build only once
newEntry2 = max;
return r;
}
+
final char[] optimizedCurrentTokenSource3() {
//try to return the same char[] build only once
newEntry3 = max;
return r;
}
+
final char[] optimizedCurrentTokenSource4() {
//try to return the same char[] build only once
return r;
}
+
final char[] optimizedCurrentTokenSource5() {
//try to return the same char[] build only once
return r;
}
+
final char[] optimizedCurrentTokenSource6() {
//try to return the same char[] build only once
newEntry6 = max;
return r;
}
+
public final void pushLineSeparator() throws InvalidInputException {
//see comment on isLineDelimiter(char) for the use of '\n' and '\r'
final int INCREMENT = 250;
commentPtr = -1; // reset comment stack
}
- public final void scanEscapeCharacter() throws InvalidInputException {
+ public final void scanSingleQuotedEscapeCharacter()
+ throws InvalidInputException {
// the string with "\\u" is a legal string of two chars \ and u
//thus we use a direct access to the source (for regular cases).
} else
currentCharacter = source[currentPosition++];
switch (currentCharacter) {
- case 'b' :
- currentCharacter = '\b';
+ case '\'' :
+ currentCharacter = '\'';
break;
+ case '\\' :
+ currentCharacter = '\\';
+ break;
+ default :
+ currentCharacter = '\\';
+ currentPosition--;
+ }
+ }
+
+ public final void scanDoubleQuotedEscapeCharacter()
+ throws InvalidInputException {
+ // the string with "\\u" is a legal string of two chars \ and u
+ //thus we use a direct access to the source (for regular cases).
+
+ if (unicodeAsBackSlash) {
+ // consume next character
+ unicodeAsBackSlash = false;
+ if (((currentCharacter = source[currentPosition++]) == '\\')
+ && (source[currentPosition] == 'u')) {
+ getNextUnicodeChar();
+ } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ }
+ } else
+ currentCharacter = source[currentPosition++];
+ switch (currentCharacter) {
+ // case 'b' :
+ // currentCharacter = '\b';
+ // break;
case 't' :
currentCharacter = '\t';
break;
case 'n' :
currentCharacter = '\n';
break;
- case 'f' :
- currentCharacter = '\f';
- break;
+ // case 'f' :
+ // currentCharacter = '\f';
+ // break;
case 'r' :
currentCharacter = '\r';
break;
case '\\' :
currentCharacter = '\\';
break;
+ case '$' :
+ currentCharacter = '$';
+ break;
default :
// -----------octal escape--------------
// OctalDigit
//disptach on the second char :-)...cool....but fast !
useAssertAsAnIndentifier = false;
+
while (getNextCharAsJavaIdentifierPart()) {
};