* Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
* accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
* and is available at http://www.eclipse.org/legal/cpl-v05.html
- *
+ *
* Contributors: IBM Corporation - initial API and implementation
**********************************************************************************************************************************/
package net.sourceforge.phpdt.internal.compiler.parser;
import net.sourceforge.phpdt.core.compiler.IScanner;
import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
import net.sourceforge.phpdt.core.compiler.InvalidInputException;
+import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
-import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
public class Scanner implements IScanner, ITerminalSymbols {
/*
* stream - currentPosition-1 gives the sourceEnd position into the stream
*/
// 1.4 feature
-// private boolean assertMode;
-
+ // private boolean assertMode;
public boolean useAssertAsAnIndentifier = false;
// flag indicating if processed source contains occurrences of keyword assert
public boolean phpMode = false;
- public boolean phpExpressionTag = false;
-
- // public Stack encapsedStringStack = null;
+ /**
+ * This token is set to TokenNameecho if a short tag block begins (i.e. >?= ... )
+ * Directly after the "=" character the getNextToken() method returns TokenNameINLINE_HTML
+ * In the next call to the getNextToken() method the value of fFillerToken (==TokenNameecho) is returned
+ *
+ */
+ int fFillerToken = TokenNameEOF;
public char currentCharacter;
static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
public final static int C_DOLLAR = 8;
-
+
public final static int C_LETTER = 4;
public final static int C_DIGIT = 3;
ObviousIdentCharNatures[i] = C_LETTER;
ObviousIdentCharNatures['$'] = C_DOLLAR;
-
+
ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
ObviousIdentCharNatures['"'] = C_SEPARATOR;
ObviousIdentCharNatures['\''] = C_SEPARATOR;
}
+
static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
static final int TableSize = 30, InternalTableSize = 6;
/**
* Determines if the specified character is permissible as the first character
* in a PHP identifier or variable
- *
+ *
* The '$' character for PHP variables is regarded as a correct first
* character !
- *
+ *
*/
public static boolean isPHPIdentOrVarStart(char ch) {
if (ch < MAX_OBVIOUS) {
- return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
+ return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
}
return false;
- //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+ // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
+ // ch && ch <= 0xFF);
}
/**
* Determines if the specified character is permissible as the first character
* in a PHP identifier.
- *
+ *
* The '$' character for PHP variables isn't regarded as the first character !
*/
public static boolean isPHPIdentifierStart(char ch) {
if (ch < MAX_OBVIOUS) {
- return ObviousIdentCharNatures[ch]==C_LETTER;
+ return ObviousIdentCharNatures[ch] == C_LETTER;
}
return false;
-// return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+ // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
+ // 0xFF);
}
/**
*/
public static boolean isPHPIdentifierPart(char ch) {
if (ch < MAX_OBVIOUS) {
- return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
+ return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
}
return false;
-// return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+ // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
+ // <= 0xFF);
}
public static boolean isSQLIdentifierPart(char ch) {
if (ch < MAX_OBVIOUS) {
- return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
+ return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
}
return false;
}
-
+
public final boolean atEnd() {
// This code is not relevant if source is
// Only a part of the real stream input
return result;
}
+ public final boolean equalsCurrentTokenSource(char[] word) {
+ if (word.length != currentPosition - startPosition) {
+ return false;
+ }
+ for (int i = 0; i < word.length; i++) {
+ if (word[i] != source[startPosition + i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
public final char[] getRawTokenSourceEnd() {
int length = this.eofPosition - this.currentPosition - 1;
char[] sourceEnd = new char[length];
return this.startPosition;
}
+ public final String getCurrentStringLiteral() {
+ char[] result = getCurrentStringLiteralSource();
+ return new String(result);
+ }
+
public final char[] getCurrentStringLiteralSource() {
// Return the token REAL source (aka unicodes are precomputed)
if (startPosition + 1 >= currentPosition) {
/*
* Search the source position corresponding to the end of a given line number
- *
+ *
* Line numbers are 1-based, and relative to the scanner initialPosition.
* Character positions are 0-based.
- *
+ *
* In case the given line number is inconsistent, answers -1.
*/
public final int getLineEnd(int lineNumber) {
/**
* Search the source position corresponding to the beginning of a given line
* number
- *
+ *
* Line numbers are 1-based, and relative to the scanner initialPosition.
* Character positions are 0-based.
- *
+ *
* e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
- *
+ *
* In case the given line number is inconsistent, answers -1.
*/
public final int getLineStart(int lineNumber) {
do {
currentCharacter = source[currentPosition++];
} while (currentCharacter == ' ' || currentCharacter == '\t');
- while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
- // while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
+ while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
+ // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
+ // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
buf.append(currentCharacter);
currentCharacter = source[currentPosition++];
}
public void consumeStringLiteral() throws InvalidInputException {
try {
- boolean openDollarBrace = false;
+ int openDollarBrace = 0;
// consume next character
unicodeAsBackSlash = false;
currentCharacter = source[currentPosition++];
- while (currentCharacter != '"' || openDollarBrace) {
+ while (currentCharacter != '"' || openDollarBrace>0) {
/** ** in PHP \r and \n are valid in string literals *** */
if (currentCharacter == '\\') {
int escapeSize = currentPosition;
}
}
} else if (currentCharacter == '$' && source[currentPosition] == '{') {
- openDollarBrace = true;
+ openDollarBrace++;
} else if (currentCharacter == '{' && source[currentPosition] == '$') {
- openDollarBrace = true;
+ openDollarBrace++;
} else if (currentCharacter == '}') {
- openDollarBrace = false;
+ openDollarBrace--;
} else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
if (recordLineSeparator) {
pushLineSeparator();
}
public int getNextToken() throws InvalidInputException {
- phpExpressionTag = false;
if (!phpMode) {
return getInlinedHTMLToken(currentPosition);
- }
- if (phpMode) {
+ } else {
+ if (fFillerToken != TokenNameEOF) {
+ int tempToken;
+ startPosition = currentPosition;
+ tempToken = fFillerToken;
+ fFillerToken = TokenNameEOF;
+ return tempToken;
+ }
this.wasAcr = false;
if (diet) {
jumpOverMethodBody();
try {
while (true) {
withoutUnicodePtr = 0;
- // start with a new token
- char encapsedChar = ' ';
- // if (!encapsedStringStack.isEmpty()) {
- // encapsedChar = ((Character)
- // encapsedStringStack.peek()).charValue();
- // }
- // if (encapsedChar != '$' && encapsedChar != ' ') {
- // currentCharacter = source[currentPosition++];
- // if (currentCharacter == encapsedChar) {
- // switch (currentCharacter) {
- // case '`':
- // return TokenNameEncapsedString0;
- // case '\'':
- // return TokenNameEncapsedString1;
- // case '"':
- // return TokenNameEncapsedString2;
- // }
- // }
- // while (currentCharacter != encapsedChar) {
- // /** ** in PHP \r and \n are valid in string literals *** */
- // switch (currentCharacter) {
- // case '\\':
- // int escapeSize = currentPosition;
- // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- // //scanEscapeCharacter make a side effect on this value and
- // // we need the previous value few lines down this one
- // scanDoubleQuotedEscapeCharacter();
- // escapeSize = currentPosition - escapeSize;
- // if (withoutUnicodePtr == 0) {
- // //buffer all the entries that have been left aside....
- // withoutUnicodePtr = currentPosition - escapeSize - 1 -
- // startPosition;
- // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
- // withoutUnicodePtr);
- // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- // } else { //overwrite the / in the buffer
- // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- // if (backSlashAsUnicodeInString) { //there are TWO \ in
- // withoutUnicodePtr--;
- // }
- // }
- // break;
- // case '\r':
- // case '\n':
- // if (recordLineSeparator) {
- // pushLineSeparator();
- // }
- // break;
- // case '$':
- // if (isPHPIdentifierStart(source[currentPosition]) ||
- // source[currentPosition] == '{') {
- // currentPosition--;
- // encapsedStringStack.push(new Character('$'));
- // return TokenNameSTRING;
- // }
- // break;
- // case '{':
- // if (source[currentPosition] == '$') { // CURLY_OPEN
- // currentPosition--;
- // encapsedStringStack.push(new Character('$'));
- // return TokenNameSTRING;
- // }
- // }
- // // consume next character
- // unicodeAsBackSlash = false;
- // currentCharacter = source[currentPosition++];
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- // }
- // // }
- // } // end while
- // currentPosition--;
- // return TokenNameSTRING;
- // }
// ---------Consume white space and handles startPosition---------
int whiteStart = currentPosition;
startPosition = currentPosition;
currentCharacter = source[currentPosition++];
- // if (encapsedChar == '$') {
- // switch (currentCharacter) {
- // case '\\':
- // currentCharacter = source[currentPosition++];
- // return TokenNameSTRING;
- // case '{':
- // if (encapsedChar == '$') {
- // if (getNextChar('$'))
- // return TokenNameLBRACE_DOLLAR;
- // }
- // return TokenNameLBRACE;
- // case '}':
- // return TokenNameRBRACE;
- // case '[':
- // return TokenNameLBRACKET;
- // case ']':
- // return TokenNameRBRACKET;
- // case '\'':
- // if (tokenizeStrings) {
- // consumeStringConstant();
- // return TokenNameStringSingleQuote;
- // }
- // return TokenNameEncapsedString1;
- // case '"':
- // return TokenNameEncapsedString2;
- // case '`':
- // if (tokenizeStrings) {
- // consumeStringInterpolated();
- // return TokenNameStringInterpolated;
- // }
- // return TokenNameEncapsedString0;
- // case '-':
- // if (getNextChar('>'))
- // return TokenNameMINUS_GREATER;
- // return TokenNameSTRING;
- // default:
- // if (currentCharacter == '$') {
- // int oldPosition = currentPosition;
- // try {
- // currentCharacter = source[currentPosition++];
- // if (currentCharacter == '{') {
- // return TokenNameDOLLAR_LBRACE;
- // }
- // if (isPHPIdentifierStart(currentCharacter)) {
- // return scanIdentifierOrKeyword(true);
- // } else {
- // currentPosition = oldPosition;
- // return TokenNameSTRING;
- // }
- // } catch (IndexOutOfBoundsException e) {
- // currentPosition = oldPosition;
- // return TokenNameSTRING;
- // }
- // }
- // if (isPHPIdentifierStart(currentCharacter))
- // return scanIdentifierOrKeyword(false);
- // if (Character.isDigit(currentCharacter))
- // return scanNumber(false);
- // return TokenNameERROR;
- // }
- // }
- // boolean isWhiteSpace;
while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
startPosition = currentPosition;
currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // isWhiteSpace = jumpOverUnicodeWhiteSpace();
- // } else {
if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
checkNonExternalizeString();
if (recordLineSeparator) {
currentLine = null;
}
}
- // isWhiteSpace = (currentCharacter == ' ')
- // || Character.isWhitespace(currentCharacter);
- // }
}
if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
// reposition scanner in case we are interested by spaces as tokens
* @throws InvalidInputException
*/
private int getInlinedHTMLToken(int start) throws InvalidInputException {
+ boolean phpShortTag = false; // true, if <?= detected
if (currentPosition > source.length) {
currentPosition = source.length;
return TokenNameEOF;
if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
if (currentCharacter != '=') { // <?=
currentPosition--;
+ phpShortTag = false;
} else {
- phpExpressionTag = true;
+ phpShortTag = true;
}
// <?
if (ignorePHPOneLiner) { // for CodeFormatter
if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
phpMode = true;
+ if (phpShortTag) {
+ fFillerToken = TokenNameECHO_INVISIBLE;
+ }
return TokenNameINLINE_HTML;
}
} else {
- phpMode = true;
+ boolean foundXML=false;
+ if (getNextChar('X','x')>=0) {
+ if (getNextChar('M','m')>=0) {
+ if (getNextChar('L','l')>=0) {
+ foundXML=true;
+ }
+ }
+ }
+ if (!foundXML) {
+ phpMode = true;
+ }
+ if (phpShortTag) {
+ fFillerToken = TokenNameECHO_INVISIBLE;
+ }
return TokenNameINLINE_HTML;
}
} else {
- // boolean phpStart = (currentCharacter == 'P') ||
- // (currentCharacter == 'p');
- // if (phpStart) {
- int test = getNextChar('H', 'h');
- if (test >= 0) {
- test = getNextChar('P', 'p');
- if (test >= 0) {
+ if (getNextChar('H', 'h') >= 0) {
+ if (getNextChar('P', 'p') >= 0) {
// <?PHP <?php
if (ignorePHPOneLiner) {
if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
}
/**
+ * check if the PHP is only in this line (for CodeFormatter)
+ *
* @return
*/
private int lookAheadLinePHPTag() {
- // check if the PHP is only in this line (for CodeFormatter)
int currentPositionInLine = currentPosition;
char previousCharInLine = ' ';
char currentCharInLine = ' ';
break;
}
default:
- if (isPHPIdentOrVarStart(currentCharacter) ) {
+ if (isPHPIdentOrVarStart(currentCharacter)) {
try {
scanIdentifierOrKeyword((currentCharacter == '$'));
} catch (InvalidInputException ex) {
;
break;
}
- if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
-// if (Character.isDigit(currentCharacter)) {
+ if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
+ // if (Character.isDigit(currentCharacter)) {
try {
scanNumber(false);
} catch (InvalidInputException ex) {
} else
return TokenNameIdentifier;
case 's':
- // static switch
+ // self static switch
switch (length) {
+ // case 4:
+ // if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index]
+ // == 'f')) {
+ // return TokenNameself;
+ // }
+ // return TokenNameIdentifier;
case 6:
if (data[++index] == 't')
if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
/**
* Search the line number corresponding to a specific position
- *
+ *
*/
public final int getLineNumber(int position) {
if (lineEnds == null)
initialPosition = currentPosition = 0;
containsAssertKeyword = false;
withoutUnicodeBuffer = new char[this.source.length];
+ fFillerToken = TokenNameEOF;
// encapsedStringStack = new Stack();
}
// //$NON-NLS-1$
case TokenNameINLINE_HTML:
return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+ case TokenNameECHO_INVISIBLE:
+ //0-length token
+ return "";
case TokenNameIdentifier:
return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameVariable:
return "require_once"; //$NON-NLS-1$
case TokenNamereturn:
return "return"; //$NON-NLS-1$
+ // case TokenNameself:
+ // return "self"; //$NON-NLS-1$
case TokenNamestatic:
return "static"; //$NON-NLS-1$
case TokenNameswitch:
case TokenNameStringInterpolated:
return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameEncapsedString0:
- return "`"; //$NON-NLS-1$
- // case TokenNameEncapsedString1:
- // return "\'"; //$NON-NLS-1$
- // case TokenNameEncapsedString2:
- // return "\""; //$NON-NLS-1$
+ return "`"; //$NON-NLS-1$
+ // case TokenNameEncapsedString1:
+ // return "\'"; //$NON-NLS-1$
+ // case TokenNameEncapsedString2:
+ // return "\""; //$NON-NLS-1$
case TokenNameSTRING:
return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameHEREDOC:
this.tokenizeWhiteSpace = tokenizeWhiteSpace;
this.tokenizeStrings = tokenizeStrings;
this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
-// this.assertMode = assertMode;
+ // this.assertMode = assertMode;
// this.encapsedStringStack = null;
this.taskTags = taskTags;
this.taskPriorities = taskPriorities;
continue nextTag;
if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
- // insensitive
- // check
+ // insensitive
+ // check
continue nextTag;
}
}