/*******************************************************************************
* Copyright (c) 2000, 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Common Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/cpl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package net.sourceforge.phpdt.internal.ui.text;
import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
//incastrix
//import org.eclipse.jface.text.Assert;
import org.eclipse.core.runtime.Assert;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.rules.ICharacterScanner;
import org.eclipse.jface.text.rules.IPartitionTokenScanner;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.Token;
/**
* This scanner recognizes the JavaDoc comments, Java multi line comments, Java
* single line comments, Java strings.
*/
public class FastJavaPartitionScanner implements IPartitionTokenScanner,
IPHPPartitions {
// states
private static enum PartState {
PHP,
SINGLE_LINE_COMMENT,
MULTI_LINE_COMMENT,
PHPDOC,
STRING_DQ,
STRING_SQ,
STRING_HEREDOC,
};
private static enum ScanState {
NONE,
BACKSLASH, // postfix for STRING_DQ and CHARACTER
SLASH, // prefix for SINGLE_LINE or MULTI_LINE or JAVADOC
SLASH_STAR, // prefix for MULTI_LINE_COMMENT or JAVADOC
SLASH_STAR_STAR, // prefix for MULTI_LINE_COMMENT or JAVADOC
STAR, // postfix for MULTI_LINE_COMMENT or JAVADOC
CARRIAGE_RETURN, // postfix for STRING_DQ, CHARACTER and SINGLE_LINE_COMMENT
LESS, // Found a '<'
LESS_LESS, // Found a '<<'
LESS_LESS_LESS, // Found a '<<<'
HEREDOC_ID, // Found a '<<<' and scanning ID (till and of id, which is a ' ')
HEREDOC, // Found a '<<<' and ID
HEREDOC_ID_END, // Searching the heredoc end ID
};
/** The heredoc ID string */
private String fHeredocId;
/** The possible heredoc ID string which is read right after a new line. Ends with a ';' and should
* match the heredoc ID string fHeredocId
*/
private String fHeredocIdEnd;
/** The scanner. */
private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner (1000); // faster implementation
/** The offset of the last returned token. */
private int fTokenOffset;
/** The length of the last returned token. */
private int fTokenLength;
/** The state of the scanner. */
private PartState fState;
/** The last significant characters read. */
private ScanState fLast;
/** The amount of characters already read on first call to nextToken(). */
private int fPrefixLength;
// emulate JavaPartitionScanner
private boolean fEmulate = false;
private int fJavaOffset;
private int fJavaLength;
private final IToken[] fTokens = new IToken[] {
new Token (null),
new Token (PHP_SINGLELINE_COMMENT),
new Token (PHP_MULTILINE_COMMENT),
new Token (PHP_PHPDOC_COMMENT),
new Token (PHP_STRING_DQ),
new Token (PHP_STRING_SQ),
new Token (PHP_STRING_HEREDOC)};
public FastJavaPartitionScanner(boolean emulate) {
fEmulate = emulate;
}
public FastJavaPartitionScanner() {
this(false);
}
/**
* Emulate JavaPartitionScanner
*
* @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
*/
public IToken nextToken() {
if (fEmulate) {
if ((fJavaOffset != -1) && (fTokenOffset + fTokenLength != fJavaOffset + fJavaLength)) {
fTokenOffset += fTokenLength;
return fTokens[PartState.PHP.ordinal()];
}
else {
fJavaOffset = -1;
fJavaLength = 0;
}
}
fTokenOffset += fTokenLength; // The new token offset is the offset of the previous partition + length of previous partition
fTokenLength = fPrefixLength; // The new partition is at least the length of the start of the new partition
while (true) {
final int ch = fScanner.read();
switch (ch) {
case ICharacterScanner.EOF:
if (fTokenLength > 0) {
fLast = ScanState.NONE; // ignore last
return preFix (fState, PartState.PHP, ScanState.NONE, 0);
}
else {
fLast = ScanState.NONE;
fPrefixLength = 0;
return Token.EOF;
}
case '\r': // Found a carriage return
// emulate JavaPartitionScanner
if (!fEmulate && (fLast != ScanState.CARRIAGE_RETURN)) {
fLast = ScanState.CARRIAGE_RETURN; // Set to what we currently found
fTokenLength++; // and count the partition length
continue; // Go for the next character to read
}
else {
switch (fState) {
case SINGLE_LINE_COMMENT:
if (fTokenLength > 0) {
IToken token = fTokens[fState.ordinal()];
// emulate JavaPartitionScanner
if (fEmulate) {
fTokenLength++;
fLast = ScanState.NONE;
fPrefixLength = 0;
}
else {
fLast = ScanState.CARRIAGE_RETURN;
fPrefixLength = 1;
}
fState = PartState.PHP;
return token;
}
else {
consume();
continue;
}
default:
consume();
continue;
}
}
case '\n': // Found a line feed
switch (fState) {
case SINGLE_LINE_COMMENT: // If we running within a single line comment,
return postFix (fState); // this is the end my friend
case STRING_HEREDOC: // If we running within a heredoc string
fTokenLength++; // Count the character
fLast = ScanState.CARRIAGE_RETURN; // and state is still new line
continue;
default: // If running anywhere else than on a single line comment
consume(); // count the length of the current partition
continue;
}
case '?':
if (fState == PartState.SINGLE_LINE_COMMENT) {
int nextch = fScanner.read();
if (nextch == '>') {
//
This is an example.
fTokenLength--;
fScanner.unread();
fScanner.unread();
return postFix (fState);
}
else {
// bug #1404228: Crash on
if (nextch != ICharacterScanner.EOF) {
fScanner.unread();
}
}
}
default:
if (!fEmulate && (fLast == ScanState.CARRIAGE_RETURN)) {
switch (fState) {
case SINGLE_LINE_COMMENT:
// case CHARACTER:
// case STRING_DQ:
// case STRING_SQ:
ScanState last;
PartState newState;
switch (ch) {
case '/':
last = ScanState.SLASH;
newState = PartState.PHP;
break;
case '*':
last = ScanState.STAR;
newState = PartState.PHP;
break;
case '\'':
last = ScanState.NONE;
newState = PartState.STRING_SQ;
break;
case '"':
last = ScanState.NONE;
newState = PartState.STRING_DQ;
break;
case '\r':
last = ScanState.CARRIAGE_RETURN;
newState = PartState.PHP;
break;
case '\\':
last = ScanState.BACKSLASH;
newState = PartState.PHP;
break;
default:
last = ScanState.NONE;
newState = PartState.PHP;
break;
}
fLast = ScanState.NONE; // ignore fLast
return preFix (fState, newState, last, 1);
default:
break;
}
}
}
// states
switch (fState) {
case PHP:
switch (ch) {
case '#': // Start of a single line comment
if (fTokenLength > 0) {
return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1);
}
else {
preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1);
fTokenOffset += fTokenLength;
fTokenLength = fPrefixLength;
}
break;
case '<':
if (fLast == ScanState.LESS) {
fTokenLength++;
fLast = ScanState.LESS_LESS;
}
else if (fLast == ScanState.LESS_LESS) {
if (fTokenLength - getLastLength(fLast) > 0) { // this is the start of a single line comment
return preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3);
}
else {
preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3);
fTokenOffset += fTokenLength;
fTokenLength = fPrefixLength;
}
}
else {
fTokenLength++;
fLast = ScanState.LESS;
}
break;
case '/': // Start of single line comment?
if (fLast == ScanState.SLASH) { // If previous character was already a slash,
if (fTokenLength - getLastLength(fLast) > 0) { // this is the start of a single line comment
return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2);
}
else {
preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2);
fTokenOffset += fTokenLength;
fTokenLength = fPrefixLength;
}
}
else {
fTokenLength++;
fLast = ScanState.SLASH; // We currently found a slash
}
break;
case '*':
if (fLast == ScanState.SLASH) { // If previous character was a slash
if (fTokenLength - getLastLength (fLast) > 0) { // this is the start of a comment /*
return preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2);
}
else {
preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2);
fTokenOffset += fTokenLength;
fTokenLength = fPrefixLength;
}
}
else { // No slash before the '*', so it's a normal character
consume ();
}
break;
case '\'': // The start of a single quoted string
fLast = ScanState.NONE; // ignore fLast
if (fTokenLength > 0) {
return preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1);
}
else {
preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1);
fTokenOffset += fTokenLength;
fTokenLength = fPrefixLength;
}
break;
case '"': // The start of a double quoted string
fLast = ScanState.NONE; // ignore fLast
if (fTokenLength > 0) {
return preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1);
}
else {
preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1);
fTokenOffset += fTokenLength;
fTokenLength = fPrefixLength;
}
break;
default: // Just a normal character with no special meaning
consume ();
break;
}
break;
case SINGLE_LINE_COMMENT: // We are just running within a single line comment (started with // or #)
consume();
break;
case PHPDOC: // We are just running within a php doc comment
switch (ch) {
case '/':
switch (fLast) {
case SLASH_STAR_STAR:
return postFix (PartState.MULTI_LINE_COMMENT);
case STAR:
return postFix (PartState.PHPDOC); // Found the end of the php doc (multi line) comment
default:
consume();
break;
}
break;
case '*': // Found a '*'
fTokenLength++;
fLast = ScanState.STAR; // Remember that we found a '*'
break;
default:
consume();
break;
}
break;
case MULTI_LINE_COMMENT: // We are currently running through a (possible) multi line comment
switch (ch) {
case '*': // and we found a '*'
if (fLast == ScanState.SLASH_STAR) { // If the previous characters have been a /*
fLast = ScanState.SLASH_STAR_STAR;
fTokenLength++;
fState = PartState.PHPDOC;
}
else {
fTokenLength++;
fLast = ScanState.STAR;
}
break;
case '/':
if (fLast == ScanState.STAR) {
return postFix (PartState.MULTI_LINE_COMMENT);
}
else {
consume();
break;
}
default:
consume();
break;
}
break;
case STRING_DQ:
switch (ch) {
case '\\':
fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH;
fTokenLength++;
break;
case '\"':
if (fLast != ScanState.BACKSLASH) {
return postFix (PartState.STRING_DQ);
}
else {
consume();
}
break;
default:
consume();
break;
}
break;
case STRING_SQ:
switch (ch) {
case '\\':
fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH;
fTokenLength++;
break;
case '\'':
if (fLast != ScanState.BACKSLASH) {
return postFix (PartState.STRING_SQ);
}
else {
consume();
}
break;
default:
consume();
break;
}
break;
case STRING_HEREDOC: // We are just running within a heredoc string
switch (fLast) {
case LESS_LESS_LESS: // The first time after we recognized the '<<<'
fLast = ScanState.HEREDOC_ID; // We do a scan of the heredoc id string
fHeredocId = "";
fHeredocId += (char) ch;
fTokenLength++;
break;
case HEREDOC_ID: // Scan the starting heredoc ID
if (ch == ' ') {
fLast = ScanState.HEREDOC;
fTokenLength++;
}
else {
fHeredocId += (char) ch;
fTokenLength++;
}
break;
case CARRIAGE_RETURN: // We previously found a new line
fTokenLength++;
fHeredocIdEnd = "";
fHeredocIdEnd += (char) ch; // Add the first character to the (possible) end ID
fLast = ScanState.HEREDOC_ID_END; // Go for scanning the (possible) end ID
break;
case HEREDOC_ID_END: // We scan the (possible) end ID
if (ch == ';') { // End ID ends with an ';'
if (fHeredocId.compareTo (fHeredocIdEnd) == 0) { // If start ID and end ID matches.
return postFix (PartState.STRING_HEREDOC); // It's the end of a heredoc partition
}
else {
consume (); // Wrong end ID, so just eat the character
}
}
else {
fTokenLength++; //
fHeredocIdEnd += (char) ch; // Add the characther to the possible heredoc end ID
}
break;
default: // Normally state NONE
consume (); // Eat the character
break;
}
break;
} // end of switch (fState)
}
}
private static final int getLastLength (ScanState last) {
switch (last) {
default:
return -1;
case NONE:
return 0;
case LESS:
case CARRIAGE_RETURN:
case BACKSLASH:
case SLASH:
case STAR:
return 1;
case LESS_LESS:
case SLASH_STAR:
return 2;
case SLASH_STAR_STAR:
return 3;
case HEREDOC:
return 3;
}
}
private final void consume() {
fTokenLength++; // Count the character
fLast = ScanState.NONE; // Reset scanner state to nothing special
}
/**
* If we found the end of a partition, return the type of the partition which is currently finished
*
* @param state The type of partition we found the end for
* @return
*/
private final IToken postFix (PartState state) {
fTokenLength++;
fLast = ScanState.NONE; // Reset the scanner state
fState = PartState.PHP; // The type of the next partition is just PHP
fPrefixLength = 0; // and have no prefix length
return fTokens[state.ordinal()]; // Return the type of partition for which we found the end
}
/**
* If we find the prefix of a new partition, return the type of the previous partition
*
* @param state
* @param newState
* @param last
* @param prefixLength
* @return
*/
private final IToken preFix (PartState oldState, PartState newState, ScanState last, int prefixLength) {
if (fEmulate && // If we are in emulation run
(oldState == PartState.PHP) &&
(fTokenLength - getLastLength (fLast) > 0)) {
fTokenLength -= getLastLength (fLast);
fJavaOffset = fTokenOffset;
fJavaLength = fTokenLength;
fTokenLength = 1;
fState = newState;
fPrefixLength = prefixLength;
fLast = last;
return fTokens[oldState.ordinal()];
}
else {
fTokenLength -= getLastLength (fLast); // Set the length of the last token (partition)
fLast = last; // Remember the type of the type of the last partition
fPrefixLength = prefixLength; // Remember the length of the currently found start of new partition
fState = newState; // The type of the new partition we found
IToken token = fTokens[oldState.ordinal()]; // Return the type of the old partition
return token;
}
}
private static PartState getState (String contentType) {
if (contentType == null)
return PartState.PHP;
else if (contentType.equals (PHP_SINGLELINE_COMMENT))
return PartState.SINGLE_LINE_COMMENT;
else if (contentType.equals (PHP_MULTILINE_COMMENT))
return PartState.MULTI_LINE_COMMENT;
else if (contentType.equals (PHP_PHPDOC_COMMENT))
return PartState.PHPDOC;
else if (contentType.equals (PHP_STRING_DQ))
return PartState.STRING_DQ;
else if (contentType.equals (PHP_STRING_SQ))
return PartState.STRING_SQ;
else if (contentType.equals (PHP_STRING_HEREDOC))
return PartState.STRING_HEREDOC;
else
return PartState.PHP;
}
/**
* @see IPartitionTokenScanner#setPartialRange (IDocument, int, int, String, int)
*
* @note Because of the PHP heredoc syntax we need to parse from the beginning of a heredoc partition,
* and not from anywhere in the middle. When not reading the start of the heredoc (and the correct heredoc start ID,
* we can't recognize the correct heredoc end ID. So we start if possible form the partitionOffset.
*
*/
public void setPartialRange (IDocument document, int offset, int length, String contentType, int partitionOffset) {
if (partitionOffset >= 0) {
fScanner.setRange (document, partitionOffset, length + (offset - partitionOffset));
fTokenOffset = partitionOffset;
fTokenLength = 0;
fPrefixLength = 0;
fLast = ScanState.NONE;
fState = PartState.PHP; // restart at beginning of partition
}
else {
fScanner.setRange (document, offset, length);
fTokenOffset = partitionOffset;
fTokenLength = 0;
fPrefixLength = offset - partitionOffset;
fLast = ScanState.NONE;
if (offset == partitionOffset) {
fState = PartState.PHP; // restart at beginning of partition
}
else {
fState = getState(contentType);
}
}
// emulate JavaPartitionScanner
if (fEmulate) {
fJavaOffset = -1;
fJavaLength = 0;
}
}
/**
* @see ITokenScanner#setRange(IDocument, int, int)
*/
public void setRange (IDocument document, int offset, int length) {
fScanner.setRange (document, offset, length);
fTokenOffset = offset;
fTokenLength = 0;
fPrefixLength = 0;
fLast = ScanState.NONE;
fState = PartState.PHP;
// emulate JavaPartitionScanner
if (fEmulate) {
fJavaOffset = -1;
fJavaLength = 0;
}
}
/*
* @see ITokenScanner#getTokenLength()
*/
public int getTokenLength() {
return fTokenLength;
}
/*
* @see ITokenScanner#getTokenOffset()
*/
public int getTokenOffset() {
if (AbstractPartitioner.DEBUG) {
Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));
}
return fTokenOffset;
}
}