From: musk Date: Thu, 27 Mar 2003 15:28:11 +0000 (+0000) Subject: PHPPartitionScanner that is not rule based X-Git-Url: http://secure.phpeclipse.com?hp=4f592efca0b99375a74e955131038e6080516ba3 PHPPartitionScanner that is not rule based --- diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/php/PHPPartitionScanner.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/php/PHPPartitionScanner.java index 213ff71..8e2485d 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/php/PHPPartitionScanner.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpeclipse/phpeditor/php/PHPPartitionScanner.java @@ -1,582 +1,338 @@ -/********************************************************************** -Copyright (c) 2000, 2002 IBM Corp. and others. -All rights reserved. This program and the accompanying materials -are made available under the terms of the Common Public License v1.0 -which accompanies this distribution, and is available at -http://www.eclipse.org/legal/cpl-v10.html - -Contributors: - IBM Corporation - Initial implementation - Klaus Hartlage - www.eclipseproject.de -**********************************************************************/ +/** + * This program and the accompanying materials + * are made available under the terms of the Common Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/cpl-v10.html + * Created on 05.03.2003 + * + * @author Stefan Langer (musk) + * @version $Revision: 1.10 $ + */ package net.sourceforge.phpeclipse.phpeditor.php; -import java.util.ArrayList; -import java.util.List; +import java.util.*; -import org.eclipse.jface.text.rules.ICharacterScanner; -import org.eclipse.jface.text.rules.IPredicateRule; -import org.eclipse.jface.text.rules.IToken; -import org.eclipse.jface.text.rules.MultiLineRule; -import org.eclipse.jface.text.rules.RuleBasedPartitionScanner; -import org.eclipse.jface.text.rules.Token; +import org.eclipse.jface.text.*; +import org.eclipse.jface.text.rules.*; /** - * This scanner recognizes the JavaDoc comments and Java multi line comments. + * */ -public class PHPPartitionScanner extends RuleBasedPartitionScanner implements IPHPPartitionScannerConstants { - - // private final static String SKIP = "__skip"; //$NON-NLS-1$ - -// public final static String HTML_MULTILINE_COMMENT = "__html_multiline_comment"; //$NON-NLS-1$ -// // public final static String JAVA_DOC= "__java_javadoc"; //$NON-NLS-1$ -// public final static String PHP = "__php"; //$NON-NLS-1$ -// //public final static String HTML = "__html"; //$NON-NLS-1$ - - public final static IToken php = new Token(PHP); - //public final static IToken html = new Token(HTML); - public final static IToken comment = new Token(HTML_MULTILINE_COMMENT); - - protected final static char[] php0EndSequence = { '<', '?' }; - protected final static char[] php1EndSequence = { '<', '?', 'p', 'h', 'p' }; - protected final static char[] php2EndSequence = { '<', '?', 'P', 'H', 'P' }; +public class PHPPartitionScanner implements IPartitionTokenScanner +{ +// private final int HTML = 0; +// private final int PHP = 1; +// private final int JS = 2; +// private final int CSS = 4; +// private final int COMMENT = 5; +// private final int HTMLCOMMENT = 6; + + private IDocument fDocument = null; + private int fOffset = -1; +// private int fLastOffset = -1; + private String fContentType = IPHPPartitionScannerConstants.HTML; + + private boolean partitionBorder = false; + private int fTokenOffset; + private int fEnd = -1; + private int fLength; + //private int fState = HTML; + private Map tokens = new HashMap(); + + public PHPPartitionScanner() + { + this.tokens.put( + IPHPPartitionScannerConstants.PHP, + new Token(IPHPPartitionScannerConstants.PHP)); + this.tokens.put( + IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT, + new Token(IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT)); + this.tokens.put( + IPHPPartitionScannerConstants.HTML, + new Token(IPHPPartitionScannerConstants.HTML)); + this.tokens.put( + IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT, + new Token(IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT)); + } - private StringBuffer test; + private IToken getToken(String type) + { + fLength = fOffset-fTokenOffset; + IToken token = (IToken)this.tokens.get(type); + Assert.isNotNull(token, "Token for type \"" + type + "\" not found!"); + return token; + } - public class PHPMultiLineRule extends MultiLineRule { +/* (non-Javadoc) + * @see org.eclipse.jface.text.rules.IPartitionTokenScanner#setPartialRange(org.eclipse.jface.text.IDocument, int, int, java.lang.String, int) + */ +public void setPartialRange( + IDocument document, + int offset, + int length, + String contentType, + int partitionOffset) +{ + this.setRange(document, offset, length); + if(this.tokens.containsKey(contentType)) + fContentType = contentType; + if (partitionOffset > -1) + { + partitionBorder = false; + fTokenOffset = partitionOffset; + } +} - public PHPMultiLineRule(String startSequence, String endSequence, IToken token) { - super(startSequence, endSequence, token); + /* (non-Javadoc) + * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength() + */ + public int getTokenLength() + { + return fLength; } - public PHPMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) { - super(startSequence, endSequence, token, escapeCharacter); + /* (non-Javadoc) + * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset() + */ + public int getTokenOffset() + { + return fTokenOffset; } - protected boolean endSequenceDetected(ICharacterScanner scanner) { - int c; - int c2; + /* (non-Javadoc) + * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken() + */ + public IToken nextToken() + { + int c; + + // check if we are not allready at the end of the + // file + if ((c = read()) == ICharacterScanner.EOF) + { + partitionBorder = false; + return Token.EOF; + } + else + unread(); - boolean lineCommentMode = false; - boolean multiLineCommentMode = false; - boolean stringMode = false; + if (partitionBorder) + { + fTokenOffset = fOffset; + partitionBorder = false; + } - char[][] delimiters = scanner.getLegalLineDelimiters(); - while ((c = scanner.read()) != ICharacterScanner.EOF) { - if (c == '#') { - // read until end of line - while ((c = scanner.read()) != ICharacterScanner.EOF) { - if (fEndSequence.length > 0 && c == fEndSequence[0]) { - // Check if the specified end sequence has been found. - if (sequenceDetected(scanner, fEndSequence, true)) - return true; - } else if (c == '\n') { - break; - } - } - continue; - } else if (c == '/' && (c = scanner.read()) != ICharacterScanner.EOF) { - if (c == '/') { - // read until end of line - while ((c = scanner.read()) != ICharacterScanner.EOF) { - if (fEndSequence.length > 0 && c == fEndSequence[0]) { - // Check if the specified end sequence has been found. - if (sequenceDetected(scanner, fEndSequence, true)) - return true; - } else if (c == '\n') { - break; - } - } - continue; - } else if (c == '*') { - // multi-line comment - while ((c = scanner.read()) != ICharacterScanner.EOF) { - if (c == '*' && (c = scanner.read()) != ICharacterScanner.EOF) { - if (c == '/') { - break; - } - scanner.unread(); - } + while ((c = read()) != ICharacterScanner.EOF) + { + switch (c) + { + case '<' : + if (fContentType != IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT && checkPattern(new char[] { '?', 'p', 'h', 'p' }, true)) + { + + if (fContentType != IPHPPartitionScannerConstants.PHP + && fOffset - 5 > 0) + { + fOffset -= 5; + IToken token = getToken(fContentType); + fContentType = IPHPPartitionScannerConstants.PHP; + + return token; + } + else + fContentType = IPHPPartitionScannerConstants.PHP; + + // remember offset of this partition + fTokenOffset = fOffset - 5; + } + else if (checkPattern(new char[] { '!', '-', '-' })) + { + // return previouse partition + if (fContentType + != IPHPPartitionScannerConstants + .HTML_MULTILINE_COMMENT + && fOffset - 4 > 0) + { + fOffset -= 4; + IToken token = getToken(fContentType); + fContentType = + IPHPPartitionScannerConstants + .HTML_MULTILINE_COMMENT; + return token; + } + else + fContentType = + IPHPPartitionScannerConstants + .HTML_MULTILINE_COMMENT; + + fTokenOffset = fOffset - 4; + } + break; + case '?' : + if (fContentType == IPHPPartitionScannerConstants.PHP) + { + if ((c = read()) == '>') + { + fContentType = IPHPPartitionScannerConstants.HTML; + partitionBorder = true; + return getToken(IPHPPartitionScannerConstants.PHP); + } + else if(c != ICharacterScanner.EOF) + unread(); + } + break; + case '-' : + if (fContentType + == IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT + && checkPattern(new char[] { '-', '>' })) + { + fContentType = IPHPPartitionScannerConstants.HTML; + partitionBorder = true; + return getToken( + IPHPPartitionScannerConstants + .HTML_MULTILINE_COMMENT); + } + break; + case '/' : + if ((c=read()) == '*') + { // MULTINE COMMENT JAVASCRIPT, CSS, PHP + if (fContentType == IPHPPartitionScannerConstants.PHP + && fOffset - 2 > 0) + { + fOffset -= 2; + IToken token = getToken(fContentType); + fContentType = + IPHPPartitionScannerConstants + .PHP_MULTILINE_COMMENT; + + return token; + } + else if ( + fContentType + == IPHPPartitionScannerConstants + .PHP_MULTILINE_COMMENT) + { + + fTokenOffset = fOffset - 2; + } + + } + else if(c != ICharacterScanner.EOF) + unread(); + break; + case '*' : + if ((c = read()) == '/') + { + if (fContentType + == IPHPPartitionScannerConstants + .PHP_MULTILINE_COMMENT) + { + fContentType = IPHPPartitionScannerConstants.PHP; + partitionBorder = true; + return getToken( + IPHPPartitionScannerConstants + .PHP_MULTILINE_COMMENT); + } + else if ( + fContentType + == IPHPPartitionScannerConstants + .CSS_MULTILINE_COMMENT) + { + } + else if ( + fContentType + == IPHPPartitionScannerConstants + .JS_MULTILINE_COMMENT) + { + } + } + else if(c != ICharacterScanner.EOF) + unread(); + break; } + } - continue; - } else { - scanner.unread(); - } - } else if (c == '"') { - // string mode - while ((c = scanner.read()) != ICharacterScanner.EOF) { - if (c == '\\') { - c = scanner.read(); - } else if (c == '"') { - break; - } - } - continue; - } else if (c == '\'') { - // string mode - while ((c = scanner.read()) != ICharacterScanner.EOF) { - if (c == '\\') { - c = scanner.read(); - } else if (c == '\'') { - break; + // end of file reached but we have to return the + // last partition. + return getToken(fContentType); + } + /* (non-Javadoc) + * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(org.eclipse.jface.text.IDocument, int, int) + */ + public void setRange(IDocument document, int offset, int length) + { + fDocument = document; + fOffset = offset; + fTokenOffset = offset; + fLength = 0; + fEnd = fOffset + length; + //partitionBorder = false; + } + + private int read() + { + try + { + if (fOffset < fEnd) + { + return fDocument.getChar(fOffset++); } - } - continue; + return ICharacterScanner.EOF; } - - if (c == fEscapeCharacter) { - // Skip the escaped character. - scanner.read(); - } else if (fEndSequence.length > 0 && c == fEndSequence[0]) { - // Check if the specified end sequence has been found. - if (sequenceDetected(scanner, fEndSequence, true)) - return true; - } else if (fBreaksOnEOL) { - // Check for end of line since it can be used to terminate the pattern. - for (int i = 0; i < delimiters.length; i++) { - if (c == delimiters[i][0] && sequenceDetected(scanner, delimiters[i], false)) - return true; - } + catch (BadLocationException e) + { + // should never happen + // TODO write stacktrace to log + fOffset = fEnd; + return ICharacterScanner.EOF; } - } - boolean phpMode = false; - if (c == ICharacterScanner.EOF) { - phpMode = true; - } - scanner.unread(); - return phpMode; } - } - - // public class HTMLMultiLineRule extends MultiLineRule { - // - // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token) { - // super(startSequence, endSequence, token); - // } - // - // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) { - // super(startSequence, endSequence, token, escapeCharacter); - // } - // - // protected boolean endSequenceDetected(ICharacterScanner scanner) { - // int c; - // - // char[][] delimiters = scanner.getLegalLineDelimiters(); - // while ((c = scanner.read()) != ICharacterScanner.EOF) { - // if (c == '<') { - // // scanner.unread(); - // if (sequenceDetected(scanner, php2EndSequence, true)) { - // // null is a legal value -// * @param token the token which will be returned on success -// * @param escapeCharacter any character following this one will be ignored -// * @param indicates whether the end of the line also termines the pattern -// */ -// public HTMLPatternRule(IToken token) { -// fToken = token; -// fEscapeCharacter = (char) 0; -// fBreaksOnEOL = false; -// } -// -// /** -// * Sets a column constraint for this rule. If set, the rule's token -// * will only be returned if the pattern is detected starting at the -// * specified column. If the column is smaller then 0, the column -// * constraint is considered removed. -// * -// * @param column the column in which the pattern starts -// */ -// public void setColumnConstraint(int column) { -// if (column < 0) -// column = UNDEFINED; -// fColumn = column; -// } -// -// /** -// * Evaluates this rules without considering any column constraints. -// * -// * @param scanner the character scanner to be used -// * @return the token resulting from this evaluation -// */ -// protected IToken doEvaluate(ICharacterScanner scanner) { -// return doEvaluate(scanner, false); -// } -// -// /** -// * Evaluates this rules without considering any column constraints. Resumes -// * detection, i.e. look sonly for the end sequence required by this rule if the -// * resume flag is set. -// * -// * @param scanner the character scanner to be used -// * @param resume true if detection should be resumed, false otherwise -// * @return the token resulting from this evaluation -// * @since 2.0 -// */ -// protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) { -// -// if (resume) { -// -// if (endSequenceDetected(scanner)) -// return fToken; -// -// } else { -// -// int c = scanner.read(); -// // if (c == fStartSequence[0]) { -// // if (sequenceDetected(scanner, fStartSequence, false)) { -// if (endSequenceDetected(scanner)) -// return fToken; -// // } -// // } -// } -// -// scanner.unread(); -// return Token.UNDEFINED; -// } -// -// /* -// * @see IRule#evaluate -// */ -// public IToken evaluate(ICharacterScanner scanner) { -// return evaluate(scanner, false); -// } -// -// /** -// * Returns whether the end sequence was detected. As the pattern can be considered -// * ended by a line delimiter, the result of this method is true if the -// * rule breaks on the end of the line, or if the EOF character is read. -// * -// * @param scanner the character scanner to be used -// * @return true if the end sequence has been detected -// */ -// protected boolean endSequenceDetected(ICharacterScanner scanner) { -// int c; -// -// char[][] delimiters = scanner.getLegalLineDelimiters(); -// while ((c = scanner.read()) != ICharacterScanner.EOF) { -// if (c == '<') { -// // scanner.unread(); -// if (sequenceDetected(scanner, php2EndSequence, true)) { -// // true if the given sequence has been detected -// */ -// protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) { -// for (int i = 1; i < sequence.length; i++) { -// int c = scanner.read(); -// if (c == ICharacterScanner.EOF && eofAllowed) { -// return true; -// } else if (c != sequence[i]) { -// // Non-matching character detected, rewind the scanner back to the start. -// scanner.unread(); -// for (int j = i - 1; j > 0; j--) -// scanner.unread(); -// return false; -// } -// } -// -// return true; -// } -// -// /* -// * @see IPredicateRule#evaluate(ICharacterScanner, boolean) -// * @since 2.0 -// */ -// public IToken evaluate(ICharacterScanner scanner, boolean resume) { -// if (fColumn == UNDEFINED) -// return doEvaluate(scanner, resume); -// -// int c = scanner.read(); -// scanner.unread(); -// // if (c == fStartSequence[0]) -// return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED); -// // else -// // return Token.UNDEFINED; -// } -// -// /* -// * @see IPredicateRule#getSuccessToken() -// * @since 2.0 -// */ -// public IToken getSuccessToken() { -// return fToken; -// } -// } - /** - * Detector for empty comments. - */ -// static class EmptyCommentDetector implements IWordDetector { -// -// /* (non-Javadoc) -// * Method declared on IWordDetector -// */ -// public boolean isWordStart(char c) { -// return (c == '/'); -// } -// -// /* (non-Javadoc) -// * Method declared on IWordDetector -// */ -// public boolean isWordPart(char c) { -// return (c == '*' || c == '/'); -// } -// }; - - /** - * - */ -// static class WordPredicateRule extends WordRule implements IPredicateRule { -// -// private IToken fSuccessToken; -// -// public WordPredicateRule(IToken successToken) { -// super(new EmptyCommentDetector()); -// fSuccessToken = successToken; -// addWord("/**/", fSuccessToken); -// } -// -// /* -// * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(ICharacterScanner, boolean) -// */ -// public IToken evaluate(ICharacterScanner scanner, boolean resume) { -// return super.evaluate(scanner); -// } -// -// /* -// * @see org.eclipse.jface.text.rules.IPredicateRule#getSuccessToken() -// */ -// public IToken getSuccessToken() { -// return fSuccessToken; -// } -// }; - - /** - * Creates the partitioner and sets up the appropriate rules. - */ - public PHPPartitionScanner() { - super(); - - // IToken php = new Token(PHP); - // IToken html = new Token(HTML); - // IToken comment = new Token(HTML_MULTILINE_COMMENT); - - List rules = new ArrayList(); - - // Add rule for single line comments. - // rules.add(new EndOfLineRule("//", Token.UNDEFINED)); - - // Add rule for strings and character constants. - // rules.add(new SingleLineRule("\"", "\"", Token.UNDEFINED, '\\')); - // rules.add(new SingleLineRule("'", "'", Token.UNDEFINED, '\\')); - - // Add special case word rule. - // rules.add(new WordPredicateRule(comment)); - // Add rules for multi-line comments and javadoc. - //rules.add(new MultiLineRule("/**", "*/", javaDoc)); - // rules.add(new HTMLMultiLineRule("<", "", comment)); - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); - - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); - - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); + private void unread() + { + --fOffset; + } - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); - rules.add(new PHPMultiLineRule("", php)); - // rules.add(new HTMLPatternRule(html)); // "<", "true if pattern is equals else returns false. + */ + private boolean checkPattern(char[] pattern, boolean ignoreCase) + { + int prevOffset = fOffset; + for (int i = 0; i < pattern.length; i++) + { + int c = read(); + + if(c == ICharacterScanner.EOF || !letterEquals(c, pattern[i], ignoreCase)) + { + fOffset = prevOffset; + return false; + } + } - // public IToken nextToken() { - // - // if (fContentType == null || fRules == null) - // return getNextToken(); - // - // fTokenOffset= fOffset; - // fColumn= UNDEFINED; - // boolean resume= (fPartitionOffset < fOffset); - // - // IPredicateRule rule; - // IToken token; - // - // for (int i= 0; i < fRules.length; i++) { - // rule= (IPredicateRule) fRules[i]; - // token= rule.getSuccessToken(); - // if (fContentType.equals(token.getData())) { - // if (resume) - // fTokenOffset= fPartitionOffset; - // token= rule.evaluate(this, resume); - // if (!token.isUndefined()) { - // fContentType= null; - // return token; - // } - // } - // } - // - // fContentType= null; - // return getNextToken(); - // } - // - // public IToken getNextToken() { - // - // IToken token; - // - // while (true) { - // - // fTokenOffset= fOffset; - // fColumn= UNDEFINED; - // - // if (fRules != null) { - // for (int i= 0; i < fRules.length; i++) { - // token= (fRules[i].evaluate(this)); - // if (!token.isUndefined()) - // return token; - // } - // } - // - // if (read() == EOF) - // return Token.EOF; - // else - // return fDefaultReturnToken; - // } - // } + return true; + } + + private boolean letterEquals(int test, char letter, boolean ignoreCase) + { + if(test == letter) + return true; + else if(ignoreCase && Character.isLowerCase(letter) && test == Character.toUpperCase(letter)) + return true; + else if(ignoreCase && Character.isUpperCase(letter) && test == Character.toLowerCase(letter)) + return true; + + return false; + } + }