misc changes
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPPartitionScanner.java
index 00ec730..bebcc1f 100644 (file)
@@ -11,18 +11,16 @@ Contributors:
 **********************************************************************/
 package net.sourceforge.phpeclipse.phpeditor.php;
 
+import java.io.CharArrayWriter;
 import java.util.ArrayList;
 import java.util.List;
-import org.eclipse.jface.text.rules.EndOfLineRule;
+
 import org.eclipse.jface.text.rules.ICharacterScanner;
 import org.eclipse.jface.text.rules.IPredicateRule;
-import org.eclipse.jface.text.rules.IRule;
 import org.eclipse.jface.text.rules.IToken;
 import org.eclipse.jface.text.rules.IWordDetector;
 import org.eclipse.jface.text.rules.MultiLineRule;
 import org.eclipse.jface.text.rules.RuleBasedPartitionScanner;
-import org.eclipse.jface.text.rules.RuleBasedScanner;
-import org.eclipse.jface.text.rules.SingleLineRule;
 import org.eclipse.jface.text.rules.Token;
 import org.eclipse.jface.text.rules.WordRule;
 
@@ -31,175 +29,526 @@ import org.eclipse.jface.text.rules.WordRule;
  */
 public class PHPPartitionScanner extends RuleBasedPartitionScanner {
 
-       private final static String SKIP = "__skip"; //$NON-NLS-1$
-       public final static String JAVA_MULTILINE_COMMENT = "__html_multiline_comment"; //$NON-NLS-1$
-       //      public final static String JAVA_DOC= "__java_javadoc"; //$NON-NLS-1$
-       public final static String PHP = "__php";
-
-       public class PHPMultiLineRule extends MultiLineRule {
-
-               public PHPMultiLineRule(String startSequence, String endSequence, IToken token) {
-                       super(startSequence, endSequence, token);
-               }
-
-               public PHPMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
-                       super(startSequence, endSequence, token, escapeCharacter);
-               }
-
-               protected boolean endSequenceDetected(ICharacterScanner scanner) {
-                       int c;
-                       int c2;
-
-                       boolean lineCommentMode = false;
-                       boolean multiLineCommentMode = false;
-                       boolean stringMode = false;
-
-                       char[][] delimiters = scanner.getLegalLineDelimiters();
-                       while ((c = scanner.read()) != ICharacterScanner.EOF) {
-                               if (c == '\n') {
-                                       lineCommentMode = false;
-                                       // read until end of line
-                               } else if (c == '#') {
-                                       // read until end of line
-                                       lineCommentMode = true;
-                                       continue;
-                               } else if (c == '/') {
-                                       c2 = scanner.read();
-                                       if (c2 == '/') {
-                                               lineCommentMode = true;
+  private final static String SKIP = "__skip"; //$NON-NLS-1$
+  public final static String HTML_MULTILINE_COMMENT = "__html_multiline_comment"; //$NON-NLS-1$
+  //   public final static String JAVA_DOC= "__java_javadoc"; //$NON-NLS-1$
+  public final static String PHP = "__php";
+//  public final static String HTML = "__html";
+
+  public final static IToken php = new Token(PHP);
+//  public final static IToken html = new Token(HTML);
+  public final static IToken comment = new Token(HTML_MULTILINE_COMMENT);
+
+  protected final static char[] php0EndSequence = { '<', '?' };
+  protected final static char[] php1EndSequence = { '<', '?', 'p', 'h', 'p' };
+  protected final static char[] php2EndSequence = { '<', '?', 'P', 'H', 'P' };
+  private StringBuffer test;
+
+  public class PHPMultiLineRule extends MultiLineRule {
+
+    public PHPMultiLineRule(String startSequence, String endSequence, IToken token) {
+      super(startSequence, endSequence, token);
+    }
+
+    public PHPMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
+      super(startSequence, endSequence, token, escapeCharacter);
+    }
+
+    protected boolean endSequenceDetected(ICharacterScanner scanner) {
+      int c;
+      int c2;
+
+      boolean lineCommentMode = false;
+      boolean multiLineCommentMode = false;
+      boolean stringMode = false;
+
+      char[][] delimiters = scanner.getLegalLineDelimiters();
+      while ((c = scanner.read()) != ICharacterScanner.EOF) {
+        if (lineCommentMode && (c == '\n')) {
+          lineCommentMode = false;
+          // read until end of line
+        } else if ((!stringMode) && (c == '#')) {
+          // read until end of line
+          lineCommentMode = true;
+          continue;
+        } else if ((!stringMode) && (!multiLineCommentMode) && (c == '/')) {
+          c2 = scanner.read();
+          if (c2 == '/') {
+            lineCommentMode = true;
+            continue;
+          } else if (c2 == '*') {
+            multiLineCommentMode = true;
+            continue;
+          } else {
+            scanner.unread();
+          }
+        } else if (c == '*' && multiLineCommentMode) {
+          c2 = scanner.read();
+          if (c2 == '/') {
+            multiLineCommentMode = false;
             continue;
-                                       }       else if(c2 == '*') {
-                                               multiLineCommentMode = true;
-                                               continue;
-                                       } else {
-                                               scanner.unread();
-                                       }
-                               } else if (c == '*' && multiLineCommentMode) {
-                                       c2 = scanner.read();
-                                       if (c2 == '/') {
-                                               multiLineCommentMode = false;
-                                               continue;
-                                       } else {
-                                               scanner.unread();
-                                       }
-                               } else if (c == '\\' && stringMode) {
-                                       c2 = scanner.read();
-                                       if (c2 == '"') {
-                                               continue;
-                                       } else {
-                                               scanner.unread();
-                                       }
-                               } else if (c == '"') {
-                                       if (stringMode) {
-                                               stringMode = false;
-                                       } else {
-                                               stringMode = true;
-                                       }
-                                       continue;
-                               }
-                               if (lineCommentMode || multiLineCommentMode || stringMode) {
-                                       continue;
-                               }
-
-                               if (c == fEscapeCharacter) {
-                                       // Skip the escaped character.
-                                       scanner.read();
-                               } else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
-                                       // Check if the specified end sequence has been found.
-                                       if (sequenceDetected(scanner, fEndSequence, true))
-                                               return true;
-                               } else if (fBreaksOnEOL) {
-                                       // Check for end of line since it can be used to terminate the pattern.
-                                       for (int i = 0; i < delimiters.length; i++) {
-                                               if (c == delimiters[i][0] && sequenceDetected(scanner, delimiters[i], false))
-                                                       return true;
-                                       }
-                               }
-                       }
-                       scanner.unread();
-                       return true;
-               }
-       }
-       /**
-        * Detector for empty comments.
-        */
-       static class EmptyCommentDetector implements IWordDetector {
-
-               /* (non-Javadoc)
-               * Method declared on IWordDetector
-                       */
-               public boolean isWordStart(char c) {
-                       return (c == '/');
-               }
-
-               /* (non-Javadoc)
-               * Method declared on IWordDetector
-                       */
-               public boolean isWordPart(char c) {
-                       return (c == '*' || c == '/');
-               }
-       };
-
-       /**
-        * 
-        */
-       static class WordPredicateRule extends WordRule implements IPredicateRule {
-
-               private IToken fSuccessToken;
-
-               public WordPredicateRule(IToken successToken) {
-                       super(new EmptyCommentDetector());
-                       fSuccessToken = successToken;
-                       addWord("/**/", fSuccessToken);
-               }
-
-               /*
-                * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(ICharacterScanner, boolean)
-                */
-               public IToken evaluate(ICharacterScanner scanner, boolean resume) {
-                       return super.evaluate(scanner);
-               }
-
-               /*
-                * @see org.eclipse.jface.text.rules.IPredicateRule#getSuccessToken()
-                */
-               public IToken getSuccessToken() {
-                       return fSuccessToken;
-               }
-       };
-
-       /**
-        * Creates the partitioner and sets up the appropriate rules.
-        */
-       public PHPPartitionScanner() {
-               super();
-
-               //              IToken javaDoc= new Token(JAVA_DOC);
-               IToken comment = new Token(JAVA_MULTILINE_COMMENT);
-               IToken php = new Token(PHP);
-
-               List rules = new ArrayList();
-
-               // Add rule for single line comments.
-               //      rules.add(new EndOfLineRule("//", Token.UNDEFINED));
-
-               // Add rule for strings and character constants.
-               //              rules.add(new SingleLineRule("\"", "\"", Token.UNDEFINED, '\\'));
-               //      rules.add(new SingleLineRule("'", "'", Token.UNDEFINED, '\\')); 
-
-               // Add special case word rule.
-               rules.add(new WordPredicateRule(comment));
-
-               // Add rules for multi-line comments and javadoc.
-               //rules.add(new MultiLineRule("/**", "*/", javaDoc));
-               rules.add(new MultiLineRule("<!--", "-->", comment));
-               rules.add(new PHPMultiLineRule("<? ", "?>", php));
-               rules.add(new PHPMultiLineRule("<?php", "?>", php));
-               rules.add(new PHPMultiLineRule("<?PHP", "?>", php));
-               //Add rule for processing instructions
-
-               IPredicateRule[] result = new IPredicateRule[rules.size()];
-               rules.toArray(result);
-               setPredicateRules(result);
-       }
+          } else {
+            scanner.unread();
+          }
+        } else if (c == '\\' && stringMode) {
+          c2 = scanner.read();
+          if (c2 == '"') {
+            continue;
+          } else {
+            scanner.unread();
+          }
+        } else if ((!lineCommentMode) && (!multiLineCommentMode) && (c == '"')) {
+          if (stringMode) {
+            stringMode = false;
+          } else {
+            stringMode = true;
+          }
+          continue;
+        }
+        if (lineCommentMode || multiLineCommentMode || stringMode) {
+          continue;
+        }
+
+        if (c == fEscapeCharacter) {
+          // Skip the escaped character.
+          scanner.read();
+        } else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
+          // Check if the specified end sequence has been found.
+          if (sequenceDetected(scanner, fEndSequence, true))
+            return true;
+        } else if (fBreaksOnEOL) {
+          // Check for end of line since it can be used to terminate the pattern.
+          for (int i = 0; i < delimiters.length; i++) {
+            if (c == delimiters[i][0] && sequenceDetected(scanner, delimiters[i], false))
+              return true;
+          }
+        }
+      }
+      scanner.unread();
+      return false;
+    }
+  }
+
+//  public class HTMLMultiLineRule extends MultiLineRule {
+//
+//    public HTMLMultiLineRule(String startSequence, String endSequence, IToken token) {
+//      super(startSequence, endSequence, token);
+//    }
+//
+//    public HTMLMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
+//      super(startSequence, endSequence, token, escapeCharacter);
+//    }
+//
+//    protected boolean endSequenceDetected(ICharacterScanner scanner) {
+//      int c;
+//
+//      char[][] delimiters = scanner.getLegalLineDelimiters();
+//      while ((c = scanner.read()) != ICharacterScanner.EOF) {
+//        if (c == '<') {
+//          //       scanner.unread();
+//          if (sequenceDetected(scanner, php2EndSequence, true)) {
+//            // <?PHP
+//            scanner.unread();
+//            scanner.unread();
+//            scanner.unread();
+//            scanner.unread();
+//            scanner.unread();
+//            return true;
+//          }
+//          if (sequenceDetected(scanner, php1EndSequence, true)) {
+//            // <?php
+//            scanner.unread();
+//            scanner.unread();
+//            scanner.unread();
+//            scanner.unread();
+//            scanner.unread();
+//            return true;
+//          }
+//          if (sequenceDetected(scanner, php0EndSequence, true)) {
+//            // <?
+//            scanner.unread();
+//            scanner.unread();
+//            return true;
+//          }
+//          //      scanner.read();
+//        }
+//
+//      }
+//      scanner.unread();
+//      return false;
+//    }
+//
+//    protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
+//
+//      if (resume) {
+//
+//        if (endSequenceDetected(scanner))
+//          return fToken;
+//
+//      } else {
+//
+//        int c = scanner.read();
+//        //     if (c == fStartSequence[0]) {
+//        //       if (sequenceDetected(scanner, fStartSequence, false)) {
+//        if (endSequenceDetected(scanner))
+//          return fToken;
+//        //       }
+//        //     }
+//      }
+//
+//      scanner.unread();
+//      return Token.UNDEFINED;
+//    }
+//
+//    public IToken evaluate(ICharacterScanner scanner, boolean resume) {
+//      if (fColumn == UNDEFINED)
+//        return doEvaluate(scanner, resume);
+//
+//      int c = scanner.read();
+//      scanner.unread();
+//      //    if (c == fStartSequence[0])
+//      return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
+//      //    else
+//      //      return Token.UNDEFINED;
+//    }
+//  }
+
+  public class HTMLPatternRule implements IPredicateRule {
+
+    protected static final int UNDEFINED = -1;
+
+    /** The token to be returned on success */
+    protected IToken fToken;
+
+    /** The pattern's column constrain */
+    protected int fColumn = UNDEFINED;
+    /** The pattern's escape character */
+    protected char fEscapeCharacter;
+    /** Indicates whether end of line termines the pattern */
+    protected boolean fBreaksOnEOL;
+
+    /**
+     * Creates a rule for the given starting and ending sequence.
+     * When these sequences are detected the rule will return the specified token.
+     * Alternatively, the sequence can also be ended by the end of the line.
+     * Any character which follows the given escapeCharacter will be ignored.
+     *
+     * @param startSequence the pattern's start sequence
+     * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
+     * @param token the token which will be returned on success
+     * @param escapeCharacter any character following this one will be ignored
+     * @param indicates whether the end of the line also termines the pattern
+     */
+    public HTMLPatternRule(IToken token) {
+      fToken = token;
+      fEscapeCharacter = (char)0;
+      fBreaksOnEOL = false;
+    }
+
+    /**
+     * Sets a column constraint for this rule. If set, the rule's token
+     * will only be returned if the pattern is detected starting at the 
+     * specified column. If the column is smaller then 0, the column
+     * constraint is considered removed.
+     *
+     * @param column the column in which the pattern starts
+     */
+    public void setColumnConstraint(int column) {
+      if (column < 0)
+        column = UNDEFINED;
+      fColumn = column;
+    }
+
+    /**
+     * Evaluates this rules without considering any column constraints.
+     *
+     * @param scanner the character scanner to be used
+     * @return the token resulting from this evaluation
+     */
+    protected IToken doEvaluate(ICharacterScanner scanner) {
+      return doEvaluate(scanner, false);
+    }
+
+    /**
+     * Evaluates this rules without considering any column constraints. Resumes
+     * detection, i.e. look sonly for the end sequence required by this rule if the
+     * <code>resume</code> flag is set.
+     *
+     * @param scanner the character scanner to be used
+     * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
+     * @return the token resulting from this evaluation
+     * @since 2.0
+     */
+    protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
+
+      if (resume) {
+
+        if (endSequenceDetected(scanner))
+          return fToken;
+
+      } else {
+
+        int c = scanner.read();
+        //      if (c == fStartSequence[0]) {
+        //        if (sequenceDetected(scanner, fStartSequence, false)) {
+        if (endSequenceDetected(scanner))
+          return fToken;
+        //        }
+        //      }
+      }
+
+      scanner.unread();
+      return Token.UNDEFINED;
+    }
+
+    /*
+     * @see IRule#evaluate
+     */
+    public IToken evaluate(ICharacterScanner scanner) {
+      return evaluate(scanner, false);
+    }
+
+    /**
+     * Returns whether the end sequence was detected. As the pattern can be considered 
+     * ended by a line delimiter, the result of this method is <code>true</code> if the 
+     * rule breaks on the end  of the line, or if the EOF character is read.
+     *
+     * @param scanner the character scanner to be used
+     * @return <code>true</code> if the end sequence has been detected
+     */
+    protected boolean endSequenceDetected(ICharacterScanner scanner) {
+      int c;
+
+      char[][] delimiters = scanner.getLegalLineDelimiters();
+      while ((c = scanner.read()) != ICharacterScanner.EOF) {
+        if (c == '<') {
+          //       scanner.unread();
+          if (sequenceDetected(scanner, php2EndSequence, true)) {
+            // <?PHP
+            scanner.unread();
+            scanner.unread();
+            scanner.unread();
+            scanner.unread();
+            scanner.unread();
+            return true;
+          }
+          if (sequenceDetected(scanner, php1EndSequence, true)) {
+            // <?php
+            scanner.unread();
+            scanner.unread();
+            scanner.unread();
+            scanner.unread();
+            scanner.unread();
+            return true;
+          }
+          if (sequenceDetected(scanner, php0EndSequence, true)) {
+            // <?
+            scanner.unread();
+            scanner.unread();
+            return true;
+          }
+          //      scanner.read();
+        }
+
+      }
+      scanner.unread();
+      return false;
+    }
+
+    /**
+     * Returns whether the next characters to be read by the character scanner
+     * are an exact match with the given sequence. No escape characters are allowed 
+     * within the sequence. If specified the sequence is considered to be found
+     * when reading the EOF character.
+     *
+     * @param scanner the character scanner to be used
+     * @param sequence the sequence to be detected
+     * @param eofAllowed indicated whether EOF terminates the pattern
+     * @return <code>true</code> if the given sequence has been detected
+     */
+    protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) {
+      for (int i = 1; i < sequence.length; i++) {
+        int c = scanner.read();
+        if (c == ICharacterScanner.EOF && eofAllowed) {
+          return true;
+        } else if (c != sequence[i]) {
+          // Non-matching character detected, rewind the scanner back to the start.
+          scanner.unread();
+          for (int j = i - 1; j > 0; j--)
+            scanner.unread();
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    /*
+     * @see IPredicateRule#evaluate(ICharacterScanner, boolean)
+     * @since 2.0
+     */
+    public IToken evaluate(ICharacterScanner scanner, boolean resume) {
+      if (fColumn == UNDEFINED)
+        return doEvaluate(scanner, resume);
+
+      int c = scanner.read();
+      scanner.unread();
+      //    if (c == fStartSequence[0])
+      return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
+      //    else
+      //      return Token.UNDEFINED;
+    }
+
+    /*
+     * @see IPredicateRule#getSuccessToken()
+     * @since 2.0
+     */
+    public IToken getSuccessToken() {
+      return fToken;
+    }
+  }
+  /**
+   * Detector for empty comments.
+   */
+  static class EmptyCommentDetector implements IWordDetector {
+
+    /* (non-Javadoc)
+    * Method declared on IWordDetector
+       */
+    public boolean isWordStart(char c) {
+      return (c == '/');
+    }
+
+    /* (non-Javadoc)
+    * Method declared on IWordDetector
+       */
+    public boolean isWordPart(char c) {
+      return (c == '*' || c == '/');
+    }
+  };
+
+  /**
+   * 
+   */
+  static class WordPredicateRule extends WordRule implements IPredicateRule {
+
+    private IToken fSuccessToken;
+
+    public WordPredicateRule(IToken successToken) {
+      super(new EmptyCommentDetector());
+      fSuccessToken = successToken;
+      addWord("/**/", fSuccessToken);
+    }
+
+    /*
+     * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(ICharacterScanner, boolean)
+     */
+    public IToken evaluate(ICharacterScanner scanner, boolean resume) {
+      return super.evaluate(scanner);
+    }
+
+    /*
+     * @see org.eclipse.jface.text.rules.IPredicateRule#getSuccessToken()
+     */
+    public IToken getSuccessToken() {
+      return fSuccessToken;
+    }
+  };
+
+  /**
+   * Creates the partitioner and sets up the appropriate rules.
+   */
+  public PHPPartitionScanner() {
+    super();
+
+    //    IToken php = new Token(PHP);
+    //    IToken html = new Token(HTML);
+    //    IToken comment = new Token(HTML_MULTILINE_COMMENT);
+
+    List rules = new ArrayList();
+
+    // Add rule for single line comments.
+    // rules.add(new EndOfLineRule("//", Token.UNDEFINED));
+
+    // Add rule for strings and character constants.
+    //         rules.add(new SingleLineRule("\"", "\"", Token.UNDEFINED, '\\'));
+    // rules.add(new SingleLineRule("'", "'", Token.UNDEFINED, '\\')); 
+
+    // Add special case word rule.
+//    rules.add(new WordPredicateRule(comment));
+
+    // Add rules for multi-line comments and javadoc.
+    //rules.add(new MultiLineRule("/**", "*/", javaDoc));
+    //  rules.add(new HTMLMultiLineRule("<", "<?", html));
+
+    rules.add(new MultiLineRule("<!--", "-->", comment));
+    rules.add(new PHPMultiLineRule("<?\r", "?>", php));
+    rules.add(new PHPMultiLineRule("<?\n", "?>", php));
+    rules.add(new PHPMultiLineRule("<?\t", "?>", php));
+    rules.add(new PHPMultiLineRule("<? ", "?>", php));
+    rules.add(new PHPMultiLineRule("<?php", "?>", php));
+    rules.add(new PHPMultiLineRule("<?PHP", "?>", php));
+
+//    rules.add(new HTMLPatternRule(html)); // "<", "<?",
+    //Add rule for processing instructions
+
+    IPredicateRule[] result = new IPredicateRule[rules.size()];
+    rules.toArray(result);
+    setPredicateRules(result);
+//    setDefaultReturnToken(html);
+  }
+
+  //    public IToken nextToken() {
+  //      
+  //      if (fContentType == null || fRules == null)
+  //        return getNextToken();
+  //      
+  //      fTokenOffset= fOffset;
+  //      fColumn= UNDEFINED;
+  //      boolean resume= (fPartitionOffset < fOffset);
+  //          
+  //      IPredicateRule rule;
+  //      IToken token;
+  //      
+  //      for (int i= 0; i < fRules.length; i++) {
+  //        rule= (IPredicateRule) fRules[i];
+  //        token= rule.getSuccessToken();
+  //        if (fContentType.equals(token.getData())) {
+  //          if (resume)
+  //            fTokenOffset= fPartitionOffset;
+  //          token= rule.evaluate(this, resume);
+  //          if (!token.isUndefined()) {
+  //            fContentType= null;
+  //            return token;
+  //          }
+  //        }
+  //      }
+  //      
+  //      fContentType= null;
+  //      return getNextToken();
+  //    }
+  //    
+  //    public IToken getNextToken() {
+  //      
+  //      IToken token;
+  //      
+  //      while (true) {
+  //        
+  //        fTokenOffset= fOffset;
+  //        fColumn= UNDEFINED;
+  //        
+  //        if (fRules != null) {
+  //          for (int i= 0; i < fRules.length; i++) {
+  //            token= (fRules[i].evaluate(this));
+  //            if (!token.isUndefined())
+  //              return token;
+  //          }
+  //        }
+  //        
+  //        if (read() == EOF)
+  //          return Token.EOF;
+  //        else
+  //          return fDefaultReturnToken;
+  //      }
+  //    }
 }