Fixed bug with unterminated strings at end of file
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPPartitionScanner.java
index 8e2485d..69bc1d5 100644 (file)
-/**
- * This program and the accompanying materials
- * are made available under the terms of the Common Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/cpl-v10.html
- * Created on 05.03.2003
- *
- * @author Stefan Langer (musk)
- * @version $Revision: 1.10 $
- */
+/**********************************************************************
+ Copyright (c) 2002  Widespace, OU  and others.
+ All rights reserved.   This program and the accompanying materials
+ are made available under the terms of the Common Public License v1.0
+ which accompanies this distribution, and is available at
+ http://solareclipse.sourceforge.net/legal/cpl-v10.html
+
+ Contributors:
+ Igor Malinin - initial contribution
+
+ $Id: PHPPartitionScanner.java,v 1.26 2005-05-05 14:06:38 axelcl Exp $
+ **********************************************************************/
 package net.sourceforge.phpeclipse.phpeditor.php;
 
-import java.util.*;
+import java.util.HashMap;
+import java.util.Map;
+
+import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
 
-import org.eclipse.jface.text.*;
-import org.eclipse.jface.text.rules.*;
+import org.eclipse.core.internal.indexing.AbstractPagePolicy;
+import org.eclipse.jface.text.Assert;
+import org.eclipse.jface.text.BadLocationException;
+import org.eclipse.jface.text.IDocument;
+import org.eclipse.jface.text.rules.ICharacterScanner;
+import org.eclipse.jface.text.rules.IPartitionTokenScanner;
+import org.eclipse.jface.text.rules.IToken;
+import org.eclipse.jface.text.rules.Token;
 
 /**
  * 
+ * 
+ * @author Igor Malinin
  */
-public class PHPPartitionScanner implements IPartitionTokenScanner
-{
-//    private final int HTML = 0;
-//    private final int PHP = 1;
-//    private final int JS = 2;
-//    private final int CSS = 4;
-//    private final int COMMENT = 5;
-//    private final int HTMLCOMMENT = 6;
-
-    private IDocument fDocument = null;
-    private int fOffset = -1;
-//    private int fLastOffset = -1;
-    private String fContentType = IPHPPartitionScannerConstants.HTML;
-       
-       private boolean partitionBorder = false;
-    private int fTokenOffset;
-    private int fEnd = -1;
-    private int fLength;
-    //private int fState = HTML;
-    private Map tokens = new HashMap();
-
-    public PHPPartitionScanner()
-    {
-        this.tokens.put(
-            IPHPPartitionScannerConstants.PHP,
-            new Token(IPHPPartitionScannerConstants.PHP));
-        this.tokens.put(
-            IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT,
-            new Token(IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT));
-        this.tokens.put(
-            IPHPPartitionScannerConstants.HTML,
-            new Token(IPHPPartitionScannerConstants.HTML));
-        this.tokens.put(
-            IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT,
-            new Token(IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT));
-    }
+public class PHPPartitionScanner implements IPartitionTokenScanner {
+  //   public static final String JSP_DIRECTIVE = "__jsp_directive";
+  //   public static final String JSP_COMMENT = "__jsp_comment";
+  //// public static final String JSP_TAG = "__jsp_tag";
+  //   public static final String JSP_DECLARATION = "__jsp_declaration";
+  public static final String PHP_SCRIPTING_AREA = "__php_scripting_area ";
 
-    private IToken getToken(String type)
-    {
-        fLength = fOffset-fTokenOffset;
-               IToken token = (IToken)this.tokens.get(type);
-               Assert.isNotNull(token, "Token for type \"" + type + "\" not found!");
-               return token;
-    }
+  //   public static final String JSP_EXPRESSION = "__jsp_expression";
 
-/* (non-Javadoc)
- * @see org.eclipse.jface.text.rules.IPartitionTokenScanner#setPartialRange(org.eclipse.jface.text.IDocument, int, int, java.lang.String, int)
- */
-public void setPartialRange(
-    IDocument document,
-    int offset,
-    int length,
-    String contentType,
-    int partitionOffset)
-{
-       this.setRange(document, offset, length);
-       if(this.tokens.containsKey(contentType))
-               fContentType = contentType;
-    if (partitionOffset > -1)
-    {
-       partitionBorder = false;
-        fTokenOffset = partitionOffset;
-    }
-}
+  public static final int STATE_DEFAULT = 0;
 
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
-     */
-    public int getTokenLength()
-    {
-        return fLength;
-    }
+  //   public static final int STATE_TAG = 1;
+  //   public static final int STATE_SCRIPT = 2;
+
+  private IDocument document;
+
+  private int begin;
+
+  private int end;
+
+  private int offset;
+
+  private int length;
 
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
+  private int position;
+
+  private int state;
+
+  private Map tokens = new HashMap();
+
+  public PHPPartitionScanner() {
+  }
+
+  /*
+   * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
+   */
+  public IToken nextToken() {
+    offset += length;
+
+    /*
+     * switch (state) { case STATE_TAG: return nextTagToken(); }
      */
-    public int getTokenOffset()
-    {  
-       return fTokenOffset;
+
+    switch (read()) {
+    case ICharacterScanner.EOF:
+      state = STATE_DEFAULT;
+      return getToken(null);
+
+    case '<':
+      switch (read()) {
+      case ICharacterScanner.EOF:
+        state = STATE_DEFAULT;
+        return getToken(null);
+
+      case '?': // <%SCRIPLET <%@DIRECTIVE <%!DECLARATION <%=EXPRESSION <%--COMMENT
+        int ch = read();
+        //                                              if (Character.isWhitespace((char)ch)) {
+        //                                                return nextJSPToken(PHP_SCRIPTING_AREA);
+        //                                              }
+        switch (ch) {
+        case ICharacterScanner.EOF:
+          state = STATE_DEFAULT;
+          return getToken(PHP_SCRIPTING_AREA);
+
+        //                                                     case '-': // <%- <%--COMMENT
+        //                                                             switch (read()) {
+        //                                                                     case ICharacterScanner.EOF:
+        //                                                                     case '-': // <%--
+        //                                                                             return nextCommentToken();
+        //                                                             }
+        //
+        //                                                             break;
+        }
+
+        return scanUntilPHPEndToken(PHP_SCRIPTING_AREA);
+      }
+
+      unread();
     }
 
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
-     */
-    public IToken nextToken()
-    {
-        int c;
-
-        // check if we are not allready at the end of the
-        // file
-        if ((c = read()) == ICharacterScanner.EOF)
-        {
-            partitionBorder = false;
-            return Token.EOF;
+    loop: while (true) {
+      switch (read()) {
+      case ICharacterScanner.EOF:
+        state = STATE_DEFAULT;
+        return getToken(null);
+
+      case '<':
+        switch (read()) {
+        case ICharacterScanner.EOF:
+          state = STATE_DEFAULT;
+          return getToken(null);
+
+        case '?':
+          unread();
+          break;
+
+        case '<':
+          unread();
+
+        default:
+          continue loop;
         }
-        else
-            unread();
 
-        if (partitionBorder)
-        {
-            fTokenOffset = fOffset;
-            partitionBorder = false;
+        unread();
+
+        state = STATE_DEFAULT;
+        return getToken(null);
+      }
+    }
+  }
+
+  private IToken scanUntilPHPEndToken(String token) {
+    int ch = read();
+    while (true) {
+      switch (ch) {
+      case ICharacterScanner.EOF:
+        state = STATE_DEFAULT;
+        return getToken(token);
+      case '"': // double quoted string
+        // read until end of double quoted string
+        if (!readUntilEscapedDQ()) {
+          state = STATE_DEFAULT;
+          return getToken(token);
         }
+        break;
+      case '\'': // single quoted string
+        // read until end of single quoted string
+        if (!readUntilEscapedSQ()) {
+          state = STATE_DEFAULT;
+          return getToken(token);
+        }
+        break;
+      case '/': // comment start?
+        ch = read();
+        switch (ch) {
+        case ICharacterScanner.EOF:
+          break;
+        case '/':
+          // read until end of line
+          readSingleLine();
+          break;
+        case '*':
+          // read until end of comment
+          readMultiLineComment();
+          break;
+        default:
+          continue;
+        }
+        break;
+      case '#': // line comment
+        // read until end of line
+        readSingleLine();
+        break;
+      case '?':
+        ch = read();
+        switch (ch) {
+        case ICharacterScanner.EOF:
+        case '>':
+          state = STATE_DEFAULT;
+          return getToken(token);
 
-        while ((c = read()) != ICharacterScanner.EOF)
-        {
-            switch (c)
-            {
-                case '<' :
-                    if (fContentType != IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT && checkPattern(new char[] { '?', 'p', 'h', 'p' }, true))
-                    {
-
-                        if (fContentType != IPHPPartitionScannerConstants.PHP
-                            && fOffset - 5 > 0)
-                        {
-                            fOffset -= 5;
-                            IToken token = getToken(fContentType);
-                            fContentType = IPHPPartitionScannerConstants.PHP;
-
-                            return token;
-                        }
-                        else
-                            fContentType = IPHPPartitionScannerConstants.PHP;
-
-                        // remember offset of this partition
-                        fTokenOffset = fOffset - 5;
-                    }
-                    else if (checkPattern(new char[] { '!', '-', '-' }))
-                    {
-                        // return previouse partition
-                        if (fContentType
-                            != IPHPPartitionScannerConstants
-                                .HTML_MULTILINE_COMMENT
-                            && fOffset - 4 > 0)
-                        {
-                            fOffset -= 4;
-                            IToken token = getToken(fContentType);
-                            fContentType =
-                                IPHPPartitionScannerConstants
-                                    .HTML_MULTILINE_COMMENT;
-                            return token;
-                        }
-                        else
-                            fContentType =
-                                IPHPPartitionScannerConstants
-                                    .HTML_MULTILINE_COMMENT;
-
-                        fTokenOffset = fOffset - 4;
-                    }
-                    break;
-                case '?' :
-                    if (fContentType == IPHPPartitionScannerConstants.PHP)
-                    {
-                        if ((c = read()) == '>')
-                        {
-                            fContentType = IPHPPartitionScannerConstants.HTML;
-                            partitionBorder = true;
-                            return getToken(IPHPPartitionScannerConstants.PHP);
-                        }
-                        else if(c != ICharacterScanner.EOF)
-                            unread();
-                    }
-                    break;
-                case '-' :
-                    if (fContentType
-                        == IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT
-                        && checkPattern(new char[] { '-', '>' }))
-                    {
-                        fContentType = IPHPPartitionScannerConstants.HTML;
-                        partitionBorder = true;
-                        return getToken(
-                            IPHPPartitionScannerConstants
-                                .HTML_MULTILINE_COMMENT);
-                    }
-                    break;
-                case '/' :
-                    if ((c=read()) == '*')
-                    { // MULTINE COMMENT JAVASCRIPT, CSS, PHP
-                        if (fContentType == IPHPPartitionScannerConstants.PHP
-                            && fOffset - 2 > 0)
-                        {
-                            fOffset -= 2;
-                            IToken token = getToken(fContentType);
-                            fContentType =
-                                IPHPPartitionScannerConstants
-                                    .PHP_MULTILINE_COMMENT;
-
-                            return token;
-                        }
-                        else if (
-                            fContentType
-                                == IPHPPartitionScannerConstants
-                                    .PHP_MULTILINE_COMMENT)
-                        {
-
-                            fTokenOffset = fOffset - 2;
-                        }
-
-                    }
-                    else if(c != ICharacterScanner.EOF)
-                        unread();
-                    break;
-                case '*' :
-                    if ((c = read()) == '/')
-                    {
-                        if (fContentType
-                            == IPHPPartitionScannerConstants
-                                .PHP_MULTILINE_COMMENT)
-                        {
-                            fContentType = IPHPPartitionScannerConstants.PHP;
-                            partitionBorder = true;
-                            return getToken(
-                                IPHPPartitionScannerConstants
-                                    .PHP_MULTILINE_COMMENT);
-                        }
-                        else if (
-                            fContentType
-                                == IPHPPartitionScannerConstants
-                                    .CSS_MULTILINE_COMMENT)
-                        {
-                        }
-                        else if (
-                            fContentType
-                                == IPHPPartitionScannerConstants
-                                    .JS_MULTILINE_COMMENT)
-                        {
-                        }
-                    }
-                    else if(c != ICharacterScanner.EOF)
-                        unread();
-                    break;
-            }
+        case '?':
+          continue;
         }
+      }
 
-        // end of file reached but we have to return the
-        // last partition.
-        return getToken(fContentType);
+      ch = read();
     }
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(org.eclipse.jface.text.IDocument, int, int)
-     */
-    public void setRange(IDocument document, int offset, int length)
-    {
-        fDocument = document;
-        fOffset = offset;
-        fTokenOffset = offset;
-        fLength = 0;    
-        fEnd = fOffset + length;
-               //partitionBorder = false;
+  }
+
+  //   private IToken nextCommentToken() {
+  //           int ch = read();
+  //           loop: while (true) {
+  //                   switch (ch) {
+  //                           case ICharacterScanner.EOF:
+  //                                   break loop;
+  //
+  //                           case '-': // - --%>
+  //                                   ch = read();
+  //                                   switch (ch) {
+  //                                           case ICharacterScanner.EOF:
+  //                                                   break loop;
+  //
+  //                                           case '-': // -- --%>
+  //                                                   ch = read();
+  //                                                   switch (ch) {
+  //                                                           case ICharacterScanner.EOF:
+  //                                                                   break loop;
+  //
+  //                                                           case '%': // --% --%>
+  //                                                                   ch = read();
+  //                                                                   switch (ch) {
+  //                                                                           case ICharacterScanner.EOF:
+  //                                                                           case '>':
+  //                                                                                   break loop;
+  //                                                                   }
+  //
+  //                                                                   continue loop;
+  //
+  //                                                           case '-': // --- ---%>
+  //                                                                   unread();
+  //                                                                   continue loop;
+  //                                                   }
+  //
+  //                                                   ch = read();
+  //                                                   continue loop;
+  //                                   }
+  //                   }
+  //
+  //                   ch = read();
+  //           }
+  //
+  //           return getToken(JSP_COMMENT);
+  //   }
+
+  private IToken getToken(String type) {
+    length = position - offset;
+
+    if (length == 0) {
+      return Token.EOF;
+    }
+
+    if (type == null) {
+      return Token.UNDEFINED;
+    }
+
+    IToken token = (IToken) tokens.get(type);
+    if (token == null) {
+      token = new Token(type);
+      tokens.put(type, token);
+    }
+
+    return token;
+  }
+
+  private int read() {
+    if (position >= end) {
+      return ICharacterScanner.EOF;
+    }
+
+    try {
+      return document.getChar(position++);
+    } catch (BadLocationException e) {
+      --position;
+      return ICharacterScanner.EOF;
     }
+  }
 
-    private int read()
-    {
-        try
-        {
-                       if (fOffset < fEnd)
-            {
-                               return fDocument.getChar(fOffset++);
-            }
-                       return ICharacterScanner.EOF;
+  private boolean readUntilEscapedDQ() {
+    // search last double quoted character
+    if (position >= end) {
+      return false;
+    }
+    try {
+      char ch;
+      while (true) {
+        ch = document.getChar(position++);
+        if (ch == '\\') {
+          ch = document.getChar(position++); // ignore escaped character
+        } else if (ch == '"') {
+          return true;
         }
-        catch (BadLocationException e)
-        {
-            // should never happen
-            // TODO write stacktrace to log
-            fOffset = fEnd;
-            return ICharacterScanner.EOF;
+      }
+    } catch (BadLocationException e) {
+      --position;
+    }
+    return false;
+  }
+  
+  private boolean readUntilEscapedSQ() {
+    // search last single quoted character
+    if (position >= end) {
+      return false;
+    }
+    try {  
+      char ch;
+      while (true) {
+        ch = document.getChar(position++); 
+        if (ch == '\\') {
+          ch = document.getChar(position++); // ignore escaped character
+        } else if (ch == '\'') {
+          return true;
         }
+      }
+    } catch (BadLocationException e) {
+      --position;
     }
+    return false;
+  }
 
-    private void unread()
-    {
-        --fOffset;
+  private void readSingleLine() {
+    if (position >= end) {
+      return;
     }
+    try {
+      while (document.getChar(position++) != '\n') {
 
-    private boolean checkPattern(char[] pattern)
-    {
-        return checkPattern(pattern, false);
+      }
+    } catch (BadLocationException e) {
+      --position;
+      return;
     }
+  }
 
-    /**
-     * Check if next character sequence read from document is equals to 
-     * the provided pattern. Pattern is read from left to right until the 
-     * first character read doesn't match. If this happens all read characters are
-     * unread.
-     * @param pattern The pattern to check.
-     * @return <code>true</code> if pattern is equals else returns <code>false</code>.
-     */
-    private boolean checkPattern(char[] pattern, boolean ignoreCase)
-    {
-       int prevOffset = fOffset;
-        for (int i = 0; i < pattern.length; i++)
-        {
-            int c = read();
-                       
-                       if(c == ICharacterScanner.EOF || !letterEquals(c, pattern[i], ignoreCase))
-            {
-               fOffset = prevOffset;
-                return false;
-            }
+  private void readMultiLineComment() {
+    if (position >= end) {
+      return;
+    }
+    try {
+      char ch;
+      while (true) {
+        ch = document.getChar(position++);
+        if (ch == '*') {
+          if (document.getChar(position) == '/') {
+            position++;
+            break;
+          }
         }
-
-        return true;
+      }
+    } catch (BadLocationException e) {
+      --position;
+      return;
     }
-    
-    private boolean letterEquals(int test, char letter, boolean ignoreCase)
-    {
-       if(test == letter)
-               return true;
-       else if(ignoreCase && Character.isLowerCase(letter) && test == Character.toUpperCase(letter))
-               return true;
-       else if(ignoreCase && Character.isUpperCase(letter) && test == Character.toLowerCase(letter))
-               return true;
-       
-       return false;
+  }
+
+  private void unread() {
+    --position;
+  }
+
+  /*
+   * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
+   */
+  public int getTokenOffset() {
+    if (AbstractPartitioner.DEBUG) {
+      Assert.isTrue(offset >= 0, Integer.toString(offset));
     }
-    
-}
+    return offset;
+  }
+
+  /*
+   * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
+   */
+  public int getTokenLength() {
+    return length;
+  }
+
+  /*
+   * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int, int)
+   */
+  public void setRange(IDocument document, int offset, int length) {
+    this.document = document;
+    this.begin = offset;
+    this.end = offset + length;
+
+    this.offset = offset;
+    this.position = offset;
+    this.length = 0;
+  }
+
+  /*
+   * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
+   */
+  public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
+    state = STATE_DEFAULT;
+    //    if (partitionOffset > -1) {
+    //         int delta= offset - partitionOffset;
+    //         if (delta > 0) {
+    //                 this.setRange(document, partitionOffset, length + delta);
+    //                 return;
+    //         }
+    // }
+    setRange(document, partitionOffset, length);
+  }
+
+  //   private boolean isContinuationPartition(IDocument document, int offset) {
+  //           try {
+  //                   String type = document.getContentType(offset - 1);
+  //
+  //                   if (type != IDocument.DEFAULT_CONTENT_TYPE) {
+  //                           return true;
+  //                   }
+  //           } catch (BadLocationException e) {}
+  //
+  //           return false;
+  //   }
+}
\ No newline at end of file