1) Fixed issue #347: Syntax highlight doesn't like apostrophe in heredoc.
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPPartitionScanner.java
index 81392ce..a3a6e0f 100644 (file)
-/**
- * This program and the accompanying materials
- * are made available under the terms of the Common Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/cpl-v10.html
- * Created on 05.03.2003
- *
- * @author Stefan Langer (musk)
- * @version $Revision: 1.12 $
- */
+/**********************************************************************
+ Copyright (c) 2002  Widespace, OU  and others.
+ All rights reserved.   This program and the accompanying materials
+ are made available under the terms of the Common Public License v1.0
+ which accompanies this distribution, and is available at
+ http://solareclipse.sourceforge.net/legal/cpl-v10.html
+
+ Contributors:
+ Igor Malinin - initial contribution
+
+ $Id: PHPPartitionScanner.java,v 1.35 2007-03-17 14:07:31 axelcl Exp $
+ **********************************************************************/
 package net.sourceforge.phpeclipse.phpeditor.php;
 
-import java.util.*;
+import java.util.HashMap;
+import java.util.Map;
 
-import org.eclipse.jface.text.*;
-import org.eclipse.jface.text.rules.*;
+import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
+import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
+
+//incastrix
+//import org.eclipse.jface.text.Assert;
+import org.eclipse.core.runtime.Assert;
+import org.eclipse.jface.text.BadLocationException;
+import org.eclipse.jface.text.IDocument;
+import org.eclipse.jface.text.rules.ICharacterScanner;
+import org.eclipse.jface.text.rules.IPartitionTokenScanner;
+import org.eclipse.jface.text.rules.IToken;
+import org.eclipse.jface.text.rules.Token;
 
 /**
  * 
+ * 
+ * @author Igor Malinin
  */
-public class PHPPartitionScanner implements IPartitionTokenScanner
-{
-    private static final boolean DEBUG = false;
-    private IDocument fDocument = null;
-    private int fOffset = -1;
-    private String fContentType = IPHPPartitionScannerConstants.HTML;
-    private String fPrevContentType;
-
-    private boolean partitionBorder = false;
-    private int fTokenOffset;
-    private int fEnd = -1;
-    private int fLength;
-    private Map tokens = new HashMap();
-
-    public PHPPartitionScanner()
-    {
-        this.tokens.put(
-            IPHPPartitionScannerConstants.PHP,
-            new Token(IPHPPartitionScannerConstants.PHP));
-        this.tokens.put(
-            IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT,
-            new Token(IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT));
-        this.tokens.put(
-            IPHPPartitionScannerConstants.HTML,
-            new Token(IPHPPartitionScannerConstants.HTML));
-        this.tokens.put(
-            IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT,
-            new Token(IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT));
-        this.tokens.put(
-            IDocument.DEFAULT_CONTENT_TYPE,
-            new Token(IDocument.DEFAULT_CONTENT_TYPE));
-    }
-
-    private IToken getToken(String type)
-    {
-        fLength = fOffset - fTokenOffset;
-        IToken token = (IToken) this.tokens.get(type);
-        Assert.isNotNull(token, "Token for type \"" + type + "\" not found!");
-        if (DEBUG)
-        {
-            System.out.println(
-                "Partition: fTokenOffset="
-                    + fTokenOffset
-                    + " fContentType="
-                    + fContentType
-                    + " fLength="
-                    + fLength);
-        }
-        return token;
-    }
-
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.IPartitionTokenScanner#setPartialRange(org.eclipse.jface.text.IDocument, int, int, java.lang.String, int)
-     */
-    public void setPartialRange(
-        IDocument document,
-        int offset,
-        int length,
-        String contentType,
-        int partitionOffset)
-    {
-        this.setRange(document, offset, length);
-        if (DEBUG)
-        {
-            System.out.println(
-                "PartialRange: contentType="
-                    + contentType
-                    + " partitionOffset="
-                    + partitionOffset);
-        }
-
-        if (this.tokens.containsKey(contentType))
-            fContentType = contentType;
-        // TODO Calculate previouse contenttype
-        if (partitionOffset > -1)
-        {
-            partitionBorder = false;
-            fTokenOffset = partitionOffset;
-        }
-    }
-
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
-     */
-    public int getTokenLength()
-    {
-        return fLength;
-    }
-
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
-     */
-    public int getTokenOffset()
-    {
-        return fTokenOffset;
-    }
-
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
-     */
-    public IToken nextToken()
-    {
-        int c;
-
-        // check if we are not allready at the end of the
-        // file
-        if ((c = read()) == ICharacterScanner.EOF)
-        {
-            partitionBorder = false;
-            return Token.EOF;
-        }
-        else
-            unread();
-
-        if (partitionBorder)
-        {
-            fTokenOffset = fOffset;
-            partitionBorder = false;
-        }
-
-        while ((c = read()) != ICharacterScanner.EOF)
-        {
-            switch (c)
-            {
-                case '<' :
-                    if (fContentType
-                        != IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT
-                        && checkPattern(new char[] { '?', 'p', 'h', 'p' }, true))
-                    {
-            if (fContentType != IPHPPartitionScannerConstants.PHP
-                            && fOffset - 5 > 0)
-                        {
-                            fOffset -= 5;
-                            IToken token = getToken(fContentType);
-                            // save previouse contenttype
-              fPrevContentType = fContentType;
-                     
-                            fContentType = IPHPPartitionScannerConstants.PHP;
-
-                            return token;
-                        }
-                        else
-                            fContentType = IPHPPartitionScannerConstants.PHP;
-
-                        // remember offset of this partition
-                        fTokenOffset = fOffset - 5;
-                    }
-                    else if (checkPattern(new char[] { '!', '-', '-' }))
-                    {
-                        // return previouse partition
-                        if (fContentType
-                            != IPHPPartitionScannerConstants
-                                .HTML_MULTILINE_COMMENT
-                            && fOffset - 4 > 0)
-                        {
-                            fOffset -= 4;
-                            IToken token = getToken(fContentType);
-
-                            fContentType =
-                                IPHPPartitionScannerConstants
-                                    .HTML_MULTILINE_COMMENT;
-                            return token;
-                        }
-                        else
-                            fContentType =
-                                IPHPPartitionScannerConstants
-                                    .HTML_MULTILINE_COMMENT;
-
-                        fTokenOffset = fOffset - 4;
-                    }
-                    break;
-                case '?' :
-                    if (fContentType == IPHPPartitionScannerConstants.PHP)
-                    {
-                        if ((c = read()) == '>')
-                        {      // TODO Actually calculate the previouse contenttype from the document
-                          if(fPrevContentType != null)
-                            fContentType = fPrevContentType;
-                          else
-                              fContentType = IPHPPartitionScannerConstants.HTML;
-                            partitionBorder = true;
-                            return getToken(IPHPPartitionScannerConstants.PHP);
-                        }
-                        else if (c != ICharacterScanner.EOF)
-                            unread();
-                    }
-                    break;
-                case '-' :
-                    if (fContentType
-                        == IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT
-                        && checkPattern(new char[] { '-', '>' }))
-                    {
-                        fContentType = IPHPPartitionScannerConstants.HTML;
-                        partitionBorder = true;
-                        return getToken(
-                            IPHPPartitionScannerConstants
-                                .HTML_MULTILINE_COMMENT);
-                    }
-                    break;
-                case '/' :
-                    if ((c = read()) == '*')
-                    { // MULTINE COMMENT JAVASCRIPT, CSS, PHP
-                        if (fContentType == IPHPPartitionScannerConstants.PHP
-                            && fOffset - 2 > 0)
-                        {
-                            fOffset -= 2;
-                            IToken token = getToken(fContentType);
-
-                            fContentType =
-                                IPHPPartitionScannerConstants
-                                    .PHP_MULTILINE_COMMENT;
-
-                            return token;
-                        }
-                        else if (
-                            fContentType
-                                == IPHPPartitionScannerConstants
-                                    .PHP_MULTILINE_COMMENT)
-                        {
-
-                            fTokenOffset = fOffset - 2;
-                        }
-
-                    }
-                    else if (c != ICharacterScanner.EOF)
-                        unread();
-                    break;
-                case '*' :
-                    if ((c = read()) == '/')
-                    {
-                        if (fContentType
-                            == IPHPPartitionScannerConstants
-                                .PHP_MULTILINE_COMMENT)
-                        {
-                            fContentType = IPHPPartitionScannerConstants.PHP;
-                            partitionBorder = true;
-
-                            return getToken(
-                                IPHPPartitionScannerConstants
-                                    .PHP_MULTILINE_COMMENT);
-                        }
-                        else if (
-                            fContentType
-                                == IPHPPartitionScannerConstants
-                                    .CSS_MULTILINE_COMMENT)
-                        {
-                        }
-                        else if (
-                            fContentType
-                                == IPHPPartitionScannerConstants
-                                    .JS_MULTILINE_COMMENT)
-                        {
-                        }
-                    }
-                    else if (c != ICharacterScanner.EOF)
-                        unread();
-                    break;
-            }
-        }
-
-        // end of file reached but we have to return the
-        // last partition.
-        return getToken(fContentType);
-    }
-    /* (non-Javadoc)
-     * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(org.eclipse.jface.text.IDocument, int, int)
-     */
-    public void setRange(IDocument document, int offset, int length)
-    {
-        if (DEBUG)
-        {
-            System.out.println(
-                "SET RANGE: offset=" + offset + " length=" + length);
-        }
-
-        fDocument = document;
-        fOffset = offset;
-        fTokenOffset = offset;
-        fLength = 0;
-        fEnd = fOffset + length;
-        //partitionBorder = false;
-    }
-
-    private int read()
-    {
-        try
-        {
-            if (fOffset < fEnd)
-            {
-                return fDocument.getChar(fOffset++);
-            }
-            return ICharacterScanner.EOF;
-        }
-        catch (BadLocationException e)
-        {
-            // should never happen
-            // TODO write stacktrace to log
-            fOffset = fEnd;
-            return ICharacterScanner.EOF;
-        }
-    }
-
-    private void unread()
-    {
-        --fOffset;
-    }
-
-    private boolean checkPattern(char[] pattern)
-    {
-        return checkPattern(pattern, false);
-    }
-
-    /**
-     * Check if next character sequence read from document is equals to 
-     * the provided pattern. Pattern is read from left to right until the 
-     * first character read doesn't match. If this happens all read characters are
-     * unread.
-     * @param pattern The pattern to check.
-     * @return <code>true</code> if pattern is equals else returns <code>false</code>.
-     */
-    private boolean checkPattern(char[] pattern, boolean ignoreCase)
-    {
-        int prevOffset = fOffset;
-        for (int i = 0; i < pattern.length; i++)
-        {
-            int c = read();
-
-            if (c == ICharacterScanner.EOF
-                || !letterEquals(c, pattern[i], ignoreCase))
-            {
-                fOffset = prevOffset;
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    private boolean letterEquals(int test, char letter, boolean ignoreCase)
-    {
-        if (test == letter)
-            return true;
-        else if (
-            ignoreCase
-                && Character.isLowerCase(letter)
-                && test == Character.toUpperCase(letter))
-            return true;
-        else if (
-            ignoreCase
-                && Character.isUpperCase(letter)
-                && test == Character.toLowerCase(letter))
-            return true;
-
-        return false;
-    }
-
-}
+public class PHPPartitionScanner implements IPartitionTokenScanner {
+       public static final String PHP_SCRIPTING_AREA = "__php_scripting_area ";
 
+       public static final int STATE_DEFAULT = 0;
+
+       // public static final int STATE_TAG = 1;
+       // public static final int STATE_SCRIPT = 2;
+
+       private IDocument document;
+
+       // private int begin;
+
+       private int end;
+
+       private int offset;
+
+       private int length;
+
+       private int position;
+
+       // private int state;
+
+       private Map tokens = new HashMap();
+
+       public PHPPartitionScanner() {
+       }
+
+       /*
+        * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
+        */
+       public IToken nextToken() {
+               offset += length;
+
+               /*
+                * switch (state) { case STATE_TAG: return nextTagToken(); }
+                */
+
+               switch (read()) {
+               case ICharacterScanner.EOF:
+                       // state = STATE_DEFAULT;
+                       return getToken(null);
+
+               case '<':
+                       switch (read()) {
+                       case ICharacterScanner.EOF:
+                               // state = STATE_DEFAULT;
+                               return getToken(null);
+
+                       case '?': // <?
+                               // int ch = read();
+                               //
+                               // switch (ch) {
+                               // case ICharacterScanner.EOF:
+                               // state = STATE_DEFAULT;
+                               // return getToken(PHP_SCRIPTING_AREA);
+                               // }
+                               return scanUntilPHPEndToken(PHP_SCRIPTING_AREA);
+                       }
+
+                       unread();
+               }
+
+               loop: while (true) {
+                       switch (read()) {
+                       case ICharacterScanner.EOF:
+                               // state = STATE_DEFAULT;
+                               return getToken(null);
+
+                       case '<':
+                               switch (read()) {
+                               case ICharacterScanner.EOF:
+                                       // state = STATE_DEFAULT;
+                                       return getToken(null);
+
+                               case '?':
+                                       unread();
+                                       break;
+
+                               case '<':
+                                       unread();
+
+                               default:
+                                       continue loop;
+                               }
+
+                               unread();
+
+                               // state = STATE_DEFAULT;
+                               return getToken(null);
+                       }
+               }
+       }
+
+       private IToken scanUntilPHPEndToken(String token) {
+               int ch = read();
+               while (true) {
+                       switch (ch) {
+                       case ICharacterScanner.EOF:
+                               // state = STATE_DEFAULT;
+                               return getToken(token);
+                       case '"': // double quoted string
+                               // read until end of double quoted string
+                               if (!readUntilEscapedDQ()) {
+                                       // state = STATE_DEFAULT;
+                                       return getToken(token);
+                               }
+                               break;
+                       case '<': // heredoc string
+                               ch = read();
+                               switch (ch) {
+                               case ICharacterScanner.EOF:
+                                       break;
+                               case '<':
+                                       ch = read();
+                                       switch (ch) {
+                                       case ICharacterScanner.EOF:
+                                               break;
+                                       case '<':
+                                               // read until end of heredoc string
+                                               if (!readUntilEscapedHEREDOC()) {
+                                                       // state = STATE_DEFAULT;
+                                                       return getToken(token);
+                                               }
+                                       }
+                               }
+                               break;
+                       case '\'': // single quoted string
+                               // read until end of single quoted string
+                               if (!readUntilEscapedSQ()) {
+                                       // state = STATE_DEFAULT;
+                                       return getToken(token);
+                               }
+                               break;
+                       case '/': // comment start?
+                               ch = read();
+                               switch (ch) {
+                               case ICharacterScanner.EOF:
+                                       break;
+                               case '/':
+                                       // read until end of line
+                                       if (!readSingleLine()) {
+                                               // state = STATE_DEFAULT;
+                                               return getToken(token);
+                                       }
+                                       break;
+                               case '*':
+                                       // read until end of comment
+                                       if (!readMultiLineComment()) {
+                                               // state = STATE_DEFAULT;
+                                               return getToken(token);
+                                       }
+                                       break;
+                               default:
+                                       continue;
+                               }
+                               break;
+                       case '#': // line comment
+                               // read until end of line
+                               if (!readSingleLine()) {
+                                       // state = STATE_DEFAULT;
+                                       return getToken(token);
+                               }
+                               break;
+                       case '?':
+                               ch = read();
+                               switch (ch) {
+                               case ICharacterScanner.EOF:
+                               case '>':
+                                       // state = STATE_DEFAULT;
+                                       return getToken(token);
+
+                               case '?':
+                                       continue;
+                               default:
+                                       continue;
+                               }
+                       }
+
+                       ch = read();
+               }
+       }
+
+       private IToken getToken(String type) {
+               length = position - offset;
+
+               if (length == 0) {
+                       return Token.EOF;
+               }
+
+               // if (length<0) {
+               // try {
+               // System.out.println("Length<0:"+document.get(offset,5)+""+length);
+               // } catch (BadLocationException e) {
+               // e.printStackTrace();
+               // }
+               // }
+
+               if (type == null) {
+                       return Token.UNDEFINED;
+               }
+
+               IToken token = (IToken) tokens.get(type);
+               if (token == null) {
+                       token = new Token(type);
+                       tokens.put(type, token);
+               }
+
+               return token;
+       }
+
+       private int read() {
+               if (position >= end) {
+                       return ICharacterScanner.EOF;
+               }
+
+               try {
+                       return document.getChar(position++);
+               } catch (BadLocationException e) {
+                       --position;
+                       return ICharacterScanner.EOF;
+               }
+       }
+
+       private boolean readUntilEscapedDQ() {
+               // search last double quoted character
+               try {
+                       char ch;
+                       while (true) {
+                               if (position >= end) {
+                                       return false;
+                               }
+                               ch = document.getChar(position++);
+                               if (ch == '\\') {
+                                       if (position >= end) {
+                                               return false;
+                                       }
+                                       ch = document.getChar(position++); // ignore escaped
+                                       // character
+                               } else if (ch == '"') {
+                                       return true;
+                               }
+                       }
+               } catch (BadLocationException e) {
+                       --position;
+               }
+               return false;
+       }
+
+       private boolean readUntilEscapedSQ() {
+               // search last single quoted character
+               try {
+                       char ch;
+                       while (true) {
+                               if (position >= end) {
+                                       return false;
+                               }
+                               ch = document.getChar(position++);
+                               if (ch == '\\') {
+                                       if (position >= end) {
+                                               return false;
+                                       }
+                                       ch = document.getChar(position++); // ignore escaped
+                                       // character
+                               } else if (ch == '\'') {
+                                       return true;
+                               }
+                       }
+               } catch (BadLocationException e) {
+                       --position;
+               }
+               return false;
+       }
+
+       /**
+        * Read until HEREDOC ends
+        * 
+        * @return
+        */
+       private boolean readUntilEscapedHEREDOC() {
+               try {
+                       char ch;
+                       StringBuffer buf = new StringBuffer();
+                       char[] heredocIdent;
+                       
+                       if (position >= end) {
+                               return false;
+                       }
+                       
+                       ch = document.getChar(position++);
+                       
+                       
+                       while (ch == ' ') {
+                               if (position >= end) {
+                                       return false;
+                               }
+                               ch = document.getChar(position++);
+                       }
+                       
+                       if (!Scanner.isPHPIdentifierStart(ch)) {
+                               return false;
+                       }
+                       
+                       while (Scanner.isPHPIdentifierPart(ch)) {
+                               buf.append(ch);
+                               if (position >= end) {
+                                       return false;
+                               }
+                               ch = document.getChar(position++);
+                       }
+                       
+                       heredocIdent = buf.toString().toCharArray();
+                       
+                       while (true) {
+                               if (position >= end) {
+                                       return false;
+                               }
+                               
+                               ch = document.getChar (position++);                                     // Get the next character from file
+                               
+                               if (ch == '\n') {                                   // heredoc could end after a newline
+                                       int pos = 0;
+                                       
+                                       while (true) {
+                                               if (position >= end) {                                          // If we are at the end of file
+                                                       return false;                                                   // Return                                                                       
+                                               }
+                                               
+                                               if (pos == heredocIdent.length) {                       // If the found length equals the length of heredoc id
+                                                       return true;                                                    // we found the end of heredoc
+                                               }
+                                               
+                                               ch = document.getChar (position++);                     // Ignore escaped character
+                                               
+                                               if (ch != heredocIdent[pos]) {                          // If current character doesn't match the heredoc id
+                                                       break;                                                                  // break the heredoc end search
+                                               }
+                                               
+                                               pos++;                                                                          // Character matched the heredoc id so far
+                                       }
+                               }
+                       }
+               } catch (BadLocationException e) {
+                       --position;
+               }
+               return false;
+       }
+
+       private boolean readSingleLine() {
+               try {
+                       do {
+                               if (position >= end) {
+                                       return false;
+                               }
+                       } while (document.getChar(position++) != '\n');
+                       return true;
+               } catch (BadLocationException e) {
+                       --position;
+               }
+               return false;
+       }
+
+       private boolean readMultiLineComment() {
+               try {
+                       char ch;
+                       while (true) {
+                               if (position >= end) {
+                                       return false;
+                               }
+                               ch = document.getChar(position++);
+                               if (ch == '*') {
+                                       if (position >= end) {
+                                               return false;
+                                       }
+                                       if (document.getChar(position) == '/') {
+                                               position++;
+                                               return true;
+                                       }
+                               }
+                       }
+               } catch (BadLocationException e) {
+                       --position;
+               }
+               return false;
+       }
+
+       private void unread() {
+               --position;
+       }
+
+       /*
+        * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
+        */
+       public int getTokenOffset() {
+               if (AbstractPartitioner.DEBUG) {
+                       Assert.isTrue(offset >= 0, Integer.toString(offset));
+               }
+               return offset;
+       }
+
+       /*
+        * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
+        */
+       public int getTokenLength() {
+               return length;
+       }
+
+       /*
+        * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int,
+        *      int)
+        */
+       public void setRange(IDocument document, int offset, int length) {
+               this.document = document;
+               // this.begin = offset;
+               this.end = offset + length;
+
+               this.offset = offset;
+               this.position = offset;
+               this.length = 0;
+       }
+
+       /*
+        * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
+        */
+       public void setPartialRange(IDocument document, int offset, int length,
+                       String contentType, int partitionOffset) {
+               // state = STATE_DEFAULT;
+               if (partitionOffset > -1) {
+                       int delta = offset - partitionOffset;
+                       if (delta > 0) {
+                               setRange(document, partitionOffset, length + delta);
+                               return;
+                       }
+               }
+               setRange(document, partitionOffset, length);
+       }
+
+       // private boolean isContinuationPartition(IDocument document, int offset) {
+       // try {
+       // String type = document.getContentType(offset - 1);
+       //
+       // if (type != IDocument.DEFAULT_CONTENT_TYPE) {
+       // return true;
+       // }
+       // } catch (BadLocationException e) {}
+       //
+       // return false;
+       // }
+}
\ No newline at end of file