Set DEBUG=false; Set DEBUG=true only for developer tests
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPPartitionScanner.java
index b96202a..072f30b 100644 (file)
-/**********************************************************************
-Copyright (c) 2000, 2002 IBM Corp. and others.
-All rights reserved. This program and the accompanying materials
-are made available under the terms of the Common Public License v1.0
-which accompanies this distribution, and is available at
-http://www.eclipse.org/legal/cpl-v10.html
-
-Contributors:
-    IBM Corporation - Initial implementation
-    Klaus Hartlage - www.eclipseproject.de
-**********************************************************************/
+/**
+ * This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/cpl-v10.html
+ * Created on 05.03.2003
+ *
+ * @author Stefan Langer (musk)
+ * @version $Revision: 1.18 $
+ */
 package net.sourceforge.phpeclipse.phpeditor.php;
 
-import java.util.ArrayList;
-import java.util.List;
-import org.eclipse.jface.text.rules.EndOfLineRule;
-import org.eclipse.jface.text.rules.ICharacterScanner;
-import org.eclipse.jface.text.rules.IPredicateRule;
-import org.eclipse.jface.text.rules.IRule;
-import org.eclipse.jface.text.rules.IToken;
-import org.eclipse.jface.text.rules.IWordDetector;
-import org.eclipse.jface.text.rules.MultiLineRule;
-import org.eclipse.jface.text.rules.RuleBasedPartitionScanner;
-import org.eclipse.jface.text.rules.RuleBasedScanner;
-import org.eclipse.jface.text.rules.SingleLineRule;
-import org.eclipse.jface.text.rules.Token;
-import org.eclipse.jface.text.rules.WordRule;
+import java.util.*;
+
+import org.eclipse.jface.text.*;
+import org.eclipse.jface.text.rules.*;
 
 /**
- * This scanner recognizes the JavaDoc comments and Java multi line comments.
+ * 
  */
-public class PHPPartitionScanner extends RuleBasedPartitionScanner {
-
-       private final static String SKIP= "__skip"; //$NON-NLS-1$
-       public final static String JAVA_MULTILINE_COMMENT= "__html_multiline_comment"; //$NON-NLS-1$
-//     public final static String JAVA_DOC= "__java_javadoc"; //$NON-NLS-1$
-  public final static String PHP= "__php";
-       /**
-        * Detector for empty comments.
-        */
-       static class EmptyCommentDetector implements IWordDetector {
-
-               /* (non-Javadoc)
-               * Method declared on IWordDetector
-               */
-               public boolean isWordStart(char c) {
-                       return (c == '/');
-               }
-
-               /* (non-Javadoc)
-               * Method declared on IWordDetector
-               */
-               public boolean isWordPart(char c) {
-                       return (c == '*' || c == '/');
-               }
-       };
-       
-       /**
-        * 
-        */
-       static class WordPredicateRule extends WordRule implements IPredicateRule {
-               
-               private IToken fSuccessToken;
-               
-               public WordPredicateRule(IToken successToken) {
-                       super(new EmptyCommentDetector());
-                       fSuccessToken= successToken;
-                       addWord("/**/", fSuccessToken);
-               }
-               
-               /*
-                * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(ICharacterScanner, boolean)
-                */
-               public IToken evaluate(ICharacterScanner scanner, boolean resume) {
-                       return super.evaluate(scanner);
-               }
-
-               /*
-                * @see org.eclipse.jface.text.rules.IPredicateRule#getSuccessToken()
-                */
-               public IToken getSuccessToken() {
-                       return fSuccessToken;
-               }
-       };
-
-       /**
-        * Creates the partitioner and sets up the appropriate rules.
-        */
-       public PHPPartitionScanner() {
-               super();
-
-//             IToken javaDoc= new Token(JAVA_DOC);
-               IToken comment= new Token(JAVA_MULTILINE_COMMENT);
-    IToken php = new Token(PHP);
-               List rules= new ArrayList();
-
-               // Add rule for single line comments.
-       //      rules.add(new EndOfLineRule("//", Token.UNDEFINED));
-
-               // Add rule for strings and character constants.
-  //           rules.add(new SingleLineRule("\"", "\"", Token.UNDEFINED, '\\'));
-       //      rules.add(new SingleLineRule("'", "'", Token.UNDEFINED, '\\')); 
-
-               // Add special case word rule.
-               rules.add(new WordPredicateRule(comment));
-
-               // Add rules for multi-line comments and javadoc.
-               //rules.add(new MultiLineRule("/**", "*/", javaDoc));
-               rules.add(new MultiLineRule("<!--", "-->", comment)); 
-    rules.add(new MultiLineRule("<? ", "?>", php)); 
-    rules.add(new MultiLineRule("<?php", "?>", php)); 
-    rules.add(new MultiLineRule("<?PHP", "?>", php)); 
-    //Add rule for processing instructions
-
-
-               IPredicateRule[] result= new IPredicateRule[rules.size()];
-               rules.toArray(result);
-               setPredicateRules(result);
-       }
+public class PHPPartitionScanner implements IPartitionTokenScanner
+{
+    private static final boolean DEBUG = false;
+    private boolean fInString = false;
+    private boolean fInDoubString = false;
+    private IDocument fDocument = null;
+    private int fOffset = -1;
+    private String fContentType = IPHPPartitionScannerConstants.HTML;
+    private String fPrevContentType = IPHPPartitionScannerConstants.HTML;
+    private boolean partitionBorder = false;
+    private int fTokenOffset;
+    private int fEnd = -1;
+    private int fLength;
+    private int fCurrentLength;
+    private Map tokens = new HashMap();
+
+    public PHPPartitionScanner()
+    {
+        this.tokens.put(
+            IPHPPartitionScannerConstants.PHP,
+            new Token(IPHPPartitionScannerConstants.PHP));
+        this.tokens.put(
+            IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT,
+            new Token(IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT));
+        this.tokens.put(
+            IPHPPartitionScannerConstants.HTML,
+            new Token(IPHPPartitionScannerConstants.HTML));
+        this.tokens.put(
+            IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT,
+            new Token(IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT));
+        this.tokens.put(
+            IDocument.DEFAULT_CONTENT_TYPE,
+            new Token(IDocument.DEFAULT_CONTENT_TYPE));
+    }
+
+    private IToken getToken(String type)
+    {
+        fLength = fCurrentLength;
+        if (DEBUG)
+        {
+
+            try
+            {
+                if (fLength <= 0)
+                {
+                    int line = fDocument.getLineOfOffset(fOffset);
+                    System.err.println(
+                        "Error at "
+                            + line
+                            + " offset:"
+                            + String.valueOf(
+                                fOffset - fDocument.getLineOffset(line)));
+                }
+            }
+            catch (BadLocationException e)
+            {  // should never happen
+                // TODO Write stacktrace to log
+                e.printStackTrace();
+            }
+        }
+        Assert.isTrue(fLength > 0, "Partition length <= 0!");
+        fCurrentLength = 0;
+        // String can never cross partition borders so reset string detection
+        fInString = false;
+        fInDoubString = false;
+        IToken token = (IToken) this.tokens.get(type);
+        Assert.isNotNull(token, "Token for type \"" + type + "\" not found!");
+        if (DEBUG)
+        {
+            System.out.println(
+                "Partition: fTokenOffset="
+                    + fTokenOffset
+                    + " fContentType="
+                    + type
+                    + " fLength="
+                    + fLength);
+        }
+        return token;
+    }
+
+    /* (non-Javadoc)
+     * @see org.eclipse.jface.text.rules.IPartitionTokenScanner#setPartialRange(org.eclipse.jface.text.IDocument, int, int, java.lang.String, int)
+     */
+    public void setPartialRange(
+        IDocument document,
+        int offset,
+        int length,
+        String contentType,
+        int partitionOffset)
+    {
+        if (DEBUG)
+        {
+            System.out.println(
+                "PartialRange: contentType="
+                    + contentType
+                    + " partitionOffset="
+                    + partitionOffset);
+        }
+
+        try
+        {
+            if (partitionOffset > -1)
+            {
+                partitionBorder = false;
+                // because of strings we have to parse the whole partition
+                this.setRange(
+                    document,
+                    partitionOffset,
+                    offset - partitionOffset + length);
+                // sometimes we get a wrong partition so we retrieve the partition
+                // directly from the document
+                fContentType = fDocument.getContentType(partitionOffset);
+            }
+            else
+                this.setRange(document, offset, length);
+
+        }
+        catch (BadLocationException e)
+        {
+            // should never happen
+            // TODO print stack trace to log
+            // fall back just scan the whole document again
+            this.setRange(document, 0, fDocument.getLength());
+        }
+
+    }
+
+    /* (non-Javadoc)
+     * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
+     */
+    public int getTokenLength()
+    {
+        return fLength;
+    }
+
+    /* (non-Javadoc)
+     * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
+     */
+    public int getTokenOffset()
+    {
+        return fTokenOffset;
+    }
+
+    /* (non-Javadoc)
+     * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
+     */
+    public IToken nextToken()
+    {
+        int c;
+
+        // check if we are not allready at the end of the
+        // file
+        if ((c = read()) == ICharacterScanner.EOF)
+        {
+            partitionBorder = false;
+            return Token.EOF;
+        }
+        else
+            unread();
+
+        if (partitionBorder)
+        {
+            fTokenOffset = fOffset;
+            partitionBorder = false;
+        }
+
+        while ((c = read()) != ICharacterScanner.EOF)
+        {
+            switch (c)
+            {
+                case '<' :
+                    if (!isInString(IPHPPartitionScannerConstants.PHP)
+                        && fContentType
+                            != IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT
+                        && checkPattern(new char[] { '?', 'p', 'h', 'p' }, true))
+                    {
+                        if (fContentType != IPHPPartitionScannerConstants.PHP
+                            && fCurrentLength > 5)
+                        {
+                            unread(5);
+                            IToken token = getToken(fContentType);
+                            // save previouse contenttype
+                            //TODO build stack for previouse contenttype 
+                            fPrevContentType = fContentType;
+
+                            fContentType = IPHPPartitionScannerConstants.PHP;
+
+                            return token;
+                        }
+                        else
+                            fContentType = IPHPPartitionScannerConstants.PHP;
+
+                        // remember offset of this partition
+                        fTokenOffset = fOffset - 5;
+                        fCurrentLength = 5;
+                    }
+                    else if (
+                        !isInString(IPHPPartitionScannerConstants.PHP)
+                            && fContentType
+                                != IPHPPartitionScannerConstants
+                                    .PHP_MULTILINE_COMMENT
+                            && checkPattern(new char[] { '?' }, false))
+                    {
+                        if (fContentType != IPHPPartitionScannerConstants.PHP
+                            && fCurrentLength > 2)
+                        {
+                            unread(2);
+                            IToken token = getToken(fContentType);
+                            // save previouse contenttype
+                            fPrevContentType = fContentType;
+                            fContentType = IPHPPartitionScannerConstants.PHP;
+                            return token;
+                        }
+                        else
+                            fContentType = IPHPPartitionScannerConstants.PHP;
+                        // remember offset of this partition
+                        fTokenOffset = fOffset - 2;
+                        fCurrentLength = 2;
+                    }
+                    else if (
+                        !isInString(IPHPPartitionScannerConstants.PHP)
+                            && checkPattern(new char[] { '!', '-', '-' }))
+                    { // return previouse partition
+                        if (fContentType
+                            != IPHPPartitionScannerConstants
+                                .HTML_MULTILINE_COMMENT
+                            && fCurrentLength > 4)
+                        {
+                            unread(4);
+                            IToken token = getToken(fContentType);
+                            fContentType =
+                                IPHPPartitionScannerConstants
+                                    .HTML_MULTILINE_COMMENT;
+                            return token;
+                        }
+                        else
+                            fContentType =
+                                IPHPPartitionScannerConstants
+                                    .HTML_MULTILINE_COMMENT;
+                                    
+                        fTokenOffset = fOffset - 4;
+                        fCurrentLength = 4;
+                    }
+                    break;
+                case '?' :
+                    if (!isInString(IPHPPartitionScannerConstants.PHP)
+                        && fContentType == IPHPPartitionScannerConstants.PHP)
+                    {
+                        if ((c = read()) == '>')
+                        { 
+                            if (fPrevContentType != null)
+                                fContentType = fPrevContentType;
+                            else
+                                fContentType =
+                                    IPHPPartitionScannerConstants.HTML;
+                            partitionBorder = true;
+                            return getToken(IPHPPartitionScannerConstants.PHP);
+                        }
+                        else if (c != ICharacterScanner.EOF)
+                            unread();
+                    }
+                    break;
+                case '-' :
+                    if (!isInString(IPHPPartitionScannerConstants.PHP)
+                        && fContentType
+                            == IPHPPartitionScannerConstants
+                                .HTML_MULTILINE_COMMENT
+                        && checkPattern(new char[] { '-', '>' }))
+                    {
+                        fContentType = IPHPPartitionScannerConstants.HTML;
+                        partitionBorder = true;
+                        return getToken(
+                            IPHPPartitionScannerConstants
+                                .HTML_MULTILINE_COMMENT);
+                    }
+                    break;
+                case '/' :
+                    if (!isInString(IPHPPartitionScannerConstants.PHP) && (c = read()) == '*')
+                    { // MULTINE COMMENT JAVASCRIPT, CSS, PHP
+                        if (fContentType == IPHPPartitionScannerConstants.PHP
+                            && fCurrentLength > 2)
+                        {
+                            unread(2);
+                            IToken token = getToken(fContentType);
+                            fContentType =
+                                IPHPPartitionScannerConstants
+                                    .PHP_MULTILINE_COMMENT;
+                            return token;
+                        }
+                        else if (
+                            fContentType
+                                == IPHPPartitionScannerConstants
+                                    .PHP_MULTILINE_COMMENT)
+                        {
+
+                            fTokenOffset = fOffset - 2;
+                            fCurrentLength = 2;
+                        }
+
+                    }
+                    else if (!isInString(IPHPPartitionScannerConstants.PHP) && c != ICharacterScanner.EOF)
+                        unread();
+                    break;
+                case '*' :
+                    if (!isInString(IPHPPartitionScannerConstants.PHP) && (c = read()) == '/')
+                    {
+                        if (fContentType
+                            == IPHPPartitionScannerConstants
+                                .PHP_MULTILINE_COMMENT)
+                        {
+                            fContentType = IPHPPartitionScannerConstants.PHP;
+                            partitionBorder = true;
+                            return getToken(
+                                IPHPPartitionScannerConstants
+                                    .PHP_MULTILINE_COMMENT);
+                        }
+                        else if (
+                            fContentType
+                                == IPHPPartitionScannerConstants
+                                    .CSS_MULTILINE_COMMENT)
+                        {
+                        }
+                        else if (
+                            fContentType
+                                == IPHPPartitionScannerConstants
+                                    .JS_MULTILINE_COMMENT)
+                        {
+                        }
+                    }
+                    else if (!isInString(IPHPPartitionScannerConstants.PHP) && c != ICharacterScanner.EOF)
+                        unread();
+                    break;
+                case '\'' :
+                    if (!fInDoubString)
+                        fInString = !fInString;
+                    break;
+                case '"' :
+                    // toggle String mode
+                    if (!fInString)
+                        fInDoubString = !fInDoubString;
+                    break;
+            }
+        } // end of file reached but we have to return the
+        // last partition.
+        return getToken(fContentType);
+    }
+    /* (non-Javadoc)
+     * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(org.eclipse.jface.text.IDocument, int, int)
+     */
+    public void setRange(IDocument document, int offset, int length)
+    {
+        if (DEBUG)
+        {
+            System.out.println(
+                "SET RANGE: offset=" + offset + " length=" + length);
+        }
+
+        fDocument = document;
+        fOffset = offset;
+        fTokenOffset = offset;
+        fCurrentLength = 0;
+        fLength = 0;
+        fEnd = fOffset + length;
+        fInString = false;
+        fInDoubString = false;
+        fContentType = IPHPPartitionScannerConstants.HTML;
+//        String[] prev = getPartitionStack(offset);
+    }
+
+    private int read()
+    {
+        try
+        {
+            if (fOffset < fEnd)
+            {
+               fCurrentLength++;
+                return fDocument.getChar(fOffset++);
+            }
+            return ICharacterScanner.EOF;
+        }
+        catch (BadLocationException e)
+        {
+            // should never happen
+            // TODO write stacktrace to log
+            fOffset = fEnd;
+            return ICharacterScanner.EOF;
+        }
+    }
+
+    private void unread()
+    {
+        --fOffset;
+        --fCurrentLength;
+    }
+    
+    private void unread(int num)
+    {
+       fOffset -= num;
+       fCurrentLength -= num;
+    }
+
+    private boolean checkPattern(char[] pattern)
+    {
+        return checkPattern(pattern, false);
+    }
+
+    /**
+     * Check if next character sequence read from document is equals to 
+     * the provided pattern. Pattern is read from left to right until the 
+     * first character read doesn't match. If this happens all read characters are
+     * unread.
+     * @param pattern The pattern to check.
+     * @return <code>true</code> if pattern is equals else returns <code>false</code>.
+     */
+    private boolean checkPattern(char[] pattern, boolean ignoreCase)
+    {
+        int prevOffset = fOffset;
+        int prevLength = fCurrentLength;
+        for (int i = 0; i < pattern.length; i++)
+        {
+            int c = read();
+
+            if (c == ICharacterScanner.EOF
+                || !letterEquals(c, pattern[i], ignoreCase))
+            {
+                fOffset = prevOffset;
+                fCurrentLength = prevLength;
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    private boolean letterEquals(int test, char letter, boolean ignoreCase)
+    {
+        if (test == letter)
+            return true;
+        else if (
+            ignoreCase
+                && Character.isLowerCase(letter)
+                && test == Character.toUpperCase(letter))
+            return true;
+        else if (
+            ignoreCase
+                && Character.isUpperCase(letter)
+                && test == Character.toLowerCase(letter))
+            return true;
+
+        return false;
+    }
+    
+    /**
+     * Checks wether the offset is in a <code>String</code> and the specified 
+     * contenttype is the current content type.
+     * Strings are delimited, mutual exclusive, by a " or by a '.
+     * 
+     * @param contentType The contenttype to check.
+     * @return <code>true</code> if the current offset is in a string else 
+     *                         returns false.
+     */
+    private  boolean isInString(String contentType)
+    {
+       if(fContentType == contentType)
+               return (fInString || fInDoubString);
+       else
+               return false;           
+    }
+    
+    /**
+     * Returns the previouse partition stack for the given offset.
+     * 
+     * @param offset The offset to return the previouse partitionstack for.
+     * 
+     * @return The stack as a string array.
+     */
+    private String[] getPartitionStack(int offset)
+    {
+       ArrayList types = new ArrayList();
+       int tmpOffset = 0;
+       try
+        {
+            ITypedRegion region = fDocument.getPartition(offset);
+            tmpOffset = region.getOffset();
+            while(tmpOffset-1 > 0)
+            {
+               region = fDocument.getPartition(tmpOffset-1);
+               tmpOffset = region.getOffset();
+               types.add(0, region.getType());
+            }
+        }
+        catch (BadLocationException e)
+        {
+           if(DEBUG)
+           {
+                       e.printStackTrace();
+           }
+        }
+       
+               String[] retVal = new String[types.size()];
+       
+       retVal = (String[])types.toArray(retVal);
+       return retVal;
+    }
+    
 }