Fix bug #1385272: Parsing of short open tags not fully compatible to PHP parse
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
index 0b12fc7..30e835c 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
  * and is available at http://www.eclipse.org/legal/cpl-v05.html
- * 
+ *
  * Contributors: IBM Corporation - initial API and implementation
  **********************************************************************************************************************************/
 package net.sourceforge.phpdt.internal.compiler.parser;
@@ -15,8 +15,8 @@ import net.sourceforge.phpdt.core.compiler.CharOperation;
 import net.sourceforge.phpdt.core.compiler.IScanner;
 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
+import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
-import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
 
 public class Scanner implements IScanner, ITerminalSymbols {
        /*
@@ -27,8 +27,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
         * stream - currentPosition-1 gives the sourceEnd position into the stream
         */
        // 1.4 feature
-//     private boolean assertMode;
-
+       // private boolean assertMode;
        public boolean useAssertAsAnIndentifier = false;
 
        // flag indicating if processed source contains occurrences of keyword assert
@@ -40,8 +39,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
        public boolean phpMode = false;
 
-       public boolean phpExpressionTag = false;
+//     public boolean phpExpressionTag = false;
 
+       int fFillerToken = TokenNameEOF;
        // public Stack encapsedStringStack = null;
 
        public char currentCharacter;
@@ -144,7 +144,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
        static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
 
        public final static int C_DOLLAR = 8;
-       
+
        public final static int C_LETTER = 4;
 
        public final static int C_DIGIT = 3;
@@ -165,7 +165,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        ObviousIdentCharNatures[i] = C_LETTER;
 
                ObviousIdentCharNatures['$'] = C_DOLLAR;
-               
+
                ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
                ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
                ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
@@ -199,6 +199,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                ObviousIdentCharNatures['"'] = C_SEPARATOR;
                ObviousIdentCharNatures['\''] = C_SEPARATOR;
        }
+
        static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 
        static final int TableSize = 30, InternalTableSize = 6;
@@ -278,31 +279,33 @@ public class Scanner implements IScanner, ITerminalSymbols {
        /**
         * Determines if the specified character is permissible as the first character
         * in a PHP identifier or variable
-        * 
+        *
         * The '$' character for PHP variables is regarded as a correct first
         * character !
-        * 
+        *
         */
        public static boolean isPHPIdentOrVarStart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
+                       return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
                }
                return false;
-               //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+               // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
+               // ch && ch <= 0xFF);
        }
 
        /**
         * Determines if the specified character is permissible as the first character
         * in a PHP identifier.
-        * 
+        *
         * The '$' character for PHP variables isn't regarded as the first character !
         */
        public static boolean isPHPIdentifierStart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER;
+                       return ObviousIdentCharNatures[ch] == C_LETTER;
                }
                return false;
-//             return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+               // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
+               // 0xFF);
        }
 
        /**
@@ -311,19 +314,20 @@ public class Scanner implements IScanner, ITerminalSymbols {
         */
        public static boolean isPHPIdentifierPart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
+                       return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
                }
                return false;
-//             return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+               // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
+               // <= 0xFF);
        }
 
        public static boolean isSQLIdentifierPart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
+                       return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
                }
                return false;
        }
-       
+
        public final boolean atEnd() {
                // This code is not relevant if source is
                // Only a part of the real stream input
@@ -419,6 +423,18 @@ public class Scanner implements IScanner, ITerminalSymbols {
                return result;
        }
 
+       public final boolean equalsCurrentTokenSource(char[] word) {
+               if (word.length != currentPosition - startPosition) {
+                       return false;
+               }
+               for (int i = 0; i < word.length; i++) {
+                       if (word[i] != source[startPosition + i]) {
+                               return false;
+                       }
+               }
+               return true;
+       }
+
        public final char[] getRawTokenSourceEnd() {
                int length = this.eofPosition - this.currentPosition - 1;
                char[] sourceEnd = new char[length];
@@ -430,6 +446,11 @@ public class Scanner implements IScanner, ITerminalSymbols {
                return this.startPosition;
        }
 
+       public final String getCurrentStringLiteral() {
+               char[] result = getCurrentStringLiteralSource();
+               return new String(result);
+       }
+
        public final char[] getCurrentStringLiteralSource() {
                // Return the token REAL source (aka unicodes are precomputed)
                if (startPosition + 1 >= currentPosition) {
@@ -453,10 +474,10 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
        /*
         * Search the source position corresponding to the end of a given line number
-        * 
+        *
         * Line numbers are 1-based, and relative to the scanner initialPosition.
         * Character positions are 0-based.
-        * 
+        *
         * In case the given line number is inconsistent, answers -1.
         */
        public final int getLineEnd(int lineNumber) {
@@ -475,12 +496,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
        /**
         * Search the source position corresponding to the beginning of a given line
         * number
-        * 
+        *
         * Line numbers are 1-based, and relative to the scanner initialPosition.
         * Character positions are 0-based.
-        * 
+        *
         * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
-        * 
+        *
         * In case the given line number is inconsistent, answers -1.
         */
        public final int getLineStart(int lineNumber) {
@@ -884,8 +905,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        do {
                                currentCharacter = source[currentPosition++];
                        } while (currentCharacter == ' ' || currentCharacter == '\t');
-                       while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
-                               //      while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
+                       while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
+                               // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
+                               // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
                                buf.append(currentCharacter);
                                currentCharacter = source[currentPosition++];
                        }
@@ -1267,11 +1289,15 @@ public class Scanner implements IScanner, ITerminalSymbols {
        }
 
        public int getNextToken() throws InvalidInputException {
-               phpExpressionTag = false;
                if (!phpMode) {
                        return getInlinedHTMLToken(currentPosition);
-               }
-               if (phpMode) {
+               } else {
+                       if (fFillerToken!=TokenNameEOF) {
+                               int tempToken;
+                               tempToken = fFillerToken;
+                               fFillerToken=TokenNameEOF;
+                               return tempToken;
+                       }
                        this.wasAcr = false;
                        if (diet) {
                                jumpOverMethodBody();
@@ -1930,6 +1956,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
         * @throws InvalidInputException
         */
        private int getInlinedHTMLToken(int start) throws InvalidInputException {
+               boolean phpShortTag = false;  // true, if <?= detected
                if (currentPosition > source.length) {
                        currentPosition = source.length;
                        return TokenNameEOF;
@@ -1944,23 +1971,27 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                                if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
                                                        if (currentCharacter != '=') { // <?=
                                                                currentPosition--;
+                                                               phpShortTag = false;
                                                        } else {
-                                                               phpExpressionTag = true;
+                                                               phpShortTag = true;
                                                        }
                                                        // <?
                                                        if (ignorePHPOneLiner) { // for CodeFormatter
                                                                if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
                                                                        phpMode = true;
+                                                                       if (phpShortTag) {
+                                                                               fFillerToken = TokenNameecho;
+                                                                       }
                                                                        return TokenNameINLINE_HTML;
                                                                }
                                                        } else {
                                                                phpMode = true;
+                                                               if (phpShortTag) {
+                                                                       fFillerToken = TokenNameecho;
+                                                               }
                                                                return TokenNameINLINE_HTML;
                                                        }
                                                } else {
-                                                       // boolean phpStart = (currentCharacter == 'P') ||
-                                                       // (currentCharacter == 'p');
-                                                       // if (phpStart) {
                                                        int test = getNextChar('H', 'h');
                                                        if (test >= 0) {
                                                                test = getNextChar('P', 'p');
@@ -2001,10 +2032,11 @@ public class Scanner implements IScanner, ITerminalSymbols {
        }
 
        /**
+        * check if the PHP is only in this line (for CodeFormatter)
+        *
         * @return
         */
        private int lookAheadLinePHPTag() {
-               // check if the PHP is only in this line (for CodeFormatter)
                int currentPositionInLine = currentPosition;
                char previousCharInLine = ' ';
                char currentCharInLine = ' ';
@@ -2424,7 +2456,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                        break;
                                }
                                default:
-                                       if (isPHPIdentOrVarStart(currentCharacter) ) {
+                                       if (isPHPIdentOrVarStart(currentCharacter)) {
                                                try {
                                                        scanIdentifierOrKeyword((currentCharacter == '$'));
                                                } catch (InvalidInputException ex) {
@@ -2432,8 +2464,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                                ;
                                                break;
                                        }
-                               if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
-//                                     if (Character.isDigit(currentCharacter)) {
+                                       if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
+                                               // if (Character.isDigit(currentCharacter)) {
                                                try {
                                                        scanNumber(false);
                                                } catch (InvalidInputException ex) {
@@ -3511,8 +3543,13 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        } else
                                return TokenNameIdentifier;
                case 's':
-                       // static switch
+                       // self static switch
                        switch (length) {
+//                     case 4:
+//                             if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index] == 'f')) {
+//                                     return TokenNameself;
+//                             }
+//                             return TokenNameIdentifier;
                        case 6:
                                if (data[++index] == 't')
                                        if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
@@ -3760,7 +3797,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
        /**
         * Search the line number corresponding to a specific position
-        * 
+        *
         */
        public final int getLineNumber(int position) {
                if (lineEnds == null)
@@ -3940,6 +3977,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        return "require_once"; //$NON-NLS-1$
                case TokenNamereturn:
                        return "return"; //$NON-NLS-1$
+//             case TokenNameself:
+//                     return "self"; //$NON-NLS-1$
                case TokenNamestatic:
                        return "static"; //$NON-NLS-1$
                case TokenNameswitch:
@@ -3967,11 +4006,11 @@ public class Scanner implements IScanner, ITerminalSymbols {
                case TokenNameStringInterpolated:
                        return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
                case TokenNameEncapsedString0:
-                       return "`"; //$NON-NLS-1$  
-                       // case TokenNameEncapsedString1:
-                       // return "\'"; //$NON-NLS-1$
-                       // case TokenNameEncapsedString2:
-                       // return "\""; //$NON-NLS-1$
+                       return "`"; //$NON-NLS-1$
+               // case TokenNameEncapsedString1:
+               // return "\'"; //$NON-NLS-1$
+               // case TokenNameEncapsedString2:
+               // return "\""; //$NON-NLS-1$
                case TokenNameSTRING:
                        return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
                case TokenNameHEREDOC:
@@ -4146,7 +4185,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                this.tokenizeWhiteSpace = tokenizeWhiteSpace;
                this.tokenizeStrings = tokenizeStrings;
                this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
-//             this.assertMode = assertMode;
+               // this.assertMode = assertMode;
                // this.encapsedStringStack = null;
                this.taskTags = taskTags;
                this.taskPriorities = taskPriorities;
@@ -4320,8 +4359,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                                        continue nextTag;
                                                if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
                                                        if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
-                                                                                                                                                                                                                                                                                                                                                                                                                                       // insensitive
-                                                                                                                                                                                                                                                                                                                                                                                                                                       // check
+                                                               // insensitive
+                                                               // check
                                                                continue nextTag;
                                                        }
                                                }