Fix bug #1385272: Improved version for "Parsing of short open tags not fully compatib...
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
index 0b12fc7..803f878 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
  * and is available at http://www.eclipse.org/legal/cpl-v05.html
- * 
+ *
  * Contributors: IBM Corporation - initial API and implementation
  **********************************************************************************************************************************/
 package net.sourceforge.phpdt.internal.compiler.parser;
@@ -15,8 +15,8 @@ import net.sourceforge.phpdt.core.compiler.CharOperation;
 import net.sourceforge.phpdt.core.compiler.IScanner;
 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
+import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
-import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
 
 public class Scanner implements IScanner, ITerminalSymbols {
        /*
@@ -27,8 +27,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
         * stream - currentPosition-1 gives the sourceEnd position into the stream
         */
        // 1.4 feature
-//     private boolean assertMode;
-
+       // private boolean assertMode;
        public boolean useAssertAsAnIndentifier = false;
 
        // flag indicating if processed source contains occurrences of keyword assert
@@ -40,9 +39,13 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
        public boolean phpMode = false;
 
-       public boolean phpExpressionTag = false;
-
-       // public Stack encapsedStringStack = null;
+       /**
+        * This token is set to TokenNameecho if a short tag block begins (i.e. >?= ... )
+        * Directly after the "=" character the getNextToken() method returns TokenNameINLINE_HTML
+        * In the next call to the getNextToken() method the value of fFillerToken (==TokenNameecho) is returned
+        *
+        */
+       int fFillerToken = TokenNameEOF;
 
        public char currentCharacter;
 
@@ -144,7 +147,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
        static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
 
        public final static int C_DOLLAR = 8;
-       
+
        public final static int C_LETTER = 4;
 
        public final static int C_DIGIT = 3;
@@ -165,7 +168,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        ObviousIdentCharNatures[i] = C_LETTER;
 
                ObviousIdentCharNatures['$'] = C_DOLLAR;
-               
+
                ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
                ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
                ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
@@ -199,6 +202,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                ObviousIdentCharNatures['"'] = C_SEPARATOR;
                ObviousIdentCharNatures['\''] = C_SEPARATOR;
        }
+
        static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 
        static final int TableSize = 30, InternalTableSize = 6;
@@ -278,31 +282,33 @@ public class Scanner implements IScanner, ITerminalSymbols {
        /**
         * Determines if the specified character is permissible as the first character
         * in a PHP identifier or variable
-        * 
+        *
         * The '$' character for PHP variables is regarded as a correct first
         * character !
-        * 
+        *
         */
        public static boolean isPHPIdentOrVarStart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
+                       return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
                }
                return false;
-               //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+               // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
+               // ch && ch <= 0xFF);
        }
 
        /**
         * Determines if the specified character is permissible as the first character
         * in a PHP identifier.
-        * 
+        *
         * The '$' character for PHP variables isn't regarded as the first character !
         */
        public static boolean isPHPIdentifierStart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER;
+                       return ObviousIdentCharNatures[ch] == C_LETTER;
                }
                return false;
-//             return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+               // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
+               // 0xFF);
        }
 
        /**
@@ -311,19 +317,20 @@ public class Scanner implements IScanner, ITerminalSymbols {
         */
        public static boolean isPHPIdentifierPart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
+                       return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
                }
                return false;
-//             return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+               // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
+               // <= 0xFF);
        }
 
        public static boolean isSQLIdentifierPart(char ch) {
                if (ch < MAX_OBVIOUS) {
-                       return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
+                       return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
                }
                return false;
        }
-       
+
        public final boolean atEnd() {
                // This code is not relevant if source is
                // Only a part of the real stream input
@@ -419,6 +426,18 @@ public class Scanner implements IScanner, ITerminalSymbols {
                return result;
        }
 
+       public final boolean equalsCurrentTokenSource(char[] word) {
+               if (word.length != currentPosition - startPosition) {
+                       return false;
+               }
+               for (int i = 0; i < word.length; i++) {
+                       if (word[i] != source[startPosition + i]) {
+                               return false;
+                       }
+               }
+               return true;
+       }
+
        public final char[] getRawTokenSourceEnd() {
                int length = this.eofPosition - this.currentPosition - 1;
                char[] sourceEnd = new char[length];
@@ -430,6 +449,11 @@ public class Scanner implements IScanner, ITerminalSymbols {
                return this.startPosition;
        }
 
+       public final String getCurrentStringLiteral() {
+               char[] result = getCurrentStringLiteralSource();
+               return new String(result);
+       }
+
        public final char[] getCurrentStringLiteralSource() {
                // Return the token REAL source (aka unicodes are precomputed)
                if (startPosition + 1 >= currentPosition) {
@@ -453,10 +477,10 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
        /*
         * Search the source position corresponding to the end of a given line number
-        * 
+        *
         * Line numbers are 1-based, and relative to the scanner initialPosition.
         * Character positions are 0-based.
-        * 
+        *
         * In case the given line number is inconsistent, answers -1.
         */
        public final int getLineEnd(int lineNumber) {
@@ -475,12 +499,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
        /**
         * Search the source position corresponding to the beginning of a given line
         * number
-        * 
+        *
         * Line numbers are 1-based, and relative to the scanner initialPosition.
         * Character positions are 0-based.
-        * 
+        *
         * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
-        * 
+        *
         * In case the given line number is inconsistent, answers -1.
         */
        public final int getLineStart(int lineNumber) {
@@ -884,8 +908,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        do {
                                currentCharacter = source[currentPosition++];
                        } while (currentCharacter == ' ' || currentCharacter == '\t');
-                       while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
-                               //      while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
+                       while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
+                               // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
+                               // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
                                buf.append(currentCharacter);
                                currentCharacter = source[currentPosition++];
                        }
@@ -1267,11 +1292,16 @@ public class Scanner implements IScanner, ITerminalSymbols {
        }
 
        public int getNextToken() throws InvalidInputException {
-               phpExpressionTag = false;
                if (!phpMode) {
                        return getInlinedHTMLToken(currentPosition);
-               }
-               if (phpMode) {
+               } else {
+                       if (fFillerToken != TokenNameEOF) {
+                               int tempToken;
+                               startPosition = currentPosition;
+                               tempToken = fFillerToken;
+                               fFillerToken = TokenNameEOF;
+                               return tempToken;
+                       }
                        this.wasAcr = false;
                        if (diet) {
                                jumpOverMethodBody();
@@ -1281,154 +1311,14 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        try {
                                while (true) {
                                        withoutUnicodePtr = 0;
-                                       // start with a new token
-                                       char encapsedChar = ' ';
-                                       // if (!encapsedStringStack.isEmpty()) {
-                                       // encapsedChar = ((Character)
-                                       // encapsedStringStack.peek()).charValue();
-                                       // }
-                                       // if (encapsedChar != '$' && encapsedChar != ' ') {
-                                       // currentCharacter = source[currentPosition++];
-                                       // if (currentCharacter == encapsedChar) {
-                                       // switch (currentCharacter) {
-                                       // case '`':
-                                       // return TokenNameEncapsedString0;
-                                       // case '\'':
-                                       // return TokenNameEncapsedString1;
-                                       // case '"':
-                                       // return TokenNameEncapsedString2;
-                                       // }
-                                       // }
-                                       // while (currentCharacter != encapsedChar) {
-                                       // /** ** in PHP \r and \n are valid in string literals *** */
-                                       // switch (currentCharacter) {
-                                       // case '\\':
-                                       // int escapeSize = currentPosition;
-                                       // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
-                                       // //scanEscapeCharacter make a side effect on this value and
-                                       // // we need the previous value few lines down this one
-                                       // scanDoubleQuotedEscapeCharacter();
-                                       // escapeSize = currentPosition - escapeSize;
-                                       // if (withoutUnicodePtr == 0) {
-                                       // //buffer all the entries that have been left aside....
-                                       // withoutUnicodePtr = currentPosition - escapeSize - 1 -
-                                       // startPosition;
-                                       // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
-                                       // withoutUnicodePtr);
-                                       // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                                       // } else { //overwrite the / in the buffer
-                                       // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
-                                       // if (backSlashAsUnicodeInString) { //there are TWO \ in
-                                       // withoutUnicodePtr--;
-                                       // }
-                                       // }
-                                       // break;
-                                       // case '\r':
-                                       // case '\n':
-                                       // if (recordLineSeparator) {
-                                       // pushLineSeparator();
-                                       // }
-                                       // break;
-                                       // case '$':
-                                       // if (isPHPIdentifierStart(source[currentPosition]) ||
-                                       // source[currentPosition] == '{') {
-                                       // currentPosition--;
-                                       // encapsedStringStack.push(new Character('$'));
-                                       // return TokenNameSTRING;
-                                       // }
-                                       // break;
-                                       // case '{':
-                                       // if (source[currentPosition] == '$') { // CURLY_OPEN
-                                       // currentPosition--;
-                                       // encapsedStringStack.push(new Character('$'));
-                                       // return TokenNameSTRING;
-                                       // }
-                                       // }
-                                       // // consume next character
-                                       // unicodeAsBackSlash = false;
-                                       // currentCharacter = source[currentPosition++];
-                                       // if (withoutUnicodePtr != 0) {
-                                       // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                                       // }
-                                       // // }
-                                       // } // end while
-                                       // currentPosition--;
-                                       // return TokenNameSTRING;
-                                       // }
                                        // ---------Consume white space and handles startPosition---------
                                        int whiteStart = currentPosition;
                                        startPosition = currentPosition;
                                        currentCharacter = source[currentPosition++];
-                                       // if (encapsedChar == '$') {
-                                       // switch (currentCharacter) {
-                                       // case '\\':
-                                       // currentCharacter = source[currentPosition++];
-                                       // return TokenNameSTRING;
-                                       // case '{':
-                                       // if (encapsedChar == '$') {
-                                       // if (getNextChar('$'))
-                                       // return TokenNameLBRACE_DOLLAR;
-                                       // }
-                                       // return TokenNameLBRACE;
-                                       // case '}':
-                                       // return TokenNameRBRACE;
-                                       // case '[':
-                                       // return TokenNameLBRACKET;
-                                       // case ']':
-                                       // return TokenNameRBRACKET;
-                                       // case '\'':
-                                       // if (tokenizeStrings) {
-                                       // consumeStringConstant();
-                                       // return TokenNameStringSingleQuote;
-                                       // }
-                                       // return TokenNameEncapsedString1;
-                                       // case '"':
-                                       // return TokenNameEncapsedString2;
-                                       // case '`':
-                                       // if (tokenizeStrings) {
-                                       // consumeStringInterpolated();
-                                       // return TokenNameStringInterpolated;
-                                       // }
-                                       // return TokenNameEncapsedString0;
-                                       // case '-':
-                                       // if (getNextChar('>'))
-                                       // return TokenNameMINUS_GREATER;
-                                       // return TokenNameSTRING;
-                                       // default:
-                                       // if (currentCharacter == '$') {
-                                       // int oldPosition = currentPosition;
-                                       // try {
-                                       // currentCharacter = source[currentPosition++];
-                                       // if (currentCharacter == '{') {
-                                       // return TokenNameDOLLAR_LBRACE;
-                                       // }
-                                       // if (isPHPIdentifierStart(currentCharacter)) {
-                                       // return scanIdentifierOrKeyword(true);
-                                       // } else {
-                                       // currentPosition = oldPosition;
-                                       // return TokenNameSTRING;
-                                       // }
-                                       // } catch (IndexOutOfBoundsException e) {
-                                       // currentPosition = oldPosition;
-                                       // return TokenNameSTRING;
-                                       // }
-                                       // }
-                                       // if (isPHPIdentifierStart(currentCharacter))
-                                       // return scanIdentifierOrKeyword(false);
-                                       // if (Character.isDigit(currentCharacter))
-                                       // return scanNumber(false);
-                                       // return TokenNameERROR;
-                                       // }
-                                       // }
-                                       // boolean isWhiteSpace;
 
                                        while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
                                                startPosition = currentPosition;
                                                currentCharacter = source[currentPosition++];
-                                               // if (((currentCharacter = source[currentPosition++]) == '\\')
-                                               // && (source[currentPosition] == 'u')) {
-                                               // isWhiteSpace = jumpOverUnicodeWhiteSpace();
-                                               // } else {
                                                if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
                                                        checkNonExternalizeString();
                                                        if (recordLineSeparator) {
@@ -1437,9 +1327,6 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                                                currentLine = null;
                                                        }
                                                }
-                                               // isWhiteSpace = (currentCharacter == ' ')
-                                               // || Character.isWhitespace(currentCharacter);
-                                               // }
                                        }
                                        if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
                                                // reposition scanner in case we are interested by spaces as tokens
@@ -1930,6 +1817,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
         * @throws InvalidInputException
         */
        private int getInlinedHTMLToken(int start) throws InvalidInputException {
+               boolean phpShortTag = false; // true, if <?= detected
                if (currentPosition > source.length) {
                        currentPosition = source.length;
                        return TokenNameEOF;
@@ -1944,23 +1832,27 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                                if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
                                                        if (currentCharacter != '=') { // <?=
                                                                currentPosition--;
+                                                               phpShortTag = false;
                                                        } else {
-                                                               phpExpressionTag = true;
+                                                               phpShortTag = true;
                                                        }
                                                        // <?
                                                        if (ignorePHPOneLiner) { // for CodeFormatter
                                                                if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
                                                                        phpMode = true;
+                                                                       if (phpShortTag) {
+                                                                               fFillerToken = TokenNameECHO_INVISIBLE;
+                                                                       }
                                                                        return TokenNameINLINE_HTML;
                                                                }
                                                        } else {
                                                                phpMode = true;
+                                                               if (phpShortTag) {
+                                                                       fFillerToken = TokenNameECHO_INVISIBLE;
+                                                               }
                                                                return TokenNameINLINE_HTML;
                                                        }
                                                } else {
-                                                       // boolean phpStart = (currentCharacter == 'P') ||
-                                                       // (currentCharacter == 'p');
-                                                       // if (phpStart) {
                                                        int test = getNextChar('H', 'h');
                                                        if (test >= 0) {
                                                                test = getNextChar('P', 'p');
@@ -2001,10 +1893,11 @@ public class Scanner implements IScanner, ITerminalSymbols {
        }
 
        /**
+        * check if the PHP is only in this line (for CodeFormatter)
+        *
         * @return
         */
        private int lookAheadLinePHPTag() {
-               // check if the PHP is only in this line (for CodeFormatter)
                int currentPositionInLine = currentPosition;
                char previousCharInLine = ' ';
                char currentCharInLine = ' ';
@@ -2424,7 +2317,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                        break;
                                }
                                default:
-                                       if (isPHPIdentOrVarStart(currentCharacter) ) {
+                                       if (isPHPIdentOrVarStart(currentCharacter)) {
                                                try {
                                                        scanIdentifierOrKeyword((currentCharacter == '$'));
                                                } catch (InvalidInputException ex) {
@@ -2432,8 +2325,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                                ;
                                                break;
                                        }
-                               if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
-//                                     if (Character.isDigit(currentCharacter)) {
+                                       if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
+                                               // if (Character.isDigit(currentCharacter)) {
                                                try {
                                                        scanNumber(false);
                                                } catch (InvalidInputException ex) {
@@ -3511,8 +3404,14 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        } else
                                return TokenNameIdentifier;
                case 's':
-                       // static switch
+                       // self static switch
                        switch (length) {
+                       // case 4:
+                       // if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index]
+                       // == 'f')) {
+                       // return TokenNameself;
+                       // }
+                       // return TokenNameIdentifier;
                        case 6:
                                if (data[++index] == 't')
                                        if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
@@ -3760,7 +3659,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
        /**
         * Search the line number corresponding to a specific position
-        * 
+        *
         */
        public final int getLineNumber(int position) {
                if (lineEnds == null)
@@ -3806,6 +3705,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                initialPosition = currentPosition = 0;
                containsAssertKeyword = false;
                withoutUnicodeBuffer = new char[this.source.length];
+               fFillerToken = TokenNameEOF;
                // encapsedStringStack = new Stack();
        }
 
@@ -3838,6 +3738,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
                // //$NON-NLS-1$
                case TokenNameINLINE_HTML:
                        return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+               case    TokenNameECHO_INVISIBLE:
+                       //0-length token
+                       return "";
                case TokenNameIdentifier:
                        return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
                case TokenNameVariable:
@@ -3940,6 +3843,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                        return "require_once"; //$NON-NLS-1$
                case TokenNamereturn:
                        return "return"; //$NON-NLS-1$
+                       // case TokenNameself:
+                       // return "self"; //$NON-NLS-1$
                case TokenNamestatic:
                        return "static"; //$NON-NLS-1$
                case TokenNameswitch:
@@ -3967,11 +3872,11 @@ public class Scanner implements IScanner, ITerminalSymbols {
                case TokenNameStringInterpolated:
                        return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
                case TokenNameEncapsedString0:
-                       return "`"; //$NON-NLS-1$  
-                       // case TokenNameEncapsedString1:
-                       // return "\'"; //$NON-NLS-1$
-                       // case TokenNameEncapsedString2:
-                       // return "\""; //$NON-NLS-1$
+                       return "`"; //$NON-NLS-1$
+               // case TokenNameEncapsedString1:
+               // return "\'"; //$NON-NLS-1$
+               // case TokenNameEncapsedString2:
+               // return "\""; //$NON-NLS-1$
                case TokenNameSTRING:
                        return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
                case TokenNameHEREDOC:
@@ -4146,7 +4051,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                this.tokenizeWhiteSpace = tokenizeWhiteSpace;
                this.tokenizeStrings = tokenizeStrings;
                this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
-//             this.assertMode = assertMode;
+               // this.assertMode = assertMode;
                // this.encapsedStringStack = null;
                this.taskTags = taskTags;
                this.taskPriorities = taskPriorities;
@@ -4320,8 +4225,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                                                        continue nextTag;
                                                if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
                                                        if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
-                                                                                                                                                                                                                                                                                                                                                                                                                                       // insensitive
-                                                                                                                                                                                                                                                                                                                                                                                                                                       // check
+                                                               // insensitive
+                                                               // check
                                                                continue nextTag;
                                                        }
                                                }