misc
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
index 4ed6bf2..6b1e495 100644 (file)
@@ -39,7 +39,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
   public boolean phpMode = false;
 
-  public Stack encapsedStringStack = null;
+  public boolean phpExpressionTag = false;
+
+//  public Stack encapsedStringStack = null;
 
   public char currentCharacter;
 
@@ -213,9 +215,19 @@ public class Scanner implements IScanner, ITerminalSymbols {
   public ICompilationUnit compilationUnit = null;
 
   /**
+   * Determines if the specified character is permissible as the first character in a PHP identifier or variable
+   * 
+   * The '$' character for PHP variables is regarded as a correct first character !
+   *  
+   */
+  public static boolean isPHPIdentOrVarStart(char ch) {
+    return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
+  }
+
+  /**
    * Determines if the specified character is permissible as the first character in a PHP identifier.
    * 
-   * The '$' character for HP variables isn't regarded as the first character !
+   * The '$' character for PHP variables isn't regarded as the first character !
    */
   public static boolean isPHPIdentifierStart(char ch) {
     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
@@ -323,6 +335,13 @@ public class Scanner implements IScanner, ITerminalSymbols {
     return result;
   }
 
+  public final char[] getRawTokenSourceEnd() {
+    int length = this.eofPosition - this.currentPosition - 1;
+    char[] sourceEnd = new char[length];
+    System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
+    return sourceEnd;
+  }
+
   public int getCurrentTokenStartPosition() {
     return this.startPosition;
   }
@@ -933,6 +952,10 @@ public class Scanner implements IScanner, ITerminalSymbols {
               withoutUnicodePtr--;
             }
           }
+        } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
+          if (recordLineSeparator) {
+            pushLineSeparator();
+          }
         }
         // consume next character
         unicodeAsBackSlash = false;
@@ -1029,6 +1052,10 @@ public class Scanner implements IScanner, ITerminalSymbols {
               withoutUnicodePtr--;
             }
           }
+        } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
+          if (recordLineSeparator) {
+            pushLineSeparator();
+          }
         }
         // consume next character
         unicodeAsBackSlash = false;
@@ -1076,36 +1103,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
   public void consumeStringLiteral() throws InvalidInputException {
     try {
+      boolean openDollarBrace = false;
       // consume next character
       unicodeAsBackSlash = false;
       currentCharacter = source[currentPosition++];
-      //                if (((currentCharacter = source[currentPosition++]) == '\\')
-      //                  && (source[currentPosition] == 'u')) {
-      //                  getNextUnicodeChar();
-      //                } else {
-      //                  if (withoutUnicodePtr != 0) {
-      //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
-      //                      currentCharacter;
-      //                  }
-      //                }
-      while (currentCharacter != '"') {
+      while (currentCharacter != '"' || openDollarBrace) {
         /** ** in PHP \r and \n are valid in string literals *** */
-        //                  if ((currentCharacter == '\n')
-        //                    || (currentCharacter == '\r')) {
-        //                    // relocate if finding another quote fairly close: thus unicode
-        // '/u000D' will be fully consumed
-        //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-        //                      if (currentPosition + lookAhead == source.length)
-        //                        break;
-        //                      if (source[currentPosition + lookAhead] == '\n')
-        //                        break;
-        //                      if (source[currentPosition + lookAhead] == '\"') {
-        //                        currentPosition += lookAhead + 1;
-        //                        break;
-        //                      }
-        //                    }
-        //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-        //                  }
         if (currentCharacter == '\\') {
           int escapeSize = currentPosition;
           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
@@ -1125,18 +1128,23 @@ public class Scanner implements IScanner, ITerminalSymbols {
               withoutUnicodePtr--;
             }
           }
+        } else if (currentCharacter == '$' && source[currentPosition] == '{') {
+          openDollarBrace = true;
+        } else if (currentCharacter == '{' && source[currentPosition] == '$') {
+          openDollarBrace = true;
+        } else if (currentCharacter == '}') {
+          openDollarBrace = false;
+        } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
+          if (recordLineSeparator) {
+            pushLineSeparator();
+          }
         }
         // consume next character
         unicodeAsBackSlash = false;
         currentCharacter = source[currentPosition++];
-        //                  if (((currentCharacter = source[currentPosition++]) == '\\')
-        //                    && (source[currentPosition] == 'u')) {
-        //                    getNextUnicodeChar();
-        //                  } else {
         if (withoutUnicodePtr != 0) {
           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
         }
-        //                  }
       }
     } catch (IndexOutOfBoundsException e) {
       //    reset end position for error reporting
@@ -1171,8 +1179,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
   }
 
   public int getNextToken() throws InvalidInputException {
+    phpExpressionTag = false;
     if (!phpMode) {
-      return getInlinedHTML(currentPosition);
+      return getInlinedHTMLToken(currentPosition);
     }
     if (phpMode) {
       this.wasAcr = false;
@@ -1186,133 +1195,139 @@ public class Scanner implements IScanner, ITerminalSymbols {
           withoutUnicodePtr = 0;
           //start with a new token
           char encapsedChar = ' ';
-          if (!encapsedStringStack.isEmpty()) {
-            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
-          }
-          if (encapsedChar != '$' && encapsedChar != ' ') {
-            currentCharacter = source[currentPosition++];
-            if (currentCharacter == encapsedChar) {
-              switch (currentCharacter) {
-              case '`':
-                return TokenNameEncapsedString0;
-              case '\'':
-                return TokenNameEncapsedString1;
-              case '"':
-                return TokenNameEncapsedString2;
-              }
-            }
-            while (currentCharacter != encapsedChar) {
-              /** ** in PHP \r and \n are valid in string literals *** */
-              switch (currentCharacter) {
-              case '\\':
-                int escapeSize = currentPosition;
-                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
-                //scanEscapeCharacter make a side effect on this value and
-                // we need the previous value few lines down this one
-                scanDoubleQuotedEscapeCharacter();
-                escapeSize = currentPosition - escapeSize;
-                if (withoutUnicodePtr == 0) {
-                  //buffer all the entries that have been left aside....
-                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
-                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
-                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                } else { //overwrite the / in the buffer
-                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
-                  if (backSlashAsUnicodeInString) { //there are TWO \ in
-                    withoutUnicodePtr--;
-                  }
-                }
-                break;
-              case '$':
-                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
-                  currentPosition--;
-                  encapsedStringStack.push(new Character('$'));
-                  return TokenNameSTRING;
-                }
-                break;
-              case '{':
-                if (source[currentPosition] == '$') { // CURLY_OPEN
-                  currentPosition--;
-                  encapsedStringStack.push(new Character('$'));
-                  return TokenNameSTRING;
-                }
-              }
-              // consume next character
-              unicodeAsBackSlash = false;
-              currentCharacter = source[currentPosition++];
-              if (withoutUnicodePtr != 0) {
-                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-              }
-              //                  }
-            } // end while
-            currentPosition--;
-            return TokenNameSTRING;
-          }
+          //          if (!encapsedStringStack.isEmpty()) {
+          //            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
+          //          }
+          //          if (encapsedChar != '$' && encapsedChar != ' ') {
+          //            currentCharacter = source[currentPosition++];
+          //            if (currentCharacter == encapsedChar) {
+          //              switch (currentCharacter) {
+          //              case '`':
+          //                return TokenNameEncapsedString0;
+          //              case '\'':
+          //                return TokenNameEncapsedString1;
+          //              case '"':
+          //                return TokenNameEncapsedString2;
+          //              }
+          //            }
+          //            while (currentCharacter != encapsedChar) {
+          //              /** ** in PHP \r and \n are valid in string literals *** */
+          //              switch (currentCharacter) {
+          //              case '\\':
+          //                int escapeSize = currentPosition;
+          //                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+          //                //scanEscapeCharacter make a side effect on this value and
+          //                // we need the previous value few lines down this one
+          //                scanDoubleQuotedEscapeCharacter();
+          //                escapeSize = currentPosition - escapeSize;
+          //                if (withoutUnicodePtr == 0) {
+          //                  //buffer all the entries that have been left aside....
+          //                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
+          //                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
+          //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          //                } else { //overwrite the / in the buffer
+          //                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+          //                  if (backSlashAsUnicodeInString) { //there are TWO \ in
+          //                    withoutUnicodePtr--;
+          //                  }
+          //                }
+          //                break;
+          //              case '\r':
+          //              case '\n':
+          //                if (recordLineSeparator) {
+          //                  pushLineSeparator();
+          //                }
+          //                break;
+          //              case '$':
+          //                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
+          //                  currentPosition--;
+          //                  encapsedStringStack.push(new Character('$'));
+          //                  return TokenNameSTRING;
+          //                }
+          //                break;
+          //              case '{':
+          //                if (source[currentPosition] == '$') { // CURLY_OPEN
+          //                  currentPosition--;
+          //                  encapsedStringStack.push(new Character('$'));
+          //                  return TokenNameSTRING;
+          //                }
+          //              }
+          //              // consume next character
+          //              unicodeAsBackSlash = false;
+          //              currentCharacter = source[currentPosition++];
+          //              if (withoutUnicodePtr != 0) {
+          //                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          //              }
+          //              // }
+          //            } // end while
+          //            currentPosition--;
+          //            return TokenNameSTRING;
+          //          }
           // ---------Consume white space and handles startPosition---------
           int whiteStart = currentPosition;
           startPosition = currentPosition;
           currentCharacter = source[currentPosition++];
-          if (encapsedChar == '$') {
-            switch (currentCharacter) {
-            case '\\':
-              currentCharacter = source[currentPosition++];
-              return TokenNameSTRING;
-            case '{':
-              if (encapsedChar == '$') {
-                if (getNextChar('$'))
-                  return TokenNameLBRACE_DOLLAR;
-              }
-              return TokenNameLBRACE;
-            case '}':
-              return TokenNameRBRACE;
-            case '[':
-              return TokenNameLBRACKET;
-            case ']':
-              return TokenNameRBRACKET;
-            case '\'':
-              if (tokenizeStrings) {
-                consumeStringConstant();
-                return TokenNameStringSingleQuote;
-              }
-              return TokenNameEncapsedString1;
-            case '"':
-              return TokenNameEncapsedString2;
-            case '`':
-              if (tokenizeStrings) {
-                consumeStringInterpolated();
-                return TokenNameStringInterpolated;
-              }
-              return TokenNameEncapsedString0;
-            case '-':
-              if (getNextChar('>'))
-                return TokenNameMINUS_GREATER;
-              return TokenNameSTRING;
-            default:
-              if (currentCharacter == '$') {
-                int oldPosition = currentPosition;
-                try {
-                  currentCharacter = source[currentPosition++];
-                  if (currentCharacter == '{') {
-                    return TokenNameDOLLAR_LBRACE;
-                  }
-                  if (isPHPIdentifierStart(currentCharacter)) {
-                    return scanIdentifierOrKeyword(true);
-                  } else {
-                    currentPosition = oldPosition;
-                    return TokenNameSTRING;
-                  }
-                } catch (IndexOutOfBoundsException e) {
-                  currentPosition = oldPosition;
-                  return TokenNameSTRING;
-                }
-              }
-              if (isPHPIdentifierStart(currentCharacter))
-                return scanIdentifierOrKeyword(false);
-              if (Character.isDigit(currentCharacter))
-                return scanNumber(false);
-              return TokenNameERROR;
-            }
-          }
+          //          if (encapsedChar == '$') {
+          //            switch (currentCharacter) {
+          //            case '\\':
+          //              currentCharacter = source[currentPosition++];
+          //              return TokenNameSTRING;
+          //            case '{':
+          //              if (encapsedChar == '$') {
+          //                if (getNextChar('$'))
+          //                  return TokenNameLBRACE_DOLLAR;
+          //              }
+          //              return TokenNameLBRACE;
+          //            case '}':
+          //              return TokenNameRBRACE;
+          //            case '[':
+          //              return TokenNameLBRACKET;
+          //            case ']':
+          //              return TokenNameRBRACKET;
+          //            case '\'':
+          //              if (tokenizeStrings) {
+          //                consumeStringConstant();
+          //                return TokenNameStringSingleQuote;
+          //              }
+          //              return TokenNameEncapsedString1;
+          //            case '"':
+          //              return TokenNameEncapsedString2;
+          //            case '`':
+          //              if (tokenizeStrings) {
+          //                consumeStringInterpolated();
+          //                return TokenNameStringInterpolated;
+          //              }
+          //              return TokenNameEncapsedString0;
+          //            case '-':
+          //              if (getNextChar('>'))
+          //                return TokenNameMINUS_GREATER;
+          //              return TokenNameSTRING;
+          //            default:
+          //              if (currentCharacter == '$') {
+          //                int oldPosition = currentPosition;
+          //                try {
+          //                  currentCharacter = source[currentPosition++];
+          //                  if (currentCharacter == '{') {
+          //                    return TokenNameDOLLAR_LBRACE;
+          //                  }
+          //                  if (isPHPIdentifierStart(currentCharacter)) {
+          //                    return scanIdentifierOrKeyword(true);
+          //                  } else {
+          //                    currentPosition = oldPosition;
+          //                    return TokenNameSTRING;
+          //                  }
+          //                } catch (IndexOutOfBoundsException e) {
+          //                  currentPosition = oldPosition;
+          //                  return TokenNameSTRING;
+          //                }
+          //              }
+          //              if (isPHPIdentifierStart(currentCharacter))
+          //                return scanIdentifierOrKeyword(false);
+          //              if (Character.isDigit(currentCharacter))
+          //                return scanNumber(false);
+          //              return TokenNameERROR;
+          //            }
+          //          }
           //          boolean isWhiteSpace;
 
           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
@@ -1518,7 +1533,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                 phpMode = true;
                 return TokenNameINLINE_HTML;
               }
-              return getInlinedHTML(currentPosition - 2);
+              return getInlinedHTMLToken(currentPosition - 2);
             }
             return TokenNameQUESTION;
           case ':':
@@ -1531,17 +1546,17 @@ public class Scanner implements IScanner, ITerminalSymbols {
             consumeStringConstant();
             return TokenNameStringSingleQuote;
           case '"':
-            if (tokenizeStrings) {
+//            if (tokenizeStrings) {
               consumeStringLiteral();
               return TokenNameStringDoubleQuote;
-            }
-            return TokenNameEncapsedString2;
+//            }
+//            return TokenNameEncapsedString2;
           case '`':
-            if (tokenizeStrings) {
+//            if (tokenizeStrings) {
               consumeStringInterpolated();
               return TokenNameStringInterpolated;
-            }
-            return TokenNameEncapsedString0;
+//            }
+//            return TokenNameEncapsedString0;
           case '#':
           case '/': {
             char startChar = currentCharacter;
@@ -1597,6 +1612,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                   this.lastCommentLinePosition = this.currentPosition;
                   if (currentCharacter == '?') {
                     if (getNextChar('>')) {
+                      // ?> breaks line comments 
                       startPosition = currentPosition - 2;
                       phpMode = false;
                       return TokenNameINLINE_HTML;
@@ -1817,39 +1833,6 @@ public class Scanner implements IScanner, ITerminalSymbols {
     return TokenNameEOF;
   }
 
-  private int getInlinedHTML(int start) throws InvalidInputException {
-    int token = getInlinedHTMLToken(start);
-    if (token == TokenNameINLINE_HTML) {
-      //               Stack stack = new Stack();
-      //               // scan html for errors
-      //               Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
-      //               int lastPHPEndPos=0;
-      //               for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
-      //                   Tag tag=(Tag)i.next();
-      //                   
-      //                   if (tag instanceof StartTag) {
-      //                       StartTag startTag=(StartTag)tag;
-      //                     // System.out.println("startTag: "+tag);
-      //                       if (startTag.isServerTag()) {
-      //                         // TODO : what to do with a server tag ?
-      //                       } else {
-      //                           // do whatever with HTML start tag
-      //                           // use startTag.getElement() to find the element corresponding
-      //                           // to this start tag which may be useful if you implement code
-      //                           // folding etc
-      //                               stack.push(startTag);
-      //                       }
-      //                   } else {
-      //                       EndTag endTag=(EndTag)tag;
-      //                       StartTag stag = (StartTag) stack.peek();
-      //// System.out.println("endTag: "+tag);
-      //                       // do whatever with HTML end tag.
-      //                   }
-      //               }
-    }
-    return token;
-  }
-
   /**
    * @return
    * @throws InvalidInputException
@@ -1867,10 +1850,13 @@ public class Scanner implements IScanner, ITerminalSymbols {
           if (getNextChar('?')) {
             currentCharacter = source[currentPosition++];
             if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
-              currentPosition--;
-              // (currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
+              if (currentCharacter != '=') { // <?=
+                currentPosition--;
+              } else {
+                phpExpressionTag = true;
+              }
               // <?
-              if (ignorePHPOneLiner) {
+              if (ignorePHPOneLiner) { // for CodeFormatter
                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
                   phpMode = true;
                   return TokenNameINLINE_HTML;
@@ -1946,11 +1932,15 @@ public class Scanner implements IScanner, ITerminalSymbols {
             return TokenNameEOF;
           }
           break;
+        case '\\':
+          if (doubleQuotedStringActive) {
+            // ignore escaped characters in double quoted strings
+            previousCharInLine = currentCharInLine;
+            currentCharInLine = source[currentPositionInLine++];
+          }
         case '\"':
           if (doubleQuotedStringActive) {
-            if (previousCharInLine != '\\') {
-              doubleQuotedStringActive = false;
-            }
+            doubleQuotedStringActive = false;
           } else {
             if (!singleQuotedStringActive) {
               doubleQuotedStringActive = true;
@@ -2905,20 +2895,6 @@ public class Scanner implements IScanner, ITerminalSymbols {
   }
 
   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
-    // the string with "\\u" is a legal string of two chars \ and u
-    //thus we use a direct access to the source (for regular cases).
-    //    if (unicodeAsBackSlash) {
-    //      // consume next character
-    //      unicodeAsBackSlash = false;
-    //      if (((currentCharacter = source[currentPosition++]) == '\\')
-    //        && (source[currentPosition] == 'u')) {
-    //        getNextUnicodeChar();
-    //      } else {
-    //        if (withoutUnicodePtr != 0) {
-    //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-    //        }
-    //      }
-    //    } else
     currentCharacter = source[currentPosition++];
     switch (currentCharacter) {
     //      case 'b' :
@@ -3736,7 +3712,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
     initialPosition = currentPosition = 0;
     containsAssertKeyword = false;
     withoutUnicodeBuffer = new char[this.source.length];
-    encapsedStringStack = new Stack();
+//    encapsedStringStack = new Stack();
   }
 
   public String toString() {
@@ -3898,12 +3874,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
     case TokenNameEncapsedString0:
       return "`"; //$NON-NLS-1$  
-    case TokenNameEncapsedString1:
-      return "\'"; //$NON-NLS-1$  
-    case TokenNameEncapsedString2:
-      return "\""; //$NON-NLS-1$  
+//    case TokenNameEncapsedString1:
+//      return "\'"; //$NON-NLS-1$  
+//    case TokenNameEncapsedString2:
+//      return "\""; //$NON-NLS-1$  
     case TokenNameSTRING:
-      return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+      return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
     case TokenNameHEREDOC:
       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
     case TokenNamePLUS_PLUS:
@@ -4077,7 +4053,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
     this.tokenizeStrings = tokenizeStrings;
     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
     this.assertMode = assertMode;
-    this.encapsedStringStack = null;
+//    this.encapsedStringStack = null;
     this.taskTags = taskTags;
     this.taskPriorities = taskPriorities;
   }
@@ -4237,8 +4213,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
             continue nextTag;
 
           // ensure tag is not leaded with letter if tag starts with a letter
-          if (Character.isJavaIdentifierStart(tag[0])) {
-            if (Character.isJavaIdentifierPart(previous)) {
+          if (Scanner.isPHPIdentifierStart(tag[0])) {
+            if (Scanner.isPHPIdentifierPart(previous)) {
               continue nextTag;
             }
           }
@@ -4255,8 +4231,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
             }
           }
           // ensure tag is not followed with letter if tag finishes with a letter
-          if (i + tagLength < commentEnd && Character.isJavaIdentifierPart(src[i + tagLength - 1])) {
-            if (Character.isJavaIdentifierPart(src[i + tagLength]))
+          if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
+            if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
               continue nextTag;
           }
           if (this.foundTaskTags == null) {