misc
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
index 6d3e4a8..6b1e495 100644 (file)
@@ -39,7 +39,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
   public boolean phpMode = false;
 
-  public Stack encapsedStringStack = null;
+  public boolean phpExpressionTag = false;
+
+//  public Stack encapsedStringStack = null;
 
   public char currentCharacter;
 
@@ -1101,36 +1103,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
   public void consumeStringLiteral() throws InvalidInputException {
     try {
+      boolean openDollarBrace = false;
       // consume next character
       unicodeAsBackSlash = false;
       currentCharacter = source[currentPosition++];
-      //                if (((currentCharacter = source[currentPosition++]) == '\\')
-      //                  && (source[currentPosition] == 'u')) {
-      //                  getNextUnicodeChar();
-      //                } else {
-      //                  if (withoutUnicodePtr != 0) {
-      //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
-      //                      currentCharacter;
-      //                  }
-      //                }
-      while (currentCharacter != '"') {
+      while (currentCharacter != '"' || openDollarBrace) {
         /** ** in PHP \r and \n are valid in string literals *** */
-        //                  if ((currentCharacter == '\n')
-        //                    || (currentCharacter == '\r')) {
-        //                    // relocate if finding another quote fairly close: thus unicode
-        // '/u000D' will be fully consumed
-        //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-        //                      if (currentPosition + lookAhead == source.length)
-        //                        break;
-        //                      if (source[currentPosition + lookAhead] == '\n')
-        //                        break;
-        //                      if (source[currentPosition + lookAhead] == '\"') {
-        //                        currentPosition += lookAhead + 1;
-        //                        break;
-        //                      }
-        //                    }
-        //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-        //                  }
         if (currentCharacter == '\\') {
           int escapeSize = currentPosition;
           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
@@ -1150,6 +1128,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
               withoutUnicodePtr--;
             }
           }
+        } else if (currentCharacter == '$' && source[currentPosition] == '{') {
+          openDollarBrace = true;
+        } else if (currentCharacter == '{' && source[currentPosition] == '$') {
+          openDollarBrace = true;
+        } else if (currentCharacter == '}') {
+          openDollarBrace = false;
         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
           if (recordLineSeparator) {
             pushLineSeparator();
@@ -1158,14 +1142,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
         // consume next character
         unicodeAsBackSlash = false;
         currentCharacter = source[currentPosition++];
-        //                  if (((currentCharacter = source[currentPosition++]) == '\\')
-        //                    && (source[currentPosition] == 'u')) {
-        //                    getNextUnicodeChar();
-        //                  } else {
         if (withoutUnicodePtr != 0) {
           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
         }
-        //                  }
       }
     } catch (IndexOutOfBoundsException e) {
       //    reset end position for error reporting
@@ -1200,8 +1179,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
   }
 
   public int getNextToken() throws InvalidInputException {
+    phpExpressionTag = false;
     if (!phpMode) {
-      return getInlinedHTML(currentPosition);
+      return getInlinedHTMLToken(currentPosition);
     }
     if (phpMode) {
       this.wasAcr = false;
@@ -1215,139 +1195,139 @@ public class Scanner implements IScanner, ITerminalSymbols {
           withoutUnicodePtr = 0;
           //start with a new token
           char encapsedChar = ' ';
-          if (!encapsedStringStack.isEmpty()) {
-            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
-          }
-          if (encapsedChar != '$' && encapsedChar != ' ') {
-            currentCharacter = source[currentPosition++];
-            if (currentCharacter == encapsedChar) {
-              switch (currentCharacter) {
-              case '`':
-                return TokenNameEncapsedString0;
-              case '\'':
-                return TokenNameEncapsedString1;
-              case '"':
-                return TokenNameEncapsedString2;
-              }
-            }
-            while (currentCharacter != encapsedChar) {
-              /** ** in PHP \r and \n are valid in string literals *** */
-              switch (currentCharacter) {
-              case '\\':
-                int escapeSize = currentPosition;
-                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
-                //scanEscapeCharacter make a side effect on this value and
-                // we need the previous value few lines down this one
-                scanDoubleQuotedEscapeCharacter();
-                escapeSize = currentPosition - escapeSize;
-                if (withoutUnicodePtr == 0) {
-                  //buffer all the entries that have been left aside....
-                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
-                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
-                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                } else { //overwrite the / in the buffer
-                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
-                  if (backSlashAsUnicodeInString) { //there are TWO \ in
-                    withoutUnicodePtr--;
-                  }
-                }
-                break;
-              case '\r':
-              case '\n':
-                if (recordLineSeparator) {
-                  pushLineSeparator();
-                }
-                break;
-              case '$':
-                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
-                  currentPosition--;
-                  encapsedStringStack.push(new Character('$'));
-                  return TokenNameSTRING;
-                }
-                break;
-              case '{':
-                if (source[currentPosition] == '$') { // CURLY_OPEN
-                  currentPosition--;
-                  encapsedStringStack.push(new Character('$'));
-                  return TokenNameSTRING;
-                }
-              }
-              // consume next character
-              unicodeAsBackSlash = false;
-              currentCharacter = source[currentPosition++];
-              if (withoutUnicodePtr != 0) {
-                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-              }
-              //                  }
-            } // end while
-            currentPosition--;
-            return TokenNameSTRING;
-          }
+          //          if (!encapsedStringStack.isEmpty()) {
+          //            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
+          //          }
+          //          if (encapsedChar != '$' && encapsedChar != ' ') {
+          //            currentCharacter = source[currentPosition++];
+          //            if (currentCharacter == encapsedChar) {
+          //              switch (currentCharacter) {
+          //              case '`':
+          //                return TokenNameEncapsedString0;
+          //              case '\'':
+          //                return TokenNameEncapsedString1;
+          //              case '"':
+          //                return TokenNameEncapsedString2;
+          //              }
+          //            }
+          //            while (currentCharacter != encapsedChar) {
+          //              /** ** in PHP \r and \n are valid in string literals *** */
+          //              switch (currentCharacter) {
+          //              case '\\':
+          //                int escapeSize = currentPosition;
+          //                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+          //                //scanEscapeCharacter make a side effect on this value and
+          //                // we need the previous value few lines down this one
+          //                scanDoubleQuotedEscapeCharacter();
+          //                escapeSize = currentPosition - escapeSize;
+          //                if (withoutUnicodePtr == 0) {
+          //                  //buffer all the entries that have been left aside....
+          //                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
+          //                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
+          //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          //                } else { //overwrite the / in the buffer
+          //                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+          //                  if (backSlashAsUnicodeInString) { //there are TWO \ in
+          //                    withoutUnicodePtr--;
+          //                  }
+          //                }
+          //                break;
+          //              case '\r':
+          //              case '\n':
+          //                if (recordLineSeparator) {
+          //                  pushLineSeparator();
+          //                }
+          //                break;
+          //              case '$':
+          //                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
+          //                  currentPosition--;
+          //                  encapsedStringStack.push(new Character('$'));
+          //                  return TokenNameSTRING;
+          //                }
+          //                break;
+          //              case '{':
+          //                if (source[currentPosition] == '$') { // CURLY_OPEN
+          //                  currentPosition--;
+          //                  encapsedStringStack.push(new Character('$'));
+          //                  return TokenNameSTRING;
+          //                }
+          //              }
+          //              // consume next character
+          //              unicodeAsBackSlash = false;
+          //              currentCharacter = source[currentPosition++];
+          //              if (withoutUnicodePtr != 0) {
+          //                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          //              }
+          //              // }
+          //            } // end while
+          //            currentPosition--;
+          //            return TokenNameSTRING;
+          //          }
           // ---------Consume white space and handles startPosition---------
           int whiteStart = currentPosition;
           startPosition = currentPosition;
           currentCharacter = source[currentPosition++];
-          if (encapsedChar == '$') {
-            switch (currentCharacter) {
-            case '\\':
-              currentCharacter = source[currentPosition++];
-              return TokenNameSTRING;
-            case '{':
-              if (encapsedChar == '$') {
-                if (getNextChar('$'))
-                  return TokenNameLBRACE_DOLLAR;
-              }
-              return TokenNameLBRACE;
-            case '}':
-              return TokenNameRBRACE;
-            case '[':
-              return TokenNameLBRACKET;
-            case ']':
-              return TokenNameRBRACKET;
-            case '\'':
-              if (tokenizeStrings) {
-                consumeStringConstant();
-                return TokenNameStringSingleQuote;
-              }
-              return TokenNameEncapsedString1;
-            case '"':
-              return TokenNameEncapsedString2;
-            case '`':
-              if (tokenizeStrings) {
-                consumeStringInterpolated();
-                return TokenNameStringInterpolated;
-              }
-              return TokenNameEncapsedString0;
-            case '-':
-              if (getNextChar('>'))
-                return TokenNameMINUS_GREATER;
-              return TokenNameSTRING;
-            default:
-              if (currentCharacter == '$') {
-                int oldPosition = currentPosition;
-                try {
-                  currentCharacter = source[currentPosition++];
-                  if (currentCharacter == '{') {
-                    return TokenNameDOLLAR_LBRACE;
-                  }
-                  if (isPHPIdentifierStart(currentCharacter)) {
-                    return scanIdentifierOrKeyword(true);
-                  } else {
-                    currentPosition = oldPosition;
-                    return TokenNameSTRING;
-                  }
-                } catch (IndexOutOfBoundsException e) {
-                  currentPosition = oldPosition;
-                  return TokenNameSTRING;
-                }
-              }
-              if (isPHPIdentifierStart(currentCharacter))
-                return scanIdentifierOrKeyword(false);
-              if (Character.isDigit(currentCharacter))
-                return scanNumber(false);
-              return TokenNameERROR;
-            }
-          }
+          //          if (encapsedChar == '$') {
+          //            switch (currentCharacter) {
+          //            case '\\':
+          //              currentCharacter = source[currentPosition++];
+          //              return TokenNameSTRING;
+          //            case '{':
+          //              if (encapsedChar == '$') {
+          //                if (getNextChar('$'))
+          //                  return TokenNameLBRACE_DOLLAR;
+          //              }
+          //              return TokenNameLBRACE;
+          //            case '}':
+          //              return TokenNameRBRACE;
+          //            case '[':
+          //              return TokenNameLBRACKET;
+          //            case ']':
+          //              return TokenNameRBRACKET;
+          //            case '\'':
+          //              if (tokenizeStrings) {
+          //                consumeStringConstant();
+          //                return TokenNameStringSingleQuote;
+          //              }
+          //              return TokenNameEncapsedString1;
+          //            case '"':
+          //              return TokenNameEncapsedString2;
+          //            case '`':
+          //              if (tokenizeStrings) {
+          //                consumeStringInterpolated();
+          //                return TokenNameStringInterpolated;
+          //              }
+          //              return TokenNameEncapsedString0;
+          //            case '-':
+          //              if (getNextChar('>'))
+          //                return TokenNameMINUS_GREATER;
+          //              return TokenNameSTRING;
+          //            default:
+          //              if (currentCharacter == '$') {
+          //                int oldPosition = currentPosition;
+          //                try {
+          //                  currentCharacter = source[currentPosition++];
+          //                  if (currentCharacter == '{') {
+          //                    return TokenNameDOLLAR_LBRACE;
+          //                  }
+          //                  if (isPHPIdentifierStart(currentCharacter)) {
+          //                    return scanIdentifierOrKeyword(true);
+          //                  } else {
+          //                    currentPosition = oldPosition;
+          //                    return TokenNameSTRING;
+          //                  }
+          //                } catch (IndexOutOfBoundsException e) {
+          //                  currentPosition = oldPosition;
+          //                  return TokenNameSTRING;
+          //                }
+          //              }
+          //              if (isPHPIdentifierStart(currentCharacter))
+          //                return scanIdentifierOrKeyword(false);
+          //              if (Character.isDigit(currentCharacter))
+          //                return scanNumber(false);
+          //              return TokenNameERROR;
+          //            }
+          //          }
           //          boolean isWhiteSpace;
 
           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
@@ -1553,7 +1533,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                 phpMode = true;
                 return TokenNameINLINE_HTML;
               }
-              return getInlinedHTML(currentPosition - 2);
+              return getInlinedHTMLToken(currentPosition - 2);
             }
             return TokenNameQUESTION;
           case ':':
@@ -1566,17 +1546,17 @@ public class Scanner implements IScanner, ITerminalSymbols {
             consumeStringConstant();
             return TokenNameStringSingleQuote;
           case '"':
-            if (tokenizeStrings) {
+//            if (tokenizeStrings) {
               consumeStringLiteral();
               return TokenNameStringDoubleQuote;
-            }
-            return TokenNameEncapsedString2;
+//            }
+//            return TokenNameEncapsedString2;
           case '`':
-            if (tokenizeStrings) {
+//            if (tokenizeStrings) {
               consumeStringInterpolated();
               return TokenNameStringInterpolated;
-            }
-            return TokenNameEncapsedString0;
+//            }
+//            return TokenNameEncapsedString0;
           case '#':
           case '/': {
             char startChar = currentCharacter;
@@ -1632,6 +1612,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
                   this.lastCommentLinePosition = this.currentPosition;
                   if (currentCharacter == '?') {
                     if (getNextChar('>')) {
+                      // ?> breaks line comments 
                       startPosition = currentPosition - 2;
                       phpMode = false;
                       return TokenNameINLINE_HTML;
@@ -1852,39 +1833,6 @@ public class Scanner implements IScanner, ITerminalSymbols {
     return TokenNameEOF;
   }
 
-  private int getInlinedHTML(int start) throws InvalidInputException {
-    int token = getInlinedHTMLToken(start);
-    if (token == TokenNameINLINE_HTML) {
-      //               Stack stack = new Stack();
-      //               // scan html for errors
-      //               Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
-      //               int lastPHPEndPos=0;
-      //               for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
-      //                   Tag tag=(Tag)i.next();
-      //                   
-      //                   if (tag instanceof StartTag) {
-      //                       StartTag startTag=(StartTag)tag;
-      //                     // System.out.println("startTag: "+tag);
-      //                       if (startTag.isServerTag()) {
-      //                         // TODO : what to do with a server tag ?
-      //                       } else {
-      //                           // do whatever with HTML start tag
-      //                           // use startTag.getElement() to find the element corresponding
-      //                           // to this start tag which may be useful if you implement code
-      //                           // folding etc
-      //                               stack.push(startTag);
-      //                       }
-      //                   } else {
-      //                       EndTag endTag=(EndTag)tag;
-      //                       StartTag stag = (StartTag) stack.peek();
-      //// System.out.println("endTag: "+tag);
-      //                       // do whatever with HTML end tag.
-      //                   }
-      //               }
-    }
-    return token;
-  }
-
   /**
    * @return
    * @throws InvalidInputException
@@ -1902,10 +1850,13 @@ public class Scanner implements IScanner, ITerminalSymbols {
           if (getNextChar('?')) {
             currentCharacter = source[currentPosition++];
             if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
-              currentPosition--;
-              // (currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
+              if (currentCharacter != '=') { // <?=
+                currentPosition--;
+              } else {
+                phpExpressionTag = true;
+              }
               // <?
-              if (ignorePHPOneLiner) {
+              if (ignorePHPOneLiner) { // for CodeFormatter
                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
                   phpMode = true;
                   return TokenNameINLINE_HTML;
@@ -1981,11 +1932,15 @@ public class Scanner implements IScanner, ITerminalSymbols {
             return TokenNameEOF;
           }
           break;
+        case '\\':
+          if (doubleQuotedStringActive) {
+            // ignore escaped characters in double quoted strings
+            previousCharInLine = currentCharInLine;
+            currentCharInLine = source[currentPositionInLine++];
+          }
         case '\"':
           if (doubleQuotedStringActive) {
-            if (previousCharInLine != '\\') {
-              doubleQuotedStringActive = false;
-            }
+            doubleQuotedStringActive = false;
           } else {
             if (!singleQuotedStringActive) {
               doubleQuotedStringActive = true;
@@ -2940,20 +2895,6 @@ public class Scanner implements IScanner, ITerminalSymbols {
   }
 
   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
-    // the string with "\\u" is a legal string of two chars \ and u
-    //thus we use a direct access to the source (for regular cases).
-    //    if (unicodeAsBackSlash) {
-    //      // consume next character
-    //      unicodeAsBackSlash = false;
-    //      if (((currentCharacter = source[currentPosition++]) == '\\')
-    //        && (source[currentPosition] == 'u')) {
-    //        getNextUnicodeChar();
-    //      } else {
-    //        if (withoutUnicodePtr != 0) {
-    //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-    //        }
-    //      }
-    //    } else
     currentCharacter = source[currentPosition++];
     switch (currentCharacter) {
     //      case 'b' :
@@ -3771,7 +3712,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
     initialPosition = currentPosition = 0;
     containsAssertKeyword = false;
     withoutUnicodeBuffer = new char[this.source.length];
-    encapsedStringStack = new Stack();
+//    encapsedStringStack = new Stack();
   }
 
   public String toString() {
@@ -3933,12 +3874,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
     case TokenNameEncapsedString0:
       return "`"; //$NON-NLS-1$  
-    case TokenNameEncapsedString1:
-      return "\'"; //$NON-NLS-1$  
-    case TokenNameEncapsedString2:
-      return "\""; //$NON-NLS-1$  
+//    case TokenNameEncapsedString1:
+//      return "\'"; //$NON-NLS-1$  
+//    case TokenNameEncapsedString2:
+//      return "\""; //$NON-NLS-1$  
     case TokenNameSTRING:
-      return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+      return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
     case TokenNameHEREDOC:
       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
     case TokenNamePLUS_PLUS:
@@ -4112,7 +4053,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
     this.tokenizeStrings = tokenizeStrings;
     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
     this.assertMode = assertMode;
-    this.encapsedStringStack = null;
+//    this.encapsedStringStack = null;
     this.taskTags = taskTags;
     this.taskPriorities = taskPriorities;
   }