Simplified parsing for double quoted strings
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
index 2965347..751e8c0 100644 (file)
@@ -38,10 +38,10 @@ public class Scanner implements IScanner, ITerminalSymbols {
   public boolean ignorePHPOneLiner = false;
 
   public boolean phpMode = false;
-  
+
   public boolean phpExpressionTag = false;
-  
-  public Stack encapsedStringStack = null;
+
+//  public Stack encapsedStringStack = null;
 
   public char currentCharacter;
 
@@ -1103,36 +1103,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
 
   public void consumeStringLiteral() throws InvalidInputException {
     try {
+      boolean openDollarBrace = false;
       // consume next character
       unicodeAsBackSlash = false;
       currentCharacter = source[currentPosition++];
-      //                if (((currentCharacter = source[currentPosition++]) == '\\')
-      //                  && (source[currentPosition] == 'u')) {
-      //                  getNextUnicodeChar();
-      //                } else {
-      //                  if (withoutUnicodePtr != 0) {
-      //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
-      //                      currentCharacter;
-      //                  }
-      //                }
-      while (currentCharacter != '"') {
+      while (currentCharacter != '"' || openDollarBrace) {
         /** ** in PHP \r and \n are valid in string literals *** */
-        //                  if ((currentCharacter == '\n')
-        //                    || (currentCharacter == '\r')) {
-        //                    // relocate if finding another quote fairly close: thus unicode
-        // '/u000D' will be fully consumed
-        //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-        //                      if (currentPosition + lookAhead == source.length)
-        //                        break;
-        //                      if (source[currentPosition + lookAhead] == '\n')
-        //                        break;
-        //                      if (source[currentPosition + lookAhead] == '\"') {
-        //                        currentPosition += lookAhead + 1;
-        //                        break;
-        //                      }
-        //                    }
-        //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-        //                  }
         if (currentCharacter == '\\') {
           int escapeSize = currentPosition;
           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
@@ -1152,6 +1128,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
               withoutUnicodePtr--;
             }
           }
+        } else if (currentCharacter == '$' && source[currentPosition] == '{') {
+          openDollarBrace = true;
+        } else if (currentCharacter == '{' && source[currentPosition] == '$') {
+          openDollarBrace = true;
+        } else if (currentCharacter == '}') {
+          openDollarBrace = false;
         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
           if (recordLineSeparator) {
             pushLineSeparator();
@@ -1160,14 +1142,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
         // consume next character
         unicodeAsBackSlash = false;
         currentCharacter = source[currentPosition++];
-        //                  if (((currentCharacter = source[currentPosition++]) == '\\')
-        //                    && (source[currentPosition] == 'u')) {
-        //                    getNextUnicodeChar();
-        //                  } else {
         if (withoutUnicodePtr != 0) {
           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
         }
-        //                  }
       }
     } catch (IndexOutOfBoundsException e) {
       //    reset end position for error reporting
@@ -1218,139 +1195,139 @@ public class Scanner implements IScanner, ITerminalSymbols {
           withoutUnicodePtr = 0;
           //start with a new token
           char encapsedChar = ' ';
-          if (!encapsedStringStack.isEmpty()) {
-            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
-          }
-          if (encapsedChar != '$' && encapsedChar != ' ') {
-            currentCharacter = source[currentPosition++];
-            if (currentCharacter == encapsedChar) {
-              switch (currentCharacter) {
-              case '`':
-                return TokenNameEncapsedString0;
-              case '\'':
-                return TokenNameEncapsedString1;
-              case '"':
-                return TokenNameEncapsedString2;
-              }
-            }
-            while (currentCharacter != encapsedChar) {
-              /** ** in PHP \r and \n are valid in string literals *** */
-              switch (currentCharacter) {
-              case '\\':
-                int escapeSize = currentPosition;
-                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
-                //scanEscapeCharacter make a side effect on this value and
-                // we need the previous value few lines down this one
-                scanDoubleQuotedEscapeCharacter();
-                escapeSize = currentPosition - escapeSize;
-                if (withoutUnicodePtr == 0) {
-                  //buffer all the entries that have been left aside....
-                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
-                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
-                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                } else { //overwrite the / in the buffer
-                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
-                  if (backSlashAsUnicodeInString) { //there are TWO \ in
-                    withoutUnicodePtr--;
-                  }
-                }
-                break;
-              case '\r':
-              case '\n':
-                if (recordLineSeparator) {
-                  pushLineSeparator();
-                }
-                break;
-              case '$':
-                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
-                  currentPosition--;
-                  encapsedStringStack.push(new Character('$'));
-                  return TokenNameSTRING;
-                }
-                break;
-              case '{':
-                if (source[currentPosition] == '$') { // CURLY_OPEN
-                  currentPosition--;
-                  encapsedStringStack.push(new Character('$'));
-                  return TokenNameSTRING;
-                }
-              }
-              // consume next character
-              unicodeAsBackSlash = false;
-              currentCharacter = source[currentPosition++];
-              if (withoutUnicodePtr != 0) {
-                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-              }
-              //                  }
-            } // end while
-            currentPosition--;
-            return TokenNameSTRING;
-          }
+          //          if (!encapsedStringStack.isEmpty()) {
+          //            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
+          //          }
+          //          if (encapsedChar != '$' && encapsedChar != ' ') {
+          //            currentCharacter = source[currentPosition++];
+          //            if (currentCharacter == encapsedChar) {
+          //              switch (currentCharacter) {
+          //              case '`':
+          //                return TokenNameEncapsedString0;
+          //              case '\'':
+          //                return TokenNameEncapsedString1;
+          //              case '"':
+          //                return TokenNameEncapsedString2;
+          //              }
+          //            }
+          //            while (currentCharacter != encapsedChar) {
+          //              /** ** in PHP \r and \n are valid in string literals *** */
+          //              switch (currentCharacter) {
+          //              case '\\':
+          //                int escapeSize = currentPosition;
+          //                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+          //                //scanEscapeCharacter make a side effect on this value and
+          //                // we need the previous value few lines down this one
+          //                scanDoubleQuotedEscapeCharacter();
+          //                escapeSize = currentPosition - escapeSize;
+          //                if (withoutUnicodePtr == 0) {
+          //                  //buffer all the entries that have been left aside....
+          //                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
+          //                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
+          //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          //                } else { //overwrite the / in the buffer
+          //                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+          //                  if (backSlashAsUnicodeInString) { //there are TWO \ in
+          //                    withoutUnicodePtr--;
+          //                  }
+          //                }
+          //                break;
+          //              case '\r':
+          //              case '\n':
+          //                if (recordLineSeparator) {
+          //                  pushLineSeparator();
+          //                }
+          //                break;
+          //              case '$':
+          //                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
+          //                  currentPosition--;
+          //                  encapsedStringStack.push(new Character('$'));
+          //                  return TokenNameSTRING;
+          //                }
+          //                break;
+          //              case '{':
+          //                if (source[currentPosition] == '$') { // CURLY_OPEN
+          //                  currentPosition--;
+          //                  encapsedStringStack.push(new Character('$'));
+          //                  return TokenNameSTRING;
+          //                }
+          //              }
+          //              // consume next character
+          //              unicodeAsBackSlash = false;
+          //              currentCharacter = source[currentPosition++];
+          //              if (withoutUnicodePtr != 0) {
+          //                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          //              }
+          //              // }
+          //            } // end while
+          //            currentPosition--;
+          //            return TokenNameSTRING;
+          //          }
           // ---------Consume white space and handles startPosition---------
           int whiteStart = currentPosition;
           startPosition = currentPosition;
           currentCharacter = source[currentPosition++];
-          if (encapsedChar == '$') {
-            switch (currentCharacter) {
-            case '\\':
-              currentCharacter = source[currentPosition++];
-              return TokenNameSTRING;
-            case '{':
-              if (encapsedChar == '$') {
-                if (getNextChar('$'))
-                  return TokenNameLBRACE_DOLLAR;
-              }
-              return TokenNameLBRACE;
-            case '}':
-              return TokenNameRBRACE;
-            case '[':
-              return TokenNameLBRACKET;
-            case ']':
-              return TokenNameRBRACKET;
-            case '\'':
-              if (tokenizeStrings) {
-                consumeStringConstant();
-                return TokenNameStringSingleQuote;
-              }
-              return TokenNameEncapsedString1;
-            case '"':
-              return TokenNameEncapsedString2;
-            case '`':
-              if (tokenizeStrings) {
-                consumeStringInterpolated();
-                return TokenNameStringInterpolated;
-              }
-              return TokenNameEncapsedString0;
-            case '-':
-              if (getNextChar('>'))
-                return TokenNameMINUS_GREATER;
-              return TokenNameSTRING;
-            default:
-              if (currentCharacter == '$') {
-                int oldPosition = currentPosition;
-                try {
-                  currentCharacter = source[currentPosition++];
-                  if (currentCharacter == '{') {
-                    return TokenNameDOLLAR_LBRACE;
-                  }
-                  if (isPHPIdentifierStart(currentCharacter)) {
-                    return scanIdentifierOrKeyword(true);
-                  } else {
-                    currentPosition = oldPosition;
-                    return TokenNameSTRING;
-                  }
-                } catch (IndexOutOfBoundsException e) {
-                  currentPosition = oldPosition;
-                  return TokenNameSTRING;
-                }
-              }
-              if (isPHPIdentifierStart(currentCharacter))
-                return scanIdentifierOrKeyword(false);
-              if (Character.isDigit(currentCharacter))
-                return scanNumber(false);
-              return TokenNameERROR;
-            }
-          }
+          //          if (encapsedChar == '$') {
+          //            switch (currentCharacter) {
+          //            case '\\':
+          //              currentCharacter = source[currentPosition++];
+          //              return TokenNameSTRING;
+          //            case '{':
+          //              if (encapsedChar == '$') {
+          //                if (getNextChar('$'))
+          //                  return TokenNameLBRACE_DOLLAR;
+          //              }
+          //              return TokenNameLBRACE;
+          //            case '}':
+          //              return TokenNameRBRACE;
+          //            case '[':
+          //              return TokenNameLBRACKET;
+          //            case ']':
+          //              return TokenNameRBRACKET;
+          //            case '\'':
+          //              if (tokenizeStrings) {
+          //                consumeStringConstant();
+          //                return TokenNameStringSingleQuote;
+          //              }
+          //              return TokenNameEncapsedString1;
+          //            case '"':
+          //              return TokenNameEncapsedString2;
+          //            case '`':
+          //              if (tokenizeStrings) {
+          //                consumeStringInterpolated();
+          //                return TokenNameStringInterpolated;
+          //              }
+          //              return TokenNameEncapsedString0;
+          //            case '-':
+          //              if (getNextChar('>'))
+          //                return TokenNameMINUS_GREATER;
+          //              return TokenNameSTRING;
+          //            default:
+          //              if (currentCharacter == '$') {
+          //                int oldPosition = currentPosition;
+          //                try {
+          //                  currentCharacter = source[currentPosition++];
+          //                  if (currentCharacter == '{') {
+          //                    return TokenNameDOLLAR_LBRACE;
+          //                  }
+          //                  if (isPHPIdentifierStart(currentCharacter)) {
+          //                    return scanIdentifierOrKeyword(true);
+          //                  } else {
+          //                    currentPosition = oldPosition;
+          //                    return TokenNameSTRING;
+          //                  }
+          //                } catch (IndexOutOfBoundsException e) {
+          //                  currentPosition = oldPosition;
+          //                  return TokenNameSTRING;
+          //                }
+          //              }
+          //              if (isPHPIdentifierStart(currentCharacter))
+          //                return scanIdentifierOrKeyword(false);
+          //              if (Character.isDigit(currentCharacter))
+          //                return scanNumber(false);
+          //              return TokenNameERROR;
+          //            }
+          //          }
           //          boolean isWhiteSpace;
 
           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
@@ -1569,17 +1546,17 @@ public class Scanner implements IScanner, ITerminalSymbols {
             consumeStringConstant();
             return TokenNameStringSingleQuote;
           case '"':
-            if (tokenizeStrings) {
+//            if (tokenizeStrings) {
               consumeStringLiteral();
               return TokenNameStringDoubleQuote;
-            }
-            return TokenNameEncapsedString2;
+//            }
+//            return TokenNameEncapsedString2;
           case '`':
-            if (tokenizeStrings) {
+//            if (tokenizeStrings) {
               consumeStringInterpolated();
               return TokenNameStringInterpolated;
-            }
-            return TokenNameEncapsedString0;
+//            }
+//            return TokenNameEncapsedString0;
           case '#':
           case '/': {
             char startChar = currentCharacter;
@@ -1875,7 +1852,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
               if (currentCharacter != '=') { // <?=
                 currentPosition--;
               } else {
-                phpExpressionTag = true; 
+                phpExpressionTag = true;
               }
               // <?
               if (ignorePHPOneLiner) { // for CodeFormatter
@@ -2917,20 +2894,6 @@ public class Scanner implements IScanner, ITerminalSymbols {
   }
 
   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
-    // the string with "\\u" is a legal string of two chars \ and u
-    //thus we use a direct access to the source (for regular cases).
-    //    if (unicodeAsBackSlash) {
-    //      // consume next character
-    //      unicodeAsBackSlash = false;
-    //      if (((currentCharacter = source[currentPosition++]) == '\\')
-    //        && (source[currentPosition] == 'u')) {
-    //        getNextUnicodeChar();
-    //      } else {
-    //        if (withoutUnicodePtr != 0) {
-    //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-    //        }
-    //      }
-    //    } else
     currentCharacter = source[currentPosition++];
     switch (currentCharacter) {
     //      case 'b' :
@@ -3748,7 +3711,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
     initialPosition = currentPosition = 0;
     containsAssertKeyword = false;
     withoutUnicodeBuffer = new char[this.source.length];
-    encapsedStringStack = new Stack();
+//    encapsedStringStack = new Stack();
   }
 
   public String toString() {
@@ -3910,12 +3873,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
     case TokenNameEncapsedString0:
       return "`"; //$NON-NLS-1$  
-    case TokenNameEncapsedString1:
-      return "\'"; //$NON-NLS-1$  
-    case TokenNameEncapsedString2:
-      return "\""; //$NON-NLS-1$  
+//    case TokenNameEncapsedString1:
+//      return "\'"; //$NON-NLS-1$  
+//    case TokenNameEncapsedString2:
+//      return "\""; //$NON-NLS-1$  
     case TokenNameSTRING:
-      return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+      return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
     case TokenNameHEREDOC:
       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
     case TokenNamePLUS_PLUS:
@@ -4089,7 +4052,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
     this.tokenizeStrings = tokenizeStrings;
     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
     this.assertMode = assertMode;
-    this.encapsedStringStack = null;
+//    this.encapsedStringStack = null;
     this.taskTags = taskTags;
     this.taskPriorities = taskPriorities;
   }