Added "Task Tags" functionality (TODO,...)
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
index ff11fa1..ea1f9c0 100644 (file)
@@ -12,6 +12,7 @@ package net.sourceforge.phpdt.internal.compiler.parser;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Stack;
 import net.sourceforge.phpdt.core.compiler.CharOperation;
 import net.sourceforge.phpdt.core.compiler.IScanner;
 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
@@ -32,6 +33,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
   public boolean containsAssertKeyword = false;
   public boolean recordLineSeparator;
   public boolean phpMode = false;
+  public Stack encapsedStringStack = null;
   public char currentCharacter;
   public int startPosition;
   public int currentPosition;
@@ -40,6 +42,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
   // source
   public boolean tokenizeComments;
   public boolean tokenizeWhiteSpace;
+  public boolean tokenizeStrings;
   //source should be viewed as a window (aka a part)
   //of a entire very large stream
   public char source[];
@@ -132,12 +135,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
   public char[][] taskPriorities = null;
   public static final boolean DEBUG = false;
   public static final boolean TRACE = false;
-  public Scanner() {
-    this(false, false);
-  }
-  public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
-    this(tokenizeComments, tokenizeWhiteSpace, false);
-  }
+
   /**
    * Determines if the specified character is permissible as the first
    * character in a PHP identifier
@@ -803,6 +801,294 @@ public class Scanner implements IScanner, ITerminalSymbols {
     currentPosition = tempPosition;
     return TokenNameLPAREN;
   }
+  public void consumeStringInterpolated() throws InvalidInputException {
+    try {
+      // consume next character
+      unicodeAsBackSlash = false;
+      currentCharacter = source[currentPosition++];
+      //                if (((currentCharacter = source[currentPosition++]) == '\\')
+      //                  && (source[currentPosition] == 'u')) {
+      //                  getNextUnicodeChar();
+      //                } else {
+      //                  if (withoutUnicodePtr != 0) {
+      //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
+      //                      currentCharacter;
+      //                  }
+      //                }
+      while (currentCharacter != '`') {
+        /** ** in PHP \r and \n are valid in string literals *** */
+        //                if ((currentCharacter == '\n')
+        //                  || (currentCharacter == '\r')) {
+        //                  // relocate if finding another quote fairly close: thus unicode
+        // '/u000D' will be fully consumed
+        //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+        //                    if (currentPosition + lookAhead == source.length)
+        //                      break;
+        //                    if (source[currentPosition + lookAhead] == '\n')
+        //                      break;
+        //                    if (source[currentPosition + lookAhead] == '\"') {
+        //                      currentPosition += lookAhead + 1;
+        //                      break;
+        //                    }
+        //                  }
+        //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+        //                }
+        if (currentCharacter == '\\') {
+          int escapeSize = currentPosition;
+          boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+          //scanEscapeCharacter make a side effect on this value and we need
+          // the previous value few lines down this one
+          scanDoubleQuotedEscapeCharacter();
+          escapeSize = currentPosition - escapeSize;
+          if (withoutUnicodePtr == 0) {
+            //buffer all the entries that have been left aside....
+            withoutUnicodePtr = currentPosition - escapeSize - 1
+                - startPosition;
+            System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+                withoutUnicodePtr);
+            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          } else { //overwrite the / in the buffer
+            withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+            if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+                                              // where only one is correct
+              withoutUnicodePtr--;
+            }
+          }
+        }
+        // consume next character
+        unicodeAsBackSlash = false;
+        currentCharacter = source[currentPosition++];
+        //                  if (((currentCharacter = source[currentPosition++]) == '\\')
+        //                    && (source[currentPosition] == 'u')) {
+        //                    getNextUnicodeChar();
+        //                  } else {
+        if (withoutUnicodePtr != 0) {
+          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+        }
+        //                  }
+      }
+    } catch (IndexOutOfBoundsException e) {
+      throw new InvalidInputException(UNTERMINATED_STRING);
+    } catch (InvalidInputException e) {
+      if (e.getMessage().equals(INVALID_ESCAPE)) {
+        // relocate if finding another quote fairly close: thus unicode
+        // '/u000D' will be fully consumed
+        for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+          if (currentPosition + lookAhead == source.length)
+            break;
+          if (source[currentPosition + lookAhead] == '\n')
+            break;
+          if (source[currentPosition + lookAhead] == '`') {
+            currentPosition += lookAhead + 1;
+            break;
+          }
+        }
+      }
+      throw e; // rethrow
+    }
+    if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+                                              // //$NON-NLS-?$ where ? is an
+                                              // int.
+      if (currentLine == null) {
+        currentLine = new NLSLine();
+        lines.add(currentLine);
+      }
+      currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+          startPosition, currentPosition - 1));
+    }
+  }
+  public void consumeStringConstant() throws InvalidInputException {
+    try {
+      // consume next character
+      unicodeAsBackSlash = false;
+      currentCharacter = source[currentPosition++];
+      //                if (((currentCharacter = source[currentPosition++]) == '\\')
+      //                  && (source[currentPosition] == 'u')) {
+      //                  getNextUnicodeChar();
+      //                } else {
+      //                  if (withoutUnicodePtr != 0) {
+      //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
+      //                      currentCharacter;
+      //                  }
+      //                }
+      while (currentCharacter != '\'') {
+        /** ** in PHP \r and \n are valid in string literals *** */
+        //                  if ((currentCharacter == '\n')
+        //                    || (currentCharacter == '\r')) {
+        //                    // relocate if finding another quote fairly close: thus unicode
+        // '/u000D' will be fully consumed
+        //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+        //                      if (currentPosition + lookAhead == source.length)
+        //                        break;
+        //                      if (source[currentPosition + lookAhead] == '\n')
+        //                        break;
+        //                      if (source[currentPosition + lookAhead] == '\"') {
+        //                        currentPosition += lookAhead + 1;
+        //                        break;
+        //                      }
+        //                    }
+        //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+        //                  }
+        if (currentCharacter == '\\') {
+          int escapeSize = currentPosition;
+          boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+          //scanEscapeCharacter make a side effect on this value and we need
+          // the previous value few lines down this one
+          scanSingleQuotedEscapeCharacter();
+          escapeSize = currentPosition - escapeSize;
+          if (withoutUnicodePtr == 0) {
+            //buffer all the entries that have been left aside....
+            withoutUnicodePtr = currentPosition - escapeSize - 1
+                - startPosition;
+            System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+                withoutUnicodePtr);
+            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          } else { //overwrite the / in the buffer
+            withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+            if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+                                              // where only one is correct
+              withoutUnicodePtr--;
+            }
+          }
+        }
+        // consume next character
+        unicodeAsBackSlash = false;
+        currentCharacter = source[currentPosition++];
+        //                  if (((currentCharacter = source[currentPosition++]) == '\\')
+        //                    && (source[currentPosition] == 'u')) {
+        //                    getNextUnicodeChar();
+        //                  } else {
+        if (withoutUnicodePtr != 0) {
+          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+        }
+        //                  }
+      }
+    } catch (IndexOutOfBoundsException e) {
+      throw new InvalidInputException(UNTERMINATED_STRING);
+    } catch (InvalidInputException e) {
+      if (e.getMessage().equals(INVALID_ESCAPE)) {
+        // relocate if finding another quote fairly close: thus unicode
+        // '/u000D' will be fully consumed
+        for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+          if (currentPosition + lookAhead == source.length)
+            break;
+          if (source[currentPosition + lookAhead] == '\n')
+            break;
+          if (source[currentPosition + lookAhead] == '\'') {
+            currentPosition += lookAhead + 1;
+            break;
+          }
+        }
+      }
+      throw e; // rethrow
+    }
+    if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+                                              // //$NON-NLS-?$ where ? is an
+                                              // int.
+      if (currentLine == null) {
+        currentLine = new NLSLine();
+        lines.add(currentLine);
+      }
+      currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+          startPosition, currentPosition - 1));
+    }
+  }
+  public void consumeStringLiteral() throws InvalidInputException {
+    try {
+      // consume next character
+      unicodeAsBackSlash = false;
+      currentCharacter = source[currentPosition++];
+      //                if (((currentCharacter = source[currentPosition++]) == '\\')
+      //                  && (source[currentPosition] == 'u')) {
+      //                  getNextUnicodeChar();
+      //                } else {
+      //                  if (withoutUnicodePtr != 0) {
+      //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
+      //                      currentCharacter;
+      //                  }
+      //                }
+      while (currentCharacter != '"') {
+        /** ** in PHP \r and \n are valid in string literals *** */
+        //                  if ((currentCharacter == '\n')
+        //                    || (currentCharacter == '\r')) {
+        //                    // relocate if finding another quote fairly close: thus unicode
+        // '/u000D' will be fully consumed
+        //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+        //                      if (currentPosition + lookAhead == source.length)
+        //                        break;
+        //                      if (source[currentPosition + lookAhead] == '\n')
+        //                        break;
+        //                      if (source[currentPosition + lookAhead] == '\"') {
+        //                        currentPosition += lookAhead + 1;
+        //                        break;
+        //                      }
+        //                    }
+        //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+        //                  }
+        if (currentCharacter == '\\') {
+          int escapeSize = currentPosition;
+          boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+          //scanEscapeCharacter make a side effect on this value and we need
+          // the previous value few lines down this one
+          scanDoubleQuotedEscapeCharacter();
+          escapeSize = currentPosition - escapeSize;
+          if (withoutUnicodePtr == 0) {
+            //buffer all the entries that have been left aside....
+            withoutUnicodePtr = currentPosition - escapeSize - 1
+                - startPosition;
+            System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+                withoutUnicodePtr);
+            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+          } else { //overwrite the / in the buffer
+            withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+            if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+                                              // where only one is correct
+              withoutUnicodePtr--;
+            }
+          }
+        }
+        // consume next character
+        unicodeAsBackSlash = false;
+        currentCharacter = source[currentPosition++];
+        //                  if (((currentCharacter = source[currentPosition++]) == '\\')
+        //                    && (source[currentPosition] == 'u')) {
+        //                    getNextUnicodeChar();
+        //                  } else {
+        if (withoutUnicodePtr != 0) {
+          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+        }
+        //                  }
+      }
+    } catch (IndexOutOfBoundsException e) {
+      throw new InvalidInputException(UNTERMINATED_STRING);
+    } catch (InvalidInputException e) {
+      if (e.getMessage().equals(INVALID_ESCAPE)) {
+        // relocate if finding another quote fairly close: thus unicode
+        // '/u000D' will be fully consumed
+        for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+          if (currentPosition + lookAhead == source.length)
+            break;
+          if (source[currentPosition + lookAhead] == '\n')
+            break;
+          if (source[currentPosition + lookAhead] == '\"') {
+            currentPosition += lookAhead + 1;
+            break;
+          }
+        }
+      }
+      throw e; // rethrow
+    }
+    if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+                                              // //$NON-NLS-?$ where ? is an
+                                              // int.
+      if (currentLine == null) {
+        currentLine = new NLSLine();
+        lines.add(currentLine);
+      }
+      currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+          startPosition, currentPosition - 1));
+    }
+  }
   public int getNextToken() throws InvalidInputException {
     if (!phpMode) {
       return getInlinedHTML(currentPosition);
@@ -815,13 +1101,144 @@ public class Scanner implements IScanner, ITerminalSymbols {
         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
       }
       try {
-        while (true) { //loop for jumping over comments
+        while (true) {
           withoutUnicodePtr = 0;
-          //start with a new token (even comment written with unicode )
+          //start with a new token
+          char encapsedChar = ' ';
+          if (!encapsedStringStack.isEmpty()) {
+            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
+          }
+          if (encapsedChar != '$' && encapsedChar != ' ') {
+            currentCharacter = source[currentPosition++];
+            if (currentCharacter == encapsedChar) {
+              switch (currentCharacter) {
+                case '`' :
+                  return TokenNameEncapsedString0;
+                case '\'' :
+                  return TokenNameEncapsedString1;
+                case '"' :
+                  return TokenNameEncapsedString2;
+              }
+            }
+            while (currentCharacter != encapsedChar) {
+              /** ** in PHP \r and \n are valid in string literals *** */
+              switch (currentCharacter) {
+                case '\\' :
+                  int escapeSize = currentPosition;
+                  boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+                  //scanEscapeCharacter make a side effect on this value and
+                  // we need the previous value few lines down this one
+                  scanDoubleQuotedEscapeCharacter();
+                  escapeSize = currentPosition - escapeSize;
+                  if (withoutUnicodePtr == 0) {
+                    //buffer all the entries that have been left aside....
+                    withoutUnicodePtr = currentPosition - escapeSize - 1
+                        - startPosition;
+                    System.arraycopy(source, startPosition,
+                        withoutUnicodeBuffer, 1, withoutUnicodePtr);
+                    withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+                  } else { //overwrite the / in the buffer
+                    withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+                    if (backSlashAsUnicodeInString) { //there are TWO \ in
+                      withoutUnicodePtr--;
+                    }
+                  }
+                  break;
+                case '$' :
+                  if (isPHPIdentifierStart(source[currentPosition])
+                      || source[currentPosition] == '{') {
+                    currentPosition--;
+                    encapsedStringStack.push(new Character('$'));
+                    return TokenNameSTRING;
+                  }
+                  break;
+                case '{' :
+                  if (source[currentPosition] == '$') { // CURLY_OPEN
+                    currentPosition--;
+                    encapsedStringStack.push(new Character('$'));
+                    return TokenNameSTRING;
+                  }
+              }
+              // consume next character
+              unicodeAsBackSlash = false;
+              currentCharacter = source[currentPosition++];
+              if (withoutUnicodePtr != 0) {
+                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+              }
+              //                  }
+            } // end while
+            currentPosition--;
+            return TokenNameSTRING;
+          }
           // ---------Consume white space and handles startPosition---------
           int whiteStart = currentPosition;
-          boolean isWhiteSpace;
-          do {
+          startPosition = currentPosition;
+          currentCharacter = source[currentPosition++];
+          if (encapsedChar == '$') {
+            switch (currentCharacter) {
+              case '\\' :
+                currentCharacter = source[currentPosition++];
+                return TokenNameSTRING;
+              case '{' :
+                if (encapsedChar == '$') {
+                  if (getNextChar('$'))
+                    return TokenNameCURLY_OPEN;
+                }
+                return TokenNameLBRACE;
+              case '}' :
+                return TokenNameRBRACE;
+              case '[' :
+                return TokenNameLBRACKET;
+              case ']' :
+                return TokenNameRBRACKET;
+              case '\'' :
+                if (tokenizeStrings) {
+                  consumeStringConstant();
+                  return TokenNameStringConstant;
+                }
+                return TokenNameEncapsedString1;
+              case '"' :
+                return TokenNameEncapsedString2;
+              case '`' :
+                if (tokenizeStrings) {
+                  consumeStringInterpolated();
+                  return TokenNameStringInterpolated;
+                }
+                return TokenNameEncapsedString0;
+              case '-' :
+                if (getNextChar('>'))
+                  return TokenNameMINUS_GREATER;
+                return TokenNameSTRING;
+              default :
+                if (currentCharacter == '$') {
+                  int oldPosition = currentPosition;
+                  try {
+                    currentCharacter = source[currentPosition++];
+                    if (currentCharacter == '{') {
+                      return TokenNameDOLLAR_LBRACE;
+                    }
+                    if (isPHPIdentifierStart(currentCharacter)) {
+                      return scanIdentifierOrKeyword(true);
+                    } else {
+                      currentPosition = oldPosition;
+                      return TokenNameSTRING;
+                    }
+                  } catch (IndexOutOfBoundsException e) {
+                    currentPosition = oldPosition;
+                    return TokenNameSTRING;
+                  }
+                }
+                if (isPHPIdentifierStart(currentCharacter))
+                  return scanIdentifierOrKeyword(false);
+                if (Character.isDigit(currentCharacter))
+                  return scanNumber(false);
+                return TokenNameERROR;
+            }
+          }
+          //          boolean isWhiteSpace;
+          
+          while ((currentCharacter == ' ')
+              || Character.isWhitespace(currentCharacter)) {
             startPosition = currentPosition;
             currentCharacter = source[currentPosition++];
             //            if (((currentCharacter = source[currentPosition++]) == '\\')
@@ -836,10 +1253,10 @@ public class Scanner implements IScanner, ITerminalSymbols {
                 currentLine = null;
               }
             }
-            isWhiteSpace = (currentCharacter == ' ')
-                || Character.isWhitespace(currentCharacter);
+            //            isWhiteSpace = (currentCharacter == ' ')
+            //                || Character.isWhitespace(currentCharacter);
             //            }
-          } while (isWhiteSpace);
+          }
           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
             // reposition scanner in case we are interested by spaces as tokens
             currentPosition--;
@@ -931,9 +1348,12 @@ public class Scanner implements IScanner, ITerminalSymbols {
                     if (getNextChar('='))
                       return TokenNameLEFT_SHIFT_EQUAL;
                     if (getNextChar('<')) {
-                      int heredocStart = currentPosition;
-                      int heredocLength = 0;
                       currentCharacter = source[currentPosition++];
+                      while (Character.isWhitespace(currentCharacter)) {
+                        currentCharacter = source[currentPosition++];
+                      }
+                      int heredocStart = currentPosition - 1;
+                      int heredocLength = 0;
                       if (isPHPIdentifierStart(currentCharacter)) {
                         currentCharacter = source[currentPosition++];
                       } else {
@@ -1024,7 +1444,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
             case '?' :
               if (getNextChar('>')) {
                 phpMode = false;
-                if (currentPosition==source.length) {
+                if (currentPosition == source.length) {
                   phpMode = true;
                   return TokenNameINLINE_HTML;
                 }
@@ -1037,383 +1457,21 @@ public class Scanner implements IScanner, ITerminalSymbols {
               return TokenNameCOLON;
             case '@' :
               return TokenNameAT;
-            //                                 case '\'' :
-            //                                         {
-            //                                                 int test;
-            //                                                 if ((test = getNextChar('\n', '\r')) == 0) {
-            //                                                         throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
-            //                                                 }
-            //                                                 if (test > 0) {
-            //                                                         // relocate if finding another quote fairly close: thus unicode
-            // '/u000D' will be fully consumed
-            //                                                         for (int lookAhead = 0;
-            //                                                                 lookAhead < 3;
-            //                                                                 lookAhead++) {
-            //                                                                 if (currentPosition + lookAhead
-            //                                                                         == source.length)
-            //                                                                         break;
-            //                                                                 if (source[currentPosition + lookAhead]
-            //                                                                         == '\n')
-            //                                                                         break;
-            //                                                                 if (source[currentPosition + lookAhead]
-            //                                                                         == '\'') {
-            //                                                                         currentPosition += lookAhead + 1;
-            //                                                                         break;
-            //                                                                 }
-            //                                                         }
-            //                                                         throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
-            //                                                 }
-            //                                         }
-            //                                         if (getNextChar('\'')) {
-            //                                                 // relocate if finding another quote fairly close: thus unicode
-            // '/u000D' will be fully consumed
-            //                                                 for (int lookAhead = 0;
-            //                                                         lookAhead < 3;
-            //                                                         lookAhead++) {
-            //                                                         if (currentPosition + lookAhead
-            //                                                                 == source.length)
-            //                                                                 break;
-            //                                                         if (source[currentPosition + lookAhead]
-            //                                                                 == '\n')
-            //                                                                 break;
-            //                                                         if (source[currentPosition + lookAhead]
-            //                                                                 == '\'') {
-            //                                                                 currentPosition += lookAhead + 1;
-            //                                                                 break;
-            //                                                         }
-            //                                                 }
-            //                                                 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
-            //                                         }
-            //                                         if (getNextChar('\\'))
-            //                                                 scanEscapeCharacter();
-            //                                         else { // consume next character
-            //                                                 unicodeAsBackSlash = false;
-            //                                                 if (((currentCharacter = source[currentPosition++])
-            //                                                         == '\\')
-            //                                                         && (source[currentPosition] == 'u')) {
-            //                                                         getNextUnicodeChar();
-            //                                                 } else {
-            //                                                         if (withoutUnicodePtr != 0) {
-            //                                                                 withoutUnicodeBuffer[++withoutUnicodePtr] =
-            //                                                                         currentCharacter;
-            //                                                         }
-            //                                                 }
-            //                                         }
-            //                                         // if (getNextChar('\''))
-            //                                         // return TokenNameCharacterLiteral;
-            //                                         // relocate if finding another quote fairly close: thus unicode
-            // '/u000D' will be fully consumed
-            //                                         for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
-            //                                                 if (currentPosition + lookAhead == source.length)
-            //                                                         break;
-            //                                                 if (source[currentPosition + lookAhead] == '\n')
-            //                                                         break;
-            //                                                 if (source[currentPosition + lookAhead] == '\'') {
-            //                                                         currentPosition += lookAhead + 1;
-            //                                                         break;
-            //                                                 }
-            //                                         }
-            //                                         throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
             case '\'' :
-              try {
-                // consume next character
-                unicodeAsBackSlash = false;
-                currentCharacter = source[currentPosition++];
-                //                if (((currentCharacter = source[currentPosition++]) == '\\')
-                //                  && (source[currentPosition] == 'u')) {
-                //                  getNextUnicodeChar();
-                //                } else {
-                //                  if (withoutUnicodePtr != 0) {
-                //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
-                //                      currentCharacter;
-                //                  }
-                //                }
-                while (currentCharacter != '\'') {
-                  /** ** in PHP \r and \n are valid in string literals *** */
-                  //                  if ((currentCharacter == '\n')
-                  //                    || (currentCharacter == '\r')) {
-                  //                    // relocate if finding another quote fairly close: thus
-                  // unicode '/u000D' will be fully consumed
-                  //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-                  //                      if (currentPosition + lookAhead == source.length)
-                  //                        break;
-                  //                      if (source[currentPosition + lookAhead] == '\n')
-                  //                        break;
-                  //                      if (source[currentPosition + lookAhead] == '\"') {
-                  //                        currentPosition += lookAhead + 1;
-                  //                        break;
-                  //                      }
-                  //                    }
-                  //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-                  //                  }
-                  if (currentCharacter == '\\') {
-                    int escapeSize = currentPosition;
-                    boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
-                    //scanEscapeCharacter make a side effect on this value and
-                    // we need the previous value few lines down this one
-                    scanSingleQuotedEscapeCharacter();
-                    escapeSize = currentPosition - escapeSize;
-                    if (withoutUnicodePtr == 0) {
-                      //buffer all the entries that have been left aside....
-                      withoutUnicodePtr = currentPosition - escapeSize - 1
-                          - startPosition;
-                      System.arraycopy(source, startPosition,
-                          withoutUnicodeBuffer, 1, withoutUnicodePtr);
-                      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                    } else { //overwrite the / in the buffer
-                      withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
-                      if (backSlashAsUnicodeInString) { //there are TWO \ in
-                        // the stream where
-                        // only one is correct
-                        withoutUnicodePtr--;
-                      }
-                    }
-                  }
-                  // consume next character
-                  unicodeAsBackSlash = false;
-                  currentCharacter = source[currentPosition++];
-                  //                  if (((currentCharacter = source[currentPosition++]) ==
-                  // '\\')
-                  //                    && (source[currentPosition] == 'u')) {
-                  //                    getNextUnicodeChar();
-                  //                  } else {
-                  if (withoutUnicodePtr != 0) {
-                    withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                  }
-                  //                  }
-                }
-              } catch (IndexOutOfBoundsException e) {
-                throw new InvalidInputException(UNTERMINATED_STRING);
-              } catch (InvalidInputException e) {
-                if (e.getMessage().equals(INVALID_ESCAPE)) {
-                  // relocate if finding another quote fairly close: thus
-                  // unicode '/u000D' will be fully consumed
-                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-                    if (currentPosition + lookAhead == source.length)
-                      break;
-                    if (source[currentPosition + lookAhead] == '\n')
-                      break;
-                    if (source[currentPosition + lookAhead] == '\'') {
-                      currentPosition += lookAhead + 1;
-                      break;
-                    }
-                  }
-                }
-                throw e; // rethrow
-              }
-              if (checkNonExternalizedStringLiterals) { // check for presence
-                // of NLS tags
-                // //$NON-NLS-?$ where
-                // ? is an int.
-                if (currentLine == null) {
-                  currentLine = new NLSLine();
-                  lines.add(currentLine);
-                }
-                currentLine.add(new StringLiteral(
-                    getCurrentTokenSourceString(), startPosition,
-                    currentPosition - 1));
-              }
+              consumeStringConstant();
               return TokenNameStringConstant;
             case '"' :
-              try {
-                // consume next character
-                unicodeAsBackSlash = false;
-                currentCharacter = source[currentPosition++];
-                //                if (((currentCharacter = source[currentPosition++]) == '\\')
-                //                  && (source[currentPosition] == 'u')) {
-                //                  getNextUnicodeChar();
-                //                } else {
-                //                  if (withoutUnicodePtr != 0) {
-                //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
-                //                      currentCharacter;
-                //                  }
-                //                }
-                while (currentCharacter != '"') {
-                  /** ** in PHP \r and \n are valid in string literals *** */
-                  //                  if ((currentCharacter == '\n')
-                  //                    || (currentCharacter == '\r')) {
-                  //                    // relocate if finding another quote fairly close: thus
-                  // unicode '/u000D' will be fully consumed
-                  //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-                  //                      if (currentPosition + lookAhead == source.length)
-                  //                        break;
-                  //                      if (source[currentPosition + lookAhead] == '\n')
-                  //                        break;
-                  //                      if (source[currentPosition + lookAhead] == '\"') {
-                  //                        currentPosition += lookAhead + 1;
-                  //                        break;
-                  //                      }
-                  //                    }
-                  //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-                  //                  }
-                  if (currentCharacter == '\\') {
-                    int escapeSize = currentPosition;
-                    boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
-                    //scanEscapeCharacter make a side effect on this value and
-                    // we need the previous value few lines down this one
-                    scanDoubleQuotedEscapeCharacter();
-                    escapeSize = currentPosition - escapeSize;
-                    if (withoutUnicodePtr == 0) {
-                      //buffer all the entries that have been left aside....
-                      withoutUnicodePtr = currentPosition - escapeSize - 1
-                          - startPosition;
-                      System.arraycopy(source, startPosition,
-                          withoutUnicodeBuffer, 1, withoutUnicodePtr);
-                      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                    } else { //overwrite the / in the buffer
-                      withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
-                      if (backSlashAsUnicodeInString) { //there are TWO \ in
-                        // the stream where
-                        // only one is correct
-                        withoutUnicodePtr--;
-                      }
-                    }
-                  }
-                  // consume next character
-                  unicodeAsBackSlash = false;
-                  currentCharacter = source[currentPosition++];
-                  //                  if (((currentCharacter = source[currentPosition++]) ==
-                  // '\\')
-                  //                    && (source[currentPosition] == 'u')) {
-                  //                    getNextUnicodeChar();
-                  //                  } else {
-                  if (withoutUnicodePtr != 0) {
-                    withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                  }
-                  //                  }
-                }
-              } catch (IndexOutOfBoundsException e) {
-                throw new InvalidInputException(UNTERMINATED_STRING);
-              } catch (InvalidInputException e) {
-                if (e.getMessage().equals(INVALID_ESCAPE)) {
-                  // relocate if finding another quote fairly close: thus
-                  // unicode '/u000D' will be fully consumed
-                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-                    if (currentPosition + lookAhead == source.length)
-                      break;
-                    if (source[currentPosition + lookAhead] == '\n')
-                      break;
-                    if (source[currentPosition + lookAhead] == '\"') {
-                      currentPosition += lookAhead + 1;
-                      break;
-                    }
-                  }
-                }
-                throw e; // rethrow
+              if (tokenizeStrings) {
+                consumeStringLiteral();
+                return TokenNameStringLiteral;
               }
-              if (checkNonExternalizedStringLiterals) { // check for presence
-                // of NLS tags
-                // //$NON-NLS-?$ where
-                // ? is an int.
-                if (currentLine == null) {
-                  currentLine = new NLSLine();
-                  lines.add(currentLine);
-                }
-                currentLine.add(new StringLiteral(
-                    getCurrentTokenSourceString(), startPosition,
-                    currentPosition - 1));
-              }
-              return TokenNameStringLiteral;
+              return TokenNameEncapsedString2;
             case '`' :
-              try {
-                // consume next character
-                unicodeAsBackSlash = false;
-                currentCharacter = source[currentPosition++];
-                //                if (((currentCharacter = source[currentPosition++]) == '\\')
-                //                  && (source[currentPosition] == 'u')) {
-                //                  getNextUnicodeChar();
-                //                } else {
-                //                  if (withoutUnicodePtr != 0) {
-                //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
-                //                      currentCharacter;
-                //                  }
-                //                }
-                while (currentCharacter != '`') {
-                  /** ** in PHP \r and \n are valid in string literals *** */
-                  //                if ((currentCharacter == '\n')
-                  //                  || (currentCharacter == '\r')) {
-                  //                  // relocate if finding another quote fairly close: thus
-                  // unicode '/u000D' will be fully consumed
-                  //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-                  //                    if (currentPosition + lookAhead == source.length)
-                  //                      break;
-                  //                    if (source[currentPosition + lookAhead] == '\n')
-                  //                      break;
-                  //                    if (source[currentPosition + lookAhead] == '\"') {
-                  //                      currentPosition += lookAhead + 1;
-                  //                      break;
-                  //                    }
-                  //                  }
-                  //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
-                  //                }
-                  if (currentCharacter == '\\') {
-                    int escapeSize = currentPosition;
-                    boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
-                    //scanEscapeCharacter make a side effect on this value and
-                    // we need the previous value few lines down this one
-                    scanDoubleQuotedEscapeCharacter();
-                    escapeSize = currentPosition - escapeSize;
-                    if (withoutUnicodePtr == 0) {
-                      //buffer all the entries that have been left aside....
-                      withoutUnicodePtr = currentPosition - escapeSize - 1
-                          - startPosition;
-                      System.arraycopy(source, startPosition,
-                          withoutUnicodeBuffer, 1, withoutUnicodePtr);
-                      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                    } else { //overwrite the / in the buffer
-                      withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
-                      if (backSlashAsUnicodeInString) { //there are TWO \ in
-                        // the stream where
-                        // only one is correct
-                        withoutUnicodePtr--;
-                      }
-                    }
-                  }
-                  // consume next character
-                  unicodeAsBackSlash = false;
-                  currentCharacter = source[currentPosition++];
-                  //                  if (((currentCharacter = source[currentPosition++]) ==
-                  // '\\')
-                  //                    && (source[currentPosition] == 'u')) {
-                  //                    getNextUnicodeChar();
-                  //                  } else {
-                  if (withoutUnicodePtr != 0) {
-                    withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
-                  }
-                  //                  }
-                }
-              } catch (IndexOutOfBoundsException e) {
-                throw new InvalidInputException(UNTERMINATED_STRING);
-              } catch (InvalidInputException e) {
-                if (e.getMessage().equals(INVALID_ESCAPE)) {
-                  // relocate if finding another quote fairly close: thus
-                  // unicode '/u000D' will be fully consumed
-                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
-                    if (currentPosition + lookAhead == source.length)
-                      break;
-                    if (source[currentPosition + lookAhead] == '\n')
-                      break;
-                    if (source[currentPosition + lookAhead] == '`') {
-                      currentPosition += lookAhead + 1;
-                      break;
-                    }
-                  }
-                }
-                throw e; // rethrow
-              }
-              if (checkNonExternalizedStringLiterals) { // check for presence
-                // of NLS tags
-                // //$NON-NLS-?$ where
-                // ? is an int.
-                if (currentLine == null) {
-                  currentLine = new NLSLine();
-                  lines.add(currentLine);
-                }
-                currentLine.add(new StringLiteral(
-                    getCurrentTokenSourceString(), startPosition,
-                    currentPosition - 1));
+              if (tokenizeStrings) {
+                consumeStringInterpolated();
+                return TokenNameStringInterpolated;
               }
-              return TokenNameStringInterpolated;
+              return TokenNameEncapsedString0;
             case '#' :
             case '/' :
               {
@@ -1452,7 +1510,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                     //                          Character.getNumericValue(source[currentPosition++]))
                     //                          > 15
                     //                        || c4 < 0) {
-                    //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
+                    //                        throw new
+                    // InvalidInputException(INVALID_UNICODE_ESCAPE);
                     //                      } else {
                     //                        currentCharacter =
                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
@@ -1523,7 +1582,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
                       endPositionForLineComment = currentPosition - 1;
                     }
                     recordComment(false);
-                    if ((currentCharacter == '\r')
+                    if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
+                                       if ((currentCharacter == '\r')
                         || (currentCharacter == '\n')) {
                       checkNonExternalizeString();
                       if (recordLineSeparator) {
@@ -1679,9 +1739,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
    */
   private int getInlinedHTML(int start) throws InvalidInputException {
     //    int htmlPosition = start;
-    if (currentPosition>source.length) {
-        currentPosition = source.length;
-        return TokenNameEOF;
+    if (currentPosition > source.length) {
+      currentPosition = source.length;
+      return TokenNameEOF;
     }
     startPosition = start;
     try {
@@ -3444,6 +3504,7 @@ public class Scanner implements IScanner, ITerminalSymbols {
     initialPosition = currentPosition = 0;
     containsAssertKeyword = false;
     withoutUnicodeBuffer = new char[this.source.length];
+    encapsedStringStack = new Stack();
   }
   public String toString() {
     if (startPosition == source.length)
@@ -3494,6 +3555,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
         return "case"; //$NON-NLS-1$
       case TokenNameclass :
         return "class"; //$NON-NLS-1$
+      case TokenNamecatch :
+        return "catch"; //$NON-NLS-1$
       case TokenNameclone :
         //$NON-NLS-1$
         return "clone";
@@ -3524,6 +3587,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
         return "endswitch"; //$NON-NLS-1$
       case TokenNameendwhile :
         return "endwhile"; //$NON-NLS-1$
+      case TokenNameexit:
+        return "exit";
       case TokenNameextends :
         return "extends"; //$NON-NLS-1$
       //      case TokenNamefalse :
@@ -3546,6 +3611,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
         return "include"; //$NON-NLS-1$
       case TokenNameinclude_once :
         return "include_once"; //$NON-NLS-1$
+      case TokenNameinstanceof :
+        return "instanceof"; //$NON-NLS-1$
       case TokenNameinterface :
         return "interface"; //$NON-NLS-1$
       case TokenNameisset :
@@ -3593,12 +3660,20 @@ public class Scanner implements IScanner, ITerminalSymbols {
       case TokenNameDoubleLiteral :
         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
       case TokenNameStringLiteral :
-        return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+        return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
       case TokenNameStringConstant :
         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
       case TokenNameStringInterpolated :
         return "StringInterpolated(" + new String(getCurrentTokenSource())
             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+      case TokenNameEncapsedString0 :
+        return "`"; //$NON-NLS-1$  
+      case TokenNameEncapsedString1 :
+        return "\'"; //$NON-NLS-1$  
+      case TokenNameEncapsedString2 :
+        return "\""; //$NON-NLS-1$  
+      case TokenNameSTRING :
+        return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
       case TokenNameHEREDOC :
         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
       case TokenNamePLUS_PLUS :
@@ -3705,8 +3780,8 @@ public class Scanner implements IScanner, ITerminalSymbols {
         return "@";
       case TokenNameDOLLAR :
         return "$";
-      //      case TokenNameDOLLAR_LBRACE :
-      //        return "${";
+      case TokenNameDOLLAR_LBRACE :
+        return "${";
       case TokenNameEOF :
         return "EOF"; //$NON-NLS-1$
       case TokenNameWHITESPACE :
@@ -3717,9 +3792,9 @@ public class Scanner implements IScanner, ITerminalSymbols {
         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
       case TokenNameCOMMENT_PHPDOC :
         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
-        //      case TokenNameHTML :
-        //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
-        // //$NON-NLS-1$
+      //      case TokenNameHTML :
+      //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
+      // //$NON-NLS-1$
       case TokenNameFILE :
         return "__FILE__"; //$NON-NLS-1$
       case TokenNameLINE :
@@ -3745,6 +3820,13 @@ public class Scanner implements IScanner, ITerminalSymbols {
             + new String(getCurrentTokenSource()); //$NON-NLS-1$
     }
   }
+  
+  public Scanner() {
+    this(false, false);
+  }
+  public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
+    this(tokenizeComments, tokenizeWhiteSpace, false);
+  }
   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
       boolean checkNonExternalizedStringLiterals) {
     this(tokenizeComments, tokenizeWhiteSpace,
@@ -3752,11 +3834,23 @@ public class Scanner implements IScanner, ITerminalSymbols {
   }
   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
+    this(tokenizeComments, tokenizeWhiteSpace,
+        checkNonExternalizedStringLiterals, assertMode, false, null, null);
+  }
+  public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
+      boolean checkNonExternalizedStringLiterals, boolean assertMode,
+      boolean tokenizeStrings,
+      char[][] taskTags,
+         char[][] taskPriorities) {
     this.eofPosition = Integer.MAX_VALUE;
     this.tokenizeComments = tokenizeComments;
     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
+    this.tokenizeStrings = tokenizeStrings;
     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
     this.assertMode = assertMode;
+    this.encapsedStringStack = null;
+    this.taskTags = taskTags;
+       this.taskPriorities = taskPriorities;
   }
   private void checkNonExternalizeString() throws InvalidInputException {
     if (currentLine == null)