archive/net.sourceforge.phpeclipse.quantum.sql/src/com/quantum/sql/parser/SQLLexx.java

   1 package com.quantum.sql.parser;
   2
   3 import java.util.Vector;
   4
   5 /**
   6  * <p>An SQL Lexer.  From
   7  * <a href="http://www.dictionary.com/">dictionary.com</a>:
   8  *
   9  * <blockquote>
  10  * <p><b>lexer</b>
  11  *
  12  * <p>/lek'sr/ n. Common hacker shorthand for 'lexical
  13  * analyzer', the input-tokenizing stage in the parser for a language
  14  * (the part that breaks it into word-like pieces).
  15  * </blockquote>
  16  *
  17  * <p>Note that this class has nothing to do with the Sci-fi channel's
  18  * <a href="http://www.scifi.com/lexx/">Lexx</a> TV series.
  19  */
  20 public class SQLLexx {
  21         private static String endline = ";"; //$NON-NLS-1$
  22         private static String dash = "-"; //$NON-NLS-1$
  23         private static String group = "/"; //$NON-NLS-1$
  24         /**
  25          * Parses a SQL text into tokens.
  26          * @param text
  27          * @return a vector of Token objects.
  28          */
  29         public static Vector parse(String text) {
  30                 Vector tokens = new Vector();
  31                 StringPointer p = new StringPointer(text);
  32                 try {
  33                         while (!p.isDone()) {
  34                                 int offset = p.getOffset();
  35                                 char c = p.getNext();
  36                                 // Adds END_OF_LINE token
  37                                 if (c == '\n') {
  38                                         tokens.addElement(new Token(Token.END_OF_LINE, "\n", offset, offset + 1));
  39                                 }
  40                                 // Adds WHITESPACE token;
  41                                 else if (Character.isWhitespace(c)) {
  42                                         StringBuffer value = new StringBuffer();
  43                                         while (Character.isWhitespace(c) && !p.isDone()) {
  44                                                 value.append(c);
  45                                                 c = p.getNext();
  46                                         }
  47                                         // done because of is done
  48                                         if (Character.isWhitespace(c)) {
  49                                                 value.append(c);
  50                                         } else if (!p.isDone()){
  51                                                 p.back();
  52                                         }
  53                                         tokens.addElement(new Token(Token.WHITESPACE, value.toString(), offset, offset + value.length()));
  54                                 // Adds IDENTIFIER token (can be reserved SQL word or not);
  55                                 } else if (Character.isLetter(c) || c == '_' || c == '$') {
  56                                         StringBuffer value = new StringBuffer();
  57                                         while ((Character.isLetterOrDigit(c) || c == '_'  || c == '$') && !p.isDone()) {
  58                                                 value.append(c);
  59                                                 c = p.getNext();
  60                                         }
  61                                         if ((Character.isLetterOrDigit(c) || c == '_')) {
  62                                                 value.append(c);
  63                                         } else if (!p.isDone()){
  64                                                 p.back();
  65                                         }
  66                                         tokens.addElement(new Token(Token.IDENTIFIER, value.toString(), offset, offset + value.length()));
  67                                 // Adds LITERAL token;
  68                                 } else if (c == '\'') {
  69                                         StringBuffer value = new StringBuffer();
  70                                         value.append(c);
  71                                         if (!p.isDone()) {
  72                                                 c = p.getNext();
  73                                                 while (c != '\'' && c != '\n' && !p.isDone()) {
  74                                                         value.append(c);
  75                                                         c = p.getNext();
  76                                                 }
  77                                                 if (c == '\'' || p.isDone()) {
  78                                                         value.append(c);
  79                                                 } else if (!p.isDone()){
  80                                                         p.back();
  81                                                 }
  82                                         }
  83                                         tokens.addElement(new Token(Token.LITERAL, value.toString(), offset, offset + value.length()));
  84                                 // Adds COMMENT token (or SYMBOL (dash) if only one dash);
  85                                 } else if (c == '-') {
  86                                         p.mark();
  87                                         if (p.isDone()) {
  88                                                 tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
  89                                         } else {
  90                                                 char next = p.getNext();
  91                                                 if (next == '-') {
  92                                                         StringBuffer value = new StringBuffer("--"); //$NON-NLS-1$
  93                                                         if (!p.isDone()) {
  94                                                                 c = p.getNext();
  95                                                                 while (c != '\n' && !p.isDone()) {
  96                                                                         value.append(c);
  97                                                                         c = p.getNext();
  98                                                                 }
  99                                                                 if (p.isDone()) {
 100                                                                         value.append(c);
 101                                                                 } else {
 102                                                                         p.back();
 103                                                                 }
 104                                                         }
 105                                                         tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
 106                                                 } else {
 107                                                         tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
 108                                                         p.reset();
 109                                                 }
 110                                         }
 111                                 // Adds SEPARATOR token (;),  considers the rest of the line as COMMENT token;
 112                                 } else if (c == ';') {
 113                                         tokens.addElement(new Token(Token.SEPARATOR, endline, offset, offset + 1));
 114                                         StringBuffer value = new StringBuffer();
 115                                         if (!p.isDone()) {
 116                                                 c = p.getNext();
 117                                                 while (c != '\n' && !p.isDone()) {
 118                                                         value.append(c);
 119                                                         c = p.getNext();
 120                                                 }
 121                                                 if (p.isDone()) {
 122                                                         value.append(c);
 123                                                 } else {
 124                                                         p.back();
 125                                                 }
 126                                                 // We add to the offset so as to skip the initial ';'
 127                                                 offset++;
 128                                                 tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
 129                                         }
 130                                 // Adds NUMERIC token;
 131                                 } else if (Character.isDigit(c)) {
 132                                         StringBuffer value = new StringBuffer();
 133                                         while ((Character.isDigit(c) || c == '.') && !p.isDone()) {
 134                                                 value.append(c);
 135                                                 c = p.getNext();
 136                                         }
 137                                         if ((Character.isDigit(c) || c == '.')) {
 138                                                 value.append(c);
 139                                         } else {
 140                                                 p.back();
 141                                         }
 142                                         tokens.addElement(new Token(Token.NUMERIC, value.toString(), offset, offset + value.length()));
 143                                 // Adds COMMENT token (or GROUP (slash) if only one slash);
 144                                 } else if (c == '/') {
 145                                         p.mark();
 146                                         // If we have '/*', it's a comment till '*/' found or eof
 147                                         if (p.peek() == '*') {
 148                                                 tokens.addElement(tokenizeComment(p, offset));
 149                                         } else {
 150                                                 // It's not '/*' , so it's a group token
 151                                                 // BCH ??? what's this business about groups?
 152                                                 // Shouldn't '/' be a divide operator?
 153                                                 tokens.addElement(new Token(Token.SYMBOL, new String(new char[] {c}) /*group*/, offset, offset + 1));
 154                                                 p.reset();
 155                                         }
 156                                 // Adds SYMBOL token;
 157                                 } else {
 158                                         tokens.addElement(new Token(Token.SYMBOL, new String(new char[] {c}), offset, offset + 1));
 159                                 }
 160                         }
 161                 } catch (RuntimeException e) {
 162                         e.printStackTrace();
 163                 }
 164
 165 //              System.out.println("-------------------");
 166 //              for (int i = 0; i < tokens.size(); i++) {
 167 //                      System.out.println((Token) tokens.elementAt(i));
 168 //              }
 169                 return tokens;
 170         }
 171         /**
 172          * @param tokens
 173          * @param p
 174          * @param offset
 175          */
 176         private static Token tokenizeComment(StringPointer p, int offset) {
 177                 char c;
 178                 StringBuffer value = new StringBuffer();
 179                 c = p.getNext();
 180                 value.append('/');
 181                 while (!( c == '*' && p.peek() == '/' ) && !p.isDone()) {
 182                         value.append(c);
 183                         c = p.getNext();
 184                 }
 185                 if (!p.isDone()){
 186                         value.append(c);
 187                         c = p.getNext();
 188                         value.append(c);
 189                 }
 190                 return new Token(Token.COMMENT, value.toString(), offset, offset + value.length());
 191         }
 192 }