1 package com.quantum.sql.parser;
3 import java.util.Vector;
6 * <p>An SQL Lexer. From
7 * <a href="http://www.dictionary.com/">dictionary.com</a>:
12 * <p>/lek'sr/ n. Common hacker shorthand for 'lexical
13 * analyzer', the input-tokenizing stage in the parser for a language
14 * (the part that breaks it into word-like pieces).
17 * <p>Note that this class has nothing to do with the Sci-fi channel's
18 * <a href="http://www.scifi.com/lexx/">Lexx</a> TV series.
20 public class SQLLexx {
21 private static String endline = ";"; //$NON-NLS-1$
22 private static String dash = "-"; //$NON-NLS-1$
23 private static String group = "/"; //$NON-NLS-1$
25 * Parses a SQL text into tokens.
27 * @return a vector of Token objects.
29 public static Vector parse(String text) {
30 Vector tokens = new Vector();
31 StringPointer p = new StringPointer(text);
34 int offset = p.getOffset();
36 // Adds END_OF_LINE token
38 tokens.addElement(new Token(Token.END_OF_LINE, "\n", offset, offset + 1));
40 // Adds WHITESPACE token;
41 else if (Character.isWhitespace(c)) {
42 StringBuffer value = new StringBuffer();
43 while (Character.isWhitespace(c) && !p.isDone()) {
47 // done because of is done
48 if (Character.isWhitespace(c)) {
50 } else if (!p.isDone()){
53 tokens.addElement(new Token(Token.WHITESPACE, value.toString(), offset, offset + value.length()));
54 // Adds IDENTIFIER token (can be reserved SQL word or not);
55 } else if (Character.isLetter(c) || c == '_' || c == '$') {
56 StringBuffer value = new StringBuffer();
57 while ((Character.isLetterOrDigit(c) || c == '_' || c == '$') && !p.isDone()) {
61 if ((Character.isLetterOrDigit(c) || c == '_')) {
63 } else if (!p.isDone()){
66 tokens.addElement(new Token(Token.IDENTIFIER, value.toString(), offset, offset + value.length()));
67 // Adds LITERAL token;
68 } else if (c == '\'') {
69 StringBuffer value = new StringBuffer();
73 while (c != '\'' && c != '\n' && !p.isDone()) {
77 if (c == '\'' || p.isDone()) {
79 } else if (!p.isDone()){
83 tokens.addElement(new Token(Token.LITERAL, value.toString(), offset, offset + value.length()));
84 // Adds COMMENT token (or SYMBOL (dash) if only one dash);
85 } else if (c == '-') {
88 tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
90 char next = p.getNext();
92 StringBuffer value = new StringBuffer("--"); //$NON-NLS-1$
95 while (c != '\n' && !p.isDone()) {
105 tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
107 tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
111 // Adds SEPARATOR token (;), considers the rest of the line as COMMENT token;
112 } else if (c == ';') {
113 tokens.addElement(new Token(Token.SEPARATOR, endline, offset, offset + 1));
114 StringBuffer value = new StringBuffer();
117 while (c != '\n' && !p.isDone()) {
126 // We add to the offset so as to skip the initial ';'
128 tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
130 // Adds NUMERIC token;
131 } else if (Character.isDigit(c)) {
132 StringBuffer value = new StringBuffer();
133 while ((Character.isDigit(c) || c == '.') && !p.isDone()) {
137 if ((Character.isDigit(c) || c == '.')) {
142 tokens.addElement(new Token(Token.NUMERIC, value.toString(), offset, offset + value.length()));
143 // Adds COMMENT token (or GROUP (slash) if only one slash);
144 } else if (c == '/') {
146 // If we have '/*', it's a comment till '*/' found or eof
147 if (p.peek() == '*') {
148 tokens.addElement(tokenizeComment(p, offset));
150 // It's not '/*' , so it's a group token
151 // BCH ??? what's this business about groups?
152 // Shouldn't '/' be a divide operator?
153 tokens.addElement(new Token(Token.SYMBOL, new String(new char[] {c}) /*group*/, offset, offset + 1));
156 // Adds SYMBOL token;
158 tokens.addElement(new Token(Token.SYMBOL, new String(new char[] {c}), offset, offset + 1));
161 } catch (RuntimeException e) {
165 // System.out.println("-------------------");
166 // for (int i = 0; i < tokens.size(); i++) {
167 // System.out.println((Token) tokens.elementAt(i));
176 private static Token tokenizeComment(StringPointer p, int offset) {
178 StringBuffer value = new StringBuffer();
181 while (!( c == '*' && p.peek() == '/' ) && !p.isDone()) {
190 return new Token(Token.COMMENT, value.toString(), offset, offset + value.length());