improved syntax highlighting scanners and preferences
[phpeclipse.git] / archive / net.sourceforge.phpeclipse.quantum.sql / src / com / quantum / sql / parser / SQLLexx.java
1 package com.quantum.sql.parser;
2
3 import java.util.Vector;
4
5 /**
6  * <p>An SQL Lexer.  From 
7  * <a href="http://www.dictionary.com/">dictionary.com</a>:
8  * 
9  * <blockquote>
10  * <p><b>lexer</b>
11  *
12  * <p>/lek'sr/ n. Common hacker shorthand for 'lexical
13  * analyzer', the input-tokenizing stage in the parser for a language
14  * (the part that breaks it into word-like pieces).
15  * </blockquote>
16  * 
17  * <p>Note that this class has nothing to do with the Sci-fi channel's
18  * <a href="http://www.scifi.com/lexx/">Lexx</a> TV series.
19  */
20 public class SQLLexx {
21         private static String endline = ";"; //$NON-NLS-1$
22         private static String dash = "-"; //$NON-NLS-1$
23         private static String group = "/"; //$NON-NLS-1$
24         /**
25          * Parses a SQL text into tokens. 
26          * @param text
27          * @return a vector of Token objects.
28          */
29         public static Vector parse(String text) {
30                 Vector tokens = new Vector();
31                 StringPointer p = new StringPointer(text);
32                 try {
33                         while (!p.isDone()) {
34                                 int offset = p.getOffset();
35                                 char c = p.getNext();
36                                 // Adds END_OF_LINE token
37                                 if (c == '\n') {
38                                         tokens.addElement(new Token(Token.END_OF_LINE, "\n", offset, offset + 1));      
39                                 }
40                                 // Adds WHITESPACE token;
41                                 else if (Character.isWhitespace(c)) {
42                                         StringBuffer value = new StringBuffer();
43                                         while (Character.isWhitespace(c) && !p.isDone()) {
44                                                 value.append(c);
45                                                 c = p.getNext();
46                                         }
47                                         // done because of is done
48                                         if (Character.isWhitespace(c)) {
49                                                 value.append(c);
50                                         } else if (!p.isDone()){
51                                                 p.back();
52                                         }
53                                         tokens.addElement(new Token(Token.WHITESPACE, value.toString(), offset, offset + value.length()));
54                                 // Adds IDENTIFIER token (can be reserved SQL word or not);
55                                 } else if (Character.isLetter(c) || c == '_' || c == '$') {
56                                         StringBuffer value = new StringBuffer();
57                                         while ((Character.isLetterOrDigit(c) || c == '_'  || c == '$') && !p.isDone()) {
58                                                 value.append(c);
59                                                 c = p.getNext();
60                                         }
61                                         if ((Character.isLetterOrDigit(c) || c == '_')) {
62                                                 value.append(c);
63                                         } else if (!p.isDone()){
64                                                 p.back();
65                                         }
66                                         tokens.addElement(new Token(Token.IDENTIFIER, value.toString(), offset, offset + value.length()));
67                                 // Adds LITERAL token;
68                                 } else if (c == '\'') {
69                                         StringBuffer value = new StringBuffer();
70                                         value.append(c);
71                                         if (!p.isDone()) {
72                                                 c = p.getNext();
73                                                 while (c != '\'' && c != '\n' && !p.isDone()) {
74                                                         value.append(c);
75                                                         c = p.getNext();
76                                                 }
77                                                 if (c == '\'' || p.isDone()) {
78                                                         value.append(c);
79                                                 } else if (!p.isDone()){
80                                                         p.back();
81                                                 }
82                                         }
83                                         tokens.addElement(new Token(Token.LITERAL, value.toString(), offset, offset + value.length()));
84                                 // Adds COMMENT token (or SYMBOL (dash) if only one dash);
85                                 } else if (c == '-') {
86                                         p.mark();
87                                         if (p.isDone()) {
88                                                 tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
89                                         } else {
90                                                 char next = p.getNext();
91                                                 if (next == '-') {
92                                                         StringBuffer value = new StringBuffer("--"); //$NON-NLS-1$
93                                                         if (!p.isDone()) {
94                                                                 c = p.getNext();
95                                                                 while (c != '\n' && !p.isDone()) {
96                                                                         value.append(c);
97                                                                         c = p.getNext();
98                                                                 }
99                                                                 if (p.isDone()) {
100                                                                         value.append(c);
101                                                                 } else {
102                                                                         p.back();
103                                                                 }
104                                                         }
105                                                         tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
106                                                 } else {
107                                                         tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
108                                                         p.reset();
109                                                 }
110                                         }
111                                 // Adds SEPARATOR token (;),  considers the rest of the line as COMMENT token;
112                                 } else if (c == ';') {
113                                         tokens.addElement(new Token(Token.SEPARATOR, endline, offset, offset + 1));
114                                         StringBuffer value = new StringBuffer();
115                                         if (!p.isDone()) {
116                                                 c = p.getNext();
117                                                 while (c != '\n' && !p.isDone()) {
118                                                         value.append(c);
119                                                         c = p.getNext();
120                                                 }
121                                                 if (p.isDone()) {
122                                                         value.append(c);
123                                                 } else {
124                                                         p.back();
125                                                 }
126                                                 // We add to the offset so as to skip the initial ';'
127                                                 offset++;
128                                                 tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
129                                         }
130                                 // Adds NUMERIC token;
131                                 } else if (Character.isDigit(c)) {
132                                         StringBuffer value = new StringBuffer();
133                                         while ((Character.isDigit(c) || c == '.') && !p.isDone()) {
134                                                 value.append(c);
135                                                 c = p.getNext();
136                                         }
137                                         if ((Character.isDigit(c) || c == '.')) {
138                                                 value.append(c);
139                                         } else {
140                                                 p.back();
141                                         }
142                                         tokens.addElement(new Token(Token.NUMERIC, value.toString(), offset, offset + value.length()));
143                                 // Adds COMMENT token (or GROUP (slash) if only one slash);
144                                 } else if (c == '/') {
145                                         p.mark();
146                                         // If we have '/*', it's a comment till '*/' found or eof
147                                         if (p.peek() == '*') {
148                                                 tokens.addElement(tokenizeComment(p, offset));
149                                         } else {
150                                                 // It's not '/*' , so it's a group token
151                                                 // BCH ??? what's this business about groups?  
152                                                 // Shouldn't '/' be a divide operator?
153                                                 tokens.addElement(new Token(Token.SYMBOL, new String(new char[] {c}) /*group*/, offset, offset + 1));
154                                                 p.reset();
155                                         }
156                                 // Adds SYMBOL token;
157                                 } else {
158                                         tokens.addElement(new Token(Token.SYMBOL, new String(new char[] {c}), offset, offset + 1));
159                                 }
160                         }
161                 } catch (RuntimeException e) {
162                         e.printStackTrace();
163                 }
164                 
165 //              System.out.println("-------------------");
166 //              for (int i = 0; i < tokens.size(); i++) {
167 //                      System.out.println((Token) tokens.elementAt(i));
168 //              }
169                 return tokens;
170         }
171         /**
172          * @param tokens
173          * @param p
174          * @param offset
175          */
176         private static Token tokenizeComment(StringPointer p, int offset) {
177                 char c;
178                 StringBuffer value = new StringBuffer();
179                 c = p.getNext();
180                 value.append('/');
181                 while (!( c == '*' && p.peek() == '/' ) && !p.isDone()) {
182                         value.append(c);
183                         c = p.getNext();
184                 }
185                 if (!p.isDone()){
186                         value.append(c);
187                         c = p.getNext();
188                         value.append(c);        
189                 }
190                 return new Token(Token.COMMENT, value.toString(), offset, offset + value.length());
191         }
192 }