Fixed bug: ArrayIndexOutOfBoundsException
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPCodeScanner.java
1 /**********************************************************************
2  Copyright (c) 2000, 2002 IBM Corp. and others.
3  All rights reserved. This program and the accompanying materials
4  are made available under the terms of the Common Public License v1.0
5  which accompanies this distribution, and is available at
6  http://www.eclipse.org/legal/cpl-v10.html
7
8  Contributors:
9  IBM Corporation - Initial implementation
10  www.phpeclipse.de
11  **********************************************************************/
12 package net.sourceforge.phpeclipse.phpeditor.php;
13
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 import java.util.List;
17 import java.util.Map;
18
19 import net.sourceforge.phpdt.internal.ui.text.AbstractJavaScanner;
20 import net.sourceforge.phpdt.ui.text.IColorManager;
21 import net.sourceforge.phpeclipse.IPreferenceConstants;
22 import net.sourceforge.phpeclipse.phpeditor.PHPSyntaxRdr;
23 import net.sourceforge.phpeclipse.phpeditor.util.PHPWhitespaceDetector;
24 import net.sourceforge.phpeclipse.phpeditor.util.PHPWordDetector;
25
26 import org.eclipse.jface.preference.IPreferenceStore;
27 import org.eclipse.jface.text.Assert;
28 import org.eclipse.jface.text.rules.ICharacterScanner;
29 import org.eclipse.jface.text.rules.IRule;
30 import org.eclipse.jface.text.rules.IToken;
31 import org.eclipse.jface.text.rules.IWordDetector;
32 import org.eclipse.jface.text.rules.MultiLineRule;
33 import org.eclipse.jface.text.rules.Token;
34 import org.eclipse.jface.text.rules.WhitespaceRule;
35 import org.eclipse.jface.text.rules.WordRule;
36
37 /**
38  * PHP Code Scanner
39  */
40 public class PHPCodeScanner extends AbstractJavaScanner {
41
42         /**
43          * Rule to detect java operators.
44          *
45          * @since 3.0
46          */
47         protected class OperatorRule implements IRule {
48
49                 /** Java operators */
50                 private final char[] PHP_OPERATORS = { ';', '(', ')', '.', '=', '/', '\\', '+', '-', '*', '[', ']', '<', '>', ':', '?', '!',
51                                 ',', '|', '&', '^', '%', '~', '@' };
52
53                 /** Token to return for this rule */
54                 private final IToken fToken;
55
56                 /** Token to return for braces */
57                 private final IToken fTokenBraces;
58
59                 /** Token to return for heredocs */
60                 private final IToken fTokenHeredoc;
61
62                 /**
63                  * Creates a new operator rule.
64                  *
65                  * @param token
66                  *          Token to use for this rule
67                  * @param tokenHeredoc
68                  *          TODO
69                  */
70                 public OperatorRule(IToken token, IToken tokenBraces, IToken tokenHeredoc) {
71                         fToken = token;
72                         fTokenBraces = tokenBraces;
73                         fTokenHeredoc = tokenHeredoc;
74                 }
75
76                 /**
77                  * Is this character an operator character?
78                  *
79                  * @param character
80                  *          Character to determine whether it is an operator character
81                  * @return <code>true</code> iff the character is an operator,
82                  *         <code>false</code> otherwise.
83                  */
84                 public boolean isOperator(char character) {
85                         for (int index = 0; index < PHP_OPERATORS.length; index++) {
86                                 if (PHP_OPERATORS[index] == character)
87                                         return true;
88                         }
89                         return false;
90                 }
91
92                 /*
93                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
94                  */
95                 public IToken evaluate(ICharacterScanner scanner) {
96
97                         int character = scanner.read();
98                         if (character == '{' || character == '}') {
99                                 return fTokenBraces;
100                         }
101                         if (isOperator((char) character)) {
102                                 int lastCharacter = character;
103                                 character = scanner.read();
104                                 // the readHEREDOC(scanner) call doesn't work, if we have our own partitions for single quoted
105                                 // or double quoted strings:
106                                 //
107                                 // if (lastCharacter == '<' && character == '<') {
108                                 // int heredocCharacter = scanner.read();
109                                 // if (heredocCharacter == '<') {
110                                 // // start of heredoc comment;
111                                 // if (readHEREDOC(scanner)) {
112                                 // return fTokenHeredoc;
113                                 // }
114                                 // } else {
115                                 // scanner.unread();
116                                 // }
117                                 // }
118                                 if (!isOperator((char) character)) {
119                                         scanner.unread();
120                                         return fToken;
121                                 }
122                                 if (checkPHPTag(scanner, lastCharacter, character)) {
123                                         return Token.UNDEFINED;
124                                 }
125                                 do {
126                                         lastCharacter = character;
127                                         character = scanner.read();
128                                         if (checkPHPTag(scanner, lastCharacter, character)) {
129                                                 return fToken;
130                                         }
131                                         if (character == ICharacterScanner.EOF) {
132                                                 return fToken;
133                                         }
134                                 } while (isOperator((char) character));
135                                 scanner.unread();
136                                 return fToken;
137                         } else {
138                                 scanner.unread();
139                                 return Token.UNDEFINED;
140                         }
141                 }
142
143                 // private boolean readHEREDOC(ICharacterScanner scanner) {
144                 // // search until heredoc ends
145                 // int ch;
146                 // StringBuffer buf = new StringBuffer();
147                 // char[] heredocIdent;
148                 //
149                 // ch = scanner.read();
150                 // if (!Scanner.isPHPIdentifierStart((char)ch)) {
151                 // scanner.unread();
152                 // scanner.unread();
153                 // return false;
154                 // }
155                 // while (Scanner.isPHPIdentifierPart((char)ch)) {
156                 // buf.append((char)ch);
157                 // ch = scanner.read();
158                 // }
159                 // if (ch==ICharacterScanner.EOF) {
160                 // return true;
161                 // }
162                 // heredocIdent = buf.toString().toCharArray();
163                 // while (true) {
164                 // ch = scanner.read();
165                 // if (ch==ICharacterScanner.EOF) {
166                 // return true;
167                 // }
168                 // if (ch == '\n') { // heredoc could end after a newline
169                 // int pos = 0;
170                 // while (true) {
171                 // if (pos == heredocIdent.length) {
172                 // return true;
173                 // }
174                 // ch = scanner.read(); // ignore escaped character
175                 // if (ch != heredocIdent[pos]) {
176                 // break;
177                 // }
178                 // if (ch==ICharacterScanner.EOF) {
179                 // return true;
180                 // }
181                 // pos++;
182                 // }
183                 // }
184                 // }
185                 // }
186
187                 /**
188                  * Check if lastCharacter/character are a PHP start or end token ( &lt;? ...
189                  * ?&gt; )
190                  *
191                  * @param scanner
192                  * @param lastCharacter
193                  * @param character
194                  * @return
195                  */
196                 private boolean checkPHPTag(ICharacterScanner scanner, int lastCharacter, int character) {
197                         if (lastCharacter == '<' && character == '?') {
198                                 scanner.unread();
199                                 scanner.unread();
200                                 return true;
201                         } else if (lastCharacter == '?' && character == '>') {
202                                 scanner.unread();
203                                 scanner.unread();
204                                 return true;
205                         }
206                         return false;
207                 }
208         }
209
210         protected class AccentStringRule implements IRule {
211
212                 /** Token to return for this rule */
213                 private final IToken fToken;
214
215                 public AccentStringRule(IToken token) {
216                         fToken = token;
217
218                 }
219
220                 /*
221                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
222                  */
223                 public IToken evaluate(ICharacterScanner scanner) {
224
225                         int character = scanner.read();
226
227                         if (character == '`') {
228
229                                 while (character != ICharacterScanner.EOF) {
230                                         character = scanner.read();
231                                         if (character == '\\') {
232                                                 character = scanner.read();
233                                         } else if (character == '`') {
234                                                 return fToken;
235                                         }
236                                 }
237                                 scanner.unread();
238                                 return Token.UNDEFINED;
239                         } else {
240                                 scanner.unread();
241                                 return Token.UNDEFINED;
242                         }
243                 }
244
245         }
246
247         private class PHPWordRule extends WordRule {
248                 private StringBuffer fBuffer = new StringBuffer();
249
250                 protected Map fWordsIgnoreCase = new HashMap();
251
252                 public PHPWordRule(IWordDetector detector) {
253                         super(detector, Token.UNDEFINED);
254                 }
255
256                 public PHPWordRule(IWordDetector detector, IToken defaultToken) {
257                         super(detector, defaultToken);
258                 }
259
260                 /**
261                  * Adds a word and the token to be returned if it is detected.
262                  *
263                  * @param word
264                  *          the word this rule will search for, may not be <code>null</code>
265                  * @param token
266                  *          the token to be returned if the word has been found, may not be
267                  *          <code>null</code>
268                  */
269                 public void addWordIgnoreCase(String word, IToken token) {
270                         Assert.isNotNull(word);
271                         Assert.isNotNull(token);
272
273                         fWordsIgnoreCase.put(word, token);
274                 }
275
276                 public IToken evaluate(ICharacterScanner scanner) {
277                         int c = scanner.read();
278                         boolean isVariable = false;
279                         boolean isUnderscore = false;
280                         String word;
281                         if (c == '<') {
282                                 c = scanner.read();
283                                 if (c != '?') {
284                                         scanner.unread();
285                                         scanner.unread();
286                                         return Token.UNDEFINED;
287                                 } else {
288                                         c = scanner.read();
289                                         if (c == '=') { // <?=
290                                                 return getToken(IPreferenceConstants.PHP_TAG);
291                                         }
292                                         if (c != 'p' && c != 'P') {
293                                                 scanner.unread();
294                                                 return getToken(IPreferenceConstants.PHP_TAG);
295                                         } else {
296                                                 c = scanner.read();
297                                                 if (c != 'h' && c != 'H') {
298                                                         scanner.unread();
299                                                         scanner.unread();
300                                                         return getToken(IPreferenceConstants.PHP_TAG);
301                                                 } else {
302                                                         c = scanner.read();
303                                                         if (c != 'p' && c != 'P') {
304                                                                 scanner.unread();
305                                                                 scanner.unread();
306                                                                 scanner.unread();
307                                                                 return getToken(IPreferenceConstants.PHP_TAG);
308                                                         } else {
309                                                                 return getToken(IPreferenceConstants.PHP_TAG);
310                                                         }
311                                                 }
312                                         }
313                                 }
314                         }
315                         if (c == '?') {
316                                 c = scanner.read();
317                                 if (c == '>') {
318                                         return getToken(IPreferenceConstants.PHP_TAG);
319                                 }
320                                 scanner.unread();
321                                 scanner.unread();
322                                 return Token.UNDEFINED;
323                         }
324                         if (fDetector.isWordStart((char) c)) {
325                                 if (c == '$') {
326                                         isVariable = true;
327                                 }
328                                 if (fColumn == UNDEFINED || (fColumn == scanner.getColumn() - 1)) {
329
330                                         fBuffer.setLength(0);
331                                         fBuffer.append((char) c);
332                                         c = scanner.read();
333                                         if (c == '_') {
334                                                 isUnderscore = true;
335                                         }
336                                         while (c != ICharacterScanner.EOF && fDetector.isWordPart((char) c)) {
337                                                 fBuffer.append((char) c);
338                                                 c = scanner.read();
339                                         }
340                                         scanner.unread();
341
342                                         if (isVariable) {
343                                                 if (isUnderscore) {
344                                                         return getToken(IPreferenceConstants.PHP_VARIABLE_DOLLAR);
345                                                 }
346                                                 return getToken(IPreferenceConstants.PHP_VARIABLE);
347                                         }
348                                         word = fBuffer.toString();
349                                         IToken token = (IToken) fWords.get(word);
350                                         if (token != null)
351                                                 return token;
352
353                                         token = (IToken) fWordsIgnoreCase.get(word.toLowerCase());
354                                         if (token != null)
355                                                 return token;
356
357                                         if (fDefaultToken.isUndefined())
358                                                 unreadBuffer(scanner);
359
360                                         return fDefaultToken;
361                                 }
362                         }
363
364                         scanner.unread();
365                         return Token.UNDEFINED;
366                 }
367         }
368
369         // private PHPColorProvider fColorProvider;
370
371         private static String[] fgTokenProperties = { IPreferenceConstants.PHP_MULTILINE_COMMENT,
372                         IPreferenceConstants.PHP_SINGLELINE_COMMENT, IPreferenceConstants.PHP_TAG, IPreferenceConstants.PHP_KEYWORD,
373                         IPreferenceConstants.PHP_FUNCTIONNAME, IPreferenceConstants.PHP_VARIABLE, IPreferenceConstants.PHP_VARIABLE_DOLLAR,
374                         IPreferenceConstants.PHP_STRING_DQ, IPreferenceConstants.PHP_STRING_SQ, IPreferenceConstants.PHP_TYPE,
375                         IPreferenceConstants.PHP_CONSTANT, IPreferenceConstants.PHP_DEFAULT, IPreferenceConstants.PHP_OPERATOR,
376                         IPreferenceConstants.PHP_BRACE_OPERATOR, IPreferenceConstants.PHP_KEYWORD_RETURN };
377
378         /**
379          * Creates a PHP code scanner
380          */
381         // public PHPCodeScanner(JavaColorManager provider, IPreferenceStore store) {
382         public PHPCodeScanner(IColorManager manager, IPreferenceStore store) {
383                 super(manager, store);
384                 initialize();
385         }
386
387         /*
388          * @see AbstractJavaScanner#getTokenProperties()
389          */
390         protected String[] getTokenProperties() {
391                 return fgTokenProperties;
392         }
393
394         /*
395          * @see AbstractJavaScanner#createRules()
396          */
397         protected List createRules() {
398                 List rules = new ArrayList();
399                 Token token = getToken(IPreferenceConstants.PHP_SINGLELINE_COMMENT);
400                 // Add rule for single line comments.
401                 // rules.add(new EndOfLineRule("//", token)); //$NON-NLS-1$
402                 // rules.add(new EndOfLineRule("#", token)); //$NON-NLS-1$
403                 // Add rule for strings and character constants.
404                 // token = getToken(IPreferenceConstants.PHP_STRING_SQ);
405                 // rules.add(new SingleQuoteStringRule(token));
406                 // token = getToken(IPreferenceConstants.PHP_STRING_DQ);
407                 // rules.add(new DoubleQuoteStringRule(token));
408                 rules.add(new AccentStringRule(token));
409
410                 token = getToken(IPreferenceConstants.PHP_MULTILINE_COMMENT);
411                 rules.add(new MultiLineRule("/*", "*/", token)); //$NON-NLS-2$ //$NON-NLS-1$
412                 // Add generic whitespace rule.
413                 rules.add(new WhitespaceRule(new PHPWhitespaceDetector()));
414                 // Add word rule for keywords, types, and constants.
415                 token = getToken(IPreferenceConstants.PHP_DEFAULT);
416                 PHPWordRule wordRule = new PHPWordRule(new PHPWordDetector(), token);
417
418                 Token keyword = getToken(IPreferenceConstants.PHP_KEYWORD);
419                 Token functionName = getToken(IPreferenceConstants.PHP_FUNCTIONNAME);
420                 Token type = getToken(IPreferenceConstants.PHP_TYPE);
421                 Token constant = getToken(IPreferenceConstants.PHP_CONSTANT);
422
423                 ArrayList buffer = PHPSyntaxRdr.getSyntaxData();
424                 // String strbuffer = null; unused
425                 PHPElement elbuffer = null;
426                 String name;
427                 for (int i = 0; i < buffer.size(); i++) {
428                         // while ((buffer != null)
429                         // && (!buffer.isEmpty()
430                         // && ((elbuffer = (PHPElement) buffer.remove(0)) != null))) {
431                         elbuffer = (PHPElement) buffer.get(i);
432                         if (elbuffer instanceof PHPKeyword) {
433                                 name = ((PHPKeyword) elbuffer).getName();
434                                 if (!name.equals("return")) {
435                                         wordRule.addWord(name, keyword);
436                                 }
437                         } else if (elbuffer instanceof PHPFunction) {
438                                 wordRule.addWordIgnoreCase(((PHPFunction) elbuffer).getName(), functionName);
439                         } else if (elbuffer instanceof PHPType) {
440                                 wordRule.addWord(elbuffer.getName(), type);
441                         } else if (elbuffer instanceof PHPConstant) {
442                                 wordRule.addWord(elbuffer.getName(), constant);
443                         }
444                 }
445
446                 // Add word rule for keyword 'return'.
447                 token = getToken(IPreferenceConstants.PHP_KEYWORD_RETURN);
448                 wordRule.addWord("return", token);
449
450                 // Add rule for operators and brackets (at the end !)
451                 rules.add(new OperatorRule(getToken(IPreferenceConstants.PHP_OPERATOR), getToken(IPreferenceConstants.PHP_BRACE_OPERATOR),
452                                 getToken(IPreferenceConstants.PHP_STRING_DQ)));
453
454                 rules.add(wordRule);
455
456                 setDefaultReturnToken(getToken(IPreferenceConstants.PHP_DEFAULT));
457                 return rules;
458         }
459 }