410e0c113951226afa2c1fb40b14b102889b08c9
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPCodeScanner.java
1 /**********************************************************************
2  Copyright (c) 2000, 2002 IBM Corp. and others.
3  All rights reserved. This program and the accompanying materials
4  are made available under the terms of the Common Public License v1.0
5  which accompanies this distribution, and is available at
6  http://www.eclipse.org/legal/cpl-v10.html
7
8  Contributors:
9  IBM Corporation - Initial implementation
10  www.phpeclipse.de
11  **********************************************************************/
12 package net.sourceforge.phpeclipse.phpeditor.php;
13
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 import java.util.List;
17 import java.util.Map;
18
19 import net.sourceforge.phpdt.internal.ui.text.AbstractJavaScanner;
20 import net.sourceforge.phpdt.ui.text.IColorManager;
21 import net.sourceforge.phpeclipse.IPreferenceConstants;
22 import net.sourceforge.phpeclipse.phpeditor.PHPSyntaxRdr;
23 import net.sourceforge.phpeclipse.phpeditor.util.PHPWhitespaceDetector;
24 import net.sourceforge.phpeclipse.phpeditor.util.PHPWordDetector;
25
26 import org.eclipse.jface.preference.IPreferenceStore;
27 import org.eclipse.jface.text.Assert;
28 import org.eclipse.jface.text.rules.ICharacterScanner;
29 import org.eclipse.jface.text.rules.IRule;
30 import org.eclipse.jface.text.rules.IToken;
31 import org.eclipse.jface.text.rules.IWordDetector;
32 import org.eclipse.jface.text.rules.MultiLineRule;
33 import org.eclipse.jface.text.rules.Token;
34 import org.eclipse.jface.text.rules.WhitespaceRule;
35 import org.eclipse.jface.text.rules.WordRule;
36
37 /**
38  * PHP Code Scanner
39  */
40 public class PHPCodeScanner extends AbstractJavaScanner {
41
42         /**
43          * Rule to detect java operators.
44          * 
45          * @since 3.0
46          */
47         protected class OperatorRule implements IRule {
48
49                 /** Java operators */
50                 private final char[] PHP_OPERATORS = { ';', '(', ')', '.', '=', '/',
51                                 '\\', '+', '-', '*', '[', ']', '<', '>', ':', '?', '!', ',',
52                                 '|', '&', '^', '%', '~', '@' };
53
54                 /** Token to return for this rule */
55                 private final IToken fToken;
56
57                 /** Token to return for braces */
58                 private final IToken fTokenBraces;
59
60                 /** Token to return for heredocs */
61                 private final IToken fTokenHeredoc;
62
63                 /**
64                  * Creates a new operator rule.
65                  * 
66                  * @param token
67                  *            Token to use for this rule
68                  * @param tokenHeredoc
69                  *            TODO
70                  */
71                 public OperatorRule(IToken token, IToken tokenBraces,
72                                 IToken tokenHeredoc) {
73                         fToken = token;
74                         fTokenBraces = tokenBraces;
75                         fTokenHeredoc = tokenHeredoc;
76                 }
77
78                 /**
79                  * Is this character an operator character?
80                  * 
81                  * @param character
82                  *            Character to determine whether it is an operator character
83                  * @return <code>true</code> iff the character is an operator,
84                  *         <code>false</code> otherwise.
85                  */
86                 public boolean isOperator(char character) {
87                         for (int index = 0; index < PHP_OPERATORS.length; index++) {
88                                 if (PHP_OPERATORS[index] == character)
89                                         return true;
90                         }
91                         return false;
92                 }
93
94                 /*
95                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
96                  */
97                 public IToken evaluate(ICharacterScanner scanner) {
98
99                         int character = scanner.read();
100                         if (character == '{' || character == '}') {
101                                 return fTokenBraces;
102                         }
103                         if (isOperator((char) character)) {
104                                 int lastCharacter = character;
105                                 character = scanner.read();
106                                 // the readHEREDOC(scanner) call doesn't work, if we have our
107                                 // own partitions for single quoted
108                                 // or double quoted strings:
109                                 //
110                                 // if (lastCharacter == '<' && character == '<') {
111                                 // int heredocCharacter = scanner.read();
112                                 // if (heredocCharacter == '<') {
113                                 // // start of heredoc comment;
114                                 // if (readHEREDOC(scanner)) {
115                                 // return fTokenHeredoc;
116                                 // }
117                                 // } else {
118                                 // scanner.unread();
119                                 // }
120                                 // }
121                                 if (!isOperator((char) character)) {
122                                         scanner.unread();
123                                         return fToken;
124                                 }
125                                 if (checkPHPTag(scanner, lastCharacter, character)) {
126                                         return Token.UNDEFINED;
127                                 }
128                                 do {
129                                         lastCharacter = character;
130                                         character = scanner.read();
131                                         if (checkPHPTag(scanner, lastCharacter, character)) {
132                                                 return fToken;
133                                         }
134                                         if (character == ICharacterScanner.EOF) {
135                                                 return fToken;
136                                         }
137                                 } while (isOperator((char) character));
138                                 scanner.unread();
139                                 return fToken;
140                         } else {
141                                 scanner.unread();
142                                 return Token.UNDEFINED;
143                         }
144                 }
145
146                 // private boolean readHEREDOC(ICharacterScanner scanner) {
147                 // // search until heredoc ends
148                 // int ch;
149                 // StringBuffer buf = new StringBuffer();
150                 // char[] heredocIdent;
151                 //
152                 // ch = scanner.read();
153                 // if (!Scanner.isPHPIdentifierStart((char)ch)) {
154                 // scanner.unread();
155                 // scanner.unread();
156                 // return false;
157                 // }
158                 // while (Scanner.isPHPIdentifierPart((char)ch)) {
159                 // buf.append((char)ch);
160                 // ch = scanner.read();
161                 // }
162                 // if (ch==ICharacterScanner.EOF) {
163                 // return true;
164                 // }
165                 // heredocIdent = buf.toString().toCharArray();
166                 // while (true) {
167                 // ch = scanner.read();
168                 // if (ch==ICharacterScanner.EOF) {
169                 // return true;
170                 // }
171                 // if (ch == '\n') { // heredoc could end after a newline
172                 // int pos = 0;
173                 // while (true) {
174                 // if (pos == heredocIdent.length) {
175                 // return true;
176                 // }
177                 // ch = scanner.read(); // ignore escaped character
178                 // if (ch != heredocIdent[pos]) {
179                 // break;
180                 // }
181                 // if (ch==ICharacterScanner.EOF) {
182                 // return true;
183                 // }
184                 // pos++;
185                 // }
186                 // }
187                 // }
188                 // }
189
190                 /**
191                  * Check if lastCharacter/character are a PHP start or end token ( &lt;?
192                  * ... ?&gt; )
193                  * 
194                  * @param scanner
195                  * @param lastCharacter
196                  * @param character
197                  * @return
198                  */
199                 private boolean checkPHPTag(ICharacterScanner scanner,
200                                 int lastCharacter, int character) {
201                         if (lastCharacter == '<' && character == '?') {
202                                 scanner.unread();
203                                 scanner.unread();
204                                 return true;
205                         } else if (lastCharacter == '?' && character == '>') {
206                                 scanner.unread();
207                                 scanner.unread();
208                                 return true;
209                         }
210                         return false;
211                 }
212         }
213
214         protected class AccentStringRule implements IRule {
215
216                 /** Token to return for this rule */
217                 private final IToken fToken;
218
219                 public AccentStringRule(IToken token) {
220                         fToken = token;
221
222                 }
223
224                 /*
225                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
226                  */
227                 public IToken evaluate(ICharacterScanner scanner) {
228
229                         int character = scanner.read();
230
231                         if (character == '`') {
232
233                                 while (character != ICharacterScanner.EOF) {
234                                         character = scanner.read();
235                                         if (character == '\\') {
236                                                 character = scanner.read();
237                                         } else if (character == '`') {
238                                                 return fToken;
239                                         }
240                                 }
241                                 scanner.unread();
242                                 return Token.UNDEFINED;
243                         } else {
244                                 scanner.unread();
245                                 return Token.UNDEFINED;
246                         }
247                 }
248
249         }
250
251         private class PHPWordRule extends WordRule {
252                 private StringBuffer fBuffer = new StringBuffer();
253
254                 protected Map fWordsIgnoreCase = new HashMap();
255
256                 public PHPWordRule(IWordDetector detector) {
257                         super(detector, Token.UNDEFINED);
258                 }
259
260                 public PHPWordRule(IWordDetector detector, IToken defaultToken) {
261                         super(detector, defaultToken);
262                 }
263
264                 /**
265                  * Adds a word and the token to be returned if it is detected.
266                  * 
267                  * @param word
268                  *            the word this rule will search for, may not be
269                  *            <code>null</code>
270                  * @param token
271                  *            the token to be returned if the word has been found, may
272                  *            not be <code>null</code>
273                  */
274                 public void addWordIgnoreCase(String word, IToken token) {
275                         Assert.isNotNull(word);
276                         Assert.isNotNull(token);
277
278                         fWordsIgnoreCase.put(word, token);
279                 }
280
281                 public IToken evaluate(ICharacterScanner scanner) {
282                         int c = scanner.read();
283                         boolean isVariable = false;
284                         boolean isUnderscore = false;
285                         String word;
286                         if (c == '<') {
287                                 c = scanner.read();
288                                 if (c != '?') {
289                                         scanner.unread();
290                                         scanner.unread();
291                                         return Token.UNDEFINED;
292                                 } else {
293                                         c = scanner.read();
294                                         if (c == '=') { // <?=
295                                                 return getToken(IPreferenceConstants.PHP_TAG);
296                                         }
297                                         if (c != 'p' && c != 'P') {
298                                                 scanner.unread();
299                                                 return getToken(IPreferenceConstants.PHP_TAG);
300                                         } else {
301                                                 c = scanner.read();
302                                                 if (c != 'h' && c != 'H') {
303                                                         scanner.unread();
304                                                         scanner.unread();
305                                                         return getToken(IPreferenceConstants.PHP_TAG);
306                                                 } else {
307                                                         c = scanner.read();
308                                                         if (c != 'p' && c != 'P') {
309                                                                 scanner.unread();
310                                                                 scanner.unread();
311                                                                 scanner.unread();
312                                                                 return getToken(IPreferenceConstants.PHP_TAG);
313                                                         } else {
314                                                                 return getToken(IPreferenceConstants.PHP_TAG);
315                                                         }
316                                                 }
317                                         }
318                                 }
319                         }
320                         if (c == '?') {
321                                 c = scanner.read();
322                                 if (c == '>') {
323                                         return getToken(IPreferenceConstants.PHP_TAG);
324                                 }
325                                 scanner.unread();
326                                 scanner.unread();
327                                 return Token.UNDEFINED;
328                         }
329                         if (fDetector.isWordStart((char) c)) {
330                                 if (c == '$') {
331                                         isVariable = true;
332                                 }
333                                 if (fColumn == UNDEFINED
334                                                 || (fColumn == scanner.getColumn() - 1)) {
335
336                                         fBuffer.setLength(0);
337                                         fBuffer.append((char) c);
338                                         c = scanner.read();
339                                         if (c == '_') {
340                                                 isUnderscore = true;
341                                         }
342                                         while (c != ICharacterScanner.EOF
343                                                         && fDetector.isWordPart((char) c)) {
344                                                 fBuffer.append((char) c);
345                                                 c = scanner.read();
346                                         }
347                                         scanner.unread();
348
349                                         if (isVariable) {
350                                                 if (isUnderscore) {
351                                                         return getToken(IPreferenceConstants.PHP_VARIABLE_DOLLAR);
352                                                 }
353                                                 return getToken(IPreferenceConstants.PHP_VARIABLE);
354                                         }
355                                         word = fBuffer.toString();
356                                         IToken token = (IToken) fWords.get(word);
357                                         if (token != null)
358                                                 return token;
359
360                                         token = (IToken) fWordsIgnoreCase.get(word.toLowerCase());
361                                         if (token != null)
362                                                 return token;
363
364                                         if (fDefaultToken.isUndefined())
365                                                 unreadBuffer(scanner);
366
367                                         return fDefaultToken;
368                                 }
369                         }
370
371                         scanner.unread();
372                         return Token.UNDEFINED;
373                 }
374         }
375
376         // private PHPColorProvider fColorProvider;
377
378         private static String[] fgTokenProperties = {
379                         IPreferenceConstants.PHP_MULTILINE_COMMENT,
380                         IPreferenceConstants.PHP_SINGLELINE_COMMENT,
381                         IPreferenceConstants.PHP_TAG, IPreferenceConstants.PHP_KEYWORD,
382                         IPreferenceConstants.PHP_FUNCTIONNAME,
383                         IPreferenceConstants.PHP_VARIABLE,
384                         IPreferenceConstants.PHP_VARIABLE_DOLLAR,
385                         IPreferenceConstants.PHP_STRING_DQ,
386                         IPreferenceConstants.PHP_STRING_SQ, IPreferenceConstants.PHP_TYPE,
387                         IPreferenceConstants.PHP_CONSTANT,
388                         IPreferenceConstants.PHP_DEFAULT,
389                         IPreferenceConstants.PHP_OPERATOR,
390                         IPreferenceConstants.PHP_BRACE_OPERATOR,
391                         IPreferenceConstants.PHP_KEYWORD_RETURN };
392
393         /**
394          * Creates a PHP code scanner
395          */
396         // public PHPCodeScanner(JavaColorManager provider, IPreferenceStore store)
397         // {
398         public PHPCodeScanner(IColorManager manager, IPreferenceStore store) {
399                 super(manager, store);
400                 initialize();
401         }
402
403         /*
404          * @see AbstractJavaScanner#getTokenProperties()
405          */
406         protected String[] getTokenProperties() {
407                 return fgTokenProperties;
408         }
409
410         /*
411          * @see AbstractJavaScanner#createRules()
412          */
413         protected List createRules() {
414                 List rules = new ArrayList();
415                 Token token = getToken(IPreferenceConstants.PHP_SINGLELINE_COMMENT);
416                 // Add rule for single line comments.
417                 // rules.add(new EndOfLineRule("//", token)); //$NON-NLS-1$
418                 // rules.add(new EndOfLineRule("#", token)); //$NON-NLS-1$
419                 // Add rule for strings and character constants.
420                 // token = getToken(IPreferenceConstants.PHP_STRING_SQ);
421                 // rules.add(new SingleQuoteStringRule(token));
422                 // token = getToken(IPreferenceConstants.PHP_STRING_DQ);
423                 // rules.add(new DoubleQuoteStringRule(token));
424                 rules.add(new AccentStringRule(token));
425
426                 token = getToken(IPreferenceConstants.PHP_MULTILINE_COMMENT);
427                 rules.add(new MultiLineRule("/*", "*/", token)); //$NON-NLS-2$ //$NON-NLS-1$
428                 // Add generic whitespace rule.
429                 rules.add(new WhitespaceRule(new PHPWhitespaceDetector()));
430                 // Add word rule for keywords, types, and constants.
431                 token = getToken(IPreferenceConstants.PHP_DEFAULT);
432                 PHPWordRule wordRule = new PHPWordRule(new PHPWordDetector(), token);
433
434                 Token keyword = getToken(IPreferenceConstants.PHP_KEYWORD);
435                 Token functionName = getToken(IPreferenceConstants.PHP_FUNCTIONNAME);
436                 Token type = getToken(IPreferenceConstants.PHP_TYPE);
437                 Token constant = getToken(IPreferenceConstants.PHP_CONSTANT);
438
439                 ArrayList buffer = PHPSyntaxRdr.getSyntaxData();
440                 // String strbuffer = null; unused
441                 PHPElement elbuffer = null;
442                 String name;
443                 for (int i = 0; i < buffer.size(); i++) {
444                         // while ((buffer != null)
445                         // && (!buffer.isEmpty()
446                         // && ((elbuffer = (PHPElement) buffer.remove(0)) != null))) {
447                         elbuffer = (PHPElement) buffer.get(i);
448                         if (elbuffer instanceof PHPKeyword) {
449                                 name = ((PHPKeyword) elbuffer).getName();
450                                 if (!name.equals("return")) {
451                                         wordRule.addWord(name, keyword);
452                                 }
453                         } else if (elbuffer instanceof PHPFunction) {
454                                 wordRule.addWordIgnoreCase(((PHPFunction) elbuffer).getName(),
455                                                 functionName);
456                         } else if (elbuffer instanceof PHPType) {
457                                 wordRule.addWord(elbuffer.getName(), type);
458                         } else if (elbuffer instanceof PHPConstant) {
459                                 wordRule.addWord(elbuffer.getName(), constant);
460                         }
461                 }
462
463                 // Add word rule for keyword 'return'.
464                 token = getToken(IPreferenceConstants.PHP_KEYWORD_RETURN);
465                 wordRule.addWord("return", token);
466
467                 // Add rule for operators and brackets (at the end !)
468                 rules.add(new OperatorRule(getToken(IPreferenceConstants.PHP_OPERATOR),
469                                 getToken(IPreferenceConstants.PHP_BRACE_OPERATOR),
470                                 getToken(IPreferenceConstants.PHP_STRING_DQ)));
471
472                 rules.add(wordRule);
473
474                 setDefaultReturnToken(getToken(IPreferenceConstants.PHP_DEFAULT));
475                 return rules;
476         }
477 }