Fix bug #1385272: Parsing of short open tags not fully compatible to PHP parse
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPCodeScanner.java
1 /**********************************************************************
2  Copyright (c) 2000, 2002 IBM Corp. and others.
3  All rights reserved. This program and the accompanying materials
4  are made available under the terms of the Common Public License v1.0
5  which accompanies this distribution, and is available at
6  http://www.eclipse.org/legal/cpl-v10.html
7
8  Contributors:
9  IBM Corporation - Initial implementation
10  www.phpeclipse.de
11  **********************************************************************/
12 package net.sourceforge.phpeclipse.phpeditor.php;
13
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 import java.util.List;
17 import java.util.Map;
18
19 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
20 import net.sourceforge.phpdt.internal.ui.text.AbstractJavaScanner;
21 import net.sourceforge.phpdt.ui.text.IColorManager;
22 import net.sourceforge.phpeclipse.IPreferenceConstants;
23 import net.sourceforge.phpeclipse.phpeditor.PHPSyntaxRdr;
24 import net.sourceforge.phpeclipse.phpeditor.util.PHPWhitespaceDetector;
25 import net.sourceforge.phpeclipse.phpeditor.util.PHPWordDetector;
26
27 import org.eclipse.jface.preference.IPreferenceStore;
28 import org.eclipse.jface.text.Assert;
29 import org.eclipse.jface.text.BadLocationException;
30 import org.eclipse.jface.text.rules.ICharacterScanner;
31 import org.eclipse.jface.text.rules.IRule;
32 import org.eclipse.jface.text.rules.IToken;
33 import org.eclipse.jface.text.rules.IWordDetector;
34 import org.eclipse.jface.text.rules.MultiLineRule;
35 import org.eclipse.jface.text.rules.Token;
36 import org.eclipse.jface.text.rules.WhitespaceRule;
37 import org.eclipse.jface.text.rules.WordRule;
38
39 /**
40  * PHP Code Scanner
41  */
42 public class PHPCodeScanner extends AbstractJavaScanner {
43
44         /**
45          * Rule to detect java operators.
46          *
47          * @since 3.0
48          */
49         protected class OperatorRule implements IRule {
50
51                 /** Java operators */
52                 private final char[] PHP_OPERATORS = { ';', '(', ')', '.', '=', '/', '\\', '+', '-', '*', '[', ']', '<', '>', ':', '?', '!',
53                                 ',', '|', '&', '^', '%', '~', '@' };
54
55                 /** Token to return for this rule */
56                 private final IToken fToken;
57
58                 /** Token to return for braces */
59                 private final IToken fTokenBraces;
60
61                 /** Token to return for heredocs */
62                 private final IToken fTokenHeredoc;
63
64                 /**
65                  * Creates a new operator rule.
66                  *
67                  * @param token
68                  *          Token to use for this rule
69                  * @param tokenHeredoc
70                  *          TODO
71                  */
72                 public OperatorRule(IToken token, IToken tokenBraces, IToken tokenHeredoc) {
73                         fToken = token;
74                         fTokenBraces = tokenBraces;
75                         fTokenHeredoc = tokenHeredoc;
76                 }
77
78                 /**
79                  * Is this character an operator character?
80                  *
81                  * @param character
82                  *          Character to determine whether it is an operator character
83                  * @return <code>true</code> iff the character is an operator,
84                  *         <code>false</code> otherwise.
85                  */
86                 public boolean isOperator(char character) {
87                         for (int index = 0; index < PHP_OPERATORS.length; index++) {
88                                 if (PHP_OPERATORS[index] == character)
89                                         return true;
90                         }
91                         return false;
92                 }
93
94                 /*
95                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
96                  */
97                 public IToken evaluate(ICharacterScanner scanner) {
98
99                         int character = scanner.read();
100                         if (character == '{' || character == '}') {
101                                 return fTokenBraces;
102                         }
103                         if (isOperator((char) character)) {
104                                 int lastCharacter = character;
105                                 character = scanner.read();
106                                 // the readHEREDOC(scanner) call doesn't work, if we have our own partitions for single quoted
107                                 // or double quoted strings:
108                                 //
109                                 // if (lastCharacter == '<' && character == '<') {
110                                 // int heredocCharacter = scanner.read();
111                                 // if (heredocCharacter == '<') {
112                                 // // start of heredoc comment;
113                                 // if (readHEREDOC(scanner)) {
114                                 // return fTokenHeredoc;
115                                 // }
116                                 // } else {
117                                 // scanner.unread();
118                                 // }
119                                 // }
120                                 if (!isOperator((char) character)) {
121                                         scanner.unread();
122                                         return fToken;
123                                 }
124                                 if (checkPHPTag(scanner, lastCharacter, character)) {
125                                         return Token.UNDEFINED;
126                                 }
127                                 do {
128                                         lastCharacter = character;
129                                         character = scanner.read();
130                                         if (checkPHPTag(scanner, lastCharacter, character)) {
131                                                 return fToken;
132                                         }
133                                         if (character == ICharacterScanner.EOF) {
134                                                 return fToken;
135                                         }
136                                 } while (isOperator((char) character));
137                                 scanner.unread();
138                                 return fToken;
139                         } else {
140                                 scanner.unread();
141                                 return Token.UNDEFINED;
142                         }
143                 }
144
145                 // private boolean readHEREDOC(ICharacterScanner scanner) {
146                 // // search until heredoc ends
147                 // int ch;
148                 // StringBuffer buf = new StringBuffer();
149                 // char[] heredocIdent;
150                 //
151                 // ch = scanner.read();
152                 // if (!Scanner.isPHPIdentifierStart((char)ch)) {
153                 // scanner.unread();
154                 // scanner.unread();
155                 // return false;
156                 // }
157                 // while (Scanner.isPHPIdentifierPart((char)ch)) {
158                 // buf.append((char)ch);
159                 // ch = scanner.read();
160                 // }
161                 // if (ch==ICharacterScanner.EOF) {
162                 // return true;
163                 // }
164                 // heredocIdent = buf.toString().toCharArray();
165                 // while (true) {
166                 // ch = scanner.read();
167                 // if (ch==ICharacterScanner.EOF) {
168                 // return true;
169                 // }
170                 // if (ch == '\n') { // heredoc could end after a newline
171                 // int pos = 0;
172                 // while (true) {
173                 // if (pos == heredocIdent.length) {
174                 // return true;
175                 // }
176                 // ch = scanner.read(); // ignore escaped character
177                 // if (ch != heredocIdent[pos]) {
178                 // break;
179                 // }
180                 // if (ch==ICharacterScanner.EOF) {
181                 // return true;
182                 // }
183                 // pos++;
184                 // }
185                 // }
186                 // }
187                 // }
188
189                 /**
190                  * Check if lastCharacter/character are a PHP start or end token ( &lt;? ...
191                  * ?&gt; )
192                  *
193                  * @param scanner
194                  * @param lastCharacter
195                  * @param character
196                  * @return
197                  */
198                 private boolean checkPHPTag(ICharacterScanner scanner, int lastCharacter, int character) {
199                         if (lastCharacter == '<' && character == '?') {
200                                 scanner.unread();
201                                 scanner.unread();
202                                 return true;
203                         } else if (lastCharacter == '?' && character == '>') {
204                                 scanner.unread();
205                                 scanner.unread();
206                                 return true;
207                         }
208                         return false;
209                 }
210         }
211
212         protected class AccentStringRule implements IRule {
213
214                 /** Token to return for this rule */
215                 private final IToken fToken;
216
217                 public AccentStringRule(IToken token) {
218                         fToken = token;
219
220                 }
221
222                 /*
223                  * @see org.eclipse.jface.text.rules.IRule#evaluate(org.eclipse.jface.text.rules.ICharacterScanner)
224                  */
225                 public IToken evaluate(ICharacterScanner scanner) {
226
227                         int character = scanner.read();
228
229                         if (character == '`') {
230
231                                 while (character != ICharacterScanner.EOF) {
232                                         character = scanner.read();
233                                         if (character == '\\') {
234                                                 character = scanner.read();
235                                         } else if (character == '`') {
236                                                 return fToken;
237                                         }
238                                 }
239                                 scanner.unread();
240                                 return Token.UNDEFINED;
241                         } else {
242                                 scanner.unread();
243                                 return Token.UNDEFINED;
244                         }
245                 }
246
247         }
248
249         private class PHPWordRule extends WordRule {
250                 private StringBuffer fBuffer = new StringBuffer();
251
252                 protected Map fWordsIgnoreCase = new HashMap();
253
254                 public PHPWordRule(IWordDetector detector) {
255                         super(detector, Token.UNDEFINED);
256                 }
257
258                 public PHPWordRule(IWordDetector detector, IToken defaultToken) {
259                         super(detector, defaultToken);
260                 }
261
262                 /**
263                  * Adds a word and the token to be returned if it is detected.
264                  *
265                  * @param word
266                  *          the word this rule will search for, may not be <code>null</code>
267                  * @param token
268                  *          the token to be returned if the word has been found, may not be
269                  *          <code>null</code>
270                  */
271                 public void addWordIgnoreCase(String word, IToken token) {
272                         Assert.isNotNull(word);
273                         Assert.isNotNull(token);
274
275                         fWordsIgnoreCase.put(word, token);
276                 }
277
278                 public IToken evaluate(ICharacterScanner scanner) {
279                         int c = scanner.read();
280                         boolean isVariable = false;
281                         boolean isUnderscore = false;
282                         String word;
283                         if (c == '<') {
284                                 c = scanner.read();
285                                 if (c != '?') {
286                                         scanner.unread();
287                                         scanner.unread();
288                                         return Token.UNDEFINED;
289                                 } else {
290                                         c = scanner.read();
291                                         if (c == '=') { // <?=
292                                                 return getToken(IPreferenceConstants.PHP_TAG);
293                                         }
294                                         if (c != 'p' && c != 'P') {
295                                                 scanner.unread();
296                                                 return getToken(IPreferenceConstants.PHP_TAG);
297                                         } else {
298                                                 c = scanner.read();
299                                                 if (c != 'h' && c != 'H') {
300                                                         scanner.unread();
301                                                         scanner.unread();
302                                                         return getToken(IPreferenceConstants.PHP_TAG);
303                                                 } else {
304                                                         c = scanner.read();
305                                                         if (c != 'p' && c != 'P') {
306                                                                 scanner.unread();
307                                                                 scanner.unread();
308                                                                 scanner.unread();
309                                                                 return getToken(IPreferenceConstants.PHP_TAG);
310                                                         } else {
311                                                                 return getToken(IPreferenceConstants.PHP_TAG);
312                                                         }
313                                                 }
314                                         }
315                                 }
316                         }
317                         if (c == '?') {
318                                 c = scanner.read();
319                                 if (c == '>') {
320                                         return getToken(IPreferenceConstants.PHP_TAG);
321                                 }
322                                 scanner.unread();
323                                 scanner.unread();
324                                 return Token.UNDEFINED;
325                         }
326                         if (fDetector.isWordStart((char) c)) {
327                                 if (c == '$') {
328                                         isVariable = true;
329                                 }
330                                 if (fColumn == UNDEFINED || (fColumn == scanner.getColumn() - 1)) {
331
332                                         fBuffer.setLength(0);
333                                         fBuffer.append((char) c);
334                                         c = scanner.read();
335                                         if (c == '_') {
336                                                 isUnderscore = true;
337                                         }
338                                         while (c != ICharacterScanner.EOF && fDetector.isWordPart((char) c)) {
339                                                 fBuffer.append((char) c);
340                                                 c = scanner.read();
341                                         }
342                                         scanner.unread();
343
344                                         if (isVariable) {
345                                                 if (isUnderscore) {
346                                                         return getToken(IPreferenceConstants.PHP_VARIABLE_DOLLAR);
347                                                 }
348                                                 return getToken(IPreferenceConstants.PHP_VARIABLE);
349                                         }
350                                         word = fBuffer.toString();
351                                         IToken token = (IToken) fWords.get(word);
352                                         if (token != null)
353                                                 return token;
354
355                                         token = (IToken) fWordsIgnoreCase.get(word.toLowerCase());
356                                         if (token != null)
357                                                 return token;
358
359                                         if (fDefaultToken.isUndefined())
360                                                 unreadBuffer(scanner);
361
362                                         return fDefaultToken;
363                                 }
364                         }
365
366                         scanner.unread();
367                         return Token.UNDEFINED;
368                 }
369         }
370
371         // private PHPColorProvider fColorProvider;
372
373         private static String[] fgTokenProperties = { IPreferenceConstants.PHP_MULTILINE_COMMENT,
374                         IPreferenceConstants.PHP_SINGLELINE_COMMENT, IPreferenceConstants.PHP_TAG, IPreferenceConstants.PHP_KEYWORD,
375                         IPreferenceConstants.PHP_FUNCTIONNAME, IPreferenceConstants.PHP_VARIABLE, IPreferenceConstants.PHP_VARIABLE_DOLLAR,
376                         IPreferenceConstants.PHP_STRING_DQ, IPreferenceConstants.PHP_STRING_SQ, IPreferenceConstants.PHP_TYPE,
377                         IPreferenceConstants.PHP_CONSTANT, IPreferenceConstants.PHP_DEFAULT, IPreferenceConstants.PHP_OPERATOR,
378                         IPreferenceConstants.PHP_BRACE_OPERATOR, IPreferenceConstants.PHP_KEYWORD_RETURN };
379
380         /**
381          * Creates a PHP code scanner
382          */
383         // public PHPCodeScanner(JavaColorManager provider, IPreferenceStore store) {
384         public PHPCodeScanner(IColorManager manager, IPreferenceStore store) {
385                 super(manager, store);
386                 initialize();
387         }
388
389         /*
390          * @see AbstractJavaScanner#getTokenProperties()
391          */
392         protected String[] getTokenProperties() {
393                 return fgTokenProperties;
394         }
395
396         /*
397          * @see AbstractJavaScanner#createRules()
398          */
399         protected List createRules() {
400                 List rules = new ArrayList();
401                 Token token = getToken(IPreferenceConstants.PHP_SINGLELINE_COMMENT);
402                 // Add rule for single line comments.
403                 // rules.add(new EndOfLineRule("//", token)); //$NON-NLS-1$
404                 // rules.add(new EndOfLineRule("#", token)); //$NON-NLS-1$
405                 // Add rule for strings and character constants.
406                 // token = getToken(IPreferenceConstants.PHP_STRING_SQ);
407                 // rules.add(new SingleQuoteStringRule(token));
408                 // token = getToken(IPreferenceConstants.PHP_STRING_DQ);
409                 // rules.add(new DoubleQuoteStringRule(token));
410                 rules.add(new AccentStringRule(token));
411
412                 token = getToken(IPreferenceConstants.PHP_MULTILINE_COMMENT);
413                 rules.add(new MultiLineRule("/*", "*/", token)); //$NON-NLS-2$ //$NON-NLS-1$
414                 // Add generic whitespace rule.
415                 rules.add(new WhitespaceRule(new PHPWhitespaceDetector()));
416                 // Add word rule for keywords, types, and constants.
417                 token = getToken(IPreferenceConstants.PHP_DEFAULT);
418                 PHPWordRule wordRule = new PHPWordRule(new PHPWordDetector(), token);
419
420                 Token keyword = getToken(IPreferenceConstants.PHP_KEYWORD);
421                 Token functionName = getToken(IPreferenceConstants.PHP_FUNCTIONNAME);
422                 Token type = getToken(IPreferenceConstants.PHP_TYPE);
423                 Token constant = getToken(IPreferenceConstants.PHP_CONSTANT);
424
425                 ArrayList buffer = PHPSyntaxRdr.getSyntaxData();
426                 // String strbuffer = null; unused
427                 PHPElement elbuffer = null;
428                 String name;
429                 for (int i = 0; i < buffer.size(); i++) {
430                         // while ((buffer != null)
431                         // && (!buffer.isEmpty()
432                         // && ((elbuffer = (PHPElement) buffer.remove(0)) != null))) {
433                         elbuffer = (PHPElement) buffer.get(i);
434                         if (elbuffer instanceof PHPKeyword) {
435                                 name = ((PHPKeyword) elbuffer).getName();
436                                 if (!name.equals("return")) {
437                                         wordRule.addWord(name, keyword);
438                                 }
439                         } else if (elbuffer instanceof PHPFunction) {
440                                 wordRule.addWordIgnoreCase(((PHPFunction) elbuffer).getName(), functionName);
441                         } else if (elbuffer instanceof PHPType) {
442                                 wordRule.addWord(elbuffer.getName(), type);
443                         } else if (elbuffer instanceof PHPConstant) {
444                                 wordRule.addWord(elbuffer.getName(), constant);
445                         }
446                 }
447
448                 // Add word rule for keyword 'return'.
449                 token = getToken(IPreferenceConstants.PHP_KEYWORD_RETURN);
450                 wordRule.addWord("return", token);
451
452                 // Add rule for operators and brackets (at the end !)
453                 rules.add(new OperatorRule(getToken(IPreferenceConstants.PHP_OPERATOR), getToken(IPreferenceConstants.PHP_BRACE_OPERATOR),
454                                 getToken(IPreferenceConstants.PHP_STRING_DQ)));
455
456                 rules.add(wordRule);
457
458                 setDefaultReturnToken(getToken(IPreferenceConstants.PHP_DEFAULT));
459                 return rules;
460         }
461 }