Fixed partitioning bug causing an assertion exception and failing
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpeclipse / phpeditor / php / PHPPartitionScanner.java
1 /**
2  * This program and the accompanying materials
3  * are made available under the terms of the Common Public License v1.0
4  * which accompanies this distribution, and is available at
5  * http://www.eclipse.org/legal/cpl-v10.html
6  * Created on 05.03.2003
7  *
8  * @author Stefan Langer (musk)
9  * @version $Revision: 1.16 $
10  */
11 package net.sourceforge.phpeclipse.phpeditor.php;
12
13 import java.util.*;
14
15 import org.eclipse.jface.text.*;
16 import org.eclipse.jface.text.rules.*;
17
18 /**
19  * 
20  */
21 public class PHPPartitionScanner implements IPartitionTokenScanner
22 {
23     private static final boolean DEBUG = true;
24     private boolean fInString = false;
25     private boolean fInDoubString = false;
26     private IDocument fDocument = null;
27     private int fOffset = -1;
28     private String fContentType = IPHPPartitionScannerConstants.HTML;
29     private String fPrevContentType = IPHPPartitionScannerConstants.HTML;
30
31     private boolean partitionBorder = false;
32     private int fTokenOffset;
33     private int fEnd = -1;
34     private int fLength;
35     private int fCurrentLength;
36     private Map tokens = new HashMap();
37
38     public PHPPartitionScanner()
39     {
40         this.tokens.put(
41             IPHPPartitionScannerConstants.PHP,
42             new Token(IPHPPartitionScannerConstants.PHP));
43         this.tokens.put(
44             IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT,
45             new Token(IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT));
46         this.tokens.put(
47             IPHPPartitionScannerConstants.HTML,
48             new Token(IPHPPartitionScannerConstants.HTML));
49         this.tokens.put(
50             IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT,
51             new Token(IPHPPartitionScannerConstants.HTML_MULTILINE_COMMENT));
52         this.tokens.put(
53             IDocument.DEFAULT_CONTENT_TYPE,
54             new Token(IDocument.DEFAULT_CONTENT_TYPE));
55     }
56
57     private IToken getToken(String type)
58     {
59         fLength = fCurrentLength;
60         if (DEBUG)
61         {
62
63             try
64             {
65                 if (fLength <= 0)
66                 {
67                     int line = fDocument.getLineOfOffset(fOffset);
68                     System.err.println(
69                         "Error at "
70                             + line
71                             + " offset:"
72                             + String.valueOf(
73                                 fOffset - fDocument.getLineOffset(line)));
74                 }
75             }
76             catch (BadLocationException e)
77             {
78                 // TODO Auto-generated catch block
79                 e.printStackTrace();
80             }
81         }
82         Assert.isTrue(fLength > 0, "Partition length <= 0!");
83         fCurrentLength = 0;
84         // String can never cross partition borders so reset string detection
85         fInString = false;
86         fInDoubString = false;
87         IToken token = (IToken) this.tokens.get(type);
88         Assert.isNotNull(token, "Token for type \"" + type + "\" not found!");
89         if (DEBUG)
90         {
91             System.out.println(
92                 "Partition: fTokenOffset="
93                     + fTokenOffset
94                     + " fContentType="
95                     + type
96                     + " fLength="
97                     + fLength);
98         }
99         return token;
100     }
101
102     /* (non-Javadoc)
103      * @see org.eclipse.jface.text.rules.IPartitionTokenScanner#setPartialRange(org.eclipse.jface.text.IDocument, int, int, java.lang.String, int)
104      */
105     public void setPartialRange(
106         IDocument document,
107         int offset,
108         int length,
109         String contentType,
110         int partitionOffset)
111     {
112         if (DEBUG)
113         {
114             System.out.println(
115                 "PartialRange: contentType="
116                     + contentType
117                     + " partitionOffset="
118                     + partitionOffset);
119         }
120
121         try
122         {
123             if (partitionOffset > -1)
124             {
125                 partitionBorder = false;
126                 // because of strings we have to parse the whole partition
127                 this.setRange(
128                     document,
129                     partitionOffset,
130                     offset - partitionOffset + length);
131                 // sometimes we get a wrong partition so we retrieve the partition
132                 // directly from the document
133                 fContentType = fDocument.getContentType(partitionOffset);
134                 //TODO determine the previouse contenttypes as a stack
135                 //if(partitionOffset > 1)
136                 //      fPrevContentType = fDocument.getContentType(partitionOffset-1);
137             }
138             else
139                 this.setRange(document, offset, length);
140
141         }
142         catch (BadLocationException e)
143         {
144             // should never happen
145             // TODO print stack trace to log
146             // fall back just scan the whole document again
147             this.setRange(document, 0, fDocument.getLength());
148         }
149
150     }
151
152     /* (non-Javadoc)
153      * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
154      */
155     public int getTokenLength()
156     {
157         return fLength;
158     }
159
160     /* (non-Javadoc)
161      * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
162      */
163     public int getTokenOffset()
164     {
165         return fTokenOffset;
166     }
167
168     /* (non-Javadoc)
169      * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
170      */
171     public IToken nextToken()
172     {
173         int c;
174
175         // check if we are not allready at the end of the
176         // file
177         if ((c = read()) == ICharacterScanner.EOF)
178         {
179             partitionBorder = false;
180             return Token.EOF;
181         }
182         else
183             unread();
184
185         if (partitionBorder)
186         {
187             fTokenOffset = fOffset;
188             partitionBorder = false;
189         }
190
191         while ((c = read()) != ICharacterScanner.EOF)
192         {
193             switch (c)
194             {
195                 case '<' :
196                     if (!isInString(IPHPPartitionScannerConstants.PHP)
197                         && fContentType
198                             != IPHPPartitionScannerConstants.PHP_MULTILINE_COMMENT
199                         && checkPattern(new char[] { '?', 'p', 'h', 'p' }, true))
200                     {
201                         if (fContentType != IPHPPartitionScannerConstants.PHP
202                             && fCurrentLength > 5)
203                         {
204                             unread(5);
205                             IToken token = getToken(fContentType);
206                             // save previouse contenttype
207                             //TODO build stack for previouse contenttype 
208                             fPrevContentType = fContentType;
209
210                             fContentType = IPHPPartitionScannerConstants.PHP;
211
212                             return token;
213                         }
214                         else
215                             fContentType = IPHPPartitionScannerConstants.PHP;
216
217                         // remember offset of this partition
218                         fTokenOffset = fOffset - 5;
219                         fCurrentLength = 5;
220                     }
221                     else if (
222                         !isInString(IPHPPartitionScannerConstants.PHP)
223                             && fContentType
224                                 != IPHPPartitionScannerConstants
225                                     .PHP_MULTILINE_COMMENT
226                             && checkPattern(new char[] { '?' }, false))
227                     {
228                         if (fContentType != IPHPPartitionScannerConstants.PHP
229                             && fCurrentLength > 2)
230                         {
231                             unread(2);
232                             IToken token = getToken(fContentType);
233                             // save previouse contenttype
234                             fPrevContentType = fContentType;
235                             fContentType = IPHPPartitionScannerConstants.PHP;
236                             return token;
237                         }
238                         else
239                             fContentType = IPHPPartitionScannerConstants.PHP;
240                         // remember offset of this partition
241                         fTokenOffset = fOffset - 2;
242                         fCurrentLength = 2;
243                     }
244                     else if (
245                         !isInString(IPHPPartitionScannerConstants.PHP)
246                             && checkPattern(new char[] { '!', '-', '-' }))
247                     { // return previouse partition
248                         if (fContentType
249                             != IPHPPartitionScannerConstants
250                                 .HTML_MULTILINE_COMMENT
251                             && fCurrentLength > 4)
252                         {
253                             unread(4);
254                             IToken token = getToken(fContentType);
255                             fContentType =
256                                 IPHPPartitionScannerConstants
257                                     .HTML_MULTILINE_COMMENT;
258                             return token;
259                         }
260                         else
261                             fContentType =
262                                 IPHPPartitionScannerConstants
263                                     .HTML_MULTILINE_COMMENT;
264                                     
265                         fTokenOffset = fOffset - 4;
266                         fCurrentLength = 4;
267                     }
268                     break;
269                 case '?' :
270                     if (!isInString(IPHPPartitionScannerConstants.PHP)
271                         && fContentType == IPHPPartitionScannerConstants.PHP)
272                     {
273                         if ((c = read()) == '>')
274                         { 
275                             if (fPrevContentType != null)
276                                 fContentType = fPrevContentType;
277                             else
278                                 fContentType =
279                                     IPHPPartitionScannerConstants.HTML;
280                             partitionBorder = true;
281                             return getToken(IPHPPartitionScannerConstants.PHP);
282                         }
283                         else if (c != ICharacterScanner.EOF)
284                             unread();
285                     }
286                     break;
287                 case '-' :
288                     if (!isInString(IPHPPartitionScannerConstants.PHP)
289                         && fContentType
290                             == IPHPPartitionScannerConstants
291                                 .HTML_MULTILINE_COMMENT
292                         && checkPattern(new char[] { '-', '>' }))
293                     {
294                         fContentType = IPHPPartitionScannerConstants.HTML;
295                         partitionBorder = true;
296                         return getToken(
297                             IPHPPartitionScannerConstants
298                                 .HTML_MULTILINE_COMMENT);
299                     }
300                     break;
301                 case '/' :
302                     if (!isInString(IPHPPartitionScannerConstants.PHP) && (c = read()) == '*')
303                     { // MULTINE COMMENT JAVASCRIPT, CSS, PHP
304                         if (fContentType == IPHPPartitionScannerConstants.PHP
305                             && fCurrentLength > 2)
306                         {
307                             unread(2);
308                             IToken token = getToken(fContentType);
309                             fContentType =
310                                 IPHPPartitionScannerConstants
311                                     .PHP_MULTILINE_COMMENT;
312                             return token;
313                         }
314                         else if (
315                             fContentType
316                                 == IPHPPartitionScannerConstants
317                                     .PHP_MULTILINE_COMMENT)
318                         {
319
320                             fTokenOffset = fOffset - 2;
321                             fCurrentLength = 2;
322                         }
323
324                     }
325                     else if (!isInString(IPHPPartitionScannerConstants.PHP) && c != ICharacterScanner.EOF)
326                         unread();
327                     break;
328                 case '*' :
329                     if (!isInString(IPHPPartitionScannerConstants.PHP) && (c = read()) == '/')
330                     {
331                         if (fContentType
332                             == IPHPPartitionScannerConstants
333                                 .PHP_MULTILINE_COMMENT)
334                         {
335                             fContentType = IPHPPartitionScannerConstants.PHP;
336                             partitionBorder = true;
337                             return getToken(
338                                 IPHPPartitionScannerConstants
339                                     .PHP_MULTILINE_COMMENT);
340                         }
341                         else if (
342                             fContentType
343                                 == IPHPPartitionScannerConstants
344                                     .CSS_MULTILINE_COMMENT)
345                         {
346                         }
347                         else if (
348                             fContentType
349                                 == IPHPPartitionScannerConstants
350                                     .JS_MULTILINE_COMMENT)
351                         {
352                         }
353                     }
354                     else if (!isInString(IPHPPartitionScannerConstants.PHP) && c != ICharacterScanner.EOF)
355                         unread();
356                     break;
357                 case '\'' :
358                     if (!fInDoubString)
359                         fInString = !fInString;
360                     break;
361                 case '"' :
362                     // toggle String mode
363                     if (!fInString)
364                         fInDoubString = !fInDoubString;
365                     break;
366             }
367         } // end of file reached but we have to return the
368         // last partition.
369         return getToken(fContentType);
370     }
371     /* (non-Javadoc)
372      * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(org.eclipse.jface.text.IDocument, int, int)
373      */
374     public void setRange(IDocument document, int offset, int length)
375     {
376         if (DEBUG)
377         {
378             System.out.println(
379                 "SET RANGE: offset=" + offset + " length=" + length);
380         }
381
382         fDocument = document;
383         fOffset = offset;
384         fTokenOffset = offset;
385         fCurrentLength = 0;
386         fLength = 0;
387         fEnd = fOffset + length;
388         fInString = false;
389         fInDoubString = false;
390         //partitionBorder = false;
391     }
392
393     private int read()
394     {
395         try
396         {
397             if (fOffset < fEnd)
398             {
399                 fCurrentLength++;
400                 return fDocument.getChar(fOffset++);
401             }
402             return ICharacterScanner.EOF;
403         }
404         catch (BadLocationException e)
405         {
406             // should never happen
407             // TODO write stacktrace to log
408             fOffset = fEnd;
409             return ICharacterScanner.EOF;
410         }
411     }
412
413     private void unread()
414     {
415         --fOffset;
416         --fCurrentLength;
417     }
418     
419     private void unread(int num)
420     {
421         fOffset -= num;
422         fCurrentLength -= num;
423     }
424
425     private boolean checkPattern(char[] pattern)
426     {
427         return checkPattern(pattern, false);
428     }
429
430     /**
431      * Check if next character sequence read from document is equals to 
432      * the provided pattern. Pattern is read from left to right until the 
433      * first character read doesn't match. If this happens all read characters are
434      * unread.
435      * @param pattern The pattern to check.
436      * @return <code>true</code> if pattern is equals else returns <code>false</code>.
437      */
438     private boolean checkPattern(char[] pattern, boolean ignoreCase)
439     {
440         int prevOffset = fOffset;
441         int prevLength = fCurrentLength;
442         for (int i = 0; i < pattern.length; i++)
443         {
444             int c = read();
445
446             if (c == ICharacterScanner.EOF
447                 || !letterEquals(c, pattern[i], ignoreCase))
448             {
449                 fOffset = prevOffset;
450                 fCurrentLength = prevLength;
451                 return false;
452             }
453         }
454
455         return true;
456     }
457
458     private boolean letterEquals(int test, char letter, boolean ignoreCase)
459     {
460         if (test == letter)
461             return true;
462         else if (
463             ignoreCase
464                 && Character.isLowerCase(letter)
465                 && test == Character.toUpperCase(letter))
466             return true;
467         else if (
468             ignoreCase
469                 && Character.isUpperCase(letter)
470                 && test == Character.toLowerCase(letter))
471             return true;
472
473         return false;
474     }
475     
476     /**
477      * Checks wether the offset is in a <code>String</code> and the specified 
478      * contenttype is the current content type.
479      * Strings are delimited, mutual exclusive, by a " or by a '.
480      * 
481      * @param contentType The contenttype to check.
482      * @return <code>true</code> if the current offset is in a string else 
483      *                  returns false.
484      */
485     private  boolean isInString(String contentType)
486     {
487         if(fContentType == contentType)
488                 return (fInString || fInDoubString);
489         else
490                 return false;           
491     }
492
493 }