bug fix 1403993, scanner wrongly detected XML comment end, when closing > was still...
[phpeclipse.git] / net.sourceforge.phpeclipse.xml.ui / src / net / sourceforge / phpeclipse / xml / ui / internal / text / PHPXMLPartitionScanner.java
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: PHPXMLPartitionScanner.java,v 1.2 2006-01-15 12:24:16 bananeweizen Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 import org.eclipse.jface.text.Assert;
20 import org.eclipse.jface.text.BadLocationException;
21 import org.eclipse.jface.text.IDocument;
22 import org.eclipse.jface.text.rules.ICharacterScanner;
23 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
24 import org.eclipse.jface.text.rules.IToken;
25 import org.eclipse.jface.text.rules.Token;
26
27
28 /**
29  *
30  *
31  * @author Igor Malinin
32  */
33 public class PHPXMLPartitionScanner implements IPartitionTokenScanner {
34 //      public static final String XML_PI         = "__xml_processing_instruction";
35         public static final String XML_COMMENT    = "__xml_comment";
36         public static final String XML_DECL       = "__xml_declaration";
37         public static final String XML_TAG        = "__xml_tag";
38         public static final String XML_ATTRIBUTE  = "__xml_attribute";
39         public static final String XML_CDATA      = "__xml_cdata";
40
41         public static final String DTD_INTERNAL         = "__dtd_internal";
42 //      public static final String DTD_INTERNAL_PI      = "__dtd_internal_pi";
43         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
44         public static final String DTD_INTERNAL_DECL    = "__dtd_internal_declaration";
45         public static final String DTD_CONDITIONAL      = "__dtd_conditional";
46
47         public static final int STATE_DEFAULT     = 0;
48         public static final int STATE_TAG         = 1;
49         public static final int STATE_DECL        = 2;
50         public static final int STATE_CDATA       = 4;
51
52         public static final int STATE_INTERNAL    = 8;
53
54         protected IDocument document;
55         protected int end;
56
57         protected int offset;
58         protected int length;
59
60         protected int position;
61         protected int state;
62
63         protected boolean parsedtd;
64
65         protected Map tokens = new HashMap();
66
67         public PHPXMLPartitionScanner(boolean parsedtd) {
68                 this.parsedtd = parsedtd;
69         }
70
71         /*
72          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
73          */
74         public IToken nextToken() {
75                 offset += length;
76
77                 switch (state) {
78                         case STATE_TAG:
79                                 return nextTagToken();
80
81                         case STATE_DECL:
82                                 return nextDeclToken();
83
84                         case STATE_CDATA:
85                                 return nextCDATAToken();
86                 }
87
88                 switch (read()) {
89                         case ICharacterScanner.EOF:
90                                 state = STATE_DEFAULT;
91                                 return getToken(null);
92
93                         case '<':
94                                 switch (read()) {
95                                         case ICharacterScanner.EOF:
96                                                 if (parsedtd || isInternal()) {
97                                                         break;
98                                                 }
99
100                                                 state = STATE_DEFAULT;
101                                                 return getToken(XML_TAG);
102
103                                         case '?': // <?  <?PI
104                                             unread();
105                                             break;
106 //                                              return nextPIToken();
107
108                                         case '!': // <!  <!DEFINITION or <![CDATA[ or <!--COMMENT
109                                                 switch (read()) {
110                                                         case ICharacterScanner.EOF:
111                                                                 state = STATE_DEFAULT;
112                                                                 return getToken(XML_TAG);
113
114                                                         case '-': // <!-  <!--COMMENT
115                                                                 switch (read()) {
116                                                                         case ICharacterScanner.EOF:
117                                                                                 return nextDeclToken();
118
119                                                                         case '-': // <!--
120                                                                                 return nextCommentToken();
121                                                                 }
122
123                                                         case '[': // <![  <![CDATA[ or <![%cond;[
124                                                                 if (parsedtd) {
125                                                                         return nextConditionalToken();
126                                                                 }
127
128                                                                 if (!isInternal()) {
129                                                                         return nextCDATAToken();
130                                                                 }
131                                                 }
132
133                                                 return nextDeclToken();
134                                 }
135
136                                 if (parsedtd || isInternal()) {
137                                         break;
138                                 }
139
140                                 unread();
141
142                                 return nextTagToken();
143
144                         case ']':
145                                 if (isInternal()) {
146                                         unread();
147
148                                         state = STATE_DECL;
149                                         length = 0;
150                                         return nextToken();
151                                 }
152                                 break;
153                     default:
154                         unread();
155                 }
156
157 loop:
158                 while (true) {
159                         switch (read()) {
160                                 case ICharacterScanner.EOF:
161                                         state = STATE_DEFAULT;
162                                         return getToken(null);
163
164                                 case '<':
165                                         if (parsedtd || isInternal()) {
166                                                 switch (read()) {
167                                                         case ICharacterScanner.EOF:
168                                                                 state = STATE_DEFAULT;
169                                                                 return getToken(null);
170
171                                                         case '!':
172                                                         case '?':
173                                                                 unread();
174                                                                 break;
175
176                                                         default:
177                                                                 continue loop;
178                                                 }
179                                         }
180
181                                         unread();
182
183                                         state &= STATE_INTERNAL;
184                                         return getToken(isInternal() ? DTD_INTERNAL : null);
185
186                                 case ']':
187                                         if (isInternal()) {
188                                                 unread();
189
190                                                 state = STATE_DECL;
191                                                 if (position == offset) {
192                                                         // nothing between
193                                                         length = 0;
194                                                         return nextToken();
195                                                 }
196
197                                                 return getToken(DTD_INTERNAL);
198                                         }
199                         }
200                 }
201         }
202
203         private IToken nextTagToken() {
204                 int quot = read();
205
206                 switch (quot) {
207                         case ICharacterScanner.EOF:
208                         case '>':
209                                 state = STATE_DEFAULT;
210                                 return getToken(XML_TAG);
211
212                         case '"': case '\'':
213                                 while (true) {
214                                         int ch = read();
215
216                                         if (ch == quot) {
217                                                 state = STATE_TAG;
218                                                 return getToken(XML_ATTRIBUTE);
219                                         }
220
221                                         switch (ch) {
222                                                 case '<':
223                                                         unread();
224
225                                                 case ICharacterScanner.EOF:
226                                                         state = STATE_DEFAULT;
227                                                         return getToken(XML_ATTRIBUTE);
228                                         }
229                                 }
230                         default:
231                           unread();
232                 }
233
234                 while (true) {
235                         switch (read()) {
236                                 case '<':
237                                         unread();
238
239                                 case ICharacterScanner.EOF:
240                                 case '>':
241                                         state = STATE_DEFAULT;
242                                         return getToken(XML_TAG);
243
244                                 case '"': case '\'':
245                                         unread();
246
247                                         state = STATE_TAG;
248                                         return getToken(XML_TAG);
249                         }
250                 }
251         }
252
253         private IToken nextDeclToken() {
254                 loop: while (true) {
255                         switch (read()) {
256                                 case ICharacterScanner.EOF:
257                                         state = STATE_DEFAULT;
258                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
259
260                                 case '<':
261                                         if (parsedtd || isInternal()) {
262                                                 switch (read()) {
263                                                         case ICharacterScanner.EOF:
264                                                                 state = STATE_DEFAULT;
265                                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
266
267                                                         case '!':
268                                                         case '?':
269                                                                 unread();
270                                                                 break;
271
272                                                         default:
273                                                                 continue loop;
274                                                 }
275                                         }
276
277                                         unread();
278
279                                 case '>':
280                                         state &= STATE_INTERNAL;
281                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
282
283                                 case '[': // <!DOCTYPE xxx [dtd]>
284                                         if (!isInternal()) {
285                                                 state = STATE_INTERNAL;
286                                                 return getToken(XML_DECL);
287                                         }
288                         }
289                 }
290         }
291
292         private IToken nextCommentToken() {
293                 state &= STATE_INTERNAL;
294
295                 loop: while (true) {
296                         switch (read()) {
297                                 case ICharacterScanner.EOF:
298                                         break loop;
299
300                                 case '-': // -  -->
301                                         switch (read()) {
302                                                 case ICharacterScanner.EOF:
303                                                         break loop;
304
305                                                 case '-': // --  -->
306                                                         switch (read()) {
307                                                                 case ICharacterScanner.EOF:
308                                                                 case '>':
309                                                                         break loop;
310                                                         }
311
312                                                         unread();
313                                                         continue loop;
314                                         }
315                         }
316                 }
317
318                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
319         }
320
321         private IToken nextCDATAToken() {
322                 state = STATE_DEFAULT;
323
324 loop:
325         while (true) {
326                         switch (read()) {
327                                 case ICharacterScanner.EOF:
328                                         break loop;
329
330                                 case ']': // ]  ]]>
331                                         switch (read()) {
332                                                 case ICharacterScanner.EOF:
333                                                         break loop;
334
335                                                 case ']': // ]]  ]]>
336                                                         switch (read()) {
337                                                                 case ICharacterScanner.EOF:
338                                                                 case '>': // ]]>
339                                                                         break loop;
340                                                         }
341
342                                                         unread();
343                                                         unread();
344                                                         continue loop;
345                                         }
346                         }
347                 }
348
349                 return getToken(XML_CDATA);
350         }
351
352         private IToken nextConditionalToken() {
353                 state = STATE_DEFAULT;
354
355                 int level = 1;
356
357 loop:
358         while (true) {
359                         switch (read()) {
360                                 case ICharacterScanner.EOF:
361                                         break loop;
362
363                                 case '<': // -  -->
364                                         switch (read()) {
365                                                 case ICharacterScanner.EOF:
366                                                         break loop;
367
368                                                 case '!': // --  -->
369                                                         switch (read()) {
370                                                                 case ICharacterScanner.EOF:
371                                                                         break loop;
372
373                                                                 case '[':
374                                                                         ++level;
375                                                                         continue loop;
376                                                         }
377
378                                                         unread();
379                                                         continue loop;
380                                         }
381
382                                         unread();
383                                         continue loop;
384
385                                 case ']': // -  -->
386                                         switch (read()) {
387                                                 case ICharacterScanner.EOF:
388                                                         break loop;
389
390                                                 case ']': // --  -->
391                                                         switch (read()) {
392                                                                 case ICharacterScanner.EOF:
393                                                                 case '>':
394                                                                         if (--level == 0) {
395                                                                                 break loop;
396                                                                         }
397
398                                                                         continue loop;
399                                                         }
400
401                                                         unread();
402                                                         unread();
403                                                         continue loop;
404                                         }
405                         }
406                 }
407
408                 return getToken(DTD_CONDITIONAL);
409         }
410
411         private IToken getToken(String type) {
412                 length = position - offset;
413
414                 if (length == 0) {
415                         return Token.EOF;
416                 }
417
418                 if (type == null) {
419                         return Token.UNDEFINED;
420                 }
421
422                 IToken token = (IToken) tokens.get(type);
423                 if (token == null) {
424                         token = new Token(type);
425                         tokens.put(type, token);
426                 }
427
428                 return token;
429         }
430
431         private boolean isInternal() {
432                 return (state & STATE_INTERNAL) != 0;
433         }
434
435         private int read() {
436                 if (position >= end) {
437                         return ICharacterScanner.EOF;
438                 }
439
440                 try {
441                         return document.getChar(position++);
442                 } catch (BadLocationException e) {
443                         --position;
444                         return ICharacterScanner.EOF;
445                 }
446         }
447
448         private void unread() {
449                 --position;
450         }
451
452         /*
453          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
454          */
455         public int getTokenOffset() {
456           Assert.isTrue(offset>=0, Integer.toString(offset));
457                 return offset;
458         }
459
460         /*
461          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
462          */
463         public int getTokenLength() {
464                 return length;
465         }
466
467         /*
468          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int, int)
469          */
470         public void setRange(IDocument document, int offset, int length) {
471                 this.document = document;
472                 this.end = offset + length;
473
474                 this.offset = offset;
475                 this.position = offset;
476                 this.length = 0;
477
478                 this.state = STATE_DEFAULT;
479         }
480
481         /*
482            * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
483            */
484 //        public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
485 //          state = STATE_DEFAULT;
486 //          if (partitionOffset > -1) {
487 //            int delta = offset - partitionOffset;
488 //            if (delta > 0) {
489 //              setRange(document, partitionOffset, length + delta);
490 //              return;
491 //            }
492 //          }
493 //          setRange(document, partitionOffset, length);
494 //        }
495         /*
496          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
497          */
498         public void setPartialRange(
499                 IDocument document, int offset, int length,
500                 String contentType, int partitionOffset
501         ) {
502 //        boolean flag = false;
503                 this.document = document;
504                 this.end = offset + length;
505
506                 // NB! Undocumented value: -1
507                 if (partitionOffset >= 0) {
508                         offset = partitionOffset;
509 //                      flag = true;
510                 }
511
512                 this.offset = offset;
513                 this.position = offset;
514                 this.length = 0;
515
516 //              if (flag) {
517 //                state = STATE_DEFAULT;
518 //                return;
519 //              }
520                 if (contentType == XML_ATTRIBUTE) {
521                         state = STATE_TAG;
522                         return;
523                 }
524
525                 if (contentType == XML_TAG) {
526                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
527                         return;
528                 }
529
530                 if (contentType == XML_DECL) {
531                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
532                         return;
533                 }
534
535                 if (contentType == DTD_INTERNAL ||
536                         contentType == DTD_INTERNAL_DECL ||
537                         contentType == DTD_INTERNAL_COMMENT
538                 ) {
539                         state = STATE_INTERNAL;
540                         return;
541                 }
542
543                 state = STATE_DEFAULT;
544         }
545
546         private boolean isContinuationPartition() {
547                 try {
548                         String type = document.getContentType(offset - 1);
549
550                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
551                                 return true;
552                         }
553                 } catch (BadLocationException e) {}
554
555                 return false;
556         }
557 }