Improved aml scanner for this bug
[phpeclipse.git] / net.sourceforge.phpeclipse.xml.ui / src / net / sourceforge / phpeclipse / xml / ui / internal / text / XMLPartitionScanner.java
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: XMLPartitionScanner.java,v 1.2 2005-05-15 23:09:09 axelcl Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 import org.eclipse.jface.text.Assert;
20 import org.eclipse.jface.text.BadLocationException;
21 import org.eclipse.jface.text.IDocument;
22 import org.eclipse.jface.text.rules.ICharacterScanner;
23 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
24 import org.eclipse.jface.text.rules.IToken;
25 import org.eclipse.jface.text.rules.Token;
26
27
28 /**
29  * 
30  * 
31  * @author Igor Malinin
32  */
33 public class XMLPartitionScanner implements IPartitionTokenScanner {
34         public static final String XML_PI         = "__xml_processing_instruction";
35         public static final String XML_COMMENT    = "__xml_comment";
36         public static final String XML_DECL       = "__xml_declaration";
37         public static final String XML_TAG        = "__xml_tag";
38         public static final String XML_ATTRIBUTE  = "__xml_attribute";
39         public static final String XML_CDATA      = "__xml_cdata";
40
41         public static final String DTD_INTERNAL         = "__dtd_internal";
42         public static final String DTD_INTERNAL_PI      = "__dtd_internal_pi";
43         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
44         public static final String DTD_INTERNAL_DECL    = "__dtd_internal_declaration";
45         public static final String DTD_CONDITIONAL      = "__dtd_conditional";
46
47         public static final int STATE_DEFAULT     = 0;
48         public static final int STATE_TAG         = 1;
49         public static final int STATE_DECL        = 2;
50         public static final int STATE_CDATA       = 4;
51
52         public static final int STATE_INTERNAL    = 8;
53
54         protected IDocument document;
55         protected int end;
56
57         protected int offset;
58         protected int length;
59
60         protected int position;
61         protected int state;
62
63         protected boolean parsedtd;
64
65         protected Map tokens = new HashMap();
66
67         public XMLPartitionScanner(boolean parsedtd) {
68                 this.parsedtd = parsedtd;
69         }
70
71         /*
72          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
73          */
74         public IToken nextToken() {
75                 offset += length;
76
77                 switch (state) {
78                         case STATE_TAG:
79                                 return nextTagToken();
80
81                         case STATE_DECL:
82                                 return nextDeclToken();
83
84                         case STATE_CDATA:
85                                 return nextCDATAToken();
86                 }
87
88                 switch (read()) {
89                         case ICharacterScanner.EOF:
90                                 state = STATE_DEFAULT;
91                                 return getToken(null);
92
93                         case '<':
94                                 switch (read()) {
95                                         case ICharacterScanner.EOF:
96                                                 if (parsedtd || isInternal()) {
97                                                         break;
98                                                 }
99
100                                                 state = STATE_DEFAULT;
101                                                 return getToken(XML_TAG);
102
103                                         case '?': // <?  <?PI 
104                                             unread();
105                                             break;
106 //                                              return nextPIToken(); 
107
108                                         case '!': // <!  <!DEFINITION or <![CDATA[ or <!--COMMENT
109                                                 switch (read()) {
110                                                         case ICharacterScanner.EOF:
111                                                                 state = STATE_DEFAULT;
112                                                                 return getToken(XML_TAG);
113
114                                                         case '-': // <!-  <!--COMMENT
115                                                                 switch (read()) {
116                                                                         case ICharacterScanner.EOF:
117                                                                                 return nextDeclToken();
118
119                                                                         case '-': // <!--
120                                                                                 return nextCommentToken();
121                                                                 }
122
123                                                         case '[': // <![  <![CDATA[ or <![%cond;[
124                                                                 if (parsedtd) {
125                                                                         return nextConditionalToken();
126                                                                 }
127
128                                                                 if (!isInternal()) {
129                                                                         return nextCDATAToken();
130                                                                 }
131                                                 }
132
133                                                 return nextDeclToken();
134                                 }
135
136                                 if (parsedtd || isInternal()) {
137                                         break;
138                                 }
139
140                                 unread();
141
142                                 return nextTagToken();
143
144                         case ']':
145                                 if (isInternal()) {
146                                         unread();
147
148                                         state = STATE_DECL;
149                                         length = 0;
150                                         return nextToken();
151                                 }
152                                 break;
153                     default:
154                         unread();
155                 }
156
157 loop:
158                 while (true) {
159                         switch (read()) {
160                                 case ICharacterScanner.EOF:
161                                         state = STATE_DEFAULT;
162                                         return getToken(null);
163
164                                 case '<':
165                                         if (parsedtd || isInternal()) {
166                                                 switch (read()) {
167                                                         case ICharacterScanner.EOF:
168                                                                 state = STATE_DEFAULT;
169                                                                 return getToken(null);
170
171                                                         case '!':
172                                                         case '?':
173                                                                 unread();
174                                                                 break;
175
176                                                         default:
177                                                                 continue loop;
178                                                 }
179                                         }
180
181                                         unread();
182
183                                         state &= STATE_INTERNAL;
184                                         return getToken(isInternal() ? DTD_INTERNAL : null);
185
186                                 case ']':
187                                         if (isInternal()) {
188                                                 unread();
189
190                                                 state = STATE_DECL;
191                                                 if (position == offset) {
192                                                         // nothing between
193                                                         length = 0;
194                                                         return nextToken();
195                                                 }
196
197                                                 return getToken(DTD_INTERNAL);
198                                         }
199                         }
200                 }
201         }
202
203         private IToken nextTagToken() {
204                 int quot = read();
205
206                 switch (quot) {
207                         case ICharacterScanner.EOF:
208                         case '>':
209                                 state = STATE_DEFAULT;
210                                 return getToken(XML_TAG);
211
212                         case '"': case '\'':
213                                 while (true) {
214                                         int ch = read();
215
216                                         if (ch == quot) {
217                                                 state = STATE_TAG;
218                                                 return getToken(XML_ATTRIBUTE);
219                                         }
220
221                                         switch (ch) {
222                                                 case '<':
223                                                         unread();
224
225                                                 case ICharacterScanner.EOF:
226                                                         state = STATE_DEFAULT;
227                                                         return getToken(XML_ATTRIBUTE);
228                                         }
229                                 }
230                         default:
231                           unread();
232                 }
233
234                 while (true) {
235                         switch (read()) {
236                                 case '<':
237                                         unread();
238
239                                 case ICharacterScanner.EOF:
240                                 case '>':
241                                         state = STATE_DEFAULT;
242                                         return getToken(XML_TAG);
243
244                                 case '"': case '\'':
245                                         unread();
246
247                                         state = STATE_TAG;
248                                         return getToken(XML_TAG);
249                         }
250                 }
251         }
252
253         private IToken nextDeclToken() {
254                 loop: while (true) {
255                         switch (read()) {
256                                 case ICharacterScanner.EOF:
257                                         state = STATE_DEFAULT;
258                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
259
260                                 case '<':
261                                         if (parsedtd || isInternal()) {
262                                                 switch (read()) {
263                                                         case ICharacterScanner.EOF:
264                                                                 state = STATE_DEFAULT;
265                                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
266
267                                                         case '!':
268                                                         case '?':
269                                                                 unread();
270                                                                 break;
271
272                                                         default:
273                                                                 continue loop;
274                                                 }
275                                         }
276
277                                         unread();
278
279                                 case '>':
280                                         state &= STATE_INTERNAL;
281                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
282
283                                 case '[': // <!DOCTYPE xxx [dtd]>
284                                         if (!isInternal()) {
285                                                 state = STATE_INTERNAL;
286                                                 return getToken(XML_DECL);
287                                         }
288                         }
289                 }
290         }
291
292         private IToken nextCommentToken() {
293                 state &= STATE_INTERNAL;
294
295                 loop: while (true) {
296                         switch (read()) {
297                                 case ICharacterScanner.EOF:
298                                         break loop;
299
300                                 case '-': // -  -->
301                                         switch (read()) {
302                                                 case ICharacterScanner.EOF:
303                                                         break loop;
304
305                                                 case '-': // --  -->
306                                                         switch (read()) {
307                                                                 case ICharacterScanner.EOF:
308                                                                 case '>':
309                                                                         break loop;
310                                                         }
311
312                                                         unread();
313                                                         break loop;
314                                         }
315                         }
316                 }
317
318                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
319         }
320
321         private IToken nextPIToken() {
322                 state &= STATE_INTERNAL;
323
324                 loop: while (true) {
325                         switch (read()) {
326                                 case ICharacterScanner.EOF:
327                                         break loop;
328
329                                 case '?': // ?  ?>
330                                         switch (read()) {
331                                                 case ICharacterScanner.EOF:
332                                                 case '>':
333                                                         break loop;
334                                         }
335
336                                         unread();
337                         }
338                 }
339
340                 return getToken(isInternal() ? DTD_INTERNAL_PI : XML_PI);
341         }
342
343         private IToken nextCDATAToken() {
344                 state = STATE_DEFAULT;
345
346 loop:
347         while (true) {
348                         switch (read()) {
349                                 case ICharacterScanner.EOF:
350                                         break loop;
351
352                                 case ']': // ]  ]]>
353                                         switch (read()) {
354                                                 case ICharacterScanner.EOF:
355                                                         break loop;
356
357                                                 case ']': // ]]  ]]>
358                                                         switch (read()) {
359                                                                 case ICharacterScanner.EOF:
360                                                                 case '>': // ]]>
361                                                                         break loop;
362                                                         }
363
364                                                         unread();
365                                                         unread();
366                                                         continue loop;
367                                         }
368                         }
369                 }
370
371                 return getToken(XML_CDATA);
372         }
373
374         private IToken nextConditionalToken() {
375                 state = STATE_DEFAULT;
376
377                 int level = 1;
378
379 loop:
380         while (true) {
381                         switch (read()) {
382                                 case ICharacterScanner.EOF:
383                                         break loop;
384
385                                 case '<': // -  -->
386                                         switch (read()) {
387                                                 case ICharacterScanner.EOF:
388                                                         break loop;
389
390                                                 case '!': // --  -->
391                                                         switch (read()) {
392                                                                 case ICharacterScanner.EOF:
393                                                                         break loop;
394
395                                                                 case '[':
396                                                                         ++level;
397                                                                         continue loop;
398                                                         }
399
400                                                         unread();
401                                                         continue loop;
402                                         }
403
404                                         unread();
405                                         continue loop;
406
407                                 case ']': // -  -->
408                                         switch (read()) {
409                                                 case ICharacterScanner.EOF:
410                                                         break loop;
411
412                                                 case ']': // --  -->
413                                                         switch (read()) {
414                                                                 case ICharacterScanner.EOF:
415                                                                 case '>':
416                                                                         if (--level == 0) {
417                                                                                 break loop;
418                                                                         }
419
420                                                                         continue loop;
421                                                         }
422
423                                                         unread();
424                                                         unread();
425                                                         continue loop;
426                                         }
427                         }
428                 }
429
430                 return getToken(DTD_CONDITIONAL);
431         }
432
433         private IToken getToken(String type) {
434                 length = position - offset;
435
436                 if (length == 0) {
437                         return Token.EOF;
438                 }
439
440                 if (type == null) {
441                         return Token.UNDEFINED;
442                 }
443
444                 IToken token = (IToken) tokens.get(type);
445                 if (token == null) {
446                         token = new Token(type);
447                         tokens.put(type, token);
448                 }
449
450                 return token;
451         }
452
453         private boolean isInternal() {
454                 return (state & STATE_INTERNAL) != 0;
455         }
456
457         private int read() {
458                 if (position >= end) {
459                         return ICharacterScanner.EOF;
460                 }
461
462                 try {
463                         return document.getChar(position++);
464                 } catch (BadLocationException e) {
465                         --position;
466                         return ICharacterScanner.EOF;
467                 }
468         }
469
470         private void unread() {
471                 --position;
472         }
473
474         /*
475          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
476          */
477         public int getTokenOffset() {
478           Assert.isTrue(offset>=0, Integer.toString(offset));
479                 return offset;
480         }
481
482         /*
483          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
484          */
485         public int getTokenLength() {
486                 return length;
487         }
488
489         /*
490          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int, int)
491          */
492         public void setRange(IDocument document, int offset, int length) {
493                 this.document = document;
494                 this.end = offset + length;
495
496                 this.offset = offset;
497                 this.position = offset;
498                 this.length = 0;
499
500                 this.state = STATE_DEFAULT;
501         }
502
503         /*
504            * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
505            */
506 //        public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
507 //          state = STATE_DEFAULT;
508 //          if (partitionOffset > -1) {
509 //            int delta = offset - partitionOffset;
510 //            if (delta > 0) {
511 //              setRange(document, partitionOffset, length + delta);
512 //              return;
513 //            }
514 //          }
515 //          setRange(document, partitionOffset, length);
516 //        }
517         /*
518          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
519          */
520         public void setPartialRange(
521                 IDocument document, int offset, int length,
522                 String contentType, int partitionOffset
523         ) {
524 //        boolean flag = false;
525                 this.document = document;
526                 this.end = offset + length;
527
528                 // NB! Undocumented value: -1
529                 if (partitionOffset >= 0) {
530                         offset = partitionOffset;
531 //                      flag = true;
532                 }
533
534                 this.offset = offset;
535                 this.position = offset;
536                 this.length = 0;
537
538 //              if (flag) {
539 //                state = STATE_DEFAULT;
540 //                return;
541 //              }
542                 if (contentType == XML_ATTRIBUTE) {
543                         state = STATE_TAG;
544                         return;
545                 }
546
547                 if (contentType == XML_TAG) {
548                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
549                         return;
550                 }
551
552                 if (contentType == XML_DECL) {
553                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
554                         return;
555                 }
556
557                 if (contentType == DTD_INTERNAL ||
558                         contentType == DTD_INTERNAL_PI ||
559                         contentType == DTD_INTERNAL_DECL ||
560                         contentType == DTD_INTERNAL_COMMENT
561                 ) {
562                         state = STATE_INTERNAL;
563                         return;
564                 }
565
566                 state = STATE_DEFAULT;
567         }
568
569         private boolean isContinuationPartition() {
570                 try {
571                         String type = document.getContentType(offset - 1);
572
573                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
574                                 return true;
575                         }
576                 } catch (BadLocationException e) {}
577
578                 return false;
579         }
580 }