Improved xml scanner for this bug
[phpeclipse.git] / net.sourceforge.phpeclipse.xml.ui / src / net / sourceforge / phpeclipse / xml / ui / internal / text / XMLPartitionScanner.java
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: XMLPartitionScanner.java,v 1.3 2005-05-15 23:23:02 axelcl Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 import org.eclipse.jface.text.Assert;
20 import org.eclipse.jface.text.BadLocationException;
21 import org.eclipse.jface.text.IDocument;
22 import org.eclipse.jface.text.rules.ICharacterScanner;
23 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
24 import org.eclipse.jface.text.rules.IToken;
25 import org.eclipse.jface.text.rules.Token;
26
27
28 /**
29  * 
30  * 
31  * @author Igor Malinin
32  */
33 public class XMLPartitionScanner implements IPartitionTokenScanner {
34         public static final String XML_PI         = "__xml_processing_instruction";
35         public static final String XML_COMMENT    = "__xml_comment";
36         public static final String XML_DECL       = "__xml_declaration";
37         public static final String XML_TAG        = "__xml_tag";
38         public static final String XML_ATTRIBUTE  = "__xml_attribute";
39         public static final String XML_CDATA      = "__xml_cdata";
40
41         public static final String DTD_INTERNAL         = "__dtd_internal";
42         public static final String DTD_INTERNAL_PI      = "__dtd_internal_pi";
43         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
44         public static final String DTD_INTERNAL_DECL    = "__dtd_internal_declaration";
45         public static final String DTD_CONDITIONAL      = "__dtd_conditional";
46
47         public static final int STATE_DEFAULT     = 0;
48         public static final int STATE_TAG         = 1;
49         public static final int STATE_DECL        = 2;
50         public static final int STATE_CDATA       = 4;
51
52         public static final int STATE_INTERNAL    = 8;
53
54         protected IDocument document;
55         protected int end;
56
57         protected int offset;
58         protected int length;
59
60         protected int position;
61         protected int state;
62
63         protected boolean parsedtd;
64
65         protected Map tokens = new HashMap();
66
67         public XMLPartitionScanner(boolean parsedtd) {
68                 this.parsedtd = parsedtd;
69         }
70
71         /*
72          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
73          */
74         public IToken nextToken() {
75                 offset += length;
76
77                 switch (state) {
78                         case STATE_TAG:
79                                 return nextTagToken();
80
81                         case STATE_DECL:
82                                 return nextDeclToken();
83
84                         case STATE_CDATA:
85                                 return nextCDATAToken();
86                 }
87
88                 switch (read()) {
89                         case ICharacterScanner.EOF:
90                                 state = STATE_DEFAULT;
91                                 return getToken(null);
92
93                         case '<':
94                                 switch (read()) {
95                                         case ICharacterScanner.EOF:
96                                                 if (parsedtd || isInternal()) {
97                                                         break;
98                                                 }
99
100                                                 state = STATE_DEFAULT;
101                                                 return getToken(XML_TAG);
102
103                                         case '?': // <?  <?PI 
104                                                 return nextPIToken(); 
105
106                                         case '!': // <!  <!DEFINITION or <![CDATA[ or <!--COMMENT
107                                                 switch (read()) {
108                                                         case ICharacterScanner.EOF:
109                                                                 state = STATE_DEFAULT;
110                                                                 return getToken(XML_TAG);
111
112                                                         case '-': // <!-  <!--COMMENT
113                                                                 switch (read()) {
114                                                                         case ICharacterScanner.EOF:
115                                                                                 return nextDeclToken();
116
117                                                                         case '-': // <!--
118                                                                                 return nextCommentToken();
119                                                                 }
120
121                                                         case '[': // <![  <![CDATA[ or <![%cond;[
122                                                                 if (parsedtd) {
123                                                                         return nextConditionalToken();
124                                                                 }
125
126                                                                 if (!isInternal()) {
127                                                                         return nextCDATAToken();
128                                                                 }
129                                                 }
130
131                                                 return nextDeclToken();
132                                 }
133
134                                 if (parsedtd || isInternal()) {
135                                         break;
136                                 }
137
138                                 unread();
139
140                                 return nextTagToken();
141
142                         case ']':
143                                 if (isInternal()) {
144                                         unread();
145
146                                         state = STATE_DECL;
147                                         length = 0;
148                                         return nextToken();
149                                 }
150                                 break;
151                     default:
152                         unread();
153                 }
154
155 loop:
156                 while (true) {
157                         switch (read()) {
158                                 case ICharacterScanner.EOF:
159                                         state = STATE_DEFAULT;
160                                         return getToken(null);
161
162                                 case '<':
163                                         if (parsedtd || isInternal()) {
164                                                 switch (read()) {
165                                                         case ICharacterScanner.EOF:
166                                                                 state = STATE_DEFAULT;
167                                                                 return getToken(null);
168
169                                                         case '!':
170                                                         case '?':
171                                                                 unread();
172                                                                 break;
173
174                                                         default:
175                                                                 continue loop;
176                                                 }
177                                         }
178
179                                         unread();
180
181                                         state &= STATE_INTERNAL;
182                                         return getToken(isInternal() ? DTD_INTERNAL : null);
183
184                                 case ']':
185                                         if (isInternal()) {
186                                                 unread();
187
188                                                 state = STATE_DECL;
189                                                 if (position == offset) {
190                                                         // nothing between
191                                                         length = 0;
192                                                         return nextToken();
193                                                 }
194
195                                                 return getToken(DTD_INTERNAL);
196                                         }
197                         }
198                 }
199         }
200
201         private IToken nextTagToken() {
202                 int quot = read();
203
204                 switch (quot) {
205                         case ICharacterScanner.EOF:
206                         case '>':
207                                 state = STATE_DEFAULT;
208                                 return getToken(XML_TAG);
209
210                         case '"': case '\'':
211                                 while (true) {
212                                         int ch = read();
213
214                                         if (ch == quot) {
215                                                 state = STATE_TAG;
216                                                 return getToken(XML_ATTRIBUTE);
217                                         }
218
219                                         switch (ch) {
220                                                 case '<':
221                                                         unread();
222
223                                                 case ICharacterScanner.EOF:
224                                                         state = STATE_DEFAULT;
225                                                         return getToken(XML_ATTRIBUTE);
226                                         }
227                                 }
228                         default:
229                           unread();
230                 }
231
232                 while (true) {
233                         switch (read()) {
234                                 case '<':
235                                         unread();
236
237                                 case ICharacterScanner.EOF:
238                                 case '>':
239                                         state = STATE_DEFAULT;
240                                         return getToken(XML_TAG);
241
242                                 case '"': case '\'':
243                                         unread();
244
245                                         state = STATE_TAG;
246                                         return getToken(XML_TAG);
247                         }
248                 }
249         }
250
251         private IToken nextDeclToken() {
252                 loop: while (true) {
253                         switch (read()) {
254                                 case ICharacterScanner.EOF:
255                                         state = STATE_DEFAULT;
256                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
257
258                                 case '<':
259                                         if (parsedtd || isInternal()) {
260                                                 switch (read()) {
261                                                         case ICharacterScanner.EOF:
262                                                                 state = STATE_DEFAULT;
263                                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
264
265                                                         case '!':
266                                                         case '?':
267                                                                 unread();
268                                                                 break;
269
270                                                         default:
271                                                                 continue loop;
272                                                 }
273                                         }
274
275                                         unread();
276
277                                 case '>':
278                                         state &= STATE_INTERNAL;
279                                         return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
280
281                                 case '[': // <!DOCTYPE xxx [dtd]>
282                                         if (!isInternal()) {
283                                                 state = STATE_INTERNAL;
284                                                 return getToken(XML_DECL);
285                                         }
286                         }
287                 }
288         }
289
290         private IToken nextCommentToken() {
291                 state &= STATE_INTERNAL;
292
293                 loop: while (true) {
294                         switch (read()) {
295                                 case ICharacterScanner.EOF:
296                                         break loop;
297
298                                 case '-': // -  -->
299                                         switch (read()) {
300                                                 case ICharacterScanner.EOF:
301                                                         break loop;
302
303                                                 case '-': // --  -->
304                                                         switch (read()) {
305                                                                 case ICharacterScanner.EOF:
306                                                                 case '>':
307                                                                         break loop;
308                                                         }
309
310                                                         unread();
311                                                         break loop;
312                                         }
313                         }
314                 }
315
316                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
317         }
318
319         private IToken nextPIToken() {
320                 state &= STATE_INTERNAL;
321
322                 loop: while (true) {
323                         switch (read()) {
324                                 case ICharacterScanner.EOF:
325                                         break loop;
326
327                                 case '?': // ?  ?>
328                                         switch (read()) {
329                                                 case ICharacterScanner.EOF:
330                                                 case '>':
331                                                         break loop;
332                                         }
333
334                                         unread();
335                         }
336                 }
337
338                 return getToken(isInternal() ? DTD_INTERNAL_PI : XML_PI);
339         }
340
341         private IToken nextCDATAToken() {
342                 state = STATE_DEFAULT;
343
344 loop:
345         while (true) {
346                         switch (read()) {
347                                 case ICharacterScanner.EOF:
348                                         break loop;
349
350                                 case ']': // ]  ]]>
351                                         switch (read()) {
352                                                 case ICharacterScanner.EOF:
353                                                         break loop;
354
355                                                 case ']': // ]]  ]]>
356                                                         switch (read()) {
357                                                                 case ICharacterScanner.EOF:
358                                                                 case '>': // ]]>
359                                                                         break loop;
360                                                         }
361
362                                                         unread();
363                                                         unread();
364                                                         continue loop;
365                                         }
366                         }
367                 }
368
369                 return getToken(XML_CDATA);
370         }
371
372         private IToken nextConditionalToken() {
373                 state = STATE_DEFAULT;
374
375                 int level = 1;
376
377 loop:
378         while (true) {
379                         switch (read()) {
380                                 case ICharacterScanner.EOF:
381                                         break loop;
382
383                                 case '<': // -  -->
384                                         switch (read()) {
385                                                 case ICharacterScanner.EOF:
386                                                         break loop;
387
388                                                 case '!': // --  -->
389                                                         switch (read()) {
390                                                                 case ICharacterScanner.EOF:
391                                                                         break loop;
392
393                                                                 case '[':
394                                                                         ++level;
395                                                                         continue loop;
396                                                         }
397
398                                                         unread();
399                                                         continue loop;
400                                         }
401
402                                         unread();
403                                         continue loop;
404
405                                 case ']': // -  -->
406                                         switch (read()) {
407                                                 case ICharacterScanner.EOF:
408                                                         break loop;
409
410                                                 case ']': // --  -->
411                                                         switch (read()) {
412                                                                 case ICharacterScanner.EOF:
413                                                                 case '>':
414                                                                         if (--level == 0) {
415                                                                                 break loop;
416                                                                         }
417
418                                                                         continue loop;
419                                                         }
420
421                                                         unread();
422                                                         unread();
423                                                         continue loop;
424                                         }
425                         }
426                 }
427
428                 return getToken(DTD_CONDITIONAL);
429         }
430
431         private IToken getToken(String type) {
432                 length = position - offset;
433
434                 if (length == 0) {
435                         return Token.EOF;
436                 }
437
438                 if (type == null) {
439                         return Token.UNDEFINED;
440                 }
441
442                 IToken token = (IToken) tokens.get(type);
443                 if (token == null) {
444                         token = new Token(type);
445                         tokens.put(type, token);
446                 }
447
448                 return token;
449         }
450
451         private boolean isInternal() {
452                 return (state & STATE_INTERNAL) != 0;
453         }
454
455         private int read() {
456                 if (position >= end) {
457                         return ICharacterScanner.EOF;
458                 }
459
460                 try {
461                         return document.getChar(position++);
462                 } catch (BadLocationException e) {
463                         --position;
464                         return ICharacterScanner.EOF;
465                 }
466         }
467
468         private void unread() {
469                 --position;
470         }
471
472         /*
473          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
474          */
475         public int getTokenOffset() {
476           Assert.isTrue(offset>=0, Integer.toString(offset));
477                 return offset;
478         }
479
480         /*
481          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
482          */
483         public int getTokenLength() {
484                 return length;
485         }
486
487         /*
488          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int, int)
489          */
490         public void setRange(IDocument document, int offset, int length) {
491                 this.document = document;
492                 this.end = offset + length;
493
494                 this.offset = offset;
495                 this.position = offset;
496                 this.length = 0;
497
498                 this.state = STATE_DEFAULT;
499         }
500
501         /*
502            * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
503            */
504 //        public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {
505 //          state = STATE_DEFAULT;
506 //          if (partitionOffset > -1) {
507 //            int delta = offset - partitionOffset;
508 //            if (delta > 0) {
509 //              setRange(document, partitionOffset, length + delta);
510 //              return;
511 //            }
512 //          }
513 //          setRange(document, partitionOffset, length);
514 //        }
515         /*
516          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
517          */
518         public void setPartialRange(
519                 IDocument document, int offset, int length,
520                 String contentType, int partitionOffset
521         ) {
522 //        boolean flag = false;
523                 this.document = document;
524                 this.end = offset + length;
525
526                 // NB! Undocumented value: -1
527                 if (partitionOffset >= 0) {
528                         offset = partitionOffset;
529 //                      flag = true;
530                 }
531
532                 this.offset = offset;
533                 this.position = offset;
534                 this.length = 0;
535
536 //              if (flag) {
537 //                state = STATE_DEFAULT;
538 //                return;
539 //              }
540                 if (contentType == XML_ATTRIBUTE) {
541                         state = STATE_TAG;
542                         return;
543                 }
544
545                 if (contentType == XML_TAG) {
546                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
547                         return;
548                 }
549
550                 if (contentType == XML_DECL) {
551                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
552                         return;
553                 }
554
555                 if (contentType == DTD_INTERNAL ||
556                         contentType == DTD_INTERNAL_PI ||
557                         contentType == DTD_INTERNAL_DECL ||
558                         contentType == DTD_INTERNAL_COMMENT
559                 ) {
560                         state = STATE_INTERNAL;
561                         return;
562                 }
563
564                 state = STATE_DEFAULT;
565         }
566
567         private boolean isContinuationPartition() {
568                 try {
569                         String type = document.getContentType(offset - 1);
570
571                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
572                                 return true;
573                         }
574                 } catch (BadLocationException e) {}
575
576                 return false;
577         }
578 }