eca1657c13fb31553d3bc649159efac1ff63ff62
[phpeclipse.git] /
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: XMLPartitionScanner.java,v 1.5 2006-10-21 23:14:13 pombredanne Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 import org.eclipse.jface.text.Assert;
20 import org.eclipse.jface.text.BadLocationException;
21 import org.eclipse.jface.text.IDocument;
22 import org.eclipse.jface.text.rules.ICharacterScanner;
23 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
24 import org.eclipse.jface.text.rules.IToken;
25 import org.eclipse.jface.text.rules.Token;
26
27 /**
28  * 
29  * 
30  * @author Igor Malinin
31  */
32 public class XMLPartitionScanner implements IPartitionTokenScanner {
33         public static final String XML_PI = "__xml_processing_instruction";
34
35         public static final String XML_COMMENT = "__xml_comment";
36
37         public static final String XML_DECL = "__xml_declaration";
38
39         public static final String XML_TAG = "__xml_tag";
40
41         public static final String XML_ATTRIBUTE = "__xml_attribute";
42
43         public static final String XML_CDATA = "__xml_cdata";
44
45         public static final String DTD_INTERNAL = "__dtd_internal";
46
47         public static final String DTD_INTERNAL_PI = "__dtd_internal_pi";
48
49         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
50
51         public static final String DTD_INTERNAL_DECL = "__dtd_internal_declaration";
52
53         public static final String DTD_CONDITIONAL = "__dtd_conditional";
54
55         public static final int STATE_DEFAULT = 0;
56
57         public static final int STATE_TAG = 1;
58
59         public static final int STATE_DECL = 2;
60
61         public static final int STATE_CDATA = 4;
62
63         public static final int STATE_INTERNAL = 8;
64
65         protected IDocument document;
66
67         protected int end;
68
69         protected int offset;
70
71         protected int length;
72
73         protected int position;
74
75         protected int state;
76
77         protected boolean parsedtd;
78
79         protected Map tokens = new HashMap();
80
81         public XMLPartitionScanner(boolean parsedtd) {
82                 this.parsedtd = parsedtd;
83         }
84
85         /*
86          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
87          */
88         public IToken nextToken() {
89                 offset += length;
90
91                 switch (state) {
92                 case STATE_TAG:
93                         return nextTagToken();
94
95                 case STATE_DECL:
96                         return nextDeclToken();
97
98                 case STATE_CDATA:
99                         return nextCDATAToken();
100                 }
101
102                 switch (read()) {
103                 case ICharacterScanner.EOF:
104                         state = STATE_DEFAULT;
105                         return getToken(null);
106
107                 case '<':
108                         switch (read()) {
109                         case ICharacterScanner.EOF:
110                                 if (parsedtd || isInternal()) {
111                                         break;
112                                 }
113
114                                 state = STATE_DEFAULT;
115                                 return getToken(XML_TAG);
116
117                         case '?': // <? <?PI
118                                 return nextPIToken();
119
120                         case '!': // <! <!DEFINITION or <![CDATA[ or <!--COMMENT
121                                 switch (read()) {
122                                 case ICharacterScanner.EOF:
123                                         state = STATE_DEFAULT;
124                                         return getToken(XML_TAG);
125
126                                 case '-': // <!- <!--COMMENT
127                                         switch (read()) {
128                                         case ICharacterScanner.EOF:
129                                                 return nextDeclToken();
130
131                                         case '-': // <!--
132                                                 return nextCommentToken();
133                                         }
134
135                                 case '[': // <![ <![CDATA[ or <![%cond;[
136                                         if (parsedtd) {
137                                                 return nextConditionalToken();
138                                         }
139
140                                         if (!isInternal()) {
141                                                 return nextCDATAToken();
142                                         }
143                                 }
144
145                                 return nextDeclToken();
146                         }
147
148                         if (parsedtd || isInternal()) {
149                                 break;
150                         }
151
152                         unread();
153
154                         return nextTagToken();
155
156                 case ']':
157                         if (isInternal()) {
158                                 unread();
159
160                                 state = STATE_DECL;
161                                 length = 0;
162                                 return nextToken();
163                         }
164                         break;
165                 default:
166                         unread();
167                 }
168
169                 loop: while (true) {
170                         switch (read()) {
171                         case ICharacterScanner.EOF:
172                                 state = STATE_DEFAULT;
173                                 return getToken(null);
174
175                         case '<':
176                                 if (parsedtd || isInternal()) {
177                                         switch (read()) {
178                                         case ICharacterScanner.EOF:
179                                                 state = STATE_DEFAULT;
180                                                 return getToken(null);
181
182                                         case '!':
183                                         case '?':
184                                                 unread();
185                                                 break;
186
187                                         default:
188                                                 continue loop;
189                                         }
190                                 }
191
192                                 unread();
193
194                                 state &= STATE_INTERNAL;
195                                 return getToken(isInternal() ? DTD_INTERNAL : null);
196
197                         case ']':
198                                 if (isInternal()) {
199                                         unread();
200
201                                         state = STATE_DECL;
202                                         if (position == offset) {
203                                                 // nothing between
204                                                 length = 0;
205                                                 return nextToken();
206                                         }
207
208                                         return getToken(DTD_INTERNAL);
209                                 }
210                         }
211                 }
212         }
213
214         private IToken nextTagToken() {
215                 int quot = read();
216
217                 switch (quot) {
218                 case ICharacterScanner.EOF:
219                 case '>':
220                         state = STATE_DEFAULT;
221                         return getToken(XML_TAG);
222
223                 case '"':
224                 case '\'':
225                         while (true) {
226                                 int ch = read();
227
228                                 if (ch == quot) {
229                                         state = STATE_TAG;
230                                         return getToken(XML_ATTRIBUTE);
231                                 }
232
233                                 switch (ch) {
234                                 case '<':
235                                         unread();
236
237                                 case ICharacterScanner.EOF:
238                                         state = STATE_DEFAULT;
239                                         return getToken(XML_ATTRIBUTE);
240                                 }
241                         }
242                 default:
243                         unread();
244                 }
245
246                 while (true) {
247                         switch (read()) {
248                         case '<':
249                                 unread();
250
251                         case ICharacterScanner.EOF:
252                         case '>':
253                                 state = STATE_DEFAULT;
254                                 return getToken(XML_TAG);
255
256                         case '"':
257                         case '\'':
258                                 unread();
259
260                                 state = STATE_TAG;
261                                 return getToken(XML_TAG);
262                         }
263                 }
264         }
265
266         private IToken nextDeclToken() {
267                 loop: while (true) {
268                         switch (read()) {
269                         case ICharacterScanner.EOF:
270                                 state = STATE_DEFAULT;
271                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
272
273                         case '<':
274                                 if (parsedtd || isInternal()) {
275                                         switch (read()) {
276                                         case ICharacterScanner.EOF:
277                                                 state = STATE_DEFAULT;
278                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
279
280                                         case '!':
281                                         case '?':
282                                                 unread();
283                                                 break;
284
285                                         default:
286                                                 continue loop;
287                                         }
288                                 }
289
290                                 unread();
291
292                         case '>':
293                                 state &= STATE_INTERNAL;
294                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
295
296                         case '[': // <!DOCTYPE xxx [dtd]>
297                                 if (!isInternal()) {
298                                         state = STATE_INTERNAL;
299                                         return getToken(XML_DECL);
300                                 }
301                         }
302                 }
303         }
304
305         private IToken nextCommentToken() {
306                 state &= STATE_INTERNAL;
307
308                 loop: while (true) {
309                         switch (read()) {
310                         case ICharacterScanner.EOF:
311                                 break loop;
312
313                         case '-': // - -->
314                                 switch (read()) {
315                                 case ICharacterScanner.EOF:
316                                         break loop;
317
318                                 case '-': // -- -->
319                                         switch (read()) {
320                                         case ICharacterScanner.EOF:
321                                         case '>':
322                                                 break loop;
323                                         }
324
325                                         unread();
326                                         continue loop;
327                                 }
328                         }
329                 }
330
331                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
332         }
333
334         private IToken nextPIToken() {
335                 state &= STATE_INTERNAL;
336
337                 loop: while (true) {
338                         switch (read()) {
339                         case ICharacterScanner.EOF:
340                                 break loop;
341
342                         case '?': // ? ?>
343                                 switch (read()) {
344                                 case ICharacterScanner.EOF:
345                                 case '>':
346                                         break loop;
347                                 }
348
349                                 unread();
350                         }
351                 }
352
353                 return getToken(isInternal() ? DTD_INTERNAL_PI : XML_PI);
354         }
355
356         private IToken nextCDATAToken() {
357                 state = STATE_DEFAULT;
358
359                 loop: while (true) {
360                         switch (read()) {
361                         case ICharacterScanner.EOF:
362                                 break loop;
363
364                         case ']': // ] ]]>
365                                 switch (read()) {
366                                 case ICharacterScanner.EOF:
367                                         break loop;
368
369                                 case ']': // ]] ]]>
370                                         switch (read()) {
371                                         case ICharacterScanner.EOF:
372                                         case '>': // ]]>
373                                                 break loop;
374                                         }
375
376                                         unread();
377                                         unread();
378                                         continue loop;
379                                 }
380                         }
381                 }
382
383                 return getToken(XML_CDATA);
384         }
385
386         private IToken nextConditionalToken() {
387                 state = STATE_DEFAULT;
388
389                 int level = 1;
390
391                 loop: while (true) {
392                         switch (read()) {
393                         case ICharacterScanner.EOF:
394                                 break loop;
395
396                         case '<': // - -->
397                                 switch (read()) {
398                                 case ICharacterScanner.EOF:
399                                         break loop;
400
401                                 case '!': // -- -->
402                                         switch (read()) {
403                                         case ICharacterScanner.EOF:
404                                                 break loop;
405
406                                         case '[':
407                                                 ++level;
408                                                 continue loop;
409                                         }
410
411                                         unread();
412                                         continue loop;
413                                 }
414
415                                 unread();
416                                 continue loop;
417
418                         case ']': // - -->
419                                 switch (read()) {
420                                 case ICharacterScanner.EOF:
421                                         break loop;
422
423                                 case ']': // -- -->
424                                         switch (read()) {
425                                         case ICharacterScanner.EOF:
426                                         case '>':
427                                                 if (--level == 0) {
428                                                         break loop;
429                                                 }
430
431                                                 continue loop;
432                                         }
433
434                                         unread();
435                                         unread();
436                                         continue loop;
437                                 }
438                         }
439                 }
440
441                 return getToken(DTD_CONDITIONAL);
442         }
443
444         private IToken getToken(String type) {
445                 length = position - offset;
446
447                 if (length == 0) {
448                         return Token.EOF;
449                 }
450
451                 if (type == null) {
452                         return Token.UNDEFINED;
453                 }
454
455                 IToken token = (IToken) tokens.get(type);
456                 if (token == null) {
457                         token = new Token(type);
458                         tokens.put(type, token);
459                 }
460
461                 return token;
462         }
463
464         private boolean isInternal() {
465                 return (state & STATE_INTERNAL) != 0;
466         }
467
468         private int read() {
469                 if (position >= end) {
470                         return ICharacterScanner.EOF;
471                 }
472
473                 try {
474                         return document.getChar(position++);
475                 } catch (BadLocationException e) {
476                         --position;
477                         return ICharacterScanner.EOF;
478                 }
479         }
480
481         private void unread() {
482                 --position;
483         }
484
485         /*
486          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
487          */
488         public int getTokenOffset() {
489                 Assert.isTrue(offset >= 0, Integer.toString(offset));
490                 return offset;
491         }
492
493         /*
494          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
495          */
496         public int getTokenLength() {
497                 return length;
498         }
499
500         /*
501          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int,
502          *      int)
503          */
504         public void setRange(IDocument document, int offset, int length) {
505                 this.document = document;
506                 this.end = offset + length;
507
508                 this.offset = offset;
509                 this.position = offset;
510                 this.length = 0;
511
512                 this.state = STATE_DEFAULT;
513         }
514
515         /*
516          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
517          */
518         // public void setPartialRange(IDocument document, int offset, int length,
519         // String contentType, int partitionOffset) {
520         // state = STATE_DEFAULT;
521         // if (partitionOffset > -1) {
522         // int delta = offset - partitionOffset;
523         // if (delta > 0) {
524         // setRange(document, partitionOffset, length + delta);
525         // return;
526         // }
527         // }
528         // setRange(document, partitionOffset, length);
529         // }
530         /*
531          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
532          */
533         public void setPartialRange(IDocument document, int offset, int length,
534                         String contentType, int partitionOffset) {
535                 // boolean flag = false;
536                 this.document = document;
537                 this.end = offset + length;
538
539                 // NB! Undocumented value: -1
540                 if (partitionOffset >= 0) {
541                         offset = partitionOffset;
542                         // flag = true;
543                 }
544
545                 this.offset = offset;
546                 this.position = offset;
547                 this.length = 0;
548
549                 // if (flag) {
550                 // state = STATE_DEFAULT;
551                 // return;
552                 // }
553                 if (contentType == XML_ATTRIBUTE) {
554                         state = STATE_TAG;
555                         return;
556                 }
557
558                 if (contentType == XML_TAG) {
559                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
560                         return;
561                 }
562
563                 if (contentType == XML_DECL) {
564                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
565                         return;
566                 }
567
568                 if (contentType == DTD_INTERNAL || contentType == DTD_INTERNAL_PI
569                                 || contentType == DTD_INTERNAL_DECL
570                                 || contentType == DTD_INTERNAL_COMMENT) {
571                         state = STATE_INTERNAL;
572                         return;
573                 }
574
575                 state = STATE_DEFAULT;
576         }
577
578         private boolean isContinuationPartition() {
579                 try {
580                         String type = document.getContentType(offset - 1);
581
582                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
583                                 return true;
584                         }
585                 } catch (BadLocationException e) {
586                 }
587
588                 return false;
589         }
590 }