0339ba586fc2c3220135d117e580903d7b49b723
[phpeclipse.git] /
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: PHPXMLPartitionScanner.java,v 1.3 2006-10-21 23:14:13 pombredanne Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 import org.eclipse.jface.text.Assert;
20 import org.eclipse.jface.text.BadLocationException;
21 import org.eclipse.jface.text.IDocument;
22 import org.eclipse.jface.text.rules.ICharacterScanner;
23 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
24 import org.eclipse.jface.text.rules.IToken;
25 import org.eclipse.jface.text.rules.Token;
26
27 /**
28  * 
29  * 
30  * @author Igor Malinin
31  */
32 public class PHPXMLPartitionScanner implements IPartitionTokenScanner {
33         // public static final String XML_PI = "__xml_processing_instruction";
34         public static final String XML_COMMENT = "__xml_comment";
35
36         public static final String XML_DECL = "__xml_declaration";
37
38         public static final String XML_TAG = "__xml_tag";
39
40         public static final String XML_ATTRIBUTE = "__xml_attribute";
41
42         public static final String XML_CDATA = "__xml_cdata";
43
44         public static final String DTD_INTERNAL = "__dtd_internal";
45
46         // public static final String DTD_INTERNAL_PI = "__dtd_internal_pi";
47         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
48
49         public static final String DTD_INTERNAL_DECL = "__dtd_internal_declaration";
50
51         public static final String DTD_CONDITIONAL = "__dtd_conditional";
52
53         public static final int STATE_DEFAULT = 0;
54
55         public static final int STATE_TAG = 1;
56
57         public static final int STATE_DECL = 2;
58
59         public static final int STATE_CDATA = 4;
60
61         public static final int STATE_INTERNAL = 8;
62
63         protected IDocument document;
64
65         protected int end;
66
67         protected int offset;
68
69         protected int length;
70
71         protected int position;
72
73         protected int state;
74
75         protected boolean parsedtd;
76
77         protected Map tokens = new HashMap();
78
79         public PHPXMLPartitionScanner(boolean parsedtd) {
80                 this.parsedtd = parsedtd;
81         }
82
83         /*
84          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
85          */
86         public IToken nextToken() {
87                 offset += length;
88
89                 switch (state) {
90                 case STATE_TAG:
91                         return nextTagToken();
92
93                 case STATE_DECL:
94                         return nextDeclToken();
95
96                 case STATE_CDATA:
97                         return nextCDATAToken();
98                 }
99
100                 switch (read()) {
101                 case ICharacterScanner.EOF:
102                         state = STATE_DEFAULT;
103                         return getToken(null);
104
105                 case '<':
106                         switch (read()) {
107                         case ICharacterScanner.EOF:
108                                 if (parsedtd || isInternal()) {
109                                         break;
110                                 }
111
112                                 state = STATE_DEFAULT;
113                                 return getToken(XML_TAG);
114
115                         case '?': // <? <?PI
116                                 unread();
117                                 break;
118                         // return nextPIToken();
119
120                         case '!': // <! <!DEFINITION or <![CDATA[ or <!--COMMENT
121                                 switch (read()) {
122                                 case ICharacterScanner.EOF:
123                                         state = STATE_DEFAULT;
124                                         return getToken(XML_TAG);
125
126                                 case '-': // <!- <!--COMMENT
127                                         switch (read()) {
128                                         case ICharacterScanner.EOF:
129                                                 return nextDeclToken();
130
131                                         case '-': // <!--
132                                                 return nextCommentToken();
133                                         }
134
135                                 case '[': // <![ <![CDATA[ or <![%cond;[
136                                         if (parsedtd) {
137                                                 return nextConditionalToken();
138                                         }
139
140                                         if (!isInternal()) {
141                                                 return nextCDATAToken();
142                                         }
143                                 }
144
145                                 return nextDeclToken();
146                         }
147
148                         if (parsedtd || isInternal()) {
149                                 break;
150                         }
151
152                         unread();
153
154                         return nextTagToken();
155
156                 case ']':
157                         if (isInternal()) {
158                                 unread();
159
160                                 state = STATE_DECL;
161                                 length = 0;
162                                 return nextToken();
163                         }
164                         break;
165                 default:
166                         unread();
167                 }
168
169                 loop: while (true) {
170                         switch (read()) {
171                         case ICharacterScanner.EOF:
172                                 state = STATE_DEFAULT;
173                                 return getToken(null);
174
175                         case '<':
176                                 if (parsedtd || isInternal()) {
177                                         switch (read()) {
178                                         case ICharacterScanner.EOF:
179                                                 state = STATE_DEFAULT;
180                                                 return getToken(null);
181
182                                         case '!':
183                                         case '?':
184                                                 unread();
185                                                 break;
186
187                                         default:
188                                                 continue loop;
189                                         }
190                                 }
191
192                                 unread();
193
194                                 state &= STATE_INTERNAL;
195                                 return getToken(isInternal() ? DTD_INTERNAL : null);
196
197                         case ']':
198                                 if (isInternal()) {
199                                         unread();
200
201                                         state = STATE_DECL;
202                                         if (position == offset) {
203                                                 // nothing between
204                                                 length = 0;
205                                                 return nextToken();
206                                         }
207
208                                         return getToken(DTD_INTERNAL);
209                                 }
210                         }
211                 }
212         }
213
214         private IToken nextTagToken() {
215                 int quot = read();
216
217                 switch (quot) {
218                 case ICharacterScanner.EOF:
219                 case '>':
220                         state = STATE_DEFAULT;
221                         return getToken(XML_TAG);
222
223                 case '"':
224                 case '\'':
225                         while (true) {
226                                 int ch = read();
227
228                                 if (ch == quot) {
229                                         state = STATE_TAG;
230                                         return getToken(XML_ATTRIBUTE);
231                                 }
232
233                                 switch (ch) {
234                                 case '<':
235                                         unread();
236
237                                 case ICharacterScanner.EOF:
238                                         state = STATE_DEFAULT;
239                                         return getToken(XML_ATTRIBUTE);
240                                 }
241                         }
242                 default:
243                         unread();
244                 }
245
246                 while (true) {
247                         switch (read()) {
248                         case '<':
249                                 unread();
250
251                         case ICharacterScanner.EOF:
252                         case '>':
253                                 state = STATE_DEFAULT;
254                                 return getToken(XML_TAG);
255
256                         case '"':
257                         case '\'':
258                                 unread();
259
260                                 state = STATE_TAG;
261                                 return getToken(XML_TAG);
262                         }
263                 }
264         }
265
266         private IToken nextDeclToken() {
267                 loop: while (true) {
268                         switch (read()) {
269                         case ICharacterScanner.EOF:
270                                 state = STATE_DEFAULT;
271                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
272
273                         case '<':
274                                 if (parsedtd || isInternal()) {
275                                         switch (read()) {
276                                         case ICharacterScanner.EOF:
277                                                 state = STATE_DEFAULT;
278                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
279
280                                         case '!':
281                                         case '?':
282                                                 unread();
283                                                 break;
284
285                                         default:
286                                                 continue loop;
287                                         }
288                                 }
289
290                                 unread();
291
292                         case '>':
293                                 state &= STATE_INTERNAL;
294                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
295
296                         case '[': // <!DOCTYPE xxx [dtd]>
297                                 if (!isInternal()) {
298                                         state = STATE_INTERNAL;
299                                         return getToken(XML_DECL);
300                                 }
301                         }
302                 }
303         }
304
305         private IToken nextCommentToken() {
306                 state &= STATE_INTERNAL;
307
308                 loop: while (true) {
309                         switch (read()) {
310                         case ICharacterScanner.EOF:
311                                 break loop;
312
313                         case '-': // - -->
314                                 switch (read()) {
315                                 case ICharacterScanner.EOF:
316                                         break loop;
317
318                                 case '-': // -- -->
319                                         switch (read()) {
320                                         case ICharacterScanner.EOF:
321                                         case '>':
322                                                 break loop;
323                                         }
324
325                                         unread();
326                                         continue loop;
327                                 }
328                         }
329                 }
330
331                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
332         }
333
334         private IToken nextCDATAToken() {
335                 state = STATE_DEFAULT;
336
337                 loop: while (true) {
338                         switch (read()) {
339                         case ICharacterScanner.EOF:
340                                 break loop;
341
342                         case ']': // ] ]]>
343                                 switch (read()) {
344                                 case ICharacterScanner.EOF:
345                                         break loop;
346
347                                 case ']': // ]] ]]>
348                                         switch (read()) {
349                                         case ICharacterScanner.EOF:
350                                         case '>': // ]]>
351                                                 break loop;
352                                         }
353
354                                         unread();
355                                         unread();
356                                         continue loop;
357                                 }
358                         }
359                 }
360
361                 return getToken(XML_CDATA);
362         }
363
364         private IToken nextConditionalToken() {
365                 state = STATE_DEFAULT;
366
367                 int level = 1;
368
369                 loop: while (true) {
370                         switch (read()) {
371                         case ICharacterScanner.EOF:
372                                 break loop;
373
374                         case '<': // - -->
375                                 switch (read()) {
376                                 case ICharacterScanner.EOF:
377                                         break loop;
378
379                                 case '!': // -- -->
380                                         switch (read()) {
381                                         case ICharacterScanner.EOF:
382                                                 break loop;
383
384                                         case '[':
385                                                 ++level;
386                                                 continue loop;
387                                         }
388
389                                         unread();
390                                         continue loop;
391                                 }
392
393                                 unread();
394                                 continue loop;
395
396                         case ']': // - -->
397                                 switch (read()) {
398                                 case ICharacterScanner.EOF:
399                                         break loop;
400
401                                 case ']': // -- -->
402                                         switch (read()) {
403                                         case ICharacterScanner.EOF:
404                                         case '>':
405                                                 if (--level == 0) {
406                                                         break loop;
407                                                 }
408
409                                                 continue loop;
410                                         }
411
412                                         unread();
413                                         unread();
414                                         continue loop;
415                                 }
416                         }
417                 }
418
419                 return getToken(DTD_CONDITIONAL);
420         }
421
422         private IToken getToken(String type) {
423                 length = position - offset;
424
425                 if (length == 0) {
426                         return Token.EOF;
427                 }
428
429                 if (type == null) {
430                         return Token.UNDEFINED;
431                 }
432
433                 IToken token = (IToken) tokens.get(type);
434                 if (token == null) {
435                         token = new Token(type);
436                         tokens.put(type, token);
437                 }
438
439                 return token;
440         }
441
442         private boolean isInternal() {
443                 return (state & STATE_INTERNAL) != 0;
444         }
445
446         private int read() {
447                 if (position >= end) {
448                         return ICharacterScanner.EOF;
449                 }
450
451                 try {
452                         return document.getChar(position++);
453                 } catch (BadLocationException e) {
454                         --position;
455                         return ICharacterScanner.EOF;
456                 }
457         }
458
459         private void unread() {
460                 --position;
461         }
462
463         /*
464          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
465          */
466         public int getTokenOffset() {
467                 Assert.isTrue(offset >= 0, Integer.toString(offset));
468                 return offset;
469         }
470
471         /*
472          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
473          */
474         public int getTokenLength() {
475                 return length;
476         }
477
478         /*
479          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int,
480          *      int)
481          */
482         public void setRange(IDocument document, int offset, int length) {
483                 this.document = document;
484                 this.end = offset + length;
485
486                 this.offset = offset;
487                 this.position = offset;
488                 this.length = 0;
489
490                 this.state = STATE_DEFAULT;
491         }
492
493         /*
494          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
495          */
496         // public void setPartialRange(IDocument document, int offset, int length,
497         // String contentType, int partitionOffset) {
498         // state = STATE_DEFAULT;
499         // if (partitionOffset > -1) {
500         // int delta = offset - partitionOffset;
501         // if (delta > 0) {
502         // setRange(document, partitionOffset, length + delta);
503         // return;
504         // }
505         // }
506         // setRange(document, partitionOffset, length);
507         // }
508         /*
509          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
510          */
511         public void setPartialRange(IDocument document, int offset, int length,
512                         String contentType, int partitionOffset) {
513                 // boolean flag = false;
514                 this.document = document;
515                 this.end = offset + length;
516
517                 // NB! Undocumented value: -1
518                 if (partitionOffset >= 0) {
519                         offset = partitionOffset;
520                         // flag = true;
521                 }
522
523                 this.offset = offset;
524                 this.position = offset;
525                 this.length = 0;
526
527                 // if (flag) {
528                 // state = STATE_DEFAULT;
529                 // return;
530                 // }
531                 if (contentType == XML_ATTRIBUTE) {
532                         state = STATE_TAG;
533                         return;
534                 }
535
536                 if (contentType == XML_TAG) {
537                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
538                         return;
539                 }
540
541                 if (contentType == XML_DECL) {
542                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
543                         return;
544                 }
545
546                 if (contentType == DTD_INTERNAL || contentType == DTD_INTERNAL_DECL
547                                 || contentType == DTD_INTERNAL_COMMENT) {
548                         state = STATE_INTERNAL;
549                         return;
550                 }
551
552                 state = STATE_DEFAULT;
553         }
554
555         private boolean isContinuationPartition() {
556                 try {
557                         String type = document.getContentType(offset - 1);
558
559                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
560                                 return true;
561                         }
562                 } catch (BadLocationException e) {
563                 }
564
565                 return false;
566         }
567 }