c9fbaaf123d07bf00905877e028fbd31babcfccf
[phpeclipse.git] /
1 /*
2  * Copyright (c) 2002-2004 Widespace, OU and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  *     Igor Malinin - initial contribution
10  *
11  * $Id: PHPXMLPartitionScanner.java,v 1.3 2006-10-21 23:14:13 pombredanne Exp $
12  */
13
14 package net.sourceforge.phpeclipse.xml.ui.internal.text;
15
16 import java.util.HashMap;
17 import java.util.Map;
18
19 //incastrix
20 //import org.eclipse.jface.text.Assert;
21 import org.eclipse.core.runtime.Assert;
22 import org.eclipse.jface.text.BadLocationException;
23 import org.eclipse.jface.text.IDocument;
24 import org.eclipse.jface.text.rules.ICharacterScanner;
25 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
26 import org.eclipse.jface.text.rules.IToken;
27 import org.eclipse.jface.text.rules.Token;
28
29 /**
30  * 
31  * 
32  * @author Igor Malinin
33  */
34 public class PHPXMLPartitionScanner implements IPartitionTokenScanner {
35         // public static final String XML_PI = "__xml_processing_instruction";
36         public static final String XML_COMMENT = "__xml_comment";
37
38         public static final String XML_DECL = "__xml_declaration";
39
40         public static final String XML_TAG = "__xml_tag";
41
42         public static final String XML_ATTRIBUTE = "__xml_attribute";
43
44         public static final String XML_CDATA = "__xml_cdata";
45
46         public static final String DTD_INTERNAL = "__dtd_internal";
47
48         // public static final String DTD_INTERNAL_PI = "__dtd_internal_pi";
49         public static final String DTD_INTERNAL_COMMENT = "__dtd_internal_comment";
50
51         public static final String DTD_INTERNAL_DECL = "__dtd_internal_declaration";
52
53         public static final String DTD_CONDITIONAL = "__dtd_conditional";
54
55         public static final int STATE_DEFAULT = 0;
56
57         public static final int STATE_TAG = 1;
58
59         public static final int STATE_DECL = 2;
60
61         public static final int STATE_CDATA = 4;
62
63         public static final int STATE_INTERNAL = 8;
64
65         protected IDocument document;
66
67         protected int end;
68
69         protected int offset;
70
71         protected int length;
72
73         protected int position;
74
75         protected int state;
76
77         protected boolean parsedtd;
78
79         protected Map tokens = new HashMap();
80
81         public PHPXMLPartitionScanner(boolean parsedtd) {
82                 this.parsedtd = parsedtd;
83         }
84
85         /*
86          * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
87          */
88         public IToken nextToken() {
89                 offset += length;
90
91                 switch (state) {
92                 case STATE_TAG:
93                         return nextTagToken();
94
95                 case STATE_DECL:
96                         return nextDeclToken();
97
98                 case STATE_CDATA:
99                         return nextCDATAToken();
100                 }
101
102                 switch (read()) {
103                 case ICharacterScanner.EOF:
104                         state = STATE_DEFAULT;
105                         return getToken(null);
106
107                 case '<':
108                         switch (read()) {
109                         case ICharacterScanner.EOF:
110                                 if (parsedtd || isInternal()) {
111                                         break;
112                                 }
113
114                                 state = STATE_DEFAULT;
115                                 return getToken(XML_TAG);
116
117                         case '?': // <? <?PI
118                                 unread();
119                                 break;
120                         // return nextPIToken();
121
122                         case '!': // <! <!DEFINITION or <![CDATA[ or <!--COMMENT
123                                 switch (read()) {
124                                 case ICharacterScanner.EOF:
125                                         state = STATE_DEFAULT;
126                                         return getToken(XML_TAG);
127
128                                 case '-': // <!- <!--COMMENT
129                                         switch (read()) {
130                                         case ICharacterScanner.EOF:
131                                                 return nextDeclToken();
132
133                                         case '-': // <!--
134                                                 return nextCommentToken();
135                                         }
136
137                                 case '[': // <![ <![CDATA[ or <![%cond;[
138                                         if (parsedtd) {
139                                                 return nextConditionalToken();
140                                         }
141
142                                         if (!isInternal()) {
143                                                 return nextCDATAToken();
144                                         }
145                                 }
146
147                                 return nextDeclToken();
148                         }
149
150                         if (parsedtd || isInternal()) {
151                                 break;
152                         }
153
154                         unread();
155
156                         return nextTagToken();
157
158                 case ']':
159                         if (isInternal()) {
160                                 unread();
161
162                                 state = STATE_DECL;
163                                 length = 0;
164                                 return nextToken();
165                         }
166                         break;
167                 default:
168                         unread();
169                 }
170
171                 loop: while (true) {
172                         switch (read()) {
173                         case ICharacterScanner.EOF:
174                                 state = STATE_DEFAULT;
175                                 return getToken(null);
176
177                         case '<':
178                                 if (parsedtd || isInternal()) {
179                                         switch (read()) {
180                                         case ICharacterScanner.EOF:
181                                                 state = STATE_DEFAULT;
182                                                 return getToken(null);
183
184                                         case '!':
185                                         case '?':
186                                                 unread();
187                                                 break;
188
189                                         default:
190                                                 continue loop;
191                                         }
192                                 }
193
194                                 unread();
195
196                                 state &= STATE_INTERNAL;
197                                 return getToken(isInternal() ? DTD_INTERNAL : null);
198
199                         case ']':
200                                 if (isInternal()) {
201                                         unread();
202
203                                         state = STATE_DECL;
204                                         if (position == offset) {
205                                                 // nothing between
206                                                 length = 0;
207                                                 return nextToken();
208                                         }
209
210                                         return getToken(DTD_INTERNAL);
211                                 }
212                         }
213                 }
214         }
215
216         private IToken nextTagToken() {
217                 int quot = read();
218
219                 switch (quot) {
220                 case ICharacterScanner.EOF:
221                 case '>':
222                         state = STATE_DEFAULT;
223                         return getToken(XML_TAG);
224
225                 case '"':
226                 case '\'':
227                         while (true) {
228                                 int ch = read();
229
230                                 if (ch == quot) {
231                                         state = STATE_TAG;
232                                         return getToken(XML_ATTRIBUTE);
233                                 }
234
235                                 switch (ch) {
236                                 case '<':
237                                         unread();
238
239                                 case ICharacterScanner.EOF:
240                                         state = STATE_DEFAULT;
241                                         return getToken(XML_ATTRIBUTE);
242                                 }
243                         }
244                 default:
245                         unread();
246                 }
247
248                 while (true) {
249                         switch (read()) {
250                         case '<':
251                                 unread();
252
253                         case ICharacterScanner.EOF:
254                         case '>':
255                                 state = STATE_DEFAULT;
256                                 return getToken(XML_TAG);
257
258                         case '"':
259                         case '\'':
260                                 unread();
261
262                                 state = STATE_TAG;
263                                 return getToken(XML_TAG);
264                         }
265                 }
266         }
267
268         private IToken nextDeclToken() {
269                 loop: while (true) {
270                         switch (read()) {
271                         case ICharacterScanner.EOF:
272                                 state = STATE_DEFAULT;
273                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
274
275                         case '<':
276                                 if (parsedtd || isInternal()) {
277                                         switch (read()) {
278                                         case ICharacterScanner.EOF:
279                                                 state = STATE_DEFAULT;
280                                                 return getToken(isInternal() ? DTD_INTERNAL : null);
281
282                                         case '!':
283                                         case '?':
284                                                 unread();
285                                                 break;
286
287                                         default:
288                                                 continue loop;
289                                         }
290                                 }
291
292                                 unread();
293
294                         case '>':
295                                 state &= STATE_INTERNAL;
296                                 return getToken(isInternal() ? DTD_INTERNAL_DECL : XML_DECL);
297
298                         case '[': // <!DOCTYPE xxx [dtd]>
299                                 if (!isInternal()) {
300                                         state = STATE_INTERNAL;
301                                         return getToken(XML_DECL);
302                                 }
303                         }
304                 }
305         }
306
307         private IToken nextCommentToken() {
308                 state &= STATE_INTERNAL;
309
310                 loop: while (true) {
311                         switch (read()) {
312                         case ICharacterScanner.EOF:
313                                 break loop;
314
315                         case '-': // - -->
316                                 switch (read()) {
317                                 case ICharacterScanner.EOF:
318                                         break loop;
319
320                                 case '-': // -- -->
321                                         switch (read()) {
322                                         case ICharacterScanner.EOF:
323                                         case '>':
324                                                 break loop;
325                                         }
326
327                                         unread();
328                                         continue loop;
329                                 }
330                         }
331                 }
332
333                 return getToken(isInternal() ? DTD_INTERNAL_COMMENT : XML_COMMENT);
334         }
335
336         private IToken nextCDATAToken() {
337                 state = STATE_DEFAULT;
338
339                 loop: while (true) {
340                         switch (read()) {
341                         case ICharacterScanner.EOF:
342                                 break loop;
343
344                         case ']': // ] ]]>
345                                 switch (read()) {
346                                 case ICharacterScanner.EOF:
347                                         break loop;
348
349                                 case ']': // ]] ]]>
350                                         switch (read()) {
351                                         case ICharacterScanner.EOF:
352                                         case '>': // ]]>
353                                                 break loop;
354                                         }
355
356                                         unread();
357                                         unread();
358                                         continue loop;
359                                 }
360                         }
361                 }
362
363                 return getToken(XML_CDATA);
364         }
365
366         private IToken nextConditionalToken() {
367                 state = STATE_DEFAULT;
368
369                 int level = 1;
370
371                 loop: while (true) {
372                         switch (read()) {
373                         case ICharacterScanner.EOF:
374                                 break loop;
375
376                         case '<': // - -->
377                                 switch (read()) {
378                                 case ICharacterScanner.EOF:
379                                         break loop;
380
381                                 case '!': // -- -->
382                                         switch (read()) {
383                                         case ICharacterScanner.EOF:
384                                                 break loop;
385
386                                         case '[':
387                                                 ++level;
388                                                 continue loop;
389                                         }
390
391                                         unread();
392                                         continue loop;
393                                 }
394
395                                 unread();
396                                 continue loop;
397
398                         case ']': // - -->
399                                 switch (read()) {
400                                 case ICharacterScanner.EOF:
401                                         break loop;
402
403                                 case ']': // -- -->
404                                         switch (read()) {
405                                         case ICharacterScanner.EOF:
406                                         case '>':
407                                                 if (--level == 0) {
408                                                         break loop;
409                                                 }
410
411                                                 continue loop;
412                                         }
413
414                                         unread();
415                                         unread();
416                                         continue loop;
417                                 }
418                         }
419                 }
420
421                 return getToken(DTD_CONDITIONAL);
422         }
423
424         private IToken getToken(String type) {
425                 length = position - offset;
426
427                 if (length == 0) {
428                         return Token.EOF;
429                 }
430
431                 if (type == null) {
432                         return Token.UNDEFINED;
433                 }
434
435                 IToken token = (IToken) tokens.get(type);
436                 if (token == null) {
437                         token = new Token(type);
438                         tokens.put(type, token);
439                 }
440
441                 return token;
442         }
443
444         private boolean isInternal() {
445                 return (state & STATE_INTERNAL) != 0;
446         }
447
448         private int read() {
449                 if (position >= end) {
450                         return ICharacterScanner.EOF;
451                 }
452
453                 try {
454                         return document.getChar(position++);
455                 } catch (BadLocationException e) {
456                         --position;
457                         return ICharacterScanner.EOF;
458                 }
459         }
460
461         private void unread() {
462                 --position;
463         }
464
465         /*
466          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenOffset()
467          */
468         public int getTokenOffset() {
469                 Assert.isTrue(offset >= 0, Integer.toString(offset));
470                 return offset;
471         }
472
473         /*
474          * @see org.eclipse.jface.text.rules.ITokenScanner#getTokenLength()
475          */
476         public int getTokenLength() {
477                 return length;
478         }
479
480         /*
481          * @see org.eclipse.jface.text.rules.ITokenScanner#setRange(IDocument, int,
482          *      int)
483          */
484         public void setRange(IDocument document, int offset, int length) {
485                 this.document = document;
486                 this.end = offset + length;
487
488                 this.offset = offset;
489                 this.position = offset;
490                 this.length = 0;
491
492                 this.state = STATE_DEFAULT;
493         }
494
495         /*
496          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
497          */
498         // public void setPartialRange(IDocument document, int offset, int length,
499         // String contentType, int partitionOffset) {
500         // state = STATE_DEFAULT;
501         // if (partitionOffset > -1) {
502         // int delta = offset - partitionOffset;
503         // if (delta > 0) {
504         // setRange(document, partitionOffset, length + delta);
505         // return;
506         // }
507         // }
508         // setRange(document, partitionOffset, length);
509         // }
510         /*
511          * @see org.eclipse.jface.text.rules.IPartitionTokenScanner
512          */
513         public void setPartialRange(IDocument document, int offset, int length,
514                         String contentType, int partitionOffset) {
515                 // boolean flag = false;
516                 this.document = document;
517                 this.end = offset + length;
518
519                 // NB! Undocumented value: -1
520                 if (partitionOffset >= 0) {
521                         offset = partitionOffset;
522                         // flag = true;
523                 }
524
525                 this.offset = offset;
526                 this.position = offset;
527                 this.length = 0;
528
529                 // if (flag) {
530                 // state = STATE_DEFAULT;
531                 // return;
532                 // }
533                 if (contentType == XML_ATTRIBUTE) {
534                         state = STATE_TAG;
535                         return;
536                 }
537
538                 if (contentType == XML_TAG) {
539                         state = isContinuationPartition() ? STATE_TAG : STATE_DEFAULT;
540                         return;
541                 }
542
543                 if (contentType == XML_DECL) {
544                         state = isContinuationPartition() ? STATE_DECL : STATE_DEFAULT;
545                         return;
546                 }
547
548                 if (contentType == DTD_INTERNAL || contentType == DTD_INTERNAL_DECL
549                                 || contentType == DTD_INTERNAL_COMMENT) {
550                         state = STATE_INTERNAL;
551                         return;
552                 }
553
554                 state = STATE_DEFAULT;
555         }
556
557         private boolean isContinuationPartition() {
558                 try {
559                         String type = document.getContentType(offset - 1);
560
561                         if (type != IDocument.DEFAULT_CONTENT_TYPE) {
562                                 return true;
563                         }
564                 } catch (BadLocationException e) {
565                 }
566
567                 return false;
568         }
569 }