2 * @(#)ParserImpl.java 1.11 2000/08/16
6 package net.sourceforge.phpdt.tidy.w3c;
10 * HTML Parser implementation
12 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13 * See Tidy.java for the copyright notice.
14 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15 * HTML Tidy Release 4 Aug 2000</a>
17 * @author Dave Raggett <dsr@w3.org>
18 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19 * @version 1.0, 1999/05/22
20 * @version 1.0.1, 1999/05/29
21 * @version 1.1, 1999/06/18 Java Bean
22 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24 * @version 1.4, 1999/09/04 DOM support
25 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
34 public class ParserImpl {
36 //private static int SeenBodyEndTag; /* AQ: moved into lexer structure */
38 private static void parseTag(Lexer lexer, Node node, short mode)
40 // Local fix by GLP 2000-12-21. Need to reset insertspace if this
41 // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
42 // Remove this code once the fix is made in Tidy.
44 /****** (Original code follows)
45 if ((node.tag.model & Dict.CM_EMPTY) != 0)
47 lexer.waswhite = false;
50 else if (!((node.tag.model & Dict.CM_INLINE) != 0))
51 lexer.insertspace = false;
54 if (!((node.tag.model & Dict.CM_INLINE) != 0))
55 lexer.insertspace = false;
57 if ((node.tag.model & Dict.CM_EMPTY) != 0)
59 lexer.waswhite = false;
63 if (node.tag.parser == null || node.type == Node.StartEndTag)
66 node.tag.parser.parse(lexer, node, mode);
69 private static void moveToHead(Lexer lexer, Node element, Node node)
72 TagTable tt = lexer.configuration.tt;
75 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
77 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
79 while (element.tag != tt.tagHtml)
80 element = element.parent;
82 for (head = element.content; head != null; head = head.next)
84 if (head.tag == tt.tagHead)
86 Node.insertNodeAtEnd(head, node);
91 if (node.tag.parser != null)
92 parseTag(lexer, node, Lexer.IgnoreWhitespace);
96 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
100 public static class ParseHTML implements Parser {
102 public void parse( Lexer lexer, Node html, short mode )
105 Node frameset = null;
106 Node noframes = null;
108 lexer.configuration.XmlTags = false;
109 lexer.seenBodyEndTag = 0;
110 TagTable tt = lexer.configuration.tt;
114 node = lexer.getToken(Lexer.IgnoreWhitespace);
118 node = lexer.inferredTag("head");
122 if (node.tag == tt.tagHead)
125 if (node.tag == html.tag && node.type == Node.EndTag)
127 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
131 /* deal with comments etc. */
132 if (Node.insertMisc(html, node))
136 node = lexer.inferredTag("head");
141 Node.insertNodeAtEnd(html, head);
142 getParseHead().parse(lexer, head, mode);
146 node = lexer.getToken(Lexer.IgnoreWhitespace);
150 if (frameset == null) /* create an empty body */
151 node = lexer.inferredTag("body");
156 /* robustly handle html tags */
157 if (node.tag == html.tag)
159 if (node.type != Node.StartTag && frameset == null)
160 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
165 /* deal with comments etc. */
166 if (Node.insertMisc(html, node))
169 /* if frameset document coerce <body> to <noframes> */
170 if (node.tag == tt.tagBody)
172 if (node.type != Node.StartTag)
174 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
178 if (frameset != null)
182 if (noframes == null)
184 noframes = lexer.inferredTag("noframes");
185 Node.insertNodeAtEnd(frameset, noframes);
186 Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
189 parseTag(lexer, noframes, mode);
193 break; /* to parse body */
196 /* flag an error if we see more than one frameset */
197 if (node.tag == tt.tagFrameset)
199 if (node.type != Node.StartTag)
201 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
205 if (frameset != null)
206 Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
210 Node.insertNodeAtEnd(html, node);
211 parseTag(lexer, node, mode);
214 see if it includes a noframes element so
215 that we can merge subsequent noframes elements
218 for (node = frameset.content; node != null; node = node.next)
220 if (node.tag == tt.tagNoframes)
226 /* if not a frameset document coerce <noframes> to <body> */
227 if (node.tag == tt.tagNoframes)
229 if (node.type != Node.StartTag)
231 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
235 if (frameset == null)
237 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
238 node = lexer.inferredTag("body");
242 if (noframes == null)
245 Node.insertNodeAtEnd(frameset, noframes);
248 parseTag(lexer, noframes, mode);
252 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
254 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
256 moveToHead(lexer, html, node);
263 /* insert other content into noframes element */
265 if (frameset != null)
267 if (noframes == null)
269 noframes = lexer.inferredTag("noframes");
270 Node.insertNodeAtEnd(frameset, noframes);
273 Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
275 parseTag(lexer, noframes, mode);
279 node = lexer.inferredTag("body");
283 /* node must be body */
285 Node.insertNodeAtEnd(html, node);
286 parseTag(lexer, node, mode);
291 public static class ParseHead implements Parser {
293 public void parse( Lexer lexer, Node head, short mode )
298 TagTable tt = lexer.configuration.tt;
302 node = lexer.getToken(Lexer.IgnoreWhitespace);
303 if (node == null) break;
304 if (node.tag == head.tag && node.type == Node.EndTag)
310 if (node.type == Node.TextNode)
316 /* deal with comments etc. */
317 if (Node.insertMisc(head, node))
320 if (node.type == Node.DocTypeTag)
322 Node.insertDocType(lexer, head, node);
326 /* discard unknown tags */
327 if (node.tag == null)
329 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
333 if (!((node.tag.model & Dict.CM_HEAD) != 0))
339 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
341 if (node.tag == tt.tagTitle)
346 Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
348 else if (node.tag == tt.tagBase)
353 Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
355 else if (node.tag == tt.tagNoscript)
356 Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
358 Node.insertNodeAtEnd(head, node);
359 parseTag(lexer, node, Lexer.IgnoreWhitespace);
363 /* discard unexpected text nodes and end tags */
364 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
369 Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
370 Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
376 public static class ParseTitle implements Parser {
378 public void parse( Lexer lexer, Node title, short mode )
384 node = lexer.getToken(Lexer.MixedContent);
385 if (node == null) break;
386 if (node.tag == title.tag && node.type == Node.EndTag)
389 Node.trimSpaces(lexer, title);
393 if (node.type == Node.TextNode)
395 /* only called for 1st child */
396 if (title.content == null)
397 Node.trimInitialSpace(lexer, title, node);
399 if (node.start >= node.end)
404 Node.insertNodeAtEnd(title, node);
408 /* deal with comments etc. */
409 if (Node.insertMisc(title, node))
412 /* discard unknown tags */
413 if (node.tag == null)
415 Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
419 /* pushback unexpected tokens */
420 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
422 Node.trimSpaces(lexer, title);
426 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
431 public static class ParseScript implements Parser {
433 public void parse( Lexer lexer, Node script, short mode )
436 This isn't quite right for CDATA content as it recognises
437 tags within the content and parses them accordingly.
438 This will unfortunately screw up scripts which include
439 < + letter, < + !, < + ? or < + / + letter
444 node = lexer.getCDATA( script);
447 Node.insertNodeAtEnd(script, node);
452 public static class ParseBody implements Parser {
454 public void parse( Lexer lexer, Node body, short mode )
457 boolean checkstack, iswhitenode;
459 mode = Lexer.IgnoreWhitespace;
461 TagTable tt = lexer.configuration.tt;
465 node = lexer.getToken(mode);
466 if (node == null) break;
467 if (node.tag == body.tag && node.type == Node.EndTag)
470 Node.trimSpaces(lexer, body);
471 lexer.seenBodyEndTag = 1;
472 mode = Lexer.IgnoreWhitespace;
474 if (body.parent.tag == tt.tagNoframes)
480 if (node.tag == tt.tagNoframes)
482 if (node.type == Node.StartTag)
484 Node.insertNodeAtEnd(body, node);
485 getParseBlock().parse(lexer, node, mode);
489 if (node.type == Node.EndTag &&
490 body.parent.tag == tt.tagNoframes)
492 Node.trimSpaces(lexer, body);
498 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
499 && body.parent.tag == tt.tagNoframes)
501 Node.trimSpaces(lexer, body);
506 if (node.tag == tt.tagHtml)
508 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
509 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
516 if (node.type == Node.TextNode &&
517 node.end <= node.start + 1 &&
518 node.textarray[node.start] == (byte)' ')
521 /* deal with comments etc. */
522 if (Node.insertMisc(body, node))
525 if (lexer.seenBodyEndTag == 1 && !iswhitenode)
527 ++lexer.seenBodyEndTag;
528 Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
531 /* mixed content model permits text */
532 if (node.type == Node.TextNode)
534 if (iswhitenode && mode == Lexer.IgnoreWhitespace)
539 if (lexer.configuration.EncloseBodyText && !iswhitenode)
544 para = lexer.inferredTag("p");
545 Node.insertNodeAtEnd(body, para);
546 parseTag(lexer, para, mode);
547 mode = Lexer.MixedContent;
550 else /* strict doesn't allow text here */
551 lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
557 if (lexer.inlineDup( node) > 0)
561 Node.insertNodeAtEnd(body, node);
562 mode = Lexer.MixedContent;
566 if (node.type == Node.DocTypeTag)
568 Node.insertDocType(lexer, body, node);
571 /* discard unknown and PARAM tags */
572 if (node.tag == null || node.tag == tt.tagParam)
574 //TODO: message Fix...
575 //Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
580 Netscape allows LI and DD directly in BODY
581 We infer UL or DL respectively and use this
582 boolean to exclude block-level elements so as
583 to match Netscape's observed behaviour.
585 lexer.excludeBlocks = false;
587 if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
588 !((node.tag.model & Dict.CM_INLINE) != 0))
590 /* avoid this error message being issued twice */
591 if (!((node.tag.model & Dict.CM_HEAD) != 0))
592 Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
594 if ((node.tag.model & Dict.CM_HTML) != 0)
596 /* copy body attributes if current body was inferred */
597 if (node.tag == tt.tagBody && body.implicit
598 && body.attributes == null)
600 body.attributes = node.attributes;
601 node.attributes = null;
607 if ((node.tag.model & Dict.CM_HEAD) != 0)
609 moveToHead(lexer, body, node);
613 if ((node.tag.model & Dict.CM_LIST) != 0)
616 node = lexer.inferredTag( "ul");
617 Node.addClass(node, "noindent");
618 lexer.excludeBlocks = true;
620 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
623 node = lexer.inferredTag( "dl");
624 lexer.excludeBlocks = true;
626 else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
629 node = lexer.inferredTag( "table");
630 lexer.excludeBlocks = true;
634 /* AQ: The following line is from the official C
635 version of tidy. It doesn't make sense to me
636 because the '!' operator has higher precedence
637 than the '&' operator. It seems to me that the
638 expression always evaluates to 0.
640 if (!node->tag->model & (CM_ROW | CM_FIELD))
642 AQ: 13Jan2000 fixed in C tidy
644 if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
650 /* ignore </td> </th> <option> etc. */
655 if (node.type == Node.EndTag)
657 if (node.tag == tt.tagBr)
658 node.type = Node.StartTag;
659 else if (node.tag == tt.tagP)
661 Node.coerceNode(lexer, node, tt.tagBr);
662 Node.insertNodeAtEnd(body, node);
663 node = lexer.inferredTag("br");
665 else if ((node.tag.model & Dict.CM_INLINE) != 0)
666 lexer.popInline(node);
669 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
671 if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
673 /* HTML4 strict doesn't allow inline content here */
674 /* but HTML2 does allow img elements as children of body */
675 if (node.tag == tt.tagImg)
676 lexer.versions &= ~Dict.VERS_HTML40_STRICT;
678 lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
680 if (checkstack && !node.implicit)
684 if (lexer.inlineDup( node) > 0)
688 mode = Lexer.MixedContent;
693 mode = Lexer.IgnoreWhitespace;
697 Report.warning(lexer, body, node, Report.INSERTING_TAG);
699 Node.insertNodeAtEnd(body, node);
700 parseTag(lexer, node, mode);
704 /* discard unexpected tags */
705 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
711 public static class ParseFrameSet implements Parser {
713 public void parse( Lexer lexer, Node frameset, short mode )
716 TagTable tt = lexer.configuration.tt;
718 lexer.badAccess |= Report.USING_FRAMES;
722 node = lexer.getToken(Lexer.IgnoreWhitespace);
723 if (node == null) break;
724 if (node.tag == frameset.tag && node.type == Node.EndTag)
726 frameset.closed = true;
727 Node.trimSpaces(lexer, frameset);
731 /* deal with comments etc. */
732 if (Node.insertMisc(frameset, node))
735 if (node.tag == null)
737 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
741 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
743 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
745 moveToHead(lexer, frameset, node);
750 if (node.tag == tt.tagBody)
753 node = lexer.inferredTag("noframes");
754 Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
757 if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
759 Node.insertNodeAtEnd(frameset, node);
760 lexer.excludeBlocks = false;
761 parseTag(lexer, node, Lexer.MixedContent);
764 else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
766 Node.insertNodeAtEnd(frameset, node);
770 /* discard unexpected tags */
771 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
774 Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
779 public static class ParseInline implements Parser {
781 public void parse( Lexer lexer, Node element, short mode )
784 TagTable tt = lexer.configuration.tt;
786 if ((element.tag.model & Dict.CM_EMPTY) != 0)
789 if (element.tag == tt.tagA)
791 if (element.attributes == null)
793 Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
794 Node.discardElement(element);
800 ParseInline is used for some block level elements like H1 to H6
801 For such elements we need to insert inline emphasis tags currently
802 on the inline stack. For Inline elements, we normally push them
803 onto the inline stack provided they aren't implicit or OBJECT/APPLET.
804 This test is carried out in PushInline and PopInline, see istack.c
805 We don't push A or SPAN to replicate current browser behavior
807 if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
808 lexer.inlineDup( null);
809 else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
810 element.tag != tt.tagA && element.tag != tt.tagSpan)
811 lexer.pushInline( element);
813 if (element.tag == tt.tagNobr)
814 lexer.badLayout |= Report.USING_NOBR;
815 else if (element.tag == tt.tagFont)
816 lexer.badLayout |= Report.USING_FONT;
818 /* Inline elements may or may not be within a preformatted element */
819 if (mode != Lexer.Preformatted)
820 mode = Lexer.MixedContent;
824 node = lexer.getToken(mode);
825 if (node == null) break;
826 /* end tag for current element */
827 if (node.tag == element.tag && node.type == Node.EndTag)
829 if ((element.tag.model & Dict.CM_INLINE) != 0 &&
830 element.tag != tt.tagA)
831 lexer.popInline( node);
833 if (!((mode & Lexer.Preformatted) != 0))
834 Node.trimSpaces(lexer, element);
836 if a font element wraps an anchor and nothing else
837 then move the font element inside the anchor since
838 otherwise it won't alter the anchor text color
840 if (element.tag == tt.tagFont &&
841 element.content != null &&
842 element.content == element.last)
844 Node child = element.content;
846 if (child.tag == tt.tagA)
848 child.parent = element.parent;
849 child.next = element.next;
850 child.prev = element.prev;
852 if (child.prev != null)
853 child.prev.next = child;
855 child.parent.content = child;
857 if (child.next != null)
858 child.next.prev = child;
860 child.parent.last = child;
864 element.parent = child;
865 element.content = child.content;
866 element.last = child.last;
867 child.content = element;
868 child.last = element;
869 for (child = element.content; child != null; child = child.next)
870 child.parent = element;
873 element.closed = true;
874 Node.trimSpaces(lexer, element);
875 Node.trimEmptyElement(lexer, element);
879 /* <u>...<u> map 2nd <u> to </u> if 1st is explicit */
880 /* otherwise emphasis nesting is probably unintentional */
881 /* big and small have cumulative effect to leave them alone */
882 if (node.type == Node.StartTag
883 && node.tag == element.tag
884 && lexer.isPushed(node)
887 && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
888 && node.tag != tt.tagA
889 && node.tag != tt.tagFont
890 && node.tag != tt.tagBig
891 && node.tag != tt.tagSmall)
893 if (element.content != null && node.attributes == null)
895 Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
896 node.type = Node.EndTag;
901 Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
904 if (node.type == Node.TextNode)
906 /* only called for 1st child */
907 if (element.content == null &&
908 !((mode & Lexer.Preformatted) != 0))
909 Node.trimSpaces(lexer, element);
911 if (node.start >= node.end)
916 Node.insertNodeAtEnd(element, node);
920 /* mixed content model so allow text */
921 if (Node.insertMisc(element, node))
924 /* deal with HTML tags */
925 if (node.tag == tt.tagHtml)
927 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
929 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
933 /* otherwise infer end of inline element */
935 if (!((mode & Lexer.Preformatted) != 0))
936 Node.trimSpaces(lexer, element);
937 Node.trimEmptyElement(lexer, element);
941 /* within <dt> or <pre> map <p> to <br> */
942 if (node.tag == tt.tagP &&
943 node.type == Node.StartTag &&
944 ((mode & Lexer.Preformatted) != 0 ||
945 element.tag == tt.tagDt ||
946 element.isDescendantOf(tt.tagDt)))
950 Node.trimSpaces(lexer, element);
951 Node.insertNodeAtEnd(element, node);
955 /* ignore unknown and PARAM tags */
956 if (node.tag == null || node.tag == tt.tagParam)
958 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
962 if (node.tag == tt.tagBr && node.type == Node.EndTag)
963 node.type = Node.StartTag;
965 if (node.type == Node.EndTag)
967 /* coerce </br> to <br> */
968 if (node.tag == tt.tagBr)
969 node.type = Node.StartTag;
970 else if (node.tag == tt.tagP)
972 /* coerce unmatched </p> to <br><br> */
973 if (!element.isDescendantOf(tt.tagP))
975 Node.coerceNode(lexer, node, tt.tagBr);
976 Node.trimSpaces(lexer, element);
977 Node.insertNodeAtEnd(element, node);
978 node = lexer.inferredTag("br");
982 else if ((node.tag.model & Dict.CM_INLINE) != 0
983 && node.tag != tt.tagA
984 && !((node.tag.model & Dict.CM_OBJECT) != 0)
985 && (element.tag.model & Dict.CM_INLINE) != 0)
987 /* allow any inline end tag to end current element */
988 lexer.popInline( element);
990 if (element.tag != tt.tagA)
992 if (node.tag == tt.tagA && node.tag != element.tag)
994 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
999 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1002 if (!((mode & Lexer.Preformatted) != 0))
1003 Node.trimSpaces(lexer, element);
1004 Node.trimEmptyElement(lexer, element);
1008 /* if parent is <a> then discard unexpected inline end tag */
1009 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1011 } /* special case </tr> etc. for stuff moved in front of table */
1012 else if (lexer.exiled
1013 && node.tag.model != 0
1014 && (node.tag.model & Dict.CM_TABLE) != 0)
1017 Node.trimSpaces(lexer, element);
1018 Node.trimEmptyElement(lexer, element);
1023 /* allow any header tag to end current header */
1024 if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
1026 if (node.tag == element.tag)
1028 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1032 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1035 if (!((mode & Lexer.Preformatted) != 0))
1036 Node.trimSpaces(lexer, element);
1037 Node.trimEmptyElement(lexer, element);
1042 an <A> tag to ends any open <A> element
1043 but <A href=...> is mapped to </A><A href=...>
1045 if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
1047 /* coerce <a> to </a> unless it has some attributes */
1048 if (node.attributes == null)
1050 node.type = Node.EndTag;
1051 Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1052 lexer.popInline( node);
1058 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1059 lexer.popInline( element);
1060 if (!((mode & Lexer.Preformatted) != 0))
1061 Node.trimSpaces(lexer, element);
1062 Node.trimEmptyElement(lexer, element);
1066 if ((element.tag.model & Dict.CM_HEADING) != 0)
1068 if (node.tag == tt.tagCenter ||
1069 node.tag == tt.tagDiv)
1071 if (node.type != Node.StartTag &&
1072 node.type != Node.StartEndTag)
1074 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1078 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1080 /* insert center as parent if heading is empty */
1081 if (element.content == null)
1083 Node.insertNodeAsParent(element, node);
1087 /* split heading and make center parent of 2nd part */
1088 Node.insertNodeAfterElement(element, node);
1090 if (!((mode & Lexer.Preformatted) != 0))
1091 Node.trimSpaces(lexer, element);
1093 element = lexer.cloneNode(element);
1094 element.start = lexer.lexsize;
1095 element.end = lexer.lexsize;
1096 Node.insertNodeAtEnd(node, element);
1100 if (node.tag == tt.tagHr)
1102 if (node.type != Node.StartTag &&
1103 node.type != Node.StartEndTag)
1105 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1109 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1111 /* insert hr before heading if heading is empty */
1112 if (element.content == null)
1114 Node.insertNodeBeforeElement(element, node);
1118 /* split heading and insert hr before 2nd part */
1119 Node.insertNodeAfterElement(element, node);
1121 if (!((mode & Lexer.Preformatted) != 0))
1122 Node.trimSpaces(lexer, element);
1124 element = lexer.cloneNode(element);
1125 element.start = lexer.lexsize;
1126 element.end = lexer.lexsize;
1127 Node.insertNodeAfterElement(node, element);
1132 if (element.tag == tt.tagDt)
1134 if (node.tag == tt.tagHr)
1138 if (node.type != Node.StartTag &&
1139 node.type != Node.StartEndTag)
1141 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1145 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1146 dd = lexer.inferredTag("dd");
1148 /* insert hr within dd before dt if dt is empty */
1149 if (element.content == null)
1151 Node.insertNodeBeforeElement(element, dd);
1152 Node.insertNodeAtEnd(dd, node);
1156 /* split dt and insert hr within dd before 2nd part */
1157 Node.insertNodeAfterElement(element, dd);
1158 Node.insertNodeAtEnd(dd, node);
1160 if (!((mode & Lexer.Preformatted) != 0))
1161 Node.trimSpaces(lexer, element);
1163 element = lexer.cloneNode(element);
1164 element.start = lexer.lexsize;
1165 element.end = lexer.lexsize;
1166 Node.insertNodeAfterElement(dd, element);
1173 if this is the end tag for an ancestor element
1174 then infer end tag for this element
1176 if (node.type == Node.EndTag)
1178 for (parent = element.parent;
1179 parent != null; parent = parent.parent)
1181 if (node.tag == parent.tag)
1183 if (!((element.tag.model & Dict.CM_OPT) != 0) &&
1185 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1187 if (element.tag == tt.tagA)
1188 lexer.popInline(element);
1192 if (!((mode & Lexer.Preformatted) != 0))
1193 Node.trimSpaces(lexer, element);
1195 Node.trimEmptyElement(lexer, element);
1201 /* block level tags end this element */
1202 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1204 if (node.type != Node.StartTag)
1206 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1210 if (!((element.tag.model & Dict.CM_OPT) != 0))
1211 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1213 if ((node.tag.model & Dict.CM_HEAD) != 0 &&
1214 !((node.tag.model & Dict.CM_BLOCK) != 0))
1216 moveToHead(lexer, element, node);
1221 prevent anchors from propagating into block tags
1222 except for headings h1 to h6
1224 if (element.tag == tt.tagA)
1226 if (node.tag != null &&
1227 !((node.tag.model & Dict.CM_HEADING) != 0))
1228 lexer.popInline(element);
1229 else if (!(element.content != null))
1231 Node.discardElement(element);
1239 if (!((mode & Lexer.Preformatted) != 0))
1240 Node.trimSpaces(lexer, element);
1242 Node.trimEmptyElement(lexer, element);
1246 /* parse inline element */
1247 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1250 Report.warning(lexer, element, node, Report.INSERTING_TAG);
1252 /* trim white space before <br> */
1253 if (node.tag == tt.tagBr)
1254 Node.trimSpaces(lexer, element);
1256 Node.insertNodeAtEnd(element, node);
1257 parseTag(lexer, node, mode);
1261 /* discard unexpected tags */
1262 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1265 if (!((element.tag.model & Dict.CM_OPT) != 0))
1266 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
1268 Node.trimEmptyElement(lexer, element);
1272 public static class ParseList implements Parser {
1274 public void parse( Lexer lexer, Node list, short mode )
1278 TagTable tt = lexer.configuration.tt;
1280 if ((list.tag.model & Dict.CM_EMPTY) != 0)
1283 lexer.insert = -1; /* defer implicit inline start tags */
1287 node = lexer.getToken(Lexer.IgnoreWhitespace);
1288 if (node == null) break;
1290 if (node.tag == list.tag && node.type == Node.EndTag)
1292 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1293 Node.coerceNode(lexer, list, tt.tagUl);
1296 Node.trimEmptyElement(lexer, list);
1300 /* deal with comments etc. */
1301 if (Node.insertMisc(list, node))
1304 if (node.type != Node.TextNode && node.tag == null)
1306 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1311 if this is the end tag for an ancestor element
1312 then infer end tag for this element
1314 if (node.type == Node.EndTag)
1316 if (node.tag == tt.tagForm)
1319 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1323 if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
1325 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1326 lexer.popInline(node);
1330 for (parent = list.parent;
1331 parent != null; parent = parent.parent)
1333 if (node.tag == parent.tag)
1335 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1338 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1339 Node.coerceNode(lexer, list, tt.tagUl);
1341 Node.trimEmptyElement(lexer, list);
1346 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1350 if (node.tag != tt.tagLi)
1354 if (node.tag != null &&
1355 (node.tag.model & Dict.CM_BLOCK) != 0 &&
1356 lexer.excludeBlocks)
1358 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1359 Node.trimEmptyElement(lexer, list);
1363 node = lexer.inferredTag("li");
1364 node.addAttribute("style", "list-style: none");
1365 Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1368 /* node should be <LI> */
1369 Node.insertNodeAtEnd(list, node);
1370 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1373 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1374 Node.coerceNode(lexer, list, tt.tagUl);
1376 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1377 Node.trimEmptyElement(lexer, list);
1382 public static class ParseDefList implements Parser {
1384 public void parse( Lexer lexer, Node list, short mode )
1387 TagTable tt = lexer.configuration.tt;
1389 if ((list.tag.model & Dict.CM_EMPTY) != 0)
1392 lexer.insert = -1; /* defer implicit inline start tags */
1396 node = lexer.getToken(Lexer.IgnoreWhitespace);
1397 if (node == null) break;
1398 if (node.tag == list.tag && node.type == Node.EndTag)
1401 Node.trimEmptyElement(lexer, list);
1405 /* deal with comments etc. */
1406 if (Node.insertMisc(list, node))
1409 if (node.type == Node.TextNode)
1412 node = lexer.inferredTag( "dt");
1413 Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1416 if (node.tag == null)
1418 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1423 if this is the end tag for an ancestor element
1424 then infer end tag for this element
1426 if (node.type == Node.EndTag)
1428 if (node.tag == tt.tagForm)
1431 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1435 for (parent = list.parent;
1436 parent != null; parent = parent.parent)
1438 if (node.tag == parent.tag)
1440 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1443 Node.trimEmptyElement(lexer, list);
1449 /* center in a dt or a dl breaks the dl list in two */
1450 if (node.tag == tt.tagCenter)
1452 if (list.content != null)
1453 Node.insertNodeAfterElement(list, node);
1454 else /* trim empty dl list */
1456 Node.insertNodeBeforeElement(list, node);
1457 Node.discardElement(list);
1460 /* and parse contents of center */
1461 parseTag(lexer, node, mode);
1463 /* now create a new dl element */
1464 list = lexer.inferredTag("dl");
1465 Node.insertNodeAfterElement(node, list);
1469 if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
1473 if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
1475 Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
1476 Node.trimEmptyElement(lexer, list);
1480 /* if DD appeared directly in BODY then exclude blocks */
1481 if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
1483 Node.trimEmptyElement(lexer, list);
1487 node = lexer.inferredTag( "dd");
1488 Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1491 if (node.type == Node.EndTag)
1493 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1497 /* node should be <DT> or <DD>*/
1498 Node.insertNodeAtEnd(list, node);
1499 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1502 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1503 Node.trimEmptyElement(lexer, list);
1508 public static class ParsePre implements Parser {
1510 public void parse( Lexer lexer, Node pre, short mode )
1513 TagTable tt = lexer.configuration.tt;
1515 if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1518 if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1519 Node.coerceNode(lexer, pre, tt.tagPre);
1521 lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
1525 node = lexer.getToken(Lexer.Preformatted);
1526 if (node == null) break;
1527 if (node.tag == pre.tag && node.type == Node.EndTag)
1529 Node.trimSpaces(lexer, pre);
1531 Node.trimEmptyElement(lexer, pre);
1535 if (node.tag == tt.tagHtml)
1537 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1538 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1543 if (node.type == Node.TextNode)
1545 /* if first check for inital newline */
1546 if (pre.content == null)
1548 if (node.textarray[node.start] == (byte)'\n')
1551 if (node.start >= node.end)
1557 Node.insertNodeAtEnd(pre, node);
1561 /* deal with comments etc. */
1562 if (Node.insertMisc(pre, node))
1565 /* discard unknown and PARAM tags */
1566 if (node.tag == null || node.tag == tt.tagParam)
1568 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1572 if (node.tag == tt.tagP)
1574 if (node.type == Node.StartTag)
1576 Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
1578 /* trim white space before <p> in <pre>*/
1579 Node.trimSpaces(lexer, pre);
1581 /* coerce both <p> and </p> to <br> */
1582 Node.coerceNode(lexer, node, tt.tagBr);
1583 Node.insertNodeAtEnd(pre, node);
1587 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1592 if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
1594 moveToHead(lexer, pre, node);
1599 if this is the end tag for an ancestor element
1600 then infer end tag for this element
1602 if (node.type == Node.EndTag)
1604 if (node.tag == tt.tagForm)
1607 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1611 for (parent = pre.parent;
1612 parent != null; parent = parent.parent)
1614 if (node.tag == parent.tag)
1616 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1619 Node.trimSpaces(lexer, pre);
1620 Node.trimEmptyElement(lexer, pre);
1626 /* what about head content, HEAD, BODY tags etc? */
1627 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1629 if (node.type != Node.StartTag)
1631 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1635 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1636 lexer.excludeBlocks = true;
1638 /* check if we need to infer a container */
1639 if ((node.tag.model & Dict.CM_LIST) != 0)
1642 node = lexer.inferredTag( "ul");
1643 Node.addClass(node, "noindent");
1645 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1648 node = lexer.inferredTag( "dl");
1650 else if ((node.tag.model & Dict.CM_TABLE) != 0)
1653 node = lexer.inferredTag( "table");
1656 Node.insertNodeAfterElement(pre, node);
1657 pre = lexer.inferredTag( "pre");
1658 Node.insertNodeAfterElement(node, pre);
1659 parseTag(lexer, node, Lexer.IgnoreWhitespace);
1660 lexer.excludeBlocks = false;
1664 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1666 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1671 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1673 /* trim white space before <br> */
1674 if (node.tag == tt.tagBr)
1675 Node.trimSpaces(lexer, pre);
1677 Node.insertNodeAtEnd(pre, node);
1678 parseTag(lexer, node, Lexer.Preformatted);
1682 /* discard unexpected tags */
1683 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1686 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1687 Node.trimEmptyElement(lexer, pre);
1692 public static class ParseBlock implements Parser {
1694 public void parse( Lexer lexer, Node element, short mode )
1696 element is node created by the lexer
1697 upon seeing the start tag, or by the
1698 parser when the start tag is inferred
1704 TagTable tt = lexer.configuration.tt;
1708 if ((element.tag.model & Dict.CM_EMPTY) != 0)
1711 if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
1712 Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
1715 InlineDup() asks the lexer to insert inline emphasis tags
1716 currently pushed on the istack, but take care to avoid
1717 propagating inline emphasis inside OBJECT or APPLET.
1718 For these elements a fresh inline stack context is created
1719 and disposed of upon reaching the end of the element.
1720 They thus behave like table cells in this respect.
1722 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1724 istackbase = lexer.istackbase;
1725 lexer.istackbase = lexer.istack.size();
1728 if (!((element.tag.model & Dict.CM_MIXED) != 0))
1729 lexer.inlineDup( null);
1731 mode = Lexer.IgnoreWhitespace;
1735 node = lexer.getToken(mode /*Lexer.MixedContent*/);
1736 if (node == null) break;
1737 /* end tag for this element */
1738 if (node.type == Node.EndTag && node.tag != null &&
1739 (node.tag == element.tag || element.was == node.tag))
1742 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1744 /* pop inline stack */
1745 while (lexer.istack.size() > lexer.istackbase)
1746 lexer.popInline( null);
1747 lexer.istackbase = istackbase;
1750 element.closed = true;
1751 Node.trimSpaces(lexer, element);
1752 Node.trimEmptyElement(lexer, element);
1756 if (node.tag == tt.tagHtml ||
1757 node.tag == tt.tagHead ||
1758 node.tag == tt.tagBody)
1760 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1761 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1766 if (node.type == Node.EndTag)
1768 if (node.tag == null)
1770 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1774 else if (node.tag == tt.tagBr)
1775 node.type = Node.StartTag;
1776 else if (node.tag == tt.tagP)
1778 Node.coerceNode(lexer, node, tt.tagBr);
1779 Node.insertNodeAtEnd(element, node);
1780 node = lexer.inferredTag("br");
1785 if this is the end tag for an ancestor element
1786 then infer end tag for this element
1788 for (parent = element.parent;
1789 parent != null; parent = parent.parent)
1791 if (node.tag == parent.tag)
1793 if (!((element.tag.model & Dict.CM_OPT) != 0))
1794 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1798 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1800 /* pop inline stack */
1801 while (lexer.istack.size() > lexer.istackbase)
1802 lexer.popInline( null);
1803 lexer.istackbase = istackbase;
1806 Node.trimSpaces(lexer, element);
1807 Node.trimEmptyElement(lexer, element);
1811 /* special case </tr> etc. for stuff moved in front of table */
1813 && node.tag.model != 0
1814 && (node.tag.model & Dict.CM_TABLE) != 0)
1817 Node.trimSpaces(lexer, element);
1818 Node.trimEmptyElement(lexer, element);
1824 /* mixed content model permits text */
1825 if (node.type == Node.TextNode)
1827 boolean iswhitenode = false;
1829 if (node.type == Node.TextNode &&
1830 node.end <= node.start + 1 &&
1831 lexer.lexbuf[node.start] == (byte)' ')
1834 if (lexer.configuration.EncloseBlockText && !iswhitenode)
1837 node = lexer.inferredTag("p");
1838 Node.insertNodeAtEnd(element, node);
1839 parseTag(lexer, node, Lexer.MixedContent);
1847 if (!((element.tag.model & Dict.CM_MIXED) != 0))
1849 if (lexer.inlineDup( node) > 0)
1854 Node.insertNodeAtEnd(element, node);
1855 mode = Lexer.MixedContent;
1857 HTML4 strict doesn't allow mixed content for
1858 elements with %block; as their content model
1860 lexer.versions &= ~Dict.VERS_HTML40_STRICT;
1864 if (Node.insertMisc(element, node))
1867 /* allow PARAM elements? */
1868 if (node.tag == tt.tagParam)
1870 if (((element.tag.model & Dict.CM_PARAM) != 0) &&
1871 (node.type == Node.StartTag || node.type == Node.StartEndTag))
1873 Node.insertNodeAtEnd(element, node);
1877 /* otherwise discard it */
1878 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1882 /* allow AREA elements? */
1883 if (node.tag == tt.tagArea)
1885 if ((element.tag == tt.tagMap) &&
1886 (node.type == Node.StartTag || node.type == Node.StartEndTag))
1888 Node.insertNodeAtEnd(element, node);
1892 /* otherwise discard it */
1893 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1897 /* ignore unknown start/end tags */
1898 if (node.tag == null)
1900 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1905 Allow Dict.CM_INLINE elements here.
1907 Allow Dict.CM_BLOCK elements here unless
1908 lexer.excludeBlocks is yes.
1910 LI and DD are special cased.
1912 Otherwise infer end tag for this element.
1915 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1917 if (node.type != Node.StartTag && node.type != Node.StartEndTag)
1919 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1923 if (element.tag == tt.tagTd || element.tag == tt.tagTh)
1925 /* if parent is a table cell, avoid inferring the end of the cell */
1927 if ((node.tag.model & Dict.CM_HEAD) != 0)
1929 moveToHead(lexer, element, node);
1933 if ((node.tag.model & Dict.CM_LIST) != 0)
1936 node = lexer.inferredTag( "ul");
1937 Node.addClass(node, "noindent");
1938 lexer.excludeBlocks = true;
1940 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1943 node = lexer.inferredTag( "dl");
1944 lexer.excludeBlocks = true;
1947 /* infer end of current table cell */
1948 if (!((node.tag.model & Dict.CM_BLOCK) != 0))
1951 Node.trimSpaces(lexer, element);
1952 Node.trimEmptyElement(lexer, element);
1956 else if ((node.tag.model & Dict.CM_BLOCK) != 0)
1958 if (lexer.excludeBlocks)
1960 if (!((element.tag.model & Dict.CM_OPT) != 0))
1961 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1965 if ((element.tag.model & Dict.CM_OBJECT) != 0)
1966 lexer.istackbase = istackbase;
1968 Node.trimSpaces(lexer, element);
1969 Node.trimEmptyElement(lexer, element);
1973 else /* things like list items */
1975 if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
1976 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1978 if ((node.tag.model & Dict.CM_HEAD) != 0)
1980 moveToHead(lexer, element, node);
1986 if ((node.tag.model & Dict.CM_LIST) != 0)
1988 if (element.parent != null && element.parent.tag != null &&
1989 element.parent.tag.parser == getParseList())
1991 Node.trimSpaces(lexer, element);
1992 Node.trimEmptyElement(lexer, element);
1996 node = lexer.inferredTag("ul");
1997 Node.addClass(node, "noindent");
1999 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
2001 if (element.parent.tag == tt.tagDl)
2003 Node.trimSpaces(lexer, element);
2004 Node.trimEmptyElement(lexer, element);
2008 node = lexer.inferredTag("dl");
2010 else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
2011 (node.tag.model & Dict.CM_ROW) != 0)
2013 node = lexer.inferredTag("table");
2015 else if ((element.tag.model & Dict.CM_OBJECT) != 0)
2017 /* pop inline stack */
2018 while (lexer.istack.size() > lexer.istackbase)
2019 lexer.popInline( null);
2020 lexer.istackbase = istackbase;
2021 Node.trimSpaces(lexer, element);
2022 Node.trimEmptyElement(lexer, element);
2028 Node.trimSpaces(lexer, element);
2029 Node.trimEmptyElement(lexer, element);
2035 /* parse known element */
2036 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2038 if ((node.tag.model & Dict.CM_INLINE) != 0)
2040 if (checkstack && !node.implicit)
2044 if (lexer.inlineDup( node) > 0)
2048 mode = Lexer.MixedContent;
2053 mode = Lexer.IgnoreWhitespace;
2056 /* trim white space before <br> */
2057 if (node.tag == tt.tagBr)
2058 Node.trimSpaces(lexer, element);
2060 Node.insertNodeAtEnd(element, node);
2063 Report.warning(lexer, element, node, Report.INSERTING_TAG);
2065 parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
2069 /* discard unexpected tags */
2070 if (node.type == Node.EndTag)
2071 lexer.popInline( node); /* if inline end tag */
2073 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2076 if (!((element.tag.model & Dict.CM_OPT) != 0))
2077 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
2079 if ((element.tag.model & Dict.CM_OBJECT) != 0)
2081 /* pop inline stack */
2082 while (lexer.istack.size() > lexer.istackbase)
2083 lexer.popInline( null);
2084 lexer.istackbase = istackbase;
2087 Node.trimSpaces(lexer, element);
2088 Node.trimEmptyElement(lexer, element);
2093 public static class ParseTableTag implements Parser {
2095 public void parse( Lexer lexer, Node table, short mode )
2099 TagTable tt = lexer.configuration.tt;
2102 istackbase = lexer.istackbase;
2103 lexer.istackbase = lexer.istack.size();
2107 node = lexer.getToken(Lexer.IgnoreWhitespace);
2108 if (node == null) break;
2109 if (node.tag == table.tag && node.type == Node.EndTag)
2111 lexer.istackbase = istackbase;
2112 table.closed = true;
2113 Node.trimEmptyElement(lexer, table);
2117 /* deal with comments etc. */
2118 if (Node.insertMisc(table, node))
2121 /* discard unknown tags */
2122 if (node.tag == null && node.type != Node.TextNode)
2124 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2128 /* if TD or TH or text or inline or block then infer <TR> */
2130 if (node.type != Node.EndTag)
2132 if (node.tag == tt.tagTd ||
2133 node.tag == tt.tagTh ||
2134 node.tag == tt.tagTable)
2137 node = lexer.inferredTag( "tr");
2138 Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
2140 else if (node.type == Node.TextNode
2141 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2143 Node.insertNodeBeforeElement(table, node);
2144 Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2145 lexer.exiled = true;
2148 Line 2040 of parser.c (13 Jan 2000) reads as follows:
2149 if (!node->type == TextNode)
2150 This will always evaluate to false.
2151 This has been reported to Dave Raggett <dsr@w3.org>
2153 //Should be?: if (!(node.type == Node.TextNode))
2155 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2157 lexer.exiled = false;
2160 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2162 moveToHead(lexer, table, node);
2168 if this is the end tag for an ancestor element
2169 then infer end tag for this element
2171 if (node.type == Node.EndTag)
2173 if (node.tag == tt.tagForm)
2176 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2180 if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
2182 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2186 for (parent = table.parent;
2187 parent != null; parent = parent.parent)
2189 if (node.tag == parent.tag)
2191 Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
2193 lexer.istackbase = istackbase;
2194 Node.trimEmptyElement(lexer, table);
2200 if (!((node.tag.model & Dict.CM_TABLE) != 0))
2203 Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2204 lexer.istackbase = istackbase;
2205 Node.trimEmptyElement(lexer, table);
2209 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2211 Node.insertNodeAtEnd(table, node);;
2212 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2216 /* discard unexpected text nodes and end tags */
2217 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2220 Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
2221 Node.trimEmptyElement(lexer, table);
2222 lexer.istackbase = istackbase;
2227 public static class ParseColGroup implements Parser {
2229 public void parse( Lexer lexer, Node colgroup, short mode )
2232 TagTable tt = lexer.configuration.tt;
2234 if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2239 node = lexer.getToken(Lexer.IgnoreWhitespace);
2240 if (node == null) break;
2241 if (node.tag == colgroup.tag && node.type == Node.EndTag)
2243 colgroup.closed = true;
2248 if this is the end tag for an ancestor element
2249 then infer end tag for this element
2251 if (node.type == Node.EndTag)
2253 if (node.tag == tt.tagForm)
2256 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2260 for (parent = colgroup.parent;
2261 parent != null; parent = parent.parent)
2264 if (node.tag == parent.tag)
2272 if (node.type == Node.TextNode)
2278 /* deal with comments etc. */
2279 if (Node.insertMisc(colgroup, node))
2282 /* discard unknown tags */
2283 if (node.tag == null)
2285 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2289 if (node.tag != tt.tagCol)
2295 if (node.type == Node.EndTag)
2297 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2301 /* node should be <COL> */
2302 Node.insertNodeAtEnd(colgroup, node);
2303 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2309 public static class ParseRowGroup implements Parser {
2311 public void parse( Lexer lexer, Node rowgroup, short mode )
2314 TagTable tt = lexer.configuration.tt;
2316 if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2321 node = lexer.getToken(Lexer.IgnoreWhitespace);
2322 if (node == null) break;
2323 if (node.tag == rowgroup.tag)
2325 if (node.type == Node.EndTag)
2327 rowgroup.closed = true;
2328 Node.trimEmptyElement(lexer, rowgroup);
2336 /* if </table> infer end tag */
2337 if (node.tag == tt.tagTable && node.type == Node.EndTag)
2340 Node.trimEmptyElement(lexer, rowgroup);
2344 /* deal with comments etc. */
2345 if (Node.insertMisc(rowgroup, node))
2348 /* discard unknown tags */
2349 if (node.tag == null && node.type != Node.TextNode)
2351 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2356 if TD or TH then infer <TR>
2357 if text or inline or block move before table
2358 if head content move to head
2361 if (node.type != Node.EndTag)
2363 if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2366 node = lexer.inferredTag("tr");
2367 Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2369 else if (node.type == Node.TextNode
2370 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2372 Node.moveBeforeTable(rowgroup, node, tt);
2373 Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2374 lexer.exiled = true;
2376 if (node.type != Node.TextNode)
2377 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2379 lexer.exiled = false;
2382 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2384 Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2385 moveToHead(lexer, rowgroup, node);
2391 if this is the end tag for ancestor element
2392 then infer end tag for this element
2394 if (node.type == Node.EndTag)
2396 if (node.tag == tt.tagForm)
2399 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2403 if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
2405 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2409 for (parent = rowgroup.parent;
2410 parent != null; parent = parent.parent)
2412 if (node.tag == parent.tag)
2415 Node.trimEmptyElement(lexer, rowgroup);
2422 if THEAD, TFOOT or TBODY then implied end tag
2425 if ((node.tag.model & Dict.CM_ROWGRP) != 0)
2427 if (node.type != Node.EndTag)
2430 Node.trimEmptyElement(lexer, rowgroup);
2434 if (node.type == Node.EndTag)
2436 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2440 if (!(node.tag == tt.tagTr))
2442 node = lexer.inferredTag( "tr");
2443 Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2447 /* node should be <TR> */
2448 Node.insertNodeAtEnd(rowgroup, node);
2449 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2452 Node.trimEmptyElement(lexer, rowgroup);
2457 public static class ParseRow implements Parser {
2459 public void parse( Lexer lexer, Node row, short mode )
2462 boolean exclude_state;
2463 TagTable tt = lexer.configuration.tt;
2465 if ((row.tag.model & Dict.CM_EMPTY) != 0)
2470 node = lexer.getToken(Lexer.IgnoreWhitespace);
2471 if (node == null) break;
2472 if (node.tag == row.tag)
2474 if (node.type == Node.EndTag)
2477 Node.fixEmptyRow(lexer, row);
2482 Node.fixEmptyRow(lexer, row);
2487 if this is the end tag for an ancestor element
2488 then infer end tag for this element
2490 if (node.type == Node.EndTag)
2492 if (node.tag == tt.tagForm)
2495 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2499 if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2501 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2505 for (parent = row.parent;
2506 parent != null; parent = parent.parent)
2508 if (node.tag == parent.tag)
2511 Node.trimEmptyElement(lexer, row);
2517 /* deal with comments etc. */
2518 if (Node.insertMisc(row, node))
2521 /* discard unknown tags */
2522 if (node.tag == null && node.type != Node.TextNode)
2524 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2528 /* discard unexpected <table> element */
2529 if (node.tag == tt.tagTable)
2531 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2535 /* THEAD, TFOOT or TBODY */
2536 if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
2539 Node.trimEmptyElement(lexer, row);
2543 if (node.type == Node.EndTag)
2545 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2550 if text or inline or block move before table
2551 if head content move to head
2554 if (node.type != Node.EndTag)
2556 if (node.tag == tt.tagForm)
2559 node = lexer.inferredTag("td");
2560 Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
2562 else if (node.type == Node.TextNode
2563 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2565 Node.moveBeforeTable(row, node, tt);
2566 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2567 lexer.exiled = true;
2569 if (node.type != Node.TextNode)
2570 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2572 lexer.exiled = false;
2575 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2577 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2578 moveToHead(lexer, row, node);
2583 if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
2585 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2589 /* node should be <TD> or <TH> */
2590 Node.insertNodeAtEnd(row, node);
2591 exclude_state = lexer.excludeBlocks;
2592 lexer.excludeBlocks = false;
2593 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2594 lexer.excludeBlocks = exclude_state;
2596 /* pop inline stack */
2598 while (lexer.istack.size() > lexer.istackbase)
2599 lexer.popInline( null);
2602 Node.trimEmptyElement(lexer, row);
2607 public static class ParseNoFrames implements Parser {
2609 public void parse( Lexer lexer, Node noframes, short mode )
2613 TagTable tt = lexer.configuration.tt;
2615 lexer.badAccess |= Report.USING_NOFRAMES;
2616 mode = Lexer.IgnoreWhitespace;
2621 node = lexer.getToken(mode);
2622 if (node == null) break;
2623 if (node.tag == noframes.tag && node.type == Node.EndTag)
2625 noframes.closed = true;
2626 Node.trimSpaces(lexer, noframes);
2630 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
2632 Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
2633 Node.trimSpaces(lexer, noframes);
2638 if (node.tag == tt.tagHtml)
2640 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2641 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2646 /* deal with comments etc. */
2647 if (Node.insertMisc(noframes, node))
2650 if (node.tag == tt.tagBody && node.type == Node.StartTag)
2652 Node.insertNodeAtEnd(noframes, node);
2653 parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2657 /* implicit body element inferred */
2658 if (node.type == Node.TextNode || node.tag != null)
2661 node = lexer.inferredTag("body");
2662 if (lexer.configuration.XmlOut)
2663 Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
2664 Node.insertNodeAtEnd(noframes, node);
2665 parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2668 /* discard unexpected end tags */
2669 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2672 Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
2677 public static class ParseSelect implements Parser {
2679 public void parse( Lexer lexer, Node field, short mode )
2682 TagTable tt = lexer.configuration.tt;
2684 lexer.insert = -1; /* defer implicit inline start tags */
2688 node = lexer.getToken(Lexer.IgnoreWhitespace);
2689 if (node == null) break;
2690 if (node.tag == field.tag && node.type == Node.EndTag)
2692 field.closed = true;
2693 Node.trimSpaces(lexer, field);
2697 /* deal with comments etc. */
2698 if (Node.insertMisc(field, node))
2701 if (node.type == Node.StartTag &&
2702 (node.tag == tt.tagOption ||
2703 node.tag == tt.tagOptgroup ||
2704 node.tag == tt.tagScript))
2706 Node.insertNodeAtEnd(field, node);
2707 parseTag(lexer, node, Lexer.IgnoreWhitespace);
2711 /* discard unexpected tags */
2712 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2715 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2720 public static class ParseText implements Parser {
2722 public void parse( Lexer lexer, Node field, short mode )
2725 TagTable tt = lexer.configuration.tt;
2727 lexer.insert = -1; /* defer implicit inline start tags */
2729 if (field.tag == tt.tagTextarea)
2730 mode = Lexer.Preformatted;
2734 node = lexer.getToken(mode);
2735 if (node == null) break;
2736 if (node.tag == field.tag && node.type == Node.EndTag)
2738 field.closed = true;
2739 Node.trimSpaces(lexer, field);
2743 /* deal with comments etc. */
2744 if (Node.insertMisc(field, node))
2747 if (node.type == Node.TextNode)
2749 /* only called for 1st child */
2750 if (field.content == null && !((mode & Lexer.Preformatted) != 0))
2751 Node.trimSpaces(lexer, field);
2753 if (node.start >= node.end)
2758 Node.insertNodeAtEnd(field, node);
2762 if (node.tag == tt.tagFont)
2764 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2768 /* terminate element on other tags */
2769 if (!((field.tag.model & Dict.CM_OPT) != 0))
2770 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
2773 Node.trimSpaces(lexer, field);
2777 if (!((field.tag.model & Dict.CM_OPT) != 0))
2778 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2783 public static class ParseOptGroup implements Parser {
2785 public void parse( Lexer lexer, Node field, short mode )
2788 TagTable tt = lexer.configuration.tt;
2790 lexer.insert = -1; /* defer implicit inline start tags */
2794 node = lexer.getToken(Lexer.IgnoreWhitespace);
2795 if (node == null) break;
2796 if (node.tag == field.tag && node.type == Node.EndTag)
2798 field.closed = true;
2799 Node.trimSpaces(lexer, field);
2803 /* deal with comments etc. */
2804 if (Node.insertMisc(field, node))
2807 if (node.type == Node.StartTag &&
2808 (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
2810 if (node.tag == tt.tagOptgroup)
2811 Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
2813 Node.insertNodeAtEnd(field, node);
2814 parseTag(lexer, node, Lexer.MixedContent);
2818 /* discard unexpected tags */
2819 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2825 public static Parser getParseHTML()
2830 public static Parser getParseHead()
2835 public static Parser getParseTitle()
2840 public static Parser getParseScript()
2842 return _parseScript;
2845 public static Parser getParseBody()
2850 public static Parser getParseFrameSet()
2852 return _parseFrameSet;
2855 public static Parser getParseInline()
2857 return _parseInline;
2860 public static Parser getParseList()
2865 public static Parser getParseDefList()
2867 return _parseDefList;
2870 public static Parser getParsePre()
2875 public static Parser getParseBlock()
2880 public static Parser getParseTableTag()
2882 return _parseTableTag;
2885 public static Parser getParseColGroup()
2887 return _parseColGroup;
2890 public static Parser getParseRowGroup()
2892 return _parseRowGroup;
2895 public static Parser getParseRow()
2900 public static Parser getParseNoFrames()
2902 return _parseNoFrames;
2905 public static Parser getParseSelect()
2907 return _parseSelect;
2910 public static Parser getParseText()
2915 public static Parser getParseOptGroup()
2917 return _parseOptGroup;
2921 private static Parser _parseHTML = new ParseHTML();
2922 private static Parser _parseHead = new ParseHead();
2923 private static Parser _parseTitle = new ParseTitle();
2924 private static Parser _parseScript = new ParseScript();
2925 private static Parser _parseBody = new ParseBody();
2926 private static Parser _parseFrameSet = new ParseFrameSet();
2927 private static Parser _parseInline = new ParseInline();
2928 private static Parser _parseList = new ParseList();
2929 private static Parser _parseDefList = new ParseDefList();
2930 private static Parser _parsePre = new ParsePre();
2931 private static Parser _parseBlock = new ParseBlock();
2932 private static Parser _parseTableTag = new ParseTableTag();
2933 private static Parser _parseColGroup = new ParseColGroup();
2934 private static Parser _parseRowGroup = new ParseRowGroup();
2935 private static Parser _parseRow = new ParseRow();
2936 private static Parser _parseNoFrames = new ParseNoFrames();
2937 private static Parser _parseSelect = new ParseSelect();
2938 private static Parser _parseText = new ParseText();
2939 private static Parser _parseOptGroup = new ParseOptGroup();
2942 HTML is the top level element
2944 public static Node parseDocument(Lexer lexer)
2946 Node node, document, html;
2947 Node doctype = null;
2948 TagTable tt = lexer.configuration.tt;
2950 document = lexer.newNode();
2951 document.type = Node.RootNode;
2955 node = lexer.getToken(Lexer.IgnoreWhitespace);
2956 if (node == null) break;
2958 /* deal with comments etc. */
2959 if (Node.insertMisc(document, node))
2962 if (node.type == Node.DocTypeTag)
2964 if (doctype == null)
2966 Node.insertNodeAtEnd(document, node);
2970 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
2974 if (node.type == Node.EndTag)
2976 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
2980 if (node.type != Node.StartTag || node.tag != tt.tagHtml)
2983 html = lexer.inferredTag("html");
2988 Node.insertNodeAtEnd(document, html);
2989 getParseHTML().parse(lexer, html, (short)0); // TODO?
2997 * Indicates whether or not whitespace should be preserved for this element.
2998 * If an <code>xml:space</code> attribute is found, then if the attribute value is
2999 * <code>preserve</code>, returns <code>true</code>. For any other value, returns
3000 * <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
3001 * found, then the following element names result in a return value of <code>true:
3002 * pre, script, style,</code> and <code>xsl:text</code>. Finally, if a
3003 * <code>TagTable</code> was passed in and the element appears as the "pre" element
3004 * in the <code>TagTable</code>, then <code>true</code> will be returned.
3005 * Otherwise, <code>false</code> is returned.
3006 * @param element The <code>Node</code> to test to see if whitespace should be
3008 * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
3009 * function. This may be <code>null</code>, in which case this test
3011 * @return <code>true</code> or <code>false</code>, as explained above.
3014 public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
3018 /* search attributes for xml:space */
3019 for (attribute = element.attributes; attribute != null; attribute = attribute.next)
3021 if (attribute.attribute.equals("xml:space"))
3023 if (attribute.value.equals("preserve"))
3030 /* kludge for html docs without explicit xml:space attribute */
3031 if (Lexer.wstrcasecmp(element.element, "pre") == 0
3032 || Lexer.wstrcasecmp(element.element, "script") == 0
3033 || Lexer.wstrcasecmp(element.element, "style") == 0)
3036 if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
3039 /* kludge for XSL docs */
3040 if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3049 public static void parseXMLElement(Lexer lexer, Node element, short mode)
3053 /* Jeff Young's kludge for XSL docs */
3055 if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3058 /* if node is pre or has xml:space="preserve" then do so */
3060 if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
3061 mode = Lexer.Preformatted;
3065 node = lexer.getToken(mode);
3066 if (node == null) break;
3067 if (node.type == Node.EndTag && node.element.equals(element.element))
3069 element.closed = true;
3073 /* discard unexpected end tags */
3074 if (node.type == Node.EndTag)
3076 Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
3080 /* parse content on seeing start tag */
3081 if (node.type == Node.StartTag)
3082 parseXMLElement(lexer, node, mode);
3084 Node.insertNodeAtEnd(element, node);
3088 if first child is text then trim initial space and
3089 delete text node if it is empty.
3092 node = element.content;
3094 if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3096 if (node.textarray[node.start] == (byte)' ')
3100 if (node.start >= node.end)
3101 Node.discardElement(node);
3106 if last child is text then trim final space and
3107 delete the text node if it is empty
3110 node = element.last;
3112 if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3114 if (node.textarray[node.end - 1] == (byte)' ')
3118 if (node.start >= node.end)
3119 Node.discardElement(node);
3124 public static Node parseXMLDocument(Lexer lexer)
3126 Node node, document, doctype;
3128 document = lexer.newNode();
3129 document.type = Node.RootNode;
3131 lexer.configuration.XmlTags = true;
3135 node = lexer.getToken(Lexer.IgnoreWhitespace);
3136 if (node == null) break;
3137 /* discard unexpected end tags */
3138 if (node.type == Node.EndTag)
3140 Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
3144 /* deal with comments etc. */
3145 if (Node.insertMisc(document, node))
3148 if (node.type == Node.DocTypeTag)
3150 if (doctype == null)
3152 Node.insertNodeAtEnd(document, node);
3156 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
3160 /* if start tag then parse element's content */
3161 if (node.type == Node.StartTag)
3163 Node.insertNodeAtEnd(document, node);
3164 parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
3169 if (false) { //#if 0
3170 /* discard the document type */
3171 node = document.findDocType();
3174 Node.discardElement(node);
3177 if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
3178 Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
3180 /* ensure presence of initial <?XML version="1.0"?> */
3181 if (lexer.configuration.XmlPi)
3182 lexer.fixXMLPI(document);
3187 public static boolean isJavaScript(Node node)
3189 boolean result = false;
3192 if (node.attributes == null)
3195 for (attr = node.attributes; attr != null; attr = attr.next)
3197 if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
3198 || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
3199 && Lexer.wsubstr(attr.value, "javascript"))