*/
+ Node.insertNodeAtEnd(list, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+
+ Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, list);
+ }
+
+ };
+
+ public static class ParsePre implements Parser {
+
+ public void parse( Lexer lexer, Node pre, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((pre.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
+ Node.coerceNode(lexer, pre, tt.tagPre);
+
+ lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.Preformatted);
+ if (node == null) break;
+ if (node.tag == pre.tag && node.type == Node.EndTag)
+ {
+ Node.trimSpaces(lexer, pre);
+ pre.closed = true;
+ Node.trimEmptyElement(lexer, pre);
+ return;
+ }
+
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ if (node.type == Node.TextNode)
+ {
+ /* if first check for inital newline */
+ if (pre.content == null)
+ {
+ if (node.textarray[node.start] == (byte)'\n')
+ ++node.start;
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+ }
+
+ Node.insertNodeAtEnd(pre, node);
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(pre, node))
+ continue;
+
+ /* discard unknown and PARAM tags */
+ if (node.tag == null || node.tag == tt.tagParam)
+ {
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagP)
+ {
+ if (node.type == Node.StartTag)
+ {
+ Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
+
+ /* trim white space before in
*/
+ Node.trimSpaces(lexer, pre);
+
+ /* coerce both and
to
*/
+ Node.coerceNode(lexer, node, tt.tagBr);
+ Node.insertNodeAtEnd(pre, node);
+ }
+ else
+ {
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ }
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
+ {
+ moveToHead(lexer, pre, node);
+ continue;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = pre.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, pre);
+ Node.trimEmptyElement(lexer, pre);
+ return;
+ }
+ }
+ }
+
+ /* what about head content, HEAD, BODY tags etc? */
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ if (node.type != Node.StartTag)
+ {
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.excludeBlocks = true;
+
+ /* check if we need to infer a container */
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "ul");
+ Node.addClass(node, "noindent");
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "dl");
+ }
+ else if ((node.tag.model & Dict.CM_TABLE) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "table");
+ }
+
+ Node.insertNodeAfterElement(pre, node);
+ pre = lexer.inferredTag( "pre");
+ Node.insertNodeAfterElement(node, pre);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ lexer.excludeBlocks = false;
+ continue;
+ }
+ /*
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ return;
+ }
+ */
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ /* trim white space before
*/
+ if (node.tag == tt.tagBr)
+ Node.trimSpaces(lexer, pre);
+
+ Node.insertNodeAtEnd(pre, node);
+ parseTag(lexer, node, Lexer.Preformatted);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, pre);
+ }
+
+ };
+
+ public static class ParseBlock implements Parser {
+
+ public void parse( Lexer lexer, Node element, short mode )
+ /*
+ element is node created by the lexer
+ upon seeing the start tag, or by the
+ parser when the start tag is inferred
+ */
+ {
+ Node node, parent;
+ boolean checkstack;
+ int istackbase = 0;
+ TagTable tt = lexer.configuration.tt;
+
+ checkstack = true;
+
+ if ((element.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
+ Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
+
+ /*
+ InlineDup() asks the lexer to insert inline emphasis tags
+ currently pushed on the istack, but take care to avoid
+ propagating inline emphasis inside OBJECT or APPLET.
+ For these elements a fresh inline stack context is created
+ and disposed of upon reaching the end of the element.
+ They thus behave like table cells in this respect.
+ */
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ istackbase = lexer.istackbase;
+ lexer.istackbase = lexer.istack.size();
+ }
+
+ if (!((element.tag.model & Dict.CM_MIXED) != 0))
+ lexer.inlineDup( null);
+
+ mode = Lexer.IgnoreWhitespace;
+
+ while (true)
+ {
+ node = lexer.getToken(mode /*Lexer.MixedContent*/);
+ if (node == null) break;
+ /* end tag for this element */
+ if (node.type == Node.EndTag && node.tag != null &&
+ (node.tag == element.tag || element.was == node.tag))
+ {
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ }
+
+ element.closed = true;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ if (node.tag == tt.tagHtml ||
+ node.tag == tt.tagHead ||
+ node.tag == tt.tagBody)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == null)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+ else if (node.tag == tt.tagBr)
+ node.type = Node.StartTag;
+ else if (node.tag == tt.tagP)
+ {
+ Node.coerceNode(lexer, node, tt.tagBr);
+ Node.insertNodeAtEnd(element, node);
+ node = lexer.inferredTag("br");
+ }
+ else
+ {
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ for (parent = element.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ }
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ /* special case etc. for stuff moved in front of table */
+ if (lexer.exiled
+ && node.tag.model != 0
+ && (node.tag.model & Dict.CM_TABLE) != 0)
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ /* mixed content model permits text */
+ if (node.type == Node.TextNode)
+ {
+ boolean iswhitenode = false;
+
+ if (node.type == Node.TextNode &&
+ node.end <= node.start + 1 &&
+ lexer.lexbuf[node.start] == (byte)' ')
+ iswhitenode = true;
+
+ if (lexer.configuration.EncloseBlockText && !iswhitenode)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("p");
+ Node.insertNodeAtEnd(element, node);
+ parseTag(lexer, node, Lexer.MixedContent);
+ continue;
+ }
+
+ if (checkstack)
+ {
+ checkstack = false;
+
+ if (!((element.tag.model & Dict.CM_MIXED) != 0))
+ {
+ if (lexer.inlineDup( node) > 0)
+ continue;
+ }
+ }
+
+ Node.insertNodeAtEnd(element, node);
+ mode = Lexer.MixedContent;
+ /*
+ HTML4 strict doesn't allow mixed content for
+ elements with %block; as their content model
+ */
+ lexer.versions &= ~Dict.VERS_HTML40_STRICT;
+ continue;
+ }
+
+ if (Node.insertMisc(element, node))
+ continue;
+
+ /* allow PARAM elements? */
+ if (node.tag == tt.tagParam)
+ {
+ if (((element.tag.model & Dict.CM_PARAM) != 0) &&
+ (node.type == Node.StartTag || node.type == Node.StartEndTag))
+ {
+ Node.insertNodeAtEnd(element, node);
+ continue;
+ }
+
+ /* otherwise discard it */
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* allow AREA elements? */
+ if (node.tag == tt.tagArea)
+ {
+ if ((element.tag == tt.tagMap) &&
+ (node.type == Node.StartTag || node.type == Node.StartEndTag))
+ {
+ Node.insertNodeAtEnd(element, node);
+ continue;
+ }
+
+ /* otherwise discard it */
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* ignore unknown start/end tags */
+ if (node.tag == null)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ Allow Dict.CM_INLINE elements here.
+
+ Allow Dict.CM_BLOCK elements here unless
+ lexer.excludeBlocks is yes.
+
+ LI and DD are special cased.
+
+ Otherwise infer end tag for this element.
+ */
+
+ if (!((node.tag.model & Dict.CM_INLINE) != 0))
+ {
+ if (node.type != Node.StartTag && node.type != Node.StartEndTag)
+ {
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (element.tag == tt.tagTd || element.tag == tt.tagTh)
+ {
+ /* if parent is a table cell, avoid inferring the end of the cell */
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "ul");
+ Node.addClass(node, "noindent");
+ lexer.excludeBlocks = true;
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "dl");
+ lexer.excludeBlocks = true;
+ }
+
+ /* infer end of current table cell */
+ if (!((node.tag.model & Dict.CM_BLOCK) != 0))
+ {
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ else if ((node.tag.model & Dict.CM_BLOCK) != 0)
+ {
+ if (lexer.excludeBlocks)
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ lexer.istackbase = istackbase;
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ else /* things like list items */
+ {
+ if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+ if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, element, node);
+ continue;
+ }
+
+ lexer.ungetToken();
+
+ if ((node.tag.model & Dict.CM_LIST) != 0)
+ {
+ if (element.parent != null && element.parent.tag != null &&
+ element.parent.tag.parser == getParseList())
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ node = lexer.inferredTag("ul");
+ Node.addClass(node, "noindent");
+ }
+ else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+ {
+ if (element.parent.tag == tt.tagDl)
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+
+ node = lexer.inferredTag("dl");
+ }
+ else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
+ (node.tag.model & Dict.CM_ROW) != 0)
+ {
+ node = lexer.inferredTag("table");
+ }
+ else if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+
+ }
+ else
+ {
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ return;
+ }
+ }
+ }
+
+ /* parse known element */
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ if ((node.tag.model & Dict.CM_INLINE) != 0)
+ {
+ if (checkstack && !node.implicit)
+ {
+ checkstack = false;
+
+ if (lexer.inlineDup( node) > 0)
+ continue;
+ }
+
+ mode = Lexer.MixedContent;
+ }
+ else
+ {
+ checkstack = true;
+ mode = Lexer.IgnoreWhitespace;
+ }
+
+ /* trim white space before
*/
+ if (node.tag == tt.tagBr)
+ Node.trimSpaces(lexer, element);
+
+ Node.insertNodeAtEnd(element, node);
+
+ if (node.implicit)
+ Report.warning(lexer, element, node, Report.INSERTING_TAG);
+
+ parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ if (node.type == Node.EndTag)
+ lexer.popInline( node); /* if inline end tag */
+
+ Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ if (!((element.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
+
+ if ((element.tag.model & Dict.CM_OBJECT) != 0)
+ {
+ /* pop inline stack */
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ lexer.istackbase = istackbase;
+ }
+
+ Node.trimSpaces(lexer, element);
+ Node.trimEmptyElement(lexer, element);
+ }
+
+ };
+
+ public static class ParseTableTag implements Parser {
+
+ public void parse( Lexer lexer, Node table, short mode )
+ {
+ Node node, parent;
+ int istackbase;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.deferDup();
+ istackbase = lexer.istackbase;
+ lexer.istackbase = lexer.istack.size();
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == table.tag && node.type == Node.EndTag)
+ {
+ lexer.istackbase = istackbase;
+ table.closed = true;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(table, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null && node.type != Node.TextNode)
+ {
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* if TD or TH or text or inline or block then infer */
+
+ if (node.type != Node.EndTag)
+ {
+ if (node.tag == tt.tagTd ||
+ node.tag == tt.tagTh ||
+ node.tag == tt.tagTable)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag( "tr");
+ Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TextNode
+ || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ Node.insertNodeBeforeElement(table, node);
+ Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.exiled = true;
+
+ /* AQ: TODO
+ Line 2040 of parser.c (13 Jan 2000) reads as follows:
+ if (!node->type == TextNode)
+ This will always evaluate to false.
+ This has been reported to Dave Raggett
+ */
+ //Should be?: if (!(node.type == Node.TextNode))
+ if (false)
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ moveToHead(lexer, table, node);
+ continue;
+ }
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
+ {
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = table.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
+ lexer.ungetToken();
+ lexer.istackbase = istackbase;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+ }
+ }
+
+ if (!((node.tag.model & Dict.CM_TABLE) != 0))
+ {
+ lexer.ungetToken();
+ Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.istackbase = istackbase;
+ Node.trimEmptyElement(lexer, table);
+ return;
+ }
+
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ {
+ Node.insertNodeAtEnd(table, node);;
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ continue;
+ }
+
+ /* discard unexpected text nodes and end tags */
+ Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
+ Node.trimEmptyElement(lexer, table);
+ lexer.istackbase = istackbase;
+ }
+
+ };
+
+ public static class ParseColGroup implements Parser {
+
+ public void parse( Lexer lexer, Node colgroup, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == colgroup.tag && node.type == Node.EndTag)
+ {
+ colgroup.closed = true;
+ return;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = colgroup.parent;
+ parent != null; parent = parent.parent)
+ {
+
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ return;
+ }
+ }
+ }
+
+ if (node.type == Node.TextNode)
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(colgroup, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null)
+ {
+ Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag != tt.tagCol)
+ {
+ lexer.ungetToken();
+ return;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* node should be */
+ Node.insertNodeAtEnd(colgroup, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+ }
+
+ };
+
+ public static class ParseRowGroup implements Parser {
+
+ public void parse( Lexer lexer, Node rowgroup, short mode )
+ {
+ Node node, parent;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == rowgroup.tag)
+ {
+ if (node.type == Node.EndTag)
+ {
+ rowgroup.closed = true;
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ lexer.ungetToken();
+ return;
+ }
+
+ /* if infer end tag */
+ if (node.tag == tt.tagTable && node.type == Node.EndTag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(rowgroup, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null && node.type != Node.TextNode)
+ {
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ if TD or TH then infer
+ if text or inline or block move before table
+ if head content move to head
+ */
+
+ if (node.type != Node.EndTag)
+ {
+ if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("tr");
+ Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TextNode
+ || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ Node.moveBeforeTable(rowgroup, node, tt);
+ Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.exiled = true;
+
+ if (node.type != Node.TextNode)
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
+ moveToHead(lexer, rowgroup, node);
+ continue;
+ }
+ }
+
+ /*
+ if this is the end tag for ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = rowgroup.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+ }
+ }
+
+ /*
+ if THEAD, TFOOT or TBODY then implied end tag
+
+ */
+ if ((node.tag.model & Dict.CM_ROWGRP) != 0)
+ {
+ if (node.type != Node.EndTag)
+ lexer.ungetToken();
+
+ Node.trimEmptyElement(lexer, rowgroup);
+ return;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (!(node.tag == tt.tagTr))
+ {
+ node = lexer.inferredTag( "tr");
+ Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
+ lexer.ungetToken();
+ }
+
+ /* node should be
*/
+ Node.insertNodeAtEnd(rowgroup, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ }
+
+ Node.trimEmptyElement(lexer, rowgroup);
+ }
+
+ };
+
+ public static class ParseRow implements Parser {
+
+ public void parse( Lexer lexer, Node row, short mode )
+ {
+ Node node, parent;
+ boolean exclude_state;
+ TagTable tt = lexer.configuration.tt;
+
+ if ((row.tag.model & Dict.CM_EMPTY) != 0)
+ return;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == row.tag)
+ {
+ if (node.type == Node.EndTag)
+ {
+ row.closed = true;
+ Node.fixEmptyRow(lexer, row);
+ return;
+ }
+
+ lexer.ungetToken();
+ Node.fixEmptyRow(lexer, row);
+ return;
+ }
+
+ /*
+ if this is the end tag for an ancestor element
+ then infer end tag for this element
+ */
+ if (node.type == Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.badForm = 1;
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ for (parent = row.parent;
+ parent != null; parent = parent.parent)
+ {
+ if (node.tag == parent.tag)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, row);
+ return;
+ }
+ }
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(row, node))
+ continue;
+
+ /* discard unknown tags */
+ if (node.tag == null && node.type != Node.TextNode)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* discard unexpected element */
+ if (node.tag == tt.tagTable)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* THEAD, TFOOT or TBODY */
+ if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
+ {
+ lexer.ungetToken();
+ Node.trimEmptyElement(lexer, row);
+ return;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /*
+ if text or inline or block move before table
+ if head content move to head
+ */
+
+ if (node.type != Node.EndTag)
+ {
+ if (node.tag == tt.tagForm)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("td");
+ Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
+ }
+ else if (node.type == Node.TextNode
+ || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+ {
+ Node.moveBeforeTable(row, node, tt);
+ Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+ lexer.exiled = true;
+
+ if (node.type != Node.TextNode)
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+ lexer.exiled = false;
+ continue;
+ }
+ else if ((node.tag.model & Dict.CM_HEAD) != 0)
+ {
+ Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+ moveToHead(lexer, row, node);
+ continue;
+ }
+ }
+
+ if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
+ {
+ Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+ continue;
+ }
+
+ /* node should be or | */
+ Node.insertNodeAtEnd(row, node);
+ exclude_state = lexer.excludeBlocks;
+ lexer.excludeBlocks = false;
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ lexer.excludeBlocks = exclude_state;
+
+ /* pop inline stack */
+
+ while (lexer.istack.size() > lexer.istackbase)
+ lexer.popInline( null);
+ }
+
+ Node.trimEmptyElement(lexer, row);
+ }
+
+ };
+
+ public static class ParseNoFrames implements Parser {
+
+ public void parse( Lexer lexer, Node noframes, short mode )
+ {
+ Node node;
+ boolean checkstack;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.badAccess |= Report.USING_NOFRAMES;
+ mode = Lexer.IgnoreWhitespace;
+ checkstack = true;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ if (node.tag == noframes.tag && node.type == Node.EndTag)
+ {
+ noframes.closed = true;
+ Node.trimSpaces(lexer, noframes);
+ return;
+ }
+
+ if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
+ {
+ Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
+ Node.trimSpaces(lexer, noframes);
+ lexer.ungetToken();
+ return;
+ }
+
+ if (node.tag == tt.tagHtml)
+ {
+ if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+ Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
+
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(noframes, node))
+ continue;
+
+ if (node.tag == tt.tagBody && node.type == Node.StartTag)
+ {
+ Node.insertNodeAtEnd(noframes, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
+ continue;
+ }
+
+ /* implicit body element inferred */
+ if (node.type == Node.TextNode || node.tag != null)
+ {
+ lexer.ungetToken();
+ node = lexer.inferredTag("body");
+ if (lexer.configuration.XmlOut)
+ Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
+ Node.insertNodeAtEnd(noframes, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
+ continue;
+ }
+ /* discard unexpected end tags */
+ Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseSelect implements Parser {
+
+ public void parse( Lexer lexer, Node field, short mode )
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == field.tag && node.type == Node.EndTag)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(field, node))
+ continue;
+
+ if (node.type == Node.StartTag &&
+ (node.tag == tt.tagOption ||
+ node.tag == tt.tagOptgroup ||
+ node.tag == tt.tagScript))
+ {
+ Node.insertNodeAtEnd(field, node);
+ parseTag(lexer, node, Lexer.IgnoreWhitespace);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ }
+
+ Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseText implements Parser {
+
+ public void parse( Lexer lexer, Node field, short mode )
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ if (field.tag == tt.tagTextarea)
+ mode = Lexer.Preformatted;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ if (node.tag == field.tag && node.type == Node.EndTag)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(field, node))
+ continue;
+
+ if (node.type == Node.TextNode)
+ {
+ /* only called for 1st child */
+ if (field.content == null && !((mode & Lexer.Preformatted) != 0))
+ Node.trimSpaces(lexer, field);
+
+ if (node.start >= node.end)
+ {
+ continue;
+ }
+
+ Node.insertNodeAtEnd(field, node);
+ continue;
+ }
+
+ if (node.tag == tt.tagFont)
+ {
+ Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ /* terminate element on other tags */
+ if (!((field.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
+
+ lexer.ungetToken();
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ if (!((field.tag.model & Dict.CM_OPT) != 0))
+ Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+ }
+
+ };
+
+ public static class ParseOptGroup implements Parser {
+
+ public void parse( Lexer lexer, Node field, short mode )
+ {
+ Node node;
+ TagTable tt = lexer.configuration.tt;
+
+ lexer.insert = -1; /* defer implicit inline start tags */
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ if (node.tag == field.tag && node.type == Node.EndTag)
+ {
+ field.closed = true;
+ Node.trimSpaces(lexer, field);
+ return;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(field, node))
+ continue;
+
+ if (node.type == Node.StartTag &&
+ (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
+ {
+ if (node.tag == tt.tagOptgroup)
+ Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
+
+ Node.insertNodeAtEnd(field, node);
+ parseTag(lexer, node, Lexer.MixedContent);
+ continue;
+ }
+
+ /* discard unexpected tags */
+ Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+ }
+ }
+
+ };
+
+ public static Parser getParseHTML()
+ {
+ return _parseHTML;
+ }
+
+ public static Parser getParseHead()
+ {
+ return _parseHead;
+ }
+
+ public static Parser getParseTitle()
+ {
+ return _parseTitle;
+ }
+
+ public static Parser getParseScript()
+ {
+ return _parseScript;
+ }
+
+ public static Parser getParseBody()
+ {
+ return _parseBody;
+ }
+
+ public static Parser getParseFrameSet()
+ {
+ return _parseFrameSet;
+ }
+
+ public static Parser getParseInline()
+ {
+ return _parseInline;
+ }
+
+ public static Parser getParseList()
+ {
+ return _parseList;
+ }
+
+ public static Parser getParseDefList()
+ {
+ return _parseDefList;
+ }
+
+ public static Parser getParsePre()
+ {
+ return _parsePre;
+ }
+
+ public static Parser getParseBlock()
+ {
+ return _parseBlock;
+ }
+
+ public static Parser getParseTableTag()
+ {
+ return _parseTableTag;
+ }
+
+ public static Parser getParseColGroup()
+ {
+ return _parseColGroup;
+ }
+
+ public static Parser getParseRowGroup()
+ {
+ return _parseRowGroup;
+ }
+
+ public static Parser getParseRow()
+ {
+ return _parseRow;
+ }
+
+ public static Parser getParseNoFrames()
+ {
+ return _parseNoFrames;
+ }
+
+ public static Parser getParseSelect()
+ {
+ return _parseSelect;
+ }
+
+ public static Parser getParseText()
+ {
+ return _parseText;
+ }
+
+ public static Parser getParseOptGroup()
+ {
+ return _parseOptGroup;
+ }
+
+
+ private static Parser _parseHTML = new ParseHTML();
+ private static Parser _parseHead = new ParseHead();
+ private static Parser _parseTitle = new ParseTitle();
+ private static Parser _parseScript = new ParseScript();
+ private static Parser _parseBody = new ParseBody();
+ private static Parser _parseFrameSet = new ParseFrameSet();
+ private static Parser _parseInline = new ParseInline();
+ private static Parser _parseList = new ParseList();
+ private static Parser _parseDefList = new ParseDefList();
+ private static Parser _parsePre = new ParsePre();
+ private static Parser _parseBlock = new ParseBlock();
+ private static Parser _parseTableTag = new ParseTableTag();
+ private static Parser _parseColGroup = new ParseColGroup();
+ private static Parser _parseRowGroup = new ParseRowGroup();
+ private static Parser _parseRow = new ParseRow();
+ private static Parser _parseNoFrames = new ParseNoFrames();
+ private static Parser _parseSelect = new ParseSelect();
+ private static Parser _parseText = new ParseText();
+ private static Parser _parseOptGroup = new ParseOptGroup();
+
+ /*
+ HTML is the top level element
+ */
+ public static Node parseDocument(Lexer lexer)
+ {
+ Node node, document, html;
+ Node doctype = null;
+ TagTable tt = lexer.configuration.tt;
+
+ document = lexer.newNode();
+ document.type = Node.RootNode;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(document, node))
+ continue;
+
+ if (node.type == Node.DocTypeTag)
+ {
+ if (doctype == null)
+ {
+ Node.insertNodeAtEnd(document, node);
+ doctype = node;
+ }
+ else
+ Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
+ continue;
+ }
+
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
+ continue;
+ }
+
+ if (node.type != Node.StartTag || node.tag != tt.tagHtml)
+ {
+ lexer.ungetToken();
+ html = lexer.inferredTag("html");
+ }
+ else
+ html = node;
+
+ Node.insertNodeAtEnd(document, html);
+ getParseHTML().parse(lexer, html, (short)0); // TODO?
+ break;
+ }
+
+ return document;
+ }
+
+ /**
+ * Indicates whether or not whitespace should be preserved for this element.
+ * If an xml:space attribute is found, then if the attribute value is
+ * preserve , returns true . For any other value, returns
+ * false . If an xml:space attribute was not
+ * found, then the following element names result in a return value of true:
+ * pre, script, style, and xsl:text . Finally, if a
+ * TagTable was passed in and the element appears as the "pre" element
+ * in the TagTable , then true will be returned.
+ * Otherwise, false is returned.
+ * @param element The Node to test to see if whitespace should be
+ * preserved.
+ * @param tt The TagTable to test for the getNodePre()
+ * function. This may be null , in which case this test
+ * is bypassed.
+ * @return true or false , as explained above.
+ */
+
+ public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
+ {
+ AttVal attribute;
+
+ /* search attributes for xml:space */
+ for (attribute = element.attributes; attribute != null; attribute = attribute.next)
+ {
+ if (attribute.attribute.equals("xml:space"))
+ {
+ if (attribute.value.equals("preserve"))
+ return true;
+
+ return false;
+ }
+ }
+
+ /* kludge for html docs without explicit xml:space attribute */
+ if (Lexer.wstrcasecmp(element.element, "pre") == 0
+ || Lexer.wstrcasecmp(element.element, "script") == 0
+ || Lexer.wstrcasecmp(element.element, "style") == 0)
+ return true;
+
+ if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
+ return true;
+
+ /* kludge for XSL docs */
+ if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
+ return true;
+
+ return false;
+ }
+
+ /*
+ XML documents
+ */
+ public static void parseXMLElement(Lexer lexer, Node element, short mode)
+ {
+ Node node;
+
+ /* Jeff Young's kludge for XSL docs */
+
+ if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
+ return;
+
+ /* if node is pre or has xml:space="preserve" then do so */
+
+ if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
+ mode = Lexer.Preformatted;
+
+ while (true)
+ {
+ node = lexer.getToken(mode);
+ if (node == null) break;
+ if (node.type == Node.EndTag && node.element.equals(element.element))
+ {
+ element.closed = true;
+ break;
+ }
+
+ /* discard unexpected end tags */
+ if (node.type == Node.EndTag)
+ {
+ Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
+ continue;
+ }
+
+ /* parse content on seeing start tag */
+ if (node.type == Node.StartTag)
+ parseXMLElement(lexer, node, mode);
+
+ Node.insertNodeAtEnd(element, node);
+ }
+
+ /*
+ if first child is text then trim initial space and
+ delete text node if it is empty.
+ */
+
+ node = element.content;
+
+ if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
+ {
+ if (node.textarray[node.start] == (byte)' ')
+ {
+ node.start++;
+
+ if (node.start >= node.end)
+ Node.discardElement(node);
+ }
+ }
+
+ /*
+ if last child is text then trim final space and
+ delete the text node if it is empty
+ */
+
+ node = element.last;
+
+ if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
+ {
+ if (node.textarray[node.end - 1] == (byte)' ')
+ {
+ node.end--;
+
+ if (node.start >= node.end)
+ Node.discardElement(node);
+ }
+ }
+ }
+
+ public static Node parseXMLDocument(Lexer lexer)
+ {
+ Node node, document, doctype;
+
+ document = lexer.newNode();
+ document.type = Node.RootNode;
+ doctype = null;
+ lexer.configuration.XmlTags = true;
+
+ while (true)
+ {
+ node = lexer.getToken(Lexer.IgnoreWhitespace);
+ if (node == null) break;
+ /* discard unexpected end tags */
+ if (node.type == Node.EndTag)
+ {
+ Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
+ continue;
+ }
+
+ /* deal with comments etc. */
+ if (Node.insertMisc(document, node))
+ continue;
+
+ if (node.type == Node.DocTypeTag)
+ {
+ if (doctype == null)
+ {
+ Node.insertNodeAtEnd(document, node);
+ doctype = node;
+ }
+ else
+ Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
+ continue;
+ }
+
+ /* if start tag then parse element's content */
+ if (node.type == Node.StartTag)
+ {
+ Node.insertNodeAtEnd(document, node);
+ parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
+ }
+
+ }
+
+if (false) { //#if 0
+ /* discard the document type */
+ node = document.findDocType();
+
+ if (node != null)
+ Node.discardElement(node);
+} // #endif
+
+ if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
+ Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
+
+ /* ensure presence of initial */
+ if (lexer.configuration.XmlPi)
+ lexer.fixXMLPI(document);
+
+ return document;
+ }
+
+ public static boolean isJavaScript(Node node)
+ {
+ boolean result = false;
+ AttVal attr;
+
+ if (node.attributes == null)
+ return true;
+
+ for (attr = node.attributes; attr != null; attr = attr.next)
+ {
+ if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
+ || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
+ && Lexer.wsubstr(attr.value, "javascript"))
+ result = true;
+ }
+
+ return result;
+ }
+
+}
|