2 * @(#)Node.java 1.11 2000/08/16
6 package net.sourceforge.phpdt.tidy.w3c;
13 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
14 * See Tidy.java for the copyright notice.
15 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
16 * HTML Tidy Release 4 Aug 2000</a>
18 * @author Dave Raggett <dsr@w3.org>
19 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
20 * @version 1.0, 1999/05/22
21 * @version 1.0.1, 1999/05/29
22 * @version 1.1, 1999/06/18 Java Bean
23 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
24 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
25 * @version 1.4, 1999/09/04 DOM support
26 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
27 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
28 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
29 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
30 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
31 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
32 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
36 Used for elements and text nodes
37 element name is null for text nodes
38 start and end are offsets into lexbuf
39 which contains the textual content of
40 all elements in the parse tree.
42 parent and content allow traversal
43 of the parse tree in any direction.
44 attributes are represented as a linked
45 list of AttVal nodes which hold the
46 strings for attribute/value pairs.
51 public static final short RootNode = 0;
52 public static final short DocTypeTag = 1;
53 public static final short CommentTag = 2;
54 public static final short ProcInsTag = 3;
55 public static final short TextNode = 4;
56 public static final short StartTag = 5;
57 public static final short EndTag = 6;
58 public static final short StartEndTag = 7;
59 public static final short CDATATag = 8;
60 public static final short SectionTag = 9;
61 public static final short AspTag = 10;
62 public static final short JsteTag = 11;
63 public static final short PhpTag = 12;
65 protected Node parent;
69 protected int start; /* start of span onto text array */
70 protected int end; /* end of span onto text array */
71 protected byte[] textarray; /* the text array */
72 protected short type; /* TextNode, StartTag, EndTag etc. */
73 protected boolean closed; /* true if closed by explicit end tag */
74 protected boolean implicit; /* true if inferred */
75 protected boolean linebreak; /* true if followed by a line break */
76 protected Dict was; /* old tag when it was changed */
77 protected Dict tag; /* tag's dictionary definition */
78 protected String element; /* name (null for text nodes) */
79 protected AttVal attributes;
80 protected Node content;
84 this(TextNode, null, 0, 0);
87 public Node(short type, byte[] textarray, int start, int end)
95 this.textarray = textarray;
98 this.implicit = false;
99 this.linebreak = false;
103 this.attributes = null;
107 public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
115 this.textarray = textarray;
118 this.implicit = false;
119 this.linebreak = false;
122 this.element = element;
123 this.attributes = null;
125 if (type == StartTag || type == StartEndTag || type == EndTag)
129 /* used to clone heading nodes when split by an <HR> */
130 protected Object clone()
132 Node node = new Node();
134 node.parent = this.parent;
135 if (this.textarray != null)
137 node.textarray = new byte[this.end - this.start];
139 node.end = this.end - this.start;
141 System.arraycopy(this.textarray, this.start,
142 node.textarray, node.start, node.end);
144 node.type = this.type;
145 node.closed = this.closed;
146 node.implicit = this.implicit;
147 node.linebreak = this.linebreak;
150 if (this.element != null)
151 node.element = this.element;
152 if (this.attributes != null)
153 node.attributes = (AttVal)this.attributes.clone();
157 public AttVal getAttrByName(String name)
161 for (attr = this.attributes; attr != null; attr = attr.next)
164 attr.attribute != null &&
165 attr.attribute.equals(name))
172 /* default method for checking an element's attributes */
173 public void checkAttributes( Lexer lexer )
177 for (attval = this.attributes; attval != null; attval = attval.next)
178 attval.checkAttribute( lexer, this );
181 public void checkUniqueAttributes(Lexer lexer)
185 for (attval = this.attributes; attval != null; attval = attval.next) {
186 if (attval.asp == null && attval.php == null)
187 attval.checkUniqueAttribute(lexer, this);
191 public void addAttribute(String name, String value)
193 AttVal av = new AttVal(null, null, null, null,
196 AttributeTable.getDefaultAttributeTable().findAttribute(av);
198 if (this.attributes == null)
199 this.attributes = av;
200 else /* append to end of attributes */
202 AttVal here = this.attributes;
204 while (here.next != null)
211 /* remove attribute from node then free it */
212 public void removeAttribute(AttVal attr)
218 for (av = this.attributes; av != null; av = next)
227 this.attributes = next;
234 /* find doctype element */
235 public Node findDocType()
239 for (node = this.content;
240 node != null && node.type != DocTypeTag; node = node.next);
245 public void discardDocType()
249 node = findDocType();
252 if (node.prev != null)
253 node.prev.next = node.next;
255 node.parent.content = node.next;
257 if (node.next != null)
258 node.next.prev = node.prev;
264 /* remove node from markup tree and discard it */
265 public static Node discardElement(Node element)
278 /* insert node into markup tree */
279 public static void insertNodeAtStart(Node element, Node node)
281 node.parent = element;
283 if (element.content == null)
286 element.content.prev = node; // AQ added 13 Apr 2000
288 node.next = element.content;
290 element.content = node;
293 /* insert node into markup tree */
294 public static void insertNodeAtEnd(Node element, Node node)
296 node.parent = element;
297 node.prev = element.last;
299 if (element.last != null)
300 element.last.next = node;
302 element.content = node;
308 insert node into markup tree in pace of element
309 which is moved to become the child of the node
311 public static void insertNodeAsParent(Node element, Node node)
313 node.content = element;
315 node.parent = element.parent;
316 element.parent = node;
318 if (node.parent.content == element)
319 node.parent.content = node;
321 if (node.parent.last == element)
322 node.parent.last = node;
324 node.prev = element.prev;
327 if (node.prev != null)
328 node.prev.next = node;
330 node.next = element.next;
333 if (node.next != null)
334 node.next.prev = node;
337 /* insert node into markup tree before element */
338 public static void insertNodeBeforeElement(Node element, Node node)
342 parent = element.parent;
343 node.parent = parent;
345 node.prev = element.prev;
348 if (node.prev != null)
349 node.prev.next = node;
351 if (parent.content == element)
352 parent.content = node;
355 /* insert node into markup tree after element */
356 public static void insertNodeAfterElement(Node element, Node node)
360 parent = element.parent;
361 node.parent = parent;
363 // AQ - 13Jan2000 fix for parent == null
364 if (parent != null && parent.last == element)
368 node.next = element.next;
369 // AQ - 13Jan2000 fix for node.next == null
370 if (node.next != null)
371 node.next.prev = node;
378 public static void trimEmptyElement(Lexer lexer, Node element)
380 TagTable tt = lexer.configuration.tt;
382 if (lexer.canPrune(element))
384 if (element.type != TextNode)
385 Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
387 discardElement(element);
389 else if (element.tag == tt.tagP && element.content == null)
391 /* replace <p></p> by <br><br> to preserve formatting */
392 Node node = lexer.inferredTag("br");
393 Node.coerceNode(lexer, element, tt.tagBr);
394 Node.insertNodeAfterElement(element, node);
400 <em>hello </em><strong>world</strong>
402 <em>hello</em> <strong>world</strong>
404 If last child of element is a text node
405 then trim trailing white space character
406 moving it to after element's end tag.
408 public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
411 TagTable tt = lexer.configuration.tt;
413 if (last != null && last.type == Node.TextNode &&
414 last.end > last.start)
416 c = lexer.lexbuf[last.end - 1];
418 if (c == 160 || c == (byte)' ')
420 /* take care with <td> </td> */
421 if (element.tag == tt.tagTd ||
422 element.tag == tt.tagTh)
424 if (last.end > last.start + 1)
431 if (((element.tag.model & Dict.CM_INLINE) != 0) &&
432 !((element.tag.model & Dict.CM_FIELD) != 0))
433 lexer.insertspace = true;
435 /* if empty string then delete from parse tree */
436 if (last.start == last.end)
437 trimEmptyElement(lexer, last);
445 <p>hello<em> world</em>
447 <p>hello <em>world</em>
449 Trims initial space, by moving it before the
450 start tag, or if this element is the first in
451 parent's content, then by discarding the space
453 public static void trimInitialSpace(Lexer lexer, Node element, Node text)
457 // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
459 if (text.type == TextNode && text.textarray[text.start] == (byte)' '
460 && (text.start < text.end))
462 if (((element.tag.model & Dict.CM_INLINE) != 0) &&
463 !((element.tag.model & Dict.CM_FIELD) != 0) &&
464 element.parent.content != element)
468 if (prev != null && prev.type == TextNode)
470 if (prev.textarray[prev.end - 1] != (byte)' ')
471 prev.textarray[prev.end++] = (byte)' ';
475 else /* create new node */
477 node = lexer.newNode();
478 // Local fix for bug 228486 (GLP). This handles the case
479 // where we need to create a preceeding text node but there are
480 // no "slots" in textarray that we can steal from the current
481 // element. Therefore, we create a new textarray containing
482 // just the blank. When Tidy is fixed, this should be removed.
483 if (element.start >= element.end)
487 node.textarray = new byte[1];
491 node.start = element.start++;
492 node.end = element.start;
493 node.textarray = element.textarray;
495 node.textarray[node.start] = (byte)' ';
501 node.parent = element.parent;
505 /* discard the space in current node */
511 Move initial and trailing space out.
518 <em>hello </em><strong>world</strong>
520 <em>hello</em> <strong>world</strong>
522 public static void trimSpaces(Lexer lexer, Node element)
524 Node text = element.content;
525 TagTable tt = lexer.configuration.tt;
527 if (text != null && text.type == Node.TextNode &&
528 element.tag != tt.tagPre)
529 trimInitialSpace(lexer, element, text);
533 if (text != null && text.type == Node.TextNode)
534 trimTrailingSpace(lexer, element, text);
537 public boolean isDescendantOf(Dict tag)
541 for (parent = this.parent;
542 parent != null; parent = parent.parent)
544 if (parent.tag == tag)
552 the doctype has been found after other tags,
553 and needs moving to before the html element
555 public static void insertDocType(Lexer lexer, Node element, Node doctype)
557 TagTable tt = lexer.configuration.tt;
559 Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
561 while (element.tag != tt.tagHtml)
562 element = element.parent;
564 insertNodeBeforeElement(element, doctype);
567 public Node findBody(TagTable tt)
573 while (node != null && node.tag != tt.tagHtml)
581 while (node != null && node.tag != tt.tagBody)
587 public boolean isElement()
589 return (this.type == StartTag || this.type == StartEndTag ? true : false);
593 unexpected content in table row is moved to just before
594 the table in accordance with Netscape and IE. This code
595 assumes that node hasn't been inserted into the row.
597 public static void moveBeforeTable(Node row, Node node, TagTable tt)
601 /* first find the table element */
602 for (table = row.parent; table != null; table = table.parent)
604 if (table.tag == tt.tagTable)
606 if (table.parent.content == table)
607 table.parent.content = node;
609 node.prev = table.prev;
612 node.parent = table.parent;
614 if (node.prev != null)
615 node.prev.next = node;
623 if a table row is empty then insert an empty cell
624 this practice is consistent with browser behavior
625 and avoids potential problems with row spanning cells
627 public static void fixEmptyRow(Lexer lexer, Node row)
631 if (row.content == null)
633 cell = lexer.inferredTag("td");
634 insertNodeAtEnd(row, cell);
635 Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
639 public static void coerceNode(Lexer lexer, Node node, Dict tag)
641 Node tmp = lexer.inferredTag(tag.name);
642 Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
645 node.type = StartTag;
646 node.implicit = true;
647 node.element = tag.name;
650 /* extract a node and its children from a markup tree */
651 public static void removeNode(Node node)
653 if (node.prev != null)
654 node.prev.next = node.next;
656 if (node.next != null)
657 node.next.prev = node.prev;
659 if (node.parent != null)
661 if (node.parent.content == node)
662 node.parent.content = node.next;
664 if (node.parent.last == node)
665 node.parent.last = node.prev;
668 node.parent = node.prev = node.next = null;
671 public static boolean insertMisc(Node element, Node node)
673 if (node.type == CommentTag ||
674 node.type == ProcInsTag ||
675 node.type == CDATATag ||
676 node.type == SectionTag ||
677 node.type == AspTag ||
678 node.type == JsteTag ||
681 insertNodeAtEnd(element, node);
689 used to determine how attributes
690 without values should be printed
691 this was introduced to deal with
692 user defined tags e.g. Cold Fusion
694 public static boolean isNewNode(Node node)
696 if (node != null && node.tag != null)
698 return ((node.tag.model & Dict.CM_NEW) != 0);
704 public boolean hasOneChild()
706 return (this.content != null && this.content.next == null);
709 /* find html element */
710 public Node findHTML(TagTable tt)
714 for (node = this.content;
715 node != null && node.tag != tt.tagHtml; node = node.next);
720 public Node findHEAD(TagTable tt)
724 node = this.findHTML(tt);
728 for (node = node.content;
729 node != null && node.tag != tt.tagHead;
736 public boolean checkNodeIntegrity()
739 boolean found = false;
741 if (this.prev != null)
743 if (this.prev.next != this)
747 if (this.next != null)
749 if (this.next.prev != this)
753 if (this.parent != null)
755 if (this.prev == null && this.parent.content != this)
758 if (this.next == null && this.parent.last != this)
761 for (child = this.parent.content; child != null; child = child.next)
772 for (child = this.content; child != null; child = child.next)
773 if (!child.checkNodeIntegrity())
780 Add class="foo" to node
782 public static void addClass(Node node, String classname)
784 AttVal classattr = node.getAttrByName("class");
787 if there already is a class attribute
788 then append class name after a space
790 if (classattr != null)
792 classattr.value = classattr.value + " " + classname;
794 else /* create new class attribute */
795 node.addAttribute("class", classname);
798 /* --------------------- DEBUG -------------------------- */
800 private static final String[] nodeTypeString =
815 public String toString()
822 s += nodeTypeString[n.type];
824 if (n.element != null)
828 if (n.type == TextNode ||
829 n.type == CommentTag ||
830 n.type == ProcInsTag) {
832 if (n.textarray != null && n.start <= n.end) {
834 s += Lexer.getString(n.textarray, n.start, n.end - n.start);
841 if (n.content != null)
842 s += n.content.toString();
852 /* --------------------- END DEBUG ---------------------- */
855 /* --------------------- DOM ---------------------------- */
857 protected org.w3c.dom.Node adapter = null;
859 protected org.w3c.dom.Node getAdapter()
866 adapter = new DOMDocumentImpl(this);
870 adapter = new DOMElementImpl(this);
873 adapter = new DOMDocumentTypeImpl(this);
876 adapter = new DOMCommentImpl(this);
879 adapter = new DOMTextImpl(this);
882 adapter = new DOMCDATASectionImpl(this);
885 adapter = new DOMProcessingInstructionImpl(this);
888 adapter = new DOMNodeImpl(this);
894 protected Node cloneNode(boolean deep)
896 Node node = (Node)this.clone();
901 for (child = this.content; child != null; child = child.next)
903 newChild = child.cloneNode(deep);
904 insertNodeAtEnd(node, newChild);
911 protected void setType(short newType)
916 /* --------------------- END DOM ------------------------ */