2 * @(#)Node.java 1.11 2000/08/16
6 package net.sourceforge.phpdt.tidy.w3c;
8 import sun.security.krb5.internal.n;
14 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
15 * See Tidy.java for the copyright notice.
16 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
17 * HTML Tidy Release 4 Aug 2000</a>
19 * @author Dave Raggett <dsr@w3.org>
20 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
21 * @version 1.0, 1999/05/22
22 * @version 1.0.1, 1999/05/29
23 * @version 1.1, 1999/06/18 Java Bean
24 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
25 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
26 * @version 1.4, 1999/09/04 DOM support
27 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
28 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
29 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
30 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
31 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
32 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
33 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
37 Used for elements and text nodes
38 element name is null for text nodes
39 start and end are offsets into lexbuf
40 which contains the textual content of
41 all elements in the parse tree.
43 parent and content allow traversal
44 of the parse tree in any direction.
45 attributes are represented as a linked
46 list of AttVal nodes which hold the
47 strings for attribute/value pairs.
52 public static final short RootNode = 0;
53 public static final short DocTypeTag = 1;
54 public static final short CommentTag = 2;
55 public static final short ProcInsTag = 3;
56 public static final short TextNode = 4;
57 public static final short StartTag = 5;
58 public static final short EndTag = 6;
59 public static final short StartEndTag = 7;
60 public static final short CDATATag = 8;
61 public static final short SectionTag = 9;
62 public static final short AspTag = 10;
63 public static final short JsteTag = 11;
64 public static final short PhpTag = 12;
66 protected Node parent;
70 protected int start; /* start of span onto text array */
71 protected int end; /* end of span onto text array */
72 protected byte[] textarray; /* the text array */
73 protected short type; /* TextNode, StartTag, EndTag etc. */
74 protected boolean closed; /* true if closed by explicit end tag */
75 protected boolean implicit; /* true if inferred */
76 protected boolean linebreak; /* true if followed by a line break */
77 protected Dict was; /* old tag when it was changed */
78 protected Dict tag; /* tag's dictionary definition */
79 protected String element; /* name (null for text nodes) */
80 protected AttVal attributes;
81 protected Node content;
85 this(TextNode, null, 0, 0);
88 public Node(short type, byte[] textarray, int start, int end)
96 this.textarray = textarray;
99 this.implicit = false;
100 this.linebreak = false;
104 this.attributes = null;
108 public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
116 this.textarray = textarray;
119 this.implicit = false;
120 this.linebreak = false;
123 this.element = element;
124 this.attributes = null;
126 if (type == StartTag || type == StartEndTag || type == EndTag)
130 /* used to clone heading nodes when split by an <HR> */
131 protected Object clone()
133 Node node = new Node();
135 node.parent = this.parent;
136 if (this.textarray != null)
138 node.textarray = new byte[this.end - this.start];
140 node.end = this.end - this.start;
142 System.arraycopy(this.textarray, this.start,
143 node.textarray, node.start, node.end);
145 node.type = this.type;
146 node.closed = this.closed;
147 node.implicit = this.implicit;
148 node.linebreak = this.linebreak;
151 if (this.element != null)
152 node.element = this.element;
153 if (this.attributes != null)
154 node.attributes = (AttVal)this.attributes.clone();
158 public AttVal getAttrByName(String name)
162 for (attr = this.attributes; attr != null; attr = attr.next)
165 attr.attribute != null &&
166 attr.attribute.equals(name))
173 /* default method for checking an element's attributes */
174 public void checkAttributes( Lexer lexer )
178 for (attval = this.attributes; attval != null; attval = attval.next)
179 attval.checkAttribute( lexer, this );
182 public void checkUniqueAttributes(Lexer lexer)
186 for (attval = this.attributes; attval != null; attval = attval.next) {
187 if (attval.asp == null && attval.php == null)
188 attval.checkUniqueAttribute(lexer, this);
192 public void addAttribute(String name, String value)
194 AttVal av = new AttVal(null, null, null, null,
197 AttributeTable.getDefaultAttributeTable().findAttribute(av);
199 if (this.attributes == null)
200 this.attributes = av;
201 else /* append to end of attributes */
203 AttVal here = this.attributes;
205 while (here.next != null)
212 /* remove attribute from node then free it */
213 public void removeAttribute(AttVal attr)
219 for (av = this.attributes; av != null; av = next)
228 this.attributes = next;
235 /* find doctype element */
236 public Node findDocType()
240 for (node = this.content;
241 node != null && node.type != DocTypeTag; node = node.next);
246 public void discardDocType()
250 node = findDocType();
253 if (node.prev != null)
254 node.prev.next = node.next;
256 node.parent.content = node.next;
258 if (node.next != null)
259 node.next.prev = node.prev;
265 /* remove node from markup tree and discard it */
266 public static Node discardElement(Node element)
279 /* insert node into markup tree */
280 public static void insertNodeAtStart(Node element, Node node)
282 node.parent = element;
284 if (element.content == null)
287 element.content.prev = node; // AQ added 13 Apr 2000
289 node.next = element.content;
291 element.content = node;
294 /* insert node into markup tree */
295 public static void insertNodeAtEnd(Node element, Node node)
297 node.parent = element;
298 node.prev = element.last;
300 if (element.last != null)
301 element.last.next = node;
303 element.content = node;
309 insert node into markup tree in pace of element
310 which is moved to become the child of the node
312 public static void insertNodeAsParent(Node element, Node node)
314 node.content = element;
316 node.parent = element.parent;
317 element.parent = node;
319 if (node.parent.content == element)
320 node.parent.content = node;
322 if (node.parent.last == element)
323 node.parent.last = node;
325 node.prev = element.prev;
328 if (node.prev != null)
329 node.prev.next = node;
331 node.next = element.next;
334 if (node.next != null)
335 node.next.prev = node;
338 /* insert node into markup tree before element */
339 public static void insertNodeBeforeElement(Node element, Node node)
343 parent = element.parent;
344 node.parent = parent;
346 node.prev = element.prev;
349 if (node.prev != null)
350 node.prev.next = node;
352 if (parent.content == element)
353 parent.content = node;
356 /* insert node into markup tree after element */
357 public static void insertNodeAfterElement(Node element, Node node)
361 parent = element.parent;
362 node.parent = parent;
364 // AQ - 13Jan2000 fix for parent == null
365 if (parent != null && parent.last == element)
369 node.next = element.next;
370 // AQ - 13Jan2000 fix for node.next == null
371 if (node.next != null)
372 node.next.prev = node;
379 public static void trimEmptyElement(Lexer lexer, Node element)
381 TagTable tt = lexer.configuration.tt;
383 if (lexer.canPrune(element))
385 if (element.type != TextNode)
386 Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
388 discardElement(element);
390 else if (element.tag == tt.tagP && element.content == null)
392 /* replace <p></p> by <br><br> to preserve formatting */
393 Node node = lexer.inferredTag("br");
394 Node.coerceNode(lexer, element, tt.tagBr);
395 Node.insertNodeAfterElement(element, node);
401 <em>hello </em><strong>world</strong>
403 <em>hello</em> <strong>world</strong>
405 If last child of element is a text node
406 then trim trailing white space character
407 moving it to after element's end tag.
409 public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
412 TagTable tt = lexer.configuration.tt;
414 if (last != null && last.type == Node.TextNode &&
415 last.end > last.start)
417 c = lexer.lexbuf[last.end - 1];
419 if (c == 160 || c == (byte)' ')
421 /* take care with <td> </td> */
422 if (element.tag == tt.tagTd ||
423 element.tag == tt.tagTh)
425 if (last.end > last.start + 1)
432 if (((element.tag.model & Dict.CM_INLINE) != 0) &&
433 !((element.tag.model & Dict.CM_FIELD) != 0))
434 lexer.insertspace = true;
436 /* if empty string then delete from parse tree */
437 if (last.start == last.end)
438 trimEmptyElement(lexer, last);
446 <p>hello<em> world</em>
448 <p>hello <em>world</em>
450 Trims initial space, by moving it before the
451 start tag, or if this element is the first in
452 parent's content, then by discarding the space
454 public static void trimInitialSpace(Lexer lexer, Node element, Node text)
458 // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
460 if (text.type == TextNode && text.textarray[text.start] == (byte)' '
461 && (text.start < text.end))
463 if (((element.tag.model & Dict.CM_INLINE) != 0) &&
464 !((element.tag.model & Dict.CM_FIELD) != 0) &&
465 element.parent.content != element)
469 if (prev != null && prev.type == TextNode)
471 if (prev.textarray[prev.end - 1] != (byte)' ')
472 prev.textarray[prev.end++] = (byte)' ';
476 else /* create new node */
478 node = lexer.newNode();
479 // Local fix for bug 228486 (GLP). This handles the case
480 // where we need to create a preceeding text node but there are
481 // no "slots" in textarray that we can steal from the current
482 // element. Therefore, we create a new textarray containing
483 // just the blank. When Tidy is fixed, this should be removed.
484 if (element.start >= element.end)
488 node.textarray = new byte[1];
492 node.start = element.start++;
493 node.end = element.start;
494 node.textarray = element.textarray;
496 node.textarray[node.start] = (byte)' ';
502 node.parent = element.parent;
506 /* discard the space in current node */
512 Move initial and trailing space out.
519 <em>hello </em><strong>world</strong>
521 <em>hello</em> <strong>world</strong>
523 public static void trimSpaces(Lexer lexer, Node element)
525 Node text = element.content;
526 TagTable tt = lexer.configuration.tt;
528 if (text != null && text.type == Node.TextNode &&
529 element.tag != tt.tagPre)
530 trimInitialSpace(lexer, element, text);
534 if (text != null && text.type == Node.TextNode)
535 trimTrailingSpace(lexer, element, text);
538 public boolean isDescendantOf(Dict tag)
542 for (parent = this.parent;
543 parent != null; parent = parent.parent)
545 if (parent.tag == tag)
553 the doctype has been found after other tags,
554 and needs moving to before the html element
556 public static void insertDocType(Lexer lexer, Node element, Node doctype)
558 TagTable tt = lexer.configuration.tt;
560 Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
562 while (element.tag != tt.tagHtml)
563 element = element.parent;
565 insertNodeBeforeElement(element, doctype);
568 public Node findBody(TagTable tt)
574 while (node != null && node.tag != tt.tagHtml)
582 while (node != null && node.tag != tt.tagBody)
588 public boolean isElement()
590 return (this.type == StartTag || this.type == StartEndTag ? true : false);
594 unexpected content in table row is moved to just before
595 the table in accordance with Netscape and IE. This code
596 assumes that node hasn't been inserted into the row.
598 public static void moveBeforeTable(Node row, Node node, TagTable tt)
602 /* first find the table element */
603 for (table = row.parent; table != null; table = table.parent)
605 if (table.tag == tt.tagTable)
607 if (table.parent.content == table)
608 table.parent.content = node;
610 node.prev = table.prev;
613 node.parent = table.parent;
615 if (node.prev != null)
616 node.prev.next = node;
624 if a table row is empty then insert an empty cell
625 this practice is consistent with browser behavior
626 and avoids potential problems with row spanning cells
628 public static void fixEmptyRow(Lexer lexer, Node row)
632 if (row.content == null)
634 cell = lexer.inferredTag("td");
635 insertNodeAtEnd(row, cell);
636 Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
640 public static void coerceNode(Lexer lexer, Node node, Dict tag)
642 Node tmp = lexer.inferredTag(tag.name);
643 Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
646 node.type = StartTag;
647 node.implicit = true;
648 node.element = tag.name;
651 /* extract a node and its children from a markup tree */
652 public static void removeNode(Node node)
654 if (node.prev != null)
655 node.prev.next = node.next;
657 if (node.next != null)
658 node.next.prev = node.prev;
660 if (node.parent != null)
662 if (node.parent.content == node)
663 node.parent.content = node.next;
665 if (node.parent.last == node)
666 node.parent.last = node.prev;
669 node.parent = node.prev = node.next = null;
672 public static boolean insertMisc(Node element, Node node)
674 if (node.type == CommentTag ||
675 node.type == ProcInsTag ||
676 node.type == CDATATag ||
677 node.type == SectionTag ||
678 node.type == AspTag ||
679 node.type == JsteTag ||
682 insertNodeAtEnd(element, node);
690 used to determine how attributes
691 without values should be printed
692 this was introduced to deal with
693 user defined tags e.g. Cold Fusion
695 public static boolean isNewNode(Node node)
697 if (node != null && node.tag != null)
699 return ((node.tag.model & Dict.CM_NEW) != 0);
705 public boolean hasOneChild()
707 return (this.content != null && this.content.next == null);
710 /* find html element */
711 public Node findHTML(TagTable tt)
715 for (node = this.content;
716 node != null && node.tag != tt.tagHtml; node = node.next);
721 public Node findHEAD(TagTable tt)
725 node = this.findHTML(tt);
729 for (node = node.content;
730 node != null && node.tag != tt.tagHead;
737 public boolean checkNodeIntegrity()
740 boolean found = false;
742 if (this.prev != null)
744 if (this.prev.next != this)
748 if (this.next != null)
750 if (this.next.prev != this)
754 if (this.parent != null)
756 if (this.prev == null && this.parent.content != this)
759 if (this.next == null && this.parent.last != this)
762 for (child = this.parent.content; child != null; child = child.next)
773 for (child = this.content; child != null; child = child.next)
774 if (!child.checkNodeIntegrity())
781 Add class="foo" to node
783 public static void addClass(Node node, String classname)
785 AttVal classattr = node.getAttrByName("class");
788 if there already is a class attribute
789 then append class name after a space
791 if (classattr != null)
793 classattr.value = classattr.value + " " + classname;
795 else /* create new class attribute */
796 node.addAttribute("class", classname);
799 /* --------------------- DEBUG -------------------------- */
801 private static final String[] nodeTypeString =
816 public String toString()
823 s += nodeTypeString[n.type];
825 if (n.element != null)
829 if (n.type == TextNode ||
830 n.type == CommentTag ||
831 n.type == ProcInsTag) {
833 if (n.textarray != null && n.start <= n.end) {
835 s += Lexer.getString(n.textarray, n.start, n.end - n.start);
842 if (n.content != null)
843 s += n.content.toString();
853 /* --------------------- END DEBUG ---------------------- */
856 /* --------------------- DOM ---------------------------- */
858 protected org.w3c.dom.Node adapter = null;
860 protected org.w3c.dom.Node getAdapter()
867 adapter = new DOMDocumentImpl(this);
871 adapter = new DOMElementImpl(this);
874 adapter = new DOMDocumentTypeImpl(this);
877 adapter = new DOMCommentImpl(this);
880 adapter = new DOMTextImpl(this);
883 adapter = new DOMCDATASectionImpl(this);
886 adapter = new DOMProcessingInstructionImpl(this);
889 adapter = new DOMNodeImpl(this);
895 protected Node cloneNode(boolean deep)
897 Node node = (Node)this.clone();
902 for (child = this.content; child != null; child = child.next)
904 newChild = child.cloneNode(deep);
905 insertNodeAtEnd(node, newChild);
912 protected void setType(short newType)
917 /* --------------------- END DOM ------------------------ */