experimental jtidy version adapted for phpeclipse
authorkhartlage <khartlage>
Fri, 17 Jan 2003 20:44:59 +0000 (20:44 +0000)
committerkhartlage <khartlage>
Fri, 17 Jan 2003 20:44:59 +0000 (20:44 +0000)
47 files changed:
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttVal.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttrCheck.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttrCheckImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Attribute.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttributeTable.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/CheckAttribs.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/CheckAttribsImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Clean.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Configuration.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMAttrImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMAttrMapImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCDATASectionImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCharacterDataImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCommentImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMDocumentImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMDocumentTypeImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMElementImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMExceptionImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeListByTagNameImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeListImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMProcessingInstructionImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMTextImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Dict.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Entity.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/EntityTable.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/IStack.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Lexer.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableBoolean.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableInteger.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableObject.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Node.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Out.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/OutImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/PPrint.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Parser.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/ParserImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Report.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StreamIn.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StreamInImpl.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Style.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StyleProp.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TagTable.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TidyBeanInfo.java [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TidyMessages.properties [new file with mode: 0644]
net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/config.txt [new file with mode: 0644]

diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttVal.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttVal.java
new file mode 100644 (file)
index 0000000..6da908a
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ * @(#)AttVal.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Attribute/Value linked list node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class AttVal extends Object implements Cloneable {
+
+    public AttVal    next;
+    public Attribute dict;
+    public Node      asp;
+    public Node      php;
+    public int       delim;
+    public String    attribute;
+    public String    value;
+
+    public AttVal()
+    {
+        this.next = null;
+        this.dict = null;
+        this.asp = null;
+        this.php = null;
+        this.delim = 0;
+        this.attribute = null;
+        this.value = null;
+    }
+
+    public AttVal(AttVal next, Attribute dict, int delim,
+                  String attribute, String value)
+    {
+        this.next = next;
+        this.dict = dict;
+        this.asp = null;
+        this.php = null;
+        this.delim = delim;
+        this.attribute = attribute;
+        this.value = value;
+    }
+
+    public AttVal(AttVal next, Attribute dict, Node asp, Node php,
+                  int delim, String attribute, String value)
+    {
+        this.next = next;
+        this.dict = dict;
+        this.asp = asp;
+        this.php = php;
+        this.delim = delim;
+        this.attribute = attribute;
+        this.value = value;
+    }
+
+    protected Object clone()
+    {
+        AttVal av = new AttVal();
+        if (next != null) {
+            av.next = (AttVal)next.clone();
+        }
+        if (attribute != null)
+            av.attribute = attribute;
+        if (value != null)
+            av.value = value;
+        av.delim = delim;
+        if (asp != null) {
+            av.asp = (Node)asp.clone();
+        }
+        if (php != null) {
+            av.php = (Node)php.clone();
+        }
+        av.dict =
+          AttributeTable.getDefaultAttributeTable().findAttribute(this);
+        return av;
+    }
+
+    public boolean isBoolAttribute()
+    {
+        Attribute attribute = this.dict;
+        if ( attribute != null ) {
+            if (attribute.attrchk == AttrCheckImpl.getCheckBool() ) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /* ignore unknown attributes for proprietary elements */
+    public Attribute checkAttribute( Lexer lexer, Node node )
+    {
+        TagTable tt = lexer.configuration.tt;
+
+        if (this.asp == null && this.php == null)
+            this.checkUniqueAttribute(lexer, node);
+
+        Attribute attribute = this.dict;
+        if ( attribute != null ) {
+            /* title is vers 2.0 for A and LINK otherwise vers 4.0 */
+            if (attribute == AttributeTable.attrTitle &&
+                    (node.tag == tt.tagA || node.tag == tt.tagLink))
+                    lexer.versions &= Dict.VERS_ALL;
+            else if ((attribute.versions & Dict.VERS_XML) != 0)
+            {
+                if (!(lexer.configuration.XmlTags || lexer.configuration.XmlOut))
+                    Report.attrError(lexer, node, this.attribute, Report.XML_ATTRIBUTE_VALUE);
+            }
+            else
+                lexer.versions &= attribute.versions;
+        
+            if (attribute.attrchk != null)
+                attribute.attrchk.check(lexer, node, this);
+        }
+        else if (!lexer.configuration.XmlTags && !(node.tag == null) && this.asp == null &&
+                 !(node.tag != null && ((node.tag.versions & Dict.VERS_PROPRIETARY) != 0)))
+            Report.attrError(lexer, node, this.attribute, Report.UNKNOWN_ATTRIBUTE);
+
+        return attribute;
+    }
+
+    /*
+     the same attribute name can't be used
+     more than once in each element
+    */
+    public void checkUniqueAttribute(Lexer lexer, Node node)
+    {
+        AttVal attr;
+        int count = 0;
+
+        for (attr = this.next; attr != null; attr = attr.next)
+        {
+            if (this.attribute != null &&
+                attr.attribute != null &&
+                attr.asp == null &&
+                attr.php == null &&
+                Lexer.wstrcasecmp(this.attribute, attr.attribute) == 0)
+                ++count;
+        }
+
+        if (count > 0)
+            Report.attrError(lexer, node, this.attribute, Report.REPEATED_ATTRIBUTE);
+    }
+
+    /* --------------------- DOM ---------------------------- */
+
+    protected org.w3c.dom.Attr adapter = null;
+
+    protected org.w3c.dom.Attr getAdapter()
+    {
+        if (adapter == null)
+        {
+            adapter = new DOMAttrImpl(this);
+        }
+        return adapter;
+    }
+    /* --------------------- END DOM ------------------------ */
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttrCheck.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttrCheck.java
new file mode 100644 (file)
index 0000000..4262556
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * @(#)AttrCheck.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Check attribute values
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public interface AttrCheck {
+
+    public void check( Lexer lexer, Node node, AttVal attval);
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttrCheckImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttrCheckImpl.java
new file mode 100644 (file)
index 0000000..0fb72a1
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * @(#)AttrCheckImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Check attribute values implementations
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class AttrCheckImpl {
+
+    public static class CheckUrl implements AttrCheck {
+
+        public void check( Lexer lexer, Node node, AttVal attval)
+        {
+            if (attval.value == null)
+                Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
+            else if (lexer.configuration.FixBackslash)
+            {
+                attval.value = attval.value.replace('\\','/');
+            }
+        }
+
+    };
+
+    public static class CheckScript implements AttrCheck {
+
+        public void check( Lexer lexer, Node node, AttVal attval)
+        {
+        }
+
+    };
+
+    public static class CheckAlign implements AttrCheck {
+
+        public void check( Lexer lexer, Node node, AttVal attval)
+        {
+            String value;
+
+            /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
+            if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))
+            {
+                getCheckValign().check(lexer, node, attval);
+                return;
+            }
+
+            value = attval.value;
+
+            if (value == null)
+                Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
+            else if (! (Lexer.wstrcasecmp(value, "left") == 0 ||
+                        Lexer.wstrcasecmp(value, "center") == 0 ||
+                        Lexer.wstrcasecmp(value, "right") == 0 ||
+                        Lexer.wstrcasecmp(value, "justify") == 0))
+                  Report.attrError(lexer, node, attval.value, Report.BAD_ATTRIBUTE_VALUE);
+        }
+
+    };
+
+    public static class CheckValign implements AttrCheck {
+
+        public void check( Lexer lexer, Node node, AttVal attval)
+        {
+            String value;
+
+            value = attval.value;
+
+            if (value == null)
+                Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
+            else if (Lexer.wstrcasecmp(value, "top") == 0 ||
+                   Lexer.wstrcasecmp(value, "middle") == 0 ||
+                   Lexer.wstrcasecmp(value, "bottom") == 0 ||
+                   Lexer.wstrcasecmp(value, "baseline") == 0)
+            {
+                /* all is fine */
+            }
+            else if (Lexer.wstrcasecmp(value, "left") == 0 ||
+                      Lexer.wstrcasecmp(value, "right") == 0)
+            {
+                if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0)))
+                    Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
+            }
+            else if (Lexer.wstrcasecmp(value, "texttop") == 0 ||
+                   Lexer.wstrcasecmp(value, "absmiddle") == 0 ||
+                   Lexer.wstrcasecmp(value, "absbottom") == 0 ||
+                   Lexer.wstrcasecmp(value, "textbottom") == 0)
+            {
+                lexer.versions &= Dict.VERS_PROPRIETARY;
+                Report.attrError(lexer, node, value, Report.PROPRIETARY_ATTR_VALUE);
+            }
+            else
+                  Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
+        }
+
+    };
+
+    public static class CheckBool implements AttrCheck {
+
+        public void check( Lexer lexer, Node node, AttVal attval)
+        {
+        }
+
+    };
+
+    public static class CheckId implements AttrCheck {
+
+        public void check( Lexer lexer, Node node, AttVal attval)
+        {
+        }
+
+    };
+
+    public static class CheckName implements AttrCheck {
+
+        public void check( Lexer lexer, Node node, AttVal attval)
+        {
+        }
+
+    };
+
+    public static AttrCheck getCheckUrl()
+    {
+        return _checkUrl;
+    }
+
+    public static AttrCheck getCheckScript()
+    {
+        return _checkScript;
+    }
+
+    public static AttrCheck getCheckAlign()
+    {
+        return _checkAlign;
+    }
+
+    public static AttrCheck getCheckValign()
+    {
+        return _checkValign;
+    }
+
+    public static AttrCheck getCheckBool()
+    {
+        return _checkBool;
+    }
+
+    public static AttrCheck getCheckId()
+    {
+        return _checkId;
+    }
+
+    public static AttrCheck getCheckName()
+    {
+        return _checkName;
+    }
+
+
+    private static AttrCheck _checkUrl = new CheckUrl();
+    private static AttrCheck _checkScript = new CheckScript();
+    private static AttrCheck _checkAlign = new CheckAlign();
+    private static AttrCheck _checkValign = new CheckValign();
+    private static AttrCheck _checkBool = new CheckBool();
+    private static AttrCheck _checkId = new CheckId();
+    private static AttrCheck _checkName = new CheckName();
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Attribute.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Attribute.java
new file mode 100644 (file)
index 0000000..43845bc
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * @(#)Attribute.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * HTML attribute
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Attribute {
+
+    public Attribute( String name,
+                      boolean nowrap,
+                      short versions,
+                      AttrCheck attrchk )
+    {
+        this.name = name;
+        this.nowrap = nowrap;
+        this.literal = false;
+        this.versions = versions;
+        this.attrchk = attrchk;
+    }
+
+    public Attribute( String name,
+                      short versions,
+                      AttrCheck attrchk )
+    {
+        this.name = name;
+        this.nowrap = false;
+        this.literal = false;
+        this.versions = versions;
+        this.attrchk = attrchk;
+    }
+
+    public String name;
+    public boolean nowrap;
+    public boolean literal;
+    public short versions;
+    public AttrCheck attrchk;
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttributeTable.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/AttributeTable.java
new file mode 100644 (file)
index 0000000..4eb312b
--- /dev/null
@@ -0,0 +1,317 @@
+/*
+ * @(#)AttributeTable.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import java.util.Hashtable;
+import java.util.Enumeration;
+
+/**
+ *
+ * HTML attribute hash table
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class AttributeTable {
+
+    public AttributeTable()
+    {
+    }
+
+    public Attribute lookup( String name )
+    {
+        return (Attribute)attributeHashtable.get( name );
+    }
+
+    public Attribute install( Attribute attr )
+    {
+        return (Attribute)attributeHashtable.put( attr.name, attr );
+    }
+
+    /* public method for finding attribute definition by name */
+    public Attribute findAttribute( AttVal attval )
+    {
+        Attribute np;
+
+        if ( attval.attribute != null ) {
+            np = lookup( attval.attribute );
+            return np;
+        }
+
+        return null;
+    }
+
+    public boolean isUrl( String attrname )
+    {
+        Attribute np;
+
+        np = lookup( attrname );
+        return ( np != null && np.attrchk == AttrCheckImpl.getCheckUrl() );
+    }
+
+    public boolean isScript( String attrname )
+    {
+        Attribute np;
+
+        np = lookup( attrname );
+        return ( np != null && np.attrchk == AttrCheckImpl.getCheckScript() );
+    }
+
+    public boolean isLiteralAttribute( String attrname )
+    {
+        Attribute np;
+
+        np = lookup( attrname );
+        return ( np != null && np.literal );
+    }
+
+    /*
+    Henry Zrepa reports that some folk are
+    using embed with script attributes where
+    newlines are signficant. These need to be
+    declared and handled specially!
+    */
+    public void declareLiteralAttrib(String name)
+    {
+        Attribute attrib = lookup(name);
+
+        if (attrib == null)
+            attrib = install(new Attribute(name, Dict.VERS_PROPRIETARY, null));
+
+        attrib.literal = true;
+    }
+
+    private Hashtable attributeHashtable = new Hashtable();
+
+    private static AttributeTable defaultAttributeTable = null;
+
+    private static Attribute[] attrs = {
+
+    new Attribute( "abbr",             Dict.VERS_HTML40,            null ),
+    new Attribute( "accept-charset",   Dict.VERS_HTML40,            null ),
+    new Attribute( "accept",           Dict.VERS_ALL,               null ),
+    new Attribute( "accesskey",        Dict.VERS_HTML40,            null ),
+    new Attribute( "action",           Dict.VERS_ALL,               AttrCheckImpl.getCheckUrl() ),
+    new Attribute( "add_date",         Dict.VERS_NETSCAPE,          null ),     /* A */
+    new Attribute( "align",            Dict.VERS_ALL,               AttrCheckImpl.getCheckAlign() ),    /* set varies with element */
+    new Attribute( "alink",            Dict.VERS_LOOSE,             null ),
+    new Attribute( "alt",              Dict.VERS_ALL,               null ),
+    new Attribute( "archive",          Dict.VERS_HTML40,            null ),     /* space or comma separated list */
+    new Attribute( "axis",             Dict.VERS_HTML40,            null ),
+    new Attribute( "background",       Dict.VERS_LOOSE,             AttrCheckImpl.getCheckUrl() ),
+    new Attribute( "bgcolor",          Dict.VERS_LOOSE,             null ),
+    new Attribute( "bgproperties",     Dict.VERS_PROPRIETARY,       null ),     /* BODY "fixed" fixes background */
+    new Attribute( "border",           Dict.VERS_ALL,               AttrCheckImpl.getCheckBool() ),   /* like LENGTH + "border" */
+    new Attribute( "bordercolor",      Dict.VERS_MICROSOFT,         null ),    /* used on TABLE */
+    new Attribute( "bottommargin",     Dict.VERS_MICROSOFT,         null ),   /* used on BODY */
+    new Attribute( "cellpadding",      Dict.VERS_FROM32,            null ),   /* % or pixel values */
+    new Attribute( "cellspacing",      Dict.VERS_FROM32,            null ),
+    new Attribute( "char",             Dict.VERS_HTML40,            null ),
+    new Attribute( "charoff",          Dict.VERS_HTML40,            null ),
+    new Attribute( "charset",          Dict.VERS_HTML40,            null ),
+    new Attribute( "checked",          Dict.VERS_ALL,               AttrCheckImpl.getCheckBool() ),     /* i.e. "checked" or absent */
+    new Attribute( "cite",             Dict.VERS_HTML40,            AttrCheckImpl.getCheckUrl() ),
+    new Attribute( "class",            Dict.VERS_HTML40,            null ),
+    new Attribute( "classid",          Dict.VERS_HTML40,            AttrCheckImpl.getCheckUrl() ),
+    new Attribute( "clear",            Dict.VERS_LOOSE,             null ),    /* BR: left, right, all */
+    new Attribute( "code",             Dict.VERS_LOOSE,             null ),     /* APPLET */
+    new Attribute( "codebase",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckUrl() ),      /* OBJECT */
+    new Attribute( "codetype",         Dict.VERS_HTML40,            null ),     /* OBJECT */
+    new Attribute( "color",            Dict.VERS_LOOSE,             null ),    /* BASEFONT, FONT */
+    new Attribute( "cols",             Dict.VERS_IFRAMES,           null ),     /* TABLE & FRAMESET */
+    new Attribute( "colspan",          Dict.VERS_FROM32,            null ),
+    new Attribute( "compact",          Dict.VERS_ALL,               AttrCheckImpl.getCheckBool() ),     /* lists */
+    new Attribute( "content",          Dict.VERS_ALL,               null ),     /* META */
+    new Attribute( "coords",           Dict.VERS_FROM32,            null ),   /* AREA, A */    
+    new Attribute( "data",             Dict.VERS_HTML40,            AttrCheckImpl.getCheckUrl() ),      /* OBJECT */
+    new Attribute( "datafld",          Dict.VERS_MICROSOFT,         null ),     /* used on DIV, IMG */
+    new Attribute( "dataformatas",    Dict.VERS_MICROSOFT,         null ),     /* used on DIV, IMG */
+    new Attribute( "datapagesize",     Dict.VERS_MICROSOFT,         null ),   /* used on DIV, IMG */
+    new Attribute( "datasrc",          Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckUrl() ),      /* used on TABLE */
+    new Attribute( "datetime",         Dict.VERS_HTML40,            null ),     /* INS, DEL */
+    new Attribute( "declare",          Dict.VERS_HTML40,            AttrCheckImpl.getCheckBool() ),     /* OBJECT */
+    new Attribute( "defer",            Dict.VERS_HTML40,            AttrCheckImpl.getCheckBool() ),     /* SCRIPT */
+    new Attribute( "dir",              Dict.VERS_HTML40,            null ),  /* ltr or rtl */
+    new Attribute( "disabled",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckBool() ),     /* form fields */
+    new Attribute( "enctype",          Dict.VERS_ALL,               null ),     /* FORM */
+    new Attribute( "face",             Dict.VERS_LOOSE,             null ),     /* BASEFONT, FONT */
+    new Attribute( "for",              Dict.VERS_HTML40,            null ),    /* LABEL */
+    new Attribute( "frame",            Dict.VERS_HTML40,            null ),   /* TABLE */
+    new Attribute( "frameborder",      Dict.VERS_FRAMES,            null ),  /* 0 or 1 */
+    new Attribute( "framespacing",     Dict.VERS_PROPRIETARY,       null ),   /* pixel value */
+    new Attribute( "gridx",            Dict.VERS_PROPRIETARY,       null ),   /* TABLE Adobe golive*/
+    new Attribute( "gridy",            Dict.VERS_PROPRIETARY,       null ),   /* TABLE Adobe golive */
+    new Attribute( "headers",          Dict.VERS_HTML40,            null ),   /* table cells */
+    new Attribute( "height",           Dict.VERS_ALL,               null ),   /* pixels only for TH/TD */
+    new Attribute( "href",             Dict.VERS_ALL,               AttrCheckImpl.getCheckUrl() ),      /* A, AREA, LINK and BASE */
+    new Attribute( "hreflang",         Dict.VERS_HTML40,            null ),     /* A, LINK */
+    new Attribute( "hspace",           Dict.VERS_ALL,               null ),   /* APPLET, IMG, OBJECT */
+    new Attribute( "http-equiv",       Dict.VERS_ALL,               null ),     /* META */
+    new Attribute( "id",               Dict.VERS_HTML40,            AttrCheckImpl.getCheckId() ),
+    new Attribute( "ismap",            Dict.VERS_ALL,               AttrCheckImpl.getCheckBool() ),     /* IMG */
+    new Attribute( "label",            Dict.VERS_HTML40,            null ),     /* OPT, OPTGROUP */
+    new Attribute( "lang",             Dict.VERS_HTML40,            null ),
+    new Attribute( "language",         Dict.VERS_LOOSE,             null ),     /* SCRIPT */
+    new Attribute( "last_modified",    Dict.VERS_NETSCAPE,          null ),     /* A */
+    new Attribute( "last_visit",       Dict.VERS_NETSCAPE,          null ),     /* A */
+    new Attribute( "leftmargin",       Dict.VERS_MICROSOFT,         null ),   /* used on BODY */
+    new Attribute( "link",             Dict.VERS_LOOSE,             null ),    /* BODY */
+    new Attribute( "longdesc",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckUrl() ),      /* IMG */
+    new Attribute( "lowsrc",           Dict.VERS_PROPRIETARY,       AttrCheckImpl.getCheckUrl() ),      /* IMG */
+    new Attribute( "marginheight",     Dict.VERS_IFRAMES,           null ),   /* FRAME, IFRAME, BODY */
+    new Attribute( "marginwidth",      Dict.VERS_IFRAMES,           null ),   /* ditto */
+    new Attribute( "maxlength",        Dict.VERS_ALL,               null ),   /* INPUT */
+    new Attribute( "media",            Dict.VERS_HTML40,            null ),    /* STYLE, LINK */
+    new Attribute( "method",           Dict.VERS_ALL,               null ),  /* FORM: get or post */
+    new Attribute( "multiple",         Dict.VERS_ALL,               AttrCheckImpl.getCheckBool() ),     /* SELECT */
+    new Attribute( "name",             Dict.VERS_ALL,               AttrCheckImpl.getCheckName() ),
+    new Attribute( "nohref",           Dict.VERS_FROM32,            AttrCheckImpl.getCheckBool() ),     /* AREA */
+    new Attribute( "noresize",         Dict.VERS_FRAMES,            AttrCheckImpl.getCheckBool() ),     /* FRAME */
+    new Attribute( "noshade",          Dict.VERS_LOOSE,             AttrCheckImpl.getCheckBool() ),     /* HR */
+    new Attribute( "nowrap",           Dict.VERS_LOOSE,             AttrCheckImpl.getCheckBool() ),     /* table cells */
+    new Attribute( "object",           Dict.VERS_HTML40_LOOSE,      null ),     /* APPLET */
+    new Attribute( "onblur",           Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onchange",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onclick",          Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "ondblclick",       Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onkeydown",        Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onkeypress",       Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onkeyup",          Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onload",           Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onmousedown",      Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onmousemove",      Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onmouseout",       Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onmouseover",      Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onmouseup",        Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onsubmit",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onreset",          Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onselect",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onunload",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckScript() ),   /* event */
+    new Attribute( "onafterupdate",    Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* form fields */
+    new Attribute( "onbeforeupdate",   Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* form fields */
+    new Attribute( "onerrorupdate",    Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* form fields */
+    new Attribute( "onrowenter",       Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* form fields */
+    new Attribute( "onrowexit",        Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* form fields */
+    new Attribute( "onbeforeunload",   Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* form fields */
+    new Attribute( "ondatasetchanged", Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* object, applet */
+    new Attribute( "ondataavailable",  Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* object, applet */
+    new Attribute( "ondatasetcomplete",Dict.VERS_MICROSOFT,         AttrCheckImpl.getCheckScript() ),   /* object, applet */
+    new Attribute( "profile",          Dict.VERS_HTML40,            AttrCheckImpl.getCheckUrl() ),      /* HEAD */
+    new Attribute( "prompt",           Dict.VERS_LOOSE,             null ),     /* ISINDEX */
+    new Attribute( "readonly",         Dict.VERS_HTML40,            AttrCheckImpl.getCheckBool() ),     /* form fields */
+    new Attribute( "rel",              Dict.VERS_ALL,               null ), /* A, LINK */
+    new Attribute( "rev",              Dict.VERS_ALL,               null ), /* A, LINK */
+    new Attribute( "rightmargin",      Dict.VERS_MICROSOFT,         null ),   /* used on BODY */
+    new Attribute( "rows",             Dict.VERS_ALL,               null ),   /* TEXTAREA */
+    new Attribute( "rowspan",          Dict.VERS_ALL,               null ),   /* table cells */
+    new Attribute( "rules",            Dict.VERS_HTML40,            null ),   /* TABLE */
+    new Attribute( "scheme",           Dict.VERS_HTML40,            null ),     /* META */
+    new Attribute( "scope",            Dict.VERS_HTML40,            null ),    /* table cells */
+    new Attribute( "scrolling",        Dict.VERS_IFRAMES,           null ),   /* yes, no or auto */
+    new Attribute( "selected",         Dict.VERS_ALL,               AttrCheckImpl.getCheckBool() ),     /* OPTION */
+    new Attribute( "shape",            Dict.VERS_FROM32,            null ),    /* AREA, A */
+    new Attribute( "showgrid",         Dict.VERS_PROPRIETARY,       AttrCheckImpl.getCheckBool() ),     /* TABLE Adobe golive */
+    new Attribute( "showgridx",        Dict.VERS_PROPRIETARY,       AttrCheckImpl.getCheckBool() ),     /* TABLE Adobe golive*/
+    new Attribute( "showgridy",        Dict.VERS_PROPRIETARY,       AttrCheckImpl.getCheckBool() ),     /* TABLE Adobe golive*/
+    new Attribute( "size",             Dict.VERS_LOOSE,             null ),   /* HR, FONT, BASEFONT, SELECT */
+    new Attribute( "span",             Dict.VERS_HTML40,            null ),   /* COL, COLGROUP */
+    new Attribute( "src",              (short)(Dict.VERS_ALL | Dict.VERS_FRAMES), AttrCheckImpl.getCheckUrl() ),      /* IMG, FRAME, IFRAME */
+    new Attribute( "standby",          Dict.VERS_HTML40,            null ),     /* OBJECT */
+    new Attribute( "start",            Dict.VERS_ALL,               null ),   /* OL */
+    new Attribute( "style",            Dict.VERS_HTML40,            null ),
+    new Attribute( "summary",          Dict.VERS_HTML40,            null ),     /* TABLE */
+    new Attribute( "tabindex",         Dict.VERS_HTML40,            null ),   /* fields, OBJECT  and A */
+    new Attribute( "target",           Dict.VERS_HTML40,            null ),   /* names a frame/window */
+    new Attribute( "text",             Dict.VERS_LOOSE,             null ),    /* BODY */
+    new Attribute( "title",            Dict.VERS_HTML40,            null ),     /* text tool tip */
+    new Attribute( "topmargin",        Dict.VERS_MICROSOFT,         null ),   /* used on BODY */
+    new Attribute( "type",             Dict.VERS_FROM32,            null ), /* also used by SPACER */
+    new Attribute( "usemap",           Dict.VERS_ALL,               AttrCheckImpl.getCheckBool() ),     /* things with images */
+    new Attribute( "valign",           Dict.VERS_FROM32,            AttrCheckImpl.getCheckValign() ),
+    new Attribute( "value",            Dict.VERS_ALL,               null ),     /* OPTION, PARAM */
+    new Attribute( "valuetype",        Dict.VERS_HTML40,            null ),    /* PARAM: data, ref, object */
+    new Attribute( "version",          Dict.VERS_ALL,               null ),     /* HTML */
+    new Attribute( "vlink",            Dict.VERS_LOOSE,             null ),    /* BODY */
+    new Attribute( "vspace",           Dict.VERS_LOOSE,             null ),   /* IMG, OBJECT, APPLET */
+    new Attribute( "width",            Dict.VERS_ALL,               null ),   /* pixels only for TD/TH */
+    new Attribute( "wrap",             Dict.VERS_NETSCAPE,          null ),     /* textarea */
+    new Attribute( "xml:lang",         Dict.VERS_XML,               null ),     /* XML language */
+    new Attribute( "xmlns",            Dict.VERS_ALL,               null ),     /* name space */
+
+    };
+
+    public static Attribute attrHref = null;
+    public static Attribute attrSrc = null;
+    public static Attribute attrId = null;
+    public static Attribute attrName = null;
+    public static Attribute attrSummary = null;
+    public static Attribute attrAlt = null;
+    public static Attribute attrLongdesc = null;
+    public static Attribute attrUsemap = null;
+    public static Attribute attrIsmap = null;
+    public static Attribute attrLanguage = null;
+    public static Attribute attrType = null;
+    public static Attribute attrTitle = null;
+    public static Attribute attrXmlns = null;
+    public static Attribute attrValue = null;
+    public static Attribute attrContent = null;
+    public static Attribute attrDatafld = null;
+    public static Attribute attrWidth = null;
+    public static Attribute attrHeight = null;
+
+    public static AttributeTable getDefaultAttributeTable()
+    {
+        if ( defaultAttributeTable == null ) {
+            defaultAttributeTable = new AttributeTable();
+            for ( int i = 0; i < attrs.length; i++ ) {
+                defaultAttributeTable.install( attrs[i] );
+            }
+            attrHref = defaultAttributeTable.lookup("href");
+            attrSrc = defaultAttributeTable.lookup("src");
+            attrId = defaultAttributeTable.lookup("id");
+            attrName = defaultAttributeTable.lookup("name");
+            attrSummary = defaultAttributeTable.lookup("summary");
+            attrAlt = defaultAttributeTable.lookup("alt");
+            attrLongdesc = defaultAttributeTable.lookup("longdesc");
+            attrUsemap = defaultAttributeTable.lookup("usemap");
+            attrIsmap = defaultAttributeTable.lookup("ismap");
+            attrLanguage = defaultAttributeTable.lookup("language");
+            attrType = defaultAttributeTable.lookup("type");
+            attrTitle = defaultAttributeTable.lookup("title");
+            attrXmlns = defaultAttributeTable.lookup("xmlns");
+            attrValue = defaultAttributeTable.lookup("value");
+            attrContent = defaultAttributeTable.lookup("content");
+            attrDatafld = defaultAttributeTable.lookup("datafld");;
+            attrWidth = defaultAttributeTable.lookup("width");;
+            attrHeight = defaultAttributeTable.lookup("height");;
+
+            attrAlt.nowrap = true;
+            attrValue.nowrap = true;
+            attrContent.nowrap = true;
+        }
+        return defaultAttributeTable;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/CheckAttribs.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/CheckAttribs.java
new file mode 100644 (file)
index 0000000..ba69d5b
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * @(#)CheckAttribs.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Check HTML attributes
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public interface CheckAttribs {
+
+    public void check( Lexer lexer, Node node );
+
+}
+
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/CheckAttribsImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/CheckAttribsImpl.java
new file mode 100644 (file)
index 0000000..a5bcbc8
--- /dev/null
@@ -0,0 +1,403 @@
+/*
+ * @(#)CheckAttribsImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Check HTML attributes implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class CheckAttribsImpl {
+
+    public static class CheckHTML implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            AttVal attval;
+            Attribute attribute;
+
+            node.checkUniqueAttributes(lexer);
+
+            for (attval = node.attributes; attval != null; attval = attval.next)
+            {
+                attribute = attval.checkAttribute(lexer, node );
+
+                if (attribute == AttributeTable.attrXmlns)
+                    lexer.isvoyager = true;
+            }
+        }
+
+    };
+
+    public static class CheckSCRIPT implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            Attribute attribute;
+            AttVal lang, type;
+
+            node.checkUniqueAttributes(lexer);
+
+            lang = node.getAttrByName("language");
+            type = node.getAttrByName("type");
+
+            if (type == null)
+            {
+                Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
+
+                /* check for javascript */
+
+                if (lang != null)
+                {
+                    String str = lang.value;
+                    if (str.length() > 10)
+                        str = str.substring(0, 10);
+                    if ( (Lexer.wstrcasecmp(str, "javascript") == 0) ||
+                         (Lexer.wstrcasecmp(str, "jscript") == 0) )
+                    {
+                        node.addAttribute("type", "text/javascript");
+                    }
+                }
+                else
+                    node.addAttribute("type", "text/javascript");
+            }
+        }
+
+    };
+
+    public static class CheckTABLE implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            AttVal attval;
+            Attribute attribute;
+            boolean hasSummary = false;
+
+            node.checkUniqueAttributes(lexer);
+
+            for (attval = node.attributes; attval != null; attval = attval.next)
+            {
+                attribute = attval.checkAttribute(lexer, node);
+
+                if (attribute == AttributeTable.attrSummary)
+                    hasSummary = true;
+            }
+
+            /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */
+            if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32)
+            {
+                lexer.badAccess |= Report.MISSING_SUMMARY;
+                Report.attrError(lexer, node, "summary", Report.MISSING_ATTRIBUTE);
+            }
+
+            /* convert <table border> to <table border="1"> */
+            if (lexer.configuration.XmlOut)
+            {
+                attval = node.getAttrByName("border");
+                if (attval != null)
+                {
+                    if (attval.value == null)
+                        attval.value = "1";
+                }
+            }
+        }
+
+    };
+
+    public static class CheckCaption implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            AttVal attval;
+            String value = null;
+
+            node.checkUniqueAttributes(lexer);
+
+            for (attval = node.attributes; attval != null; attval = attval.next)
+            {
+                if ( Lexer.wstrcasecmp(attval.attribute, "align") == 0 )
+                {
+                    value = attval.value;
+                    break;
+                }
+            }
+
+            if (value != null)
+            {
+                if (Lexer.wstrcasecmp(value, "left") == 0 || Lexer.wstrcasecmp(value, "right") == 0)
+                    lexer.versions &= (short)(Dict.VERS_HTML40_LOOSE|Dict.VERS_FRAMES);
+                else if (Lexer.wstrcasecmp(value, "top") == 0 || Lexer.wstrcasecmp(value, "bottom") == 0)
+                    lexer.versions &= Dict.VERS_FROM32;
+                else
+                    Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
+            }
+        }
+
+    };
+
+    public static class CheckHR implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            if (node.getAttrByName("src") != null)
+                Report.attrError(lexer, node, "src", Report.PROPRIETARY_ATTR_VALUE);
+        }
+    };
+
+    public static class CheckIMG implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            AttVal attval;
+            Attribute attribute;
+            boolean hasAlt = false;
+            boolean hasSrc = false;
+            boolean hasUseMap = false;
+            boolean hasIsMap = false;
+            boolean hasDataFld = false;
+
+            node.checkUniqueAttributes(lexer);
+
+            for (attval = node.attributes; attval != null; attval = attval.next)
+            {
+                attribute = attval.checkAttribute( lexer, node );
+
+                if (attribute == AttributeTable.attrAlt)
+                    hasAlt = true;
+                else if (attribute == AttributeTable.attrSrc)
+                    hasSrc = true;
+                else if (attribute == AttributeTable.attrUsemap)
+                    hasUseMap = true;
+                else if (attribute == AttributeTable.attrIsmap)
+                    hasIsMap = true;
+                else if (attribute == AttributeTable.attrDatafld)
+                    hasDataFld = true;
+                else if (attribute == AttributeTable.attrWidth ||
+                         attribute == AttributeTable.attrHeight)
+                    lexer.versions &= ~Dict.VERS_HTML20;
+            }
+
+            if (!hasAlt)
+            {
+                lexer.badAccess |= Report.MISSING_IMAGE_ALT;
+                Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE);
+                if (lexer.configuration.altText != null)
+                    node.addAttribute("alt", lexer.configuration.altText);
+            }
+
+            if (!hasSrc && !hasDataFld)
+                Report.attrError(lexer, node, "src", Report.MISSING_ATTRIBUTE);
+
+            if (hasIsMap && !hasUseMap)
+                Report.attrError(lexer, node, "ismap", Report.MISSING_IMAGEMAP);
+        }
+
+    };
+
+    public static class CheckAREA implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            AttVal attval;
+            Attribute attribute;
+            boolean hasAlt = false;
+            boolean hasHref = false;
+
+            node.checkUniqueAttributes(lexer);
+
+            for (attval = node.attributes; attval != null; attval = attval.next)
+            {
+                attribute = attval.checkAttribute( lexer, node );
+
+                if (attribute == AttributeTable.attrAlt)
+                    hasAlt = true;
+                else if (attribute == AttributeTable.attrHref)
+                    hasHref = true;
+            }
+
+            if (!hasAlt)
+            {
+                lexer.badAccess |= Report.MISSING_LINK_ALT;
+                Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE);
+            }
+            if (!hasHref)
+                Report.attrError(lexer, node, "href", Report.MISSING_ATTRIBUTE);
+        }
+
+    };
+
+    public static class CheckAnchor implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            node.checkUniqueAttributes(lexer);
+
+            lexer.fixId(node);
+        }
+    };
+
+    public static class CheckMap implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            node.checkUniqueAttributes(lexer);
+
+            lexer.fixId(node);
+        }
+    }
+
+    public static class CheckSTYLE implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            AttVal type = node.getAttrByName("type");
+
+            node.checkUniqueAttributes(lexer);
+
+            if (type == null)
+            {
+                Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
+
+                node.addAttribute("type", "text/css");
+            }
+        }
+    }
+
+    public static class CheckTableCell implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            node.checkUniqueAttributes(lexer);
+
+            /*
+              HTML4 strict doesn't allow mixed content for
+              elements with %block; as their content model
+            */
+            if (node.getAttrByName("width") != null || node.getAttrByName("height") != null)
+                lexer.versions &= ~Dict.VERS_HTML40_STRICT;
+        }
+    }
+
+    /* add missing type attribute when appropriate */
+    public static class CheckLINK implements CheckAttribs {
+
+        public void check( Lexer lexer, Node node )
+        {
+            AttVal rel = node.getAttrByName("rel");
+
+            node.checkUniqueAttributes(lexer);
+
+            if (rel != null && rel.value != null &&
+                  rel.value.equals("stylesheet"))
+            {
+                AttVal type = node.getAttrByName("type");
+
+                if (type == null)
+                {
+                    Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
+
+                    node.addAttribute("type", "text/css");
+                }
+            }
+        }
+    }
+
+    public static CheckAttribs getCheckHTML()
+    {
+        return _checkHTML;
+    }
+
+    public static CheckAttribs getCheckSCRIPT()
+    {
+        return _checkSCRIPT;
+    }
+
+    public static CheckAttribs getCheckTABLE()
+    {
+        return _checkTABLE;
+    }
+
+    public static CheckAttribs getCheckCaption()
+    {
+        return _checkCaption;
+    }
+
+    public static CheckAttribs getCheckIMG()
+    {
+        return _checkIMG;
+    }
+
+    public static CheckAttribs getCheckAREA()
+    {
+        return _checkAREA;
+    }
+
+    public static CheckAttribs getCheckAnchor()
+    {
+        return _checkAnchor;
+    }
+
+    public static CheckAttribs getCheckMap()
+    {
+        return _checkMap;
+    }
+
+    public static CheckAttribs getCheckSTYLE()
+    {
+        return _checkStyle;
+    }
+
+    public static CheckAttribs getCheckTableCell()
+    {
+        return _checkTableCell;
+    }
+
+    public static CheckAttribs getCheckLINK()
+    {
+        return _checkLINK;
+    }
+
+    public static CheckAttribs getCheckHR()
+    {
+        return _checkHR;
+    }
+
+
+    private static CheckAttribs _checkHTML = new CheckHTML();
+    private static CheckAttribs _checkSCRIPT = new CheckSCRIPT();
+    private static CheckAttribs _checkTABLE = new CheckTABLE();
+    private static CheckAttribs _checkCaption = new CheckCaption();
+    private static CheckAttribs _checkIMG = new CheckIMG();
+    private static CheckAttribs _checkAREA = new CheckAREA();
+    private static CheckAttribs _checkAnchor = new CheckAnchor();
+    private static CheckAttribs _checkMap = new CheckMap();
+    private static CheckAttribs _checkStyle = new CheckSTYLE();
+    private static CheckAttribs _checkTableCell = new CheckTableCell();
+    private static CheckAttribs _checkLINK = new CheckLINK();
+    private static CheckAttribs _checkHR = new CheckHR();
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Clean.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Clean.java
new file mode 100644 (file)
index 0000000..66d2331
--- /dev/null
@@ -0,0 +1,1779 @@
+/*
+ * @(#)Clean.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Clean up misuse of presentation markup
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+  Filters from other formats such as Microsoft Word
+  often make excessive use of presentation markup such
+  as font tags, B, I, and the align attribute. By applying
+  a set of production rules, it is straight forward to
+  transform this to use CSS.
+
+  Some rules replace some of the children of an element by
+  style properties on the element, e.g.
+
+  <p><b>...</b></p> -> <p style="font-weight: bold">...</p>
+
+  Such rules are applied to the element's content and then
+  to the element itself until none of the rules more apply.
+  Having applied all the rules to an element, it will have
+  a style attribute with one or more properties. 
+
+  Other rules strip the element they apply to, replacing
+  it by style properties on the contents, e.g.
+  
+  <dir><li><p>...</li></dir> -> <p style="margin-left 1em">...
+      
+  These rules are applied to an element before processing
+  its content and replace the current element by the first
+  element in the exposed content.
+
+  After applying both sets of rules, you can replace the
+  style attribute by a class value and style rule in the
+  document head. To support this, an association of styles
+  and class names is built.
+
+  A naive approach is to rely on string matching to test
+  when two property lists are the same. A better approach
+  would be to first sort the properties before matching.
+*/
+
+public class Clean {
+
+    private int classNum = 1;
+
+    private TagTable tt;
+
+    public Clean(TagTable tt)
+    {
+      this.tt = tt;
+    }
+
+    private StyleProp insertProperty(StyleProp props, String name,
+                                            String value)
+    {
+        StyleProp first, prev, prop;
+        int cmp;
+
+        prev = null;
+        first = props;
+
+        while (props != null)
+        {
+            cmp = props.name.compareTo(name);
+
+            if (cmp == 0)
+            {
+                /* this property is already defined, ignore new value */
+                return first;
+            }
+
+            if (cmp > 0) // props.name > name
+            {
+                /* insert before this */
+
+                prop = new StyleProp(name, value, props);
+
+                if (prev != null)
+                    prev.next = prop;
+                else
+                    first = prop;
+
+                return first;
+            }
+
+            prev = props;
+            props = props.next;
+        }
+
+        prop = new StyleProp(name, value);
+
+        if (prev != null)
+            prev.next = prop;
+        else
+            first = prop;
+
+        return first;
+    }
+
+    /*
+     Create sorted linked list of properties from style string
+     It temporarily places nulls in place of ':' and ';' to
+     delimit the strings for the property name and value.
+     Some systems don't allow you to null literal strings,
+     so to avoid this, a copy is made first.
+    */
+    private StyleProp createProps(StyleProp prop, String style)
+    {
+        int name_end;
+        int value_end;
+        int value_start = 0;
+        int name_start = 0;
+        boolean more;
+
+        name_start = 0;
+        while (name_start < style.length())
+        {
+            while (name_start < style.length() &&
+                       style.charAt(name_start) == ' ')
+                ++name_start;
+
+            name_end = name_start;
+
+            while (name_end < style.length())
+            {
+                if (style.charAt(name_end) == ':')
+                {
+                    value_start = name_end + 1;
+                    break;
+                }
+
+                ++name_end;
+            }
+
+            if (name_end >= style.length() || style.charAt(name_end) != ':')
+                break;
+
+            while (value_start < style.length() &&
+                       style.charAt(value_start) == ' ')
+                ++value_start;
+
+            value_end = value_start;
+            more = false;
+
+            while (value_end < style.length())
+            {
+                if (style.charAt(value_end) == ';')
+                {
+                    more = true;
+                    break;
+                }
+
+                ++value_end;
+            }
+
+            prop = insertProperty(prop,
+                                  style.substring(name_start, name_end),
+                                  style.substring(value_start, value_end));
+
+            if (more)
+            {
+                name_start = value_end + 1;
+                continue;
+            }
+
+            break;
+        }
+
+        return prop;
+    }
+
+    private String createPropString(StyleProp props)
+    {
+        String style = "";
+        int len;
+        StyleProp prop;
+
+        /* compute length */
+
+        for (len = 0, prop = props; prop != null; prop = prop.next)
+        {
+            len += prop.name.length() + 2;
+            len += prop.value.length() + 2;
+        }
+
+        for (prop = props; prop != null; prop = prop.next)
+        {
+            style = style.concat(prop.name);
+            style = style.concat(": ");
+
+            style = style.concat(prop.value);
+
+            if (prop.next == null)
+                break;
+
+            style = style.concat("; ");
+        }
+
+        return style;
+    }
+
+    /*
+      create string with merged properties
+    */
+    private String addProperty(String style, String property)
+    {
+        StyleProp prop;
+
+        prop = createProps(null, style);
+        prop = createProps(prop, property);
+        style = createPropString(prop);
+        return style;
+    }
+
+    private String gensymClass(String tag)
+    {
+        String str;
+
+        str = "c" + classNum;
+        classNum++;
+        return str;
+    }
+
+    private String findStyle(Lexer lexer, String tag, String properties)
+    {
+        Style style;
+
+        for (style = lexer.styles; style != null; style=style.next)
+        {
+            if (style.tag.equals(tag) &&
+                style.properties.equals(properties))
+                return style.tagClass;
+        }
+
+        style = new Style(tag, gensymClass(tag), properties, lexer.styles);
+        lexer.styles = style;
+        return style.tagClass;
+    }
+
+    /*
+     Find style attribute in node, and replace it
+     by corresponding class attribute. Search for
+     class in style dictionary otherwise gensym
+     new class and add to dictionary.
+
+     Assumes that node doesn't have a class attribute
+    */
+    private void style2Rule(Lexer lexer, Node node)
+    {
+        AttVal styleattr, classattr;
+        String classname;
+
+        styleattr = node.getAttrByName("style");
+
+        if (styleattr != null)
+        {
+                classname = findStyle(lexer, node.element, styleattr.value);
+                classattr = node.getAttrByName("class");
+
+                /*
+                if there already is a class attribute
+                then append class name after a space
+               */
+                if (classattr != null)
+               {
+                        classattr.value = classattr.value + " " + classname;
+                        node.removeAttribute(styleattr);
+               }
+               else /* reuse style attribute for class attribute */
+               {
+                        styleattr.attribute = "class";
+                        styleattr.value = classname;
+               }
+        }
+    }
+
+    private void addColorRule(Lexer lexer, String selector, String color)
+    {
+        if (color != null)
+        {
+            lexer.addStringLiteral(selector);
+            lexer.addStringLiteral(" { color: ");
+            lexer.addStringLiteral(color);
+            lexer.addStringLiteral(" }\n");
+        }
+    }
+
+    /*
+     move presentation attribs from body to style element
+
+     background="foo" ->  body { background-image: url(foo) }
+     bgcolor="foo"    ->  body { background-color: foo }
+     text="foo"       ->  body { color: foo }
+     link="foo"       ->  :link { color: foo }
+     vlink="foo"      ->  :visited { color: foo }
+     alink="foo"      ->  :active { color: foo }
+    */
+    private void cleanBodyAttrs(Lexer lexer, Node body)
+    {
+        AttVal attr;
+        String bgurl = null;
+        String bgcolor = null;
+        String color = null;
+    
+        attr = body.getAttrByName("background");
+
+        if (attr != null)
+        {
+            bgurl = attr.value;
+            attr.value = null;
+            body.removeAttribute(attr);
+        }
+
+        attr = body.getAttrByName("bgcolor");
+
+        if (attr != null)
+        {
+            bgcolor = attr.value;
+            attr.value = null;
+            body.removeAttribute(attr);
+        }
+
+        attr = body.getAttrByName("text");
+
+        if (attr != null)
+        {
+            color = attr.value;
+            attr.value = null;
+            body.removeAttribute(attr);
+        }
+
+        if (bgurl != null || bgcolor != null || color != null)
+        {
+            lexer.addStringLiteral(" body {\n");
+
+            if (bgurl != null)
+            {
+                lexer.addStringLiteral("  background-image: url(");
+                lexer.addStringLiteral(bgurl);
+                lexer.addStringLiteral(");\n");
+            }
+
+            if (bgcolor != null)
+            {
+                lexer.addStringLiteral("  background-color: ");
+                lexer.addStringLiteral(bgcolor);
+                lexer.addStringLiteral(";\n");
+            }
+
+            if (color != null)
+            {
+                lexer.addStringLiteral("  color: ");
+                lexer.addStringLiteral(color);
+                lexer.addStringLiteral(";\n");
+            }
+
+            lexer.addStringLiteral(" }\n");
+        }
+
+        attr = body.getAttrByName("link");
+
+        if (attr != null)
+        {
+            addColorRule(lexer, " :link", attr.value);
+            body.removeAttribute(attr);
+        }
+
+        attr = body.getAttrByName("vlink");
+
+        if (attr != null)
+        {
+            addColorRule(lexer, " :visited", attr.value);
+            body.removeAttribute(attr);
+        }
+
+        attr = body.getAttrByName("alink");
+
+        if (attr != null)
+        {
+            addColorRule(lexer, " :active", attr.value);
+            body.removeAttribute(attr);
+        }
+    }
+
+    private boolean niceBody(Lexer lexer, Node doc)
+    {
+        Node body = doc.findBody(lexer.configuration.tt);
+
+        if (body != null)
+        {
+            if (
+                body.getAttrByName("background") != null ||
+                body.getAttrByName("bgcolor") != null ||
+                body.getAttrByName("text") != null ||
+                body.getAttrByName("link") != null ||
+                body.getAttrByName("vlink") != null ||
+                body.getAttrByName("alink") != null
+               )
+            {
+                lexer.badLayout |= Report.USING_BODY;
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /* create style element using rules from dictionary */
+    private void createStyleElement(Lexer lexer, Node doc)
+    {
+        Node node, head, body;
+        Style style;
+        AttVal av;
+
+        if (lexer.styles == null && niceBody(lexer, doc))
+            return;
+
+        node = lexer.newNode(Node.StartTag, null, 0, 0, "style");
+        node.implicit = true;
+
+        /* insert type attribute */
+        av = new AttVal(null, null, '"', "type", "text/css");
+        av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+        node.attributes = av;
+
+        body = doc.findBody(lexer.configuration.tt);
+
+        lexer.txtstart = lexer.lexsize;
+
+        if (body != null)
+            cleanBodyAttrs(lexer, body);
+
+        for (style = lexer.styles; style != null; style = style.next)
+        {
+            lexer.addCharToLexer(' ');
+            lexer.addStringLiteral(style.tag);
+            lexer.addCharToLexer('.');
+            lexer.addStringLiteral(style.tagClass);
+            lexer.addCharToLexer(' ');
+            lexer.addCharToLexer('{');
+            lexer.addStringLiteral(style.properties);
+            lexer.addCharToLexer('}');
+            lexer.addCharToLexer('\n');
+        }
+
+        lexer.txtend = lexer.lexsize;
+
+        Node.insertNodeAtEnd(node,
+                             lexer.newNode(Node.TextNode,
+                                      lexer.lexbuf,
+                                      lexer.txtstart,
+                                      lexer.txtend));
+
+        /*
+         now insert style element into document head
+
+         doc is root node. search its children for html node
+         the head node should be first child of html node
+        */
+
+        head = doc.findHEAD(lexer.configuration.tt);
+    
+        if (head != null)
+            Node.insertNodeAtEnd(head, node);
+    }
+
+    /* ensure bidirectional links are consistent */
+    private void fixNodeLinks(Node node)
+    {
+        Node child;
+
+        if (node.prev != null)
+            node.prev.next = node;
+        else
+            node.parent.content = node;
+
+        if (node.next != null)
+            node.next.prev = node;
+        else
+            node.parent.last = node;
+
+        for (child = node.content; child != null; child = child.next)
+            child.parent = node;
+    }
+
+    /*
+     used to strip child of node when
+     the node has one and only one child
+    */
+    private void stripOnlyChild(Node node)
+    {
+        Node child;
+
+        child = node.content;
+        node.content = child.content;
+        node.last = child.last;
+        child.content = null;
+
+        for (child = node.content; child != null; child = child.next)
+            child.parent = node;
+    }
+
+    /* used to strip font start and end tags */
+    private void discardContainer(Node element, MutableObject pnode)
+    {
+        Node node;
+        Node parent = element.parent;
+
+        if (element.content != null)
+        {
+            element.last.next = element.next;
+
+            if (element.next != null)
+            {
+                element.next.prev = element.last;
+                element.last.next = element.next;
+            }
+            else
+                parent.last = element.last;
+
+            if (element.prev != null)
+            {
+                element.content.prev = element.prev;
+                element.prev.next = element.content;
+            }
+            else
+                parent.content = element.content;
+
+            for (node = element.content; node != null; node = node.next)
+                node.parent = parent;
+
+            pnode.setObject(element.content);
+        }
+        else
+        {
+            if (element.next != null)
+                element.next.prev = element.prev;
+            else
+                parent.last = element.prev;
+
+            if (element.prev != null)
+                element.prev.next = element.next;
+            else
+                parent.content = element.next;
+
+            pnode.setObject(element.next);
+        }
+
+        element.next = null;
+        element.content = null;
+    }
+
+    /*
+     Add style property to element, creating style
+     attribute as needed and adding ; delimiter
+    */
+    private void addStyleProperty(Node node, String property)
+    {
+        AttVal av;
+
+        for (av = node.attributes; av != null; av = av.next)
+        {
+            if (av.attribute.equals("style"))
+                break;
+        }
+
+        /* if style attribute already exists then insert property */
+
+        if (av != null)
+        {
+            String s;
+
+            s = addProperty(av.value, property);
+            av.value = s;
+        }
+        else /* else create new style attribute */
+        {
+            av = new AttVal(node.attributes, null, '"', "style", property);
+            av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+            node.attributes = av;
+        }
+    }
+
+    /*
+      Create new string that consists of the
+      combined style properties in s1 and s2
+
+      To merge property lists, we build a linked
+      list of property/values and insert properties
+      into the list in order, merging values for
+      the same property name.
+    */
+    private String mergeProperties(String s1, String s2)
+    {
+        String s;
+        StyleProp prop;
+
+        prop = createProps(null, s1);
+        prop = createProps(prop, s2);
+        s = createPropString(prop);
+        return s;
+    }
+
+    private void mergeStyles(Node node, Node child)
+    {
+        AttVal av;
+        String s1, s2, style;
+
+        for (s2 = null, av = child.attributes; av != null; av = av.next)
+        {
+            if (av.attribute.equals("style"))
+            {
+                s2 = av.value;
+                break;
+            }
+        }
+
+        for (s1 = null, av = node.attributes; av != null; av = av.next)
+        {
+            if (av.attribute.equals("style"))
+            {
+                s1 = av.value;
+                break;
+            }
+        }
+
+        if (s1 != null)
+        {
+            if (s2 != null)  /* merge styles from both */
+            {
+                style = mergeProperties(s1, s2);
+                av.value = style;
+            }
+        }
+        else if (s2 != null)  /* copy style of child */
+        {
+            av = new AttVal(node.attributes, null, '"', "style", s2);
+            av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
+            node.attributes = av;
+        }
+    }
+
+    private String fontSize2Name(String size)
+    {
+        /*
+        String[] sizes =
+        {
+            "50%",
+            "60%",
+            "80%",
+            null,
+            "120%",
+            "150%",
+            "200%"
+        };
+        */
+
+        String[] sizes =
+        {
+            "60%",
+            "70%",
+            "80%",
+            null,
+            "120%",
+            "150%",
+            "200%"
+        };
+        String buf;
+
+        if (size.length() > 0 &&
+            '0' <= size.charAt(0) && size.charAt(0) <= '6')
+        {
+            int n = size.charAt(0) - '0';
+            return sizes[n];
+        }
+
+        if (size.length() > 0 && size.charAt(0) == '-')
+        {
+            if (size.length() > 1 &&
+                '0' <= size.charAt(1) && size.charAt(1) <= '6')
+            {
+                int n = size.charAt(1) - '0';
+                double x;
+
+                for (x = 1.0; n > 0; --n)
+                    x *= 0.8;
+
+                x *= 100.0;
+                buf = "" + (int)x + "%";
+
+                return buf;
+            }
+
+            return "smaller"; /*"70%"; */
+        }
+
+        if (size.length() > 1 &&
+            '0' <= size.charAt(1) && size.charAt(1) <= '6')
+        {
+            int n = size.charAt(1) - '0';
+            double x;
+
+            for (x = 1.0; n > 0; --n)
+                x *= 1.2;
+
+            x *= 100.0;
+            buf = "" + (int)x + "%";
+
+            return buf;
+        }
+
+        return "larger"; /* "140%" */
+    }
+
+    private void addFontFace(Node node, String face)
+    {
+        addStyleProperty(node, "font-family: " + face);
+    }
+
+    private void addFontSize(Node node, String size)
+    {
+        String value;
+
+        if (size.equals("6") && node.tag == tt.tagP)
+        {
+            node.element = "h1";
+            tt.findTag(node);
+            return;
+        }
+
+        if (size.equals("5") && node.tag == tt.tagP)
+        {
+            node.element = "h2";
+            tt.findTag(node);
+            return;
+        }
+
+        if (size.equals("4") && node.tag == tt.tagP)
+        {
+            node.element = "h3";
+            tt.findTag(node);
+            return;
+        }
+
+        value = fontSize2Name(size);
+
+        if (value != null)
+        {
+            addStyleProperty(node, "font-size: " + value);
+        }
+    }
+
+    private void addFontColor(Node node, String color)
+    {
+        addStyleProperty(node, "color: " + color);
+    }
+
+    private void addAlign(Node node, String align)
+    {
+        /* force alignment value to lower case */
+        addStyleProperty(node, "text-align: " + align.toLowerCase());
+    }
+
+    /*
+     add style properties to node corresponding to
+     the font face, size and color attributes
+    */
+    private void addFontStyles(Node node, AttVal av)
+    {
+        while (av != null)
+        {
+            if (av.attribute.equals("face"))
+                addFontFace(node, av.value);
+            else if (av.attribute.equals("size"))
+                addFontSize(node, av.value);
+            else if (av.attribute.equals("color"))
+                addFontColor(node, av.value);
+
+            av = av.next;
+        }
+    }
+
+    /*
+        Symptom: <p align=center>
+        Action: <p style="text-align: center">
+    */
+    private void textAlign(Lexer lexer, Node node)
+    {
+        AttVal av, prev;
+
+        prev = null;
+
+        for (av = node.attributes; av != null; av = av.next)
+        {
+            if (av.attribute.equals("align"))
+            {
+                if (prev != null)
+                    prev.next = av.next;
+                else
+                    node.attributes = av.next;
+
+                if (av.value != null)
+                {
+                    addAlign(node, av.value);
+                }
+
+                break;
+            }
+
+            prev = av;
+        }
+    }
+
+    /*
+       The clean up rules use the pnode argument to return the
+       next node when the orignal node has been deleted
+    */
+
+    /*
+        Symptom: <dir> <li> where <li> is only child
+        Action: coerce <dir> <li> to <div> with indent.
+    */
+
+    private boolean dir2Div(Lexer lexer, Node node, MutableObject pnode)
+    {
+        Node child;
+
+        if (node.tag == tt.tagDir ||
+            node.tag == tt.tagUl ||
+            node.tag == tt.tagOl)
+        {
+            child = node.content;
+
+            if (child == null)
+                return false;
+
+            /* check child has no peers */
+
+            if (child.next != null)
+                return false;
+
+            if (child.tag != tt.tagLi)
+                return false;
+
+            if (!child.implicit)
+                return false;
+
+            /* coerce dir to div */
+
+            node.tag = tt.tagDiv;
+            node.element = "div";
+            addStyleProperty(node, "margin-left: 2em");
+            stripOnlyChild(node);
+            return true;
+
+//#if 0
+            //Node content;
+            //Node last;
+            //content = child.content;
+            //last = child.last;
+            //child.content = null;
+
+            /* adjust parent and set margin on contents of <li> */
+
+            //for (child = content; child != null; child = child.next)
+            //{
+            //    child.parent = node.parent;
+            //    addStyleProperty(child, "margin-left: 1em");
+            //}
+
+            /* hook first/last into sequence */
+
+            //if (content != null)
+            //{
+            //    content.prev = node.prev;
+            //    last.next = node.next;
+            //    fixNodeLinks(content);
+            //    fixNodeLinks(last);
+            //}
+
+            //node.next = null;
+
+            /* ensure that new node is cleaned */
+            //pnode.setObject(cleanNode(lexer, content));
+            //return true;
+//#endif
+        }
+
+        return false;
+    }
+
+    /*
+        Symptom: <center>
+        Action: replace <center> by <div style="text-align: center">
+    */
+
+    private boolean center2Div(Lexer lexer, Node node, MutableObject pnode)
+    {
+        if (node.tag == tt.tagCenter)
+        {
+            if (lexer.configuration.DropFontTags)
+            {
+                if (node.content != null)
+                {
+                    Node last = node.last;
+                    Node parent = node.parent;
+
+                    discardContainer(node, pnode);
+
+                    node = lexer.inferredTag("br");
+
+                    if (last.next != null)
+                        last.next.prev = node;
+
+                    node.next = last.next;
+                    last.next = node;
+                    node.prev = last;
+
+                    if (parent.last == last)
+                        parent.last = node;
+
+                    node.parent = parent;
+                }
+                else
+                {
+                    Node prev = node.prev;
+                    Node next = node.next;
+                    Node parent = node.parent;
+                    discardContainer(node, pnode);
+
+                    node = lexer.inferredTag("br");
+                    node.next = next;
+                    node.prev = prev;
+                    node.parent = parent;
+
+                    if (next != null)
+                        next.prev = node;
+                    else
+                        parent.last = node;
+
+                    if (prev != null)
+                        prev.next = node;
+                    else
+                        parent.content = node;
+                }
+
+                return true;
+            }
+            node.tag = tt.tagDiv;
+            node.element = "div";
+            addStyleProperty(node, "text-align: center");
+            return true;
+        }
+
+        return false;
+    }
+
+    /*
+        Symptom <div><div>...</div></div>
+        Action: merge the two divs
+
+      This is useful after nested <dir>s used by Word
+      for indenting have been converted to <div>s
+    */
+    private boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode)
+    {
+        Node child;
+
+        if (node.tag != tt.tagDiv)
+            return false;
+
+        child = node.content;
+
+        if (child == null)
+            return false;
+
+        if (child.tag != tt.tagDiv)
+            return false;
+
+        if (child.next != null)
+            return false;
+
+        mergeStyles(node, child);
+        stripOnlyChild(node);
+        return true;
+    }
+
+    /*
+        Symptom: <ul><li><ul>...</ul></li></ul>
+        Action: discard outer list
+    */
+
+    private boolean nestedList(Lexer lexer, Node node, MutableObject pnode)
+    {
+        Node child, list;
+
+        if (node.tag == tt.tagUl || node.tag == tt.tagOl)
+        {
+            child = node.content;
+
+            if (child == null)
+                return false;
+
+            /* check child has no peers */
+
+            if (child.next != null)
+                return false;
+
+            list = child.content;
+
+            if (list == null)
+                return false;
+
+            if (list.tag != node.tag)
+                return false;
+
+            pnode.setObject(node.next);
+
+            /* move inner list node into position of outer node */
+            list.prev = node.prev;
+            list.next = node.next;
+            list.parent = node.parent;
+            fixNodeLinks(list);
+
+            /* get rid of outer ul and its li */
+            child.content = null;
+            node.content = null;
+            node.next = null;
+
+            /*
+              If prev node was a list the chances are this node
+              should be appended to that list. Word has no way of
+              recognizing nested lists and just uses indents
+            */
+
+            if (list.prev != null)
+            {
+                node = list;
+                list = node.prev;
+
+                if (list.tag == tt.tagUl || list.tag == tt.tagOl)
+                {
+                    list.next = node.next;
+
+                    if (list.next != null)
+                        list.next.prev = list;
+
+                    child = list.last;  /* <li> */
+
+                    node.parent = child;
+                    node.next = null;
+                    node.prev = child.last;
+                    fixNodeLinks(node);
+                }
+            }
+
+            cleanNode(lexer, node);
+            return true;
+        }
+
+        return false;
+    }
+
+    /*
+        Symptom: the only child of a block-level element is a
+        presentation element such as B, I or FONT
+
+        Action: add style "font-weight: bold" to the block and
+        strip the <b> element, leaving its children.
+
+      example:
+
+        <p>
+          <b><font face="Arial" size="6">Draft Recommended Practice</font></b>
+        </p>
+
+      becomes:
+
+          <p style="font-weight: bold; font-family: Arial; font-size: 6">
+            Draft Recommended Practice
+          </p>
+
+      This code also replaces the align attribute by a style attribute.
+      However, to avoid CSS problems with Navigator 4, this isn't done
+      for the elements: caption, tr and table
+    */
+    private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode)
+    {
+        Node child;
+
+        if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0)
+        {
+            if (node.tag != tt.tagTable
+                    && node.tag != tt.tagTr
+                    && node.tag != tt.tagLi)
+            {
+                /* check for align attribute */
+                if (node.tag != tt.tagCaption)
+                    textAlign(lexer, node);
+
+                child = node.content;
+
+                if (child == null)
+                    return false;
+
+                /* check child has no peers */
+
+                if (child.next != null)
+                    return false;
+
+                if (child.tag == tt.tagB)
+                {
+                    mergeStyles(node, child);
+                    addStyleProperty(node, "font-weight: bold");
+                    stripOnlyChild(node);
+                    return true;
+                }
+
+                if (child.tag == tt.tagI)
+                {
+                    mergeStyles(node, child);
+                    addStyleProperty(node, "font-style: italic");
+                    stripOnlyChild(node);
+                    return true;
+                }
+
+                if (child.tag == tt.tagFont)
+                {
+                    mergeStyles(node, child);
+                    addFontStyles(node, child.attributes);
+                    stripOnlyChild(node);
+                    return true;
+                }
+            }
+        }
+
+        return false;
+    }
+
+    /* the only child of table cell or an inline element such as em */
+    private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode)
+    {
+        Node child;
+
+        if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE|Dict.CM_ROW)) != 0)
+        {
+            child = node.content;
+
+            if (child == null)
+                return false;
+
+            /* check child has no peers */
+
+            if (child.next != null)
+                return false;
+
+            if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis)
+            {
+                mergeStyles(node, child);
+                addStyleProperty(node, "font-weight: bold");
+                stripOnlyChild(node);
+                return true;
+            }
+
+            if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis)
+            {
+                mergeStyles(node, child);
+                addStyleProperty(node, "font-style: italic");
+                stripOnlyChild(node);
+                return true;
+            }
+
+            if (child.tag == tt.tagFont)
+            {
+                mergeStyles(node, child);
+                addFontStyles(node, child.attributes);
+                stripOnlyChild(node);
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /*
+      Replace font elements by span elements, deleting
+      the font element's attributes and replacing them
+      by a single style attribute.
+    */
+    private boolean font2Span(Lexer lexer, Node node, MutableObject pnode)
+    {
+        AttVal av, style, next;
+
+        if (node.tag == tt.tagFont)
+        {
+            if (lexer.configuration.DropFontTags)
+            {
+                discardContainer(node, pnode);
+                return false;
+            }
+
+            /* if FONT is only child of parent element then leave alone */
+            if (node.parent.content == node
+                && node.next == null)
+                return false;
+
+            addFontStyles(node, node.attributes);
+
+            /* extract style attribute and free the rest */
+            av = node.attributes;
+            style = null;
+
+            while (av != null)
+            {
+                next = av.next;
+
+                if (av.attribute.equals("style"))
+                {
+                    av.next = null;
+                    style = av;
+                }
+
+                av = next;
+            }
+
+            node.attributes = style;
+
+            node.tag = tt.tagSpan;
+            node.element = "span";
+
+            return true;
+        }
+
+        return false;
+    }
+
+    /*
+      Applies all matching rules to a node.
+    */
+    private Node cleanNode(Lexer lexer, Node node)
+    {
+        Node next = null;
+        MutableObject o = new MutableObject();
+        boolean b = false;
+
+        for (next = node; node.isElement(); node = next)
+        {
+            o.setObject(next);
+
+            b = dir2Div(lexer, node, o);
+            next = (Node)o.getObject();
+            if (b)
+                continue;
+
+            b = nestedList(lexer, node, o);
+            next = (Node)o.getObject();
+            if (b)
+                continue;
+
+            b = center2Div(lexer, node, o);
+            next = (Node)o.getObject();
+            if (b)
+                continue;
+
+            b = mergeDivs(lexer, node, o);
+            next = (Node)o.getObject();
+            if (b)
+                continue;
+
+            b = blockStyle(lexer, node, o);
+            next = (Node)o.getObject();
+            if (b)
+                continue;
+
+            b = inlineStyle(lexer, node, o);
+            next = (Node)o.getObject();
+            if (b)
+                continue;
+
+            b = font2Span(lexer, node, o);
+            next = (Node)o.getObject();
+            if (b)
+                continue;
+
+            break;
+        }
+
+        return next;
+    }
+
+    private Node createStyleProperties(Lexer lexer, Node node)
+    {
+        Node child;
+
+        if (node.content != null)
+        {
+            for (child = node.content; child != null; child = child.next)
+            {
+                child = createStyleProperties(lexer, child);
+            }
+        }
+
+        return cleanNode(lexer, node);
+    }
+
+    private void defineStyleRules(Lexer lexer, Node node)
+    {
+        Node child;
+
+        if (node.content != null)
+        {
+            for (child = node.content;
+                    child != null; child = child.next)
+            {
+                defineStyleRules(lexer, child);
+            }
+        }
+
+        style2Rule(lexer, node);
+    }
+
+    public void cleanTree(Lexer lexer, Node doc)
+    {
+        doc = createStyleProperties(lexer, doc);
+
+        if (!lexer.configuration.MakeClean)
+        {
+            defineStyleRules(lexer, doc);
+            createStyleElement(lexer, doc);
+        }
+    }
+
+    /* simplifies <b><b> ... </b> ...</b> etc. */
+    public void nestedEmphasis(Node node)
+    {
+        MutableObject o = new MutableObject();
+        Node next;
+
+        while (node != null)
+        {
+            next = node.next;
+
+            if ((node.tag == tt.tagB || node.tag == tt.tagI)
+                && node.parent != null && node.parent.tag == node.tag)
+            {
+                /* strip redundant inner element */
+                o.setObject(next);
+                discardContainer(node, o);
+                next = (Node)o.getObject();
+                node = next;
+                continue;
+            }
+
+            if (node.content != null)
+                nestedEmphasis(node.content);
+
+            node = next;
+        }
+    }
+
+    /* replace i by em and b by strong */
+    public void emFromI(Node node)
+    {
+        while (node != null)
+        {
+            if (node.tag == tt.tagI)
+            {
+                node.element = tt.tagEm.name;
+                node.tag = tt.tagEm;
+            }
+            else if (node.tag == tt.tagB)
+            {
+                node.element = tt.tagStrong.name;
+                node.tag = tt.tagStrong;
+            }
+
+            if (node.content != null)
+                emFromI(node.content);
+
+            node = node.next;
+        }
+    }
+
+    /*
+     Some people use dir or ul without an li
+     to indent the content. The pattern to
+     look for is a list with a single implicit
+     li. This is recursively replaced by an
+     implicit blockquote.
+    */
+    public void list2BQ(Node node)
+    {
+        while (node != null)
+        {
+            if (node.content != null)
+                list2BQ(node.content);
+
+            if (node.tag != null && node.tag.parser == ParserImpl.getParseList() &&
+                node.hasOneChild() && node.content.implicit)
+            {
+                stripOnlyChild(node);
+                node.element = tt.tagBlockquote.name;
+                node.tag = tt.tagBlockquote;
+                node.implicit = true;
+            }
+
+            node = node.next;
+        }
+    }
+
+    /*
+     Replace implicit blockquote by div with an indent
+     taking care to reduce nested blockquotes to a single
+     div with the indent set to match the nesting depth
+    */
+    public void bQ2Div(Node node)
+    {
+        int indent;
+        String indent_buf;
+
+        while (node != null)
+        {
+            if (node.tag == tt.tagBlockquote && node.implicit)
+            {
+                indent = 1;
+
+                while(node.hasOneChild() &&
+                      node.content.tag == tt.tagBlockquote &&
+                      node.implicit)
+                {
+                    ++indent;
+                    stripOnlyChild(node);
+                }
+
+                if (node.content != null)
+                    bQ2Div(node.content);
+
+                indent_buf = "margin-left: " +
+                             (new Integer(2*indent)).toString() + "em";
+
+                node.element = tt.tagDiv.name;
+                node.tag = tt.tagDiv;
+                node.addAttribute("style", indent_buf);
+            }
+            else if (node.content != null)
+                bQ2Div(node.content);
+
+
+            node = node.next;
+        }
+    }
+
+    /* node is <![if ...]> prune up to <![endif]> */
+    public Node pruneSection(Lexer lexer, Node node)
+    {
+        for (;;)
+        {
+            /* discard node and returns next */
+            node = Node.discardElement(node);
+
+            if (node == null)
+                return null;
+        
+            if (node.type == Node.SectionTag)
+            {
+                if ((Lexer.getString(node.textarray, node.start, 2)).equals("if"))
+                {
+                    node = pruneSection(lexer, node);
+                    continue;
+                }
+
+                if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif"))
+                {
+                    node = Node.discardElement(node);
+                    break;
+                }
+            }
+        }
+
+        return node;
+    }
+
+    public void dropSections(Lexer lexer, Node node)
+    {
+        while (node != null)
+        {
+            if (node.type == Node.SectionTag)
+            {
+                /* prune up to matching endif */
+                if ((Lexer.getString(node.textarray, node.start, 2)).equals("if"))
+                {
+                    node = pruneSection(lexer, node);
+                    continue;
+                }
+
+                /* discard others as well */
+                node = Node.discardElement(node);
+                continue;
+            }
+
+            if (node.content != null)
+                dropSections(lexer, node.content);
+
+            node = node.next;
+        }
+    }
+
+    public void purgeAttributes(Node node)
+    {
+        AttVal attr = node.attributes;
+        AttVal next = null;
+        AttVal prev = null;
+
+        while (attr != null)
+        {
+            next = attr.next;
+
+            /* special check for class="Code" denoting pre text */
+            if (attr.attribute != null &&
+                attr.value != null &&
+                attr.attribute.equals("class") &&
+                attr.value.equals("Code"))
+            {
+                prev = attr;
+            }
+            else if (attr.attribute != null &&
+                (attr.attribute.equals("class") ||
+                 attr.attribute.equals("style") ||
+                 attr.attribute.equals("lang") ||
+                 attr.attribute.startsWith("x:") ||
+                 ((attr.attribute.equals("height") || attr.attribute.equals("width")) &&
+                    (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh))))
+            {
+                if (prev != null)
+                    prev.next = next;
+                else
+                    node.attributes = next;
+
+            }
+            else
+                prev = attr;
+
+            attr = next;
+        }
+    }
+
+    /* Word2000 uses span excessively, so we strip span out */
+    public Node stripSpan(Lexer lexer, Node span)
+    {
+        Node node;
+        Node prev = null;
+        Node content;
+
+        /*
+         deal with span elements that have content
+         by splicing the content in place of the span
+         after having processed it
+        */
+
+        cleanWord2000(lexer, span.content);
+        content = span.content;
+
+        if (span.prev != null)
+            prev = span.prev;
+        else if (content != null)
+        {
+            node = content;
+            content = content.next;
+            Node.removeNode(node);
+            Node.insertNodeBeforeElement(span, node);
+            prev = node;
+        }
+
+        while (content != null)
+        {
+            node = content;
+            content = content.next;
+            Node.removeNode(node);
+            Node.insertNodeAfterElement(prev, node);
+            prev = node;
+        }
+
+        if (span.next == null)
+            span.parent.last = prev;
+
+        node = span.next;
+        span.content = null;
+        Node.discardElement(span);
+        return node;
+    }
+
+    /* map non-breaking spaces to regular spaces */
+    private void normalizeSpaces(Lexer lexer, Node node)
+    {
+        while (node != null)
+        {
+            if (node.content != null)
+                normalizeSpaces(lexer, node.content);
+
+            if (node.type == Node.TextNode)
+            {
+                int i;
+                MutableInteger c = new MutableInteger();
+                int p = node.start;
+
+                for (i = node.start; i < node.end; ++i)
+                {
+                    c.value = (int)node.textarray[i];
+
+                    /* look for UTF-8 multibyte character */
+                    if (c.value > 0x7F)
+                        i += PPrint.getUTF8(node.textarray, i, c);
+
+                    if (c.value == 160)
+                        c.value = ' ';
+
+                    p = PPrint.putUTF8(node.textarray, p, c.value);
+                }
+            }
+
+            node = node.next;
+        }
+    }
+
+    /*
+     This is a major clean up to strip out all the extra stuff you get
+     when you save as web page from Word 2000. It doesn't yet know what
+     to do with VML tags, but these will appear as errors unless you
+     declare them as new tags, such as o:p which needs to be declared
+     as inline.
+    */
+    public void cleanWord2000(Lexer lexer, Node node)
+    {
+        /* used to a list from a sequence of bulletted p's */
+        Node list = null;
+
+        while (node != null)
+        {
+            /* discard Word's style verbiage */
+            if (node.tag == tt.tagStyle ||
+                node.tag == tt.tagMeta ||
+                node.type == Node.CommentTag)
+            {
+                node = Node.discardElement(node);
+                continue;
+            }
+
+            /* strip out all span tags Word scatters so liberally! */
+            if (node.tag == tt.tagSpan)
+            {
+                node = stripSpan(lexer, node);
+                continue;
+            }
+
+            /* get rid of Word's xmlns attributes */
+            if (node.tag == tt.tagHtml)
+            {
+                /* check that it's a Word 2000 document */
+                if (node.getAttrByName("xmlns:o") == null)
+                    return;
+            }
+
+            if (node.tag == tt.tagLink)
+            {
+                AttVal attr = node.getAttrByName("rel");
+
+                if (attr != null && attr.value != null &&
+                    attr.value.equals("File-List"))
+                {
+                    node = Node.discardElement(node);
+                    continue;
+                }
+            }
+
+            /* discard empty paragraphs */
+            if (node.content == null && node.tag == tt.tagP)
+            {
+                node = Node.discardElement(node);
+                continue;
+            }
+
+            if (node.tag == tt.tagP)
+            {
+                AttVal attr = node.getAttrByName("class");
+
+                /* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */
+                if (attr != null && attr.value != null &&
+                    attr.value.equals("MsoListBullet"))
+                {
+                    Node.coerceNode(lexer, node, tt.tagLi);
+
+                    if (list == null || list.tag != tt.tagUl)
+                    {
+                        list = lexer.inferredTag("ul");
+                        Node.insertNodeBeforeElement(node, list);
+                    }
+
+                    purgeAttributes(node);
+
+                    if (node.content != null)
+                        cleanWord2000(lexer, node.content);
+
+                    /* remove node and append to contents of list */
+                    Node.removeNode(node);
+                    Node.insertNodeAtEnd(list, node);
+                    node = list.next;
+                }
+                /* map sequence of <p class="Code"> to <pre>...</pre> */
+                else if (attr != null && attr.value != null &&
+                         attr.value.equals("Code"))
+                {
+                    Node br = lexer.newLineNode();
+                    normalizeSpaces(lexer, node);
+
+                    if (list == null || list.tag != tt.tagPre)
+                    {
+                        list = lexer.inferredTag("pre");
+                        Node.insertNodeBeforeElement(node, list);
+                    }
+
+                    /* remove node and append to contents of list */
+                    Node.removeNode(node);
+                    Node.insertNodeAtEnd(list, node);
+                    stripSpan(lexer, node);
+                    Node.insertNodeAtEnd(list, br);
+                    node = list.next;
+                }
+                else
+                    list = null;
+            }
+            else
+                list = null;
+
+            /* strip out style and class attributes */
+            if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                purgeAttributes(node);
+
+            if (node.content != null)
+                cleanWord2000(lexer, node.content);
+
+            node = node.next;
+        }
+    }
+
+    public boolean isWord2000(Node root, TagTable tt)
+    {
+        Node html = root.findHTML(tt);
+
+        return (html != null && html.getAttrByName("xmlns:o") != null);
+    }
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Configuration.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Configuration.java
new file mode 100644 (file)
index 0000000..f5468ab
--- /dev/null
@@ -0,0 +1,600 @@
+/*
+ * @(#)Configuration.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Read configuration file and manage configuration properties.
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+  Configuration files associate a property name with a value.
+  The format is that of a Java .properties file.
+*/
+
+import java.util.Enumeration;
+import java.util.Properties;
+import java.util.StringTokenizer;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+public class Configuration implements java.io.Serializable {
+
+    /* character encodings */
+    public static final int RAW         = 0;
+    public static final int ASCII       = 1;
+    public static final int LATIN1      = 2;
+    public static final int UTF8        = 3;
+    public static final int ISO2022     = 4;
+    public static final int MACROMAN    = 5;
+
+    /* mode controlling treatment of doctype */
+    public static final int DOCTYPE_OMIT  = 0;
+    public static final int DOCTYPE_AUTO  = 1;
+    public static final int DOCTYPE_STRICT= 2;
+    public static final int DOCTYPE_LOOSE = 3;
+    public static final int DOCTYPE_USER  = 4;
+
+    protected int spaces =  2;           /* default indentation */
+    protected int wraplen = 68;          /* default wrap margin */
+    protected int CharEncoding = ASCII;
+    protected int tabsize = 4;
+
+    protected int     docTypeMode = DOCTYPE_AUTO; /* see doctype property */
+    protected String  altText = null;      /* default text for alt attribute */
+    protected String  slidestyle = null;    /* style sheet for slides */
+    protected String  docTypeStr = null;    /* user specified doctype */
+    protected String  errfile = null;       /* file name to write errors to */
+    protected boolean writeback = false;        /* if true then output tidied markup */
+
+    protected boolean OnlyErrors = false;       /* if true normal output is suppressed */
+    protected boolean ShowWarnings = true;      /* however errors are always shown */
+    protected boolean Quiet = false;            /* no 'Parsing X', guessed DTD or summary */
+    protected boolean IndentContent = false;    /* indent content of appropriate tags */
+    protected boolean SmartIndent = false;      /* does text/block level content effect indentation */
+    protected boolean HideEndTags = false;      /* suppress optional end tags */
+    protected boolean XmlTags = false;          /* treat input as XML */
+    protected boolean XmlOut = false;           /* create output as XML */
+    protected boolean xHTML = false;            /* output extensible HTML */
+    protected boolean XmlPi = false;             /* add <?xml?> for XML docs */
+    protected boolean RawOut = false;           /* avoid mapping values > 127 to entities */
+    protected boolean UpperCaseTags = false;    /* output tags in upper not lower case */
+    protected boolean UpperCaseAttrs = false;   /* output attributes in upper not lower case */
+    protected boolean MakeClean = false;        /* remove presentational clutter */
+    protected boolean LogicalEmphasis = false;  /* replace i by em and b by strong */
+    protected boolean DropFontTags = false;     /* discard presentation tags */
+    protected boolean DropEmptyParas = true;    /* discard empty p elements */
+    protected boolean FixComments = true;       /* fix comments with adjacent hyphens */
+    protected boolean BreakBeforeBR = false;    /* o/p newline before <br> or not? */
+    protected boolean BurstSlides = false;      /* create slides on each h2 element */
+    protected boolean NumEntities = false;      /* use numeric entities */
+    protected boolean QuoteMarks = false;       /* output " marks as &quot; */
+    protected boolean QuoteNbsp = true;         /* output non-breaking space as entity */
+    protected boolean QuoteAmpersand = true;    /* output naked ampersand as &amp; */
+    protected boolean WrapAttVals = false;      /* wrap within attribute values */
+    protected boolean WrapScriptlets = false;   /* wrap within JavaScript string literals */
+    protected boolean WrapSection = true;       /* wrap within <![ ... ]> section tags */
+    protected boolean WrapAsp = true;           /* wrap within ASP pseudo elements */
+    protected boolean WrapJste = true;          /* wrap within JSTE pseudo elements */
+    protected boolean WrapPhp = true;           /* wrap within PHP pseudo elements */
+    protected boolean FixBackslash = true;      /* fix URLs by replacing \ with / */
+    protected boolean IndentAttributes = false; /* newline+indent before each attribute */
+    protected boolean XmlPIs = false;           /* if set to yes PIs must end with ?> */
+    protected boolean XmlSpace = false;         /* if set to yes adds xml:space attr as needed */
+    protected boolean EncloseBodyText = false;  /* if yes text at body is wrapped in <p>'s */
+    protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
+    protected boolean KeepFileTimes = true;     /* if yes last modied time is preserved */
+    protected boolean Word2000 = false;         /* draconian cleaning for Word2000 */
+    protected boolean TidyMark = true;          /* add meta element indicating tidied doc */
+    protected boolean Emacs = false;            /* if true format error output for GNU Emacs */
+    protected boolean LiteralAttribs = false;   /* if true attributes may use newlines */
+
+    protected TagTable tt;                      /* TagTable associated with this Configuration */
+
+    private transient Properties _properties = new Properties();
+
+    public Configuration()
+    {
+    }
+
+    public void addProps( Properties p )
+    {
+        Enumeration enum = p.propertyNames();
+        while (enum.hasMoreElements())
+        {
+            String key = (String) enum.nextElement();
+            String value = p.getProperty(key);
+            _properties.put(key, value);
+        }
+        parseProps();
+    }
+
+    public void parseFile( String filename )
+    {
+        try
+        {
+            _properties.load( new FileInputStream( filename ) );
+        }
+        catch (IOException e)
+        {
+            System.err.println(filename + e.toString());
+            return;
+        }
+        parseProps();
+    }
+
+    private void parseProps()
+    {
+        String value;
+
+        value = _properties.getProperty("indent-spaces");
+        if (value != null)
+            spaces = parseInt(value, "indent-spaces");
+
+        value = _properties.getProperty("wrap");
+        if (value != null)
+            wraplen = parseInt(value, "wrap");
+
+        value = _properties.getProperty("wrap-attributes");
+        if (value != null)
+            WrapAttVals = parseBool(value, "wrap-attributes");
+
+        value = _properties.getProperty("wrap-script-literals");
+        if (value != null)
+            WrapScriptlets = parseBool(value, "wrap-script-literals");
+
+        value = _properties.getProperty("wrap-sections");
+        if (value != null)
+            WrapSection = parseBool(value, "wrap-sections");
+
+        value = _properties.getProperty("wrap-asp");
+        if (value != null)
+            WrapAsp = parseBool(value, "wrap-asp");
+
+        value = _properties.getProperty("wrap-jste");
+        if (value != null)
+            WrapJste = parseBool(value, "wrap-jste");
+
+        value = _properties.getProperty("wrap-php");
+        if (value != null)
+            WrapPhp = parseBool(value, "wrap-php");
+
+        value = _properties.getProperty("literal-attributes");
+        if (value != null)
+            LiteralAttribs = parseBool(value, "literal-attributes");
+
+        value = _properties.getProperty("tab-size");
+        if (value != null)
+            tabsize = parseInt(value, "tab-size");
+
+        value = _properties.getProperty("markup");
+        if (value != null)
+            OnlyErrors = parseInvBool(value, "markup");
+
+        value = _properties.getProperty("quiet");
+        if (value != null)
+            Quiet = parseBool(value, "quiet");
+
+        value = _properties.getProperty("tidy-mark");
+        if (value != null)
+            TidyMark = parseBool(value, "tidy-mark");
+
+        value = _properties.getProperty("indent");
+        if (value != null)
+            IndentContent = parseIndent(value, "indent");
+
+        value = _properties.getProperty("indent-attributes");
+        if (value != null)
+            IndentAttributes = parseBool(value, "ident-attributes");
+
+        value = _properties.getProperty("hide-endtags");
+        if (value != null)
+            HideEndTags = parseBool(value, "hide-endtags");
+
+        value = _properties.getProperty("input-xml");
+        if (value != null)
+            XmlTags = parseBool(value, "input-xml");
+
+        value = _properties.getProperty("output-xml");
+        if (value != null)
+            XmlOut = parseBool(value, "output-xml");
+
+        value = _properties.getProperty("output-xhtml");
+        if (value != null)
+            xHTML = parseBool(value, "output-xhtml");
+
+        value = _properties.getProperty("add-xml-pi");
+        if (value != null)
+            XmlPi = parseBool(value, "add-xml-pi");
+
+        value = _properties.getProperty("add-xml-decl");
+        if (value != null)
+            XmlPi = parseBool(value, "add-xml-decl");
+
+        value = _properties.getProperty("assume-xml-procins");
+        if (value != null)
+            XmlPIs = parseBool(value, "assume-xml-procins");
+
+        value = _properties.getProperty("raw");
+        if (value != null)
+            RawOut = parseBool(value, "raw");
+
+        value = _properties.getProperty("uppercase-tags");
+        if (value != null)
+            UpperCaseTags = parseBool(value, "uppercase-tags");
+
+        value = _properties.getProperty("uppercase-attributes");
+        if (value != null)
+            UpperCaseAttrs = parseBool(value, "uppercase-attributes");
+
+        value = _properties.getProperty("clean");
+        if (value != null)
+            MakeClean = parseBool(value, "clean");
+
+        value = _properties.getProperty("logical-emphasis");
+        if (value != null)
+            LogicalEmphasis = parseBool(value, "logical-emphasis");
+
+        value = _properties.getProperty("word-2000");
+        if (value != null)
+            Word2000 = parseBool(value, "word-2000");
+
+        value = _properties.getProperty("drop-empty-paras");
+        if (value != null)
+            DropEmptyParas = parseBool(value, "drop-empty-paras");
+
+        value = _properties.getProperty("drop-font-tags");
+        if (value != null)
+            DropFontTags = parseBool(value, "drop-font-tags");
+
+        value = _properties.getProperty("enclose-text");
+        if (value != null)
+            EncloseBodyText = parseBool(value, "enclose-text");
+
+        value = _properties.getProperty("enclose-block-text");
+        if (value != null)
+            EncloseBlockText = parseBool(value, "enclose-block-text");
+
+        value = _properties.getProperty("alt-text");
+        if (value != null)
+            altText = value;
+
+        value = _properties.getProperty("add-xml-space");
+        if (value != null)
+            XmlSpace = parseBool(value, "add-xml-space");
+
+        value = _properties.getProperty("fix-bad-comments");
+        if (value != null)
+            FixComments = parseBool(value, "fix-bad-comments");
+
+        value = _properties.getProperty("split");
+        if (value != null)
+            BurstSlides = parseBool(value, "split");
+
+        value = _properties.getProperty("break-before-br");
+        if (value != null)
+            BreakBeforeBR = parseBool(value, "break-before-br");
+
+        value = _properties.getProperty("numeric-entities");
+        if (value != null)
+            NumEntities = parseBool(value, "numeric-entities");
+
+        value = _properties.getProperty("quote-marks");
+        if (value != null)
+            QuoteMarks = parseBool(value, "quote-marks");
+
+        value = _properties.getProperty("quote-nbsp");
+        if (value != null)
+            QuoteNbsp = parseBool(value, "quote-nbsp");
+
+        value = _properties.getProperty("quote-ampersand");
+        if (value != null)
+            QuoteAmpersand = parseBool(value, "quote-ampersand");
+
+        value = _properties.getProperty("write-back");
+        if (value != null)
+            writeback = parseBool(value, "write-back");
+
+        value = _properties.getProperty("keep-time");
+        if (value != null)
+            KeepFileTimes = parseBool(value, "keep-time");
+
+        value = _properties.getProperty("show-warnings");
+        if (value != null)
+            ShowWarnings = parseBool(value, "show-warnings");
+
+        value = _properties.getProperty("error-file");
+        if (value != null)
+            errfile = parseName(value, "error-file");
+
+        value = _properties.getProperty("slide-style");
+        if (value != null)
+            slidestyle = parseName(value, "slide-style");
+
+        value = _properties.getProperty("new-inline-tags");
+        if (value != null)
+            parseInlineTagNames(value, "new-inline-tags");
+
+        value = _properties.getProperty("new-blocklevel-tags");
+        if (value != null)
+            parseBlockTagNames(value, "new-blocklevel-tags");
+
+        value = _properties.getProperty("new-empty-tags");
+        if (value != null)
+            parseEmptyTagNames(value, "new-empty-tags");
+
+        value = _properties.getProperty("new-pre-tags");
+        if (value != null)
+            parsePreTagNames(value, "new-pre-tags");
+
+        value = _properties.getProperty("char-encoding");
+        if (value != null)
+            CharEncoding = parseCharEncoding(value, "char-encoding");
+
+        value = _properties.getProperty("doctype");
+        if (value != null)
+            docTypeStr = parseDocType(value, "doctype");
+
+        value = _properties.getProperty("fix-backslash");
+        if (value != null)
+            FixBackslash = parseBool(value, "fix-backslash");
+
+        value = _properties.getProperty("gnu-emacs");
+        if (value != null)
+            Emacs = parseBool(value, "gnu-emacs");
+    }
+
+    /* ensure that config is self consistent */
+    public void adjust()
+    {
+        if (EncloseBlockText)
+            EncloseBodyText = true;
+
+        /* avoid the need to set IndentContent when SmartIndent is set */
+
+        if (SmartIndent)
+            IndentContent = true;
+
+        /* disable wrapping */
+        if (wraplen == 0)
+            wraplen = 0x7FFFFFFF;
+
+        /* Word 2000 needs o:p to be declared as inline */
+        if (Word2000)
+        {
+            tt.defineInlineTag("o:p");
+        }
+
+        /* XHTML is written in lower case */
+        if (xHTML)
+        {
+            XmlOut = true;
+            UpperCaseTags = false;
+            UpperCaseAttrs = false;
+        }
+
+        /* if XML in, then XML out */
+        if (XmlTags)
+        {
+            XmlOut = true;
+            XmlPIs = true;
+        }
+
+        /* XML requires end tags */
+        if (XmlOut)
+        {
+            QuoteAmpersand = true;
+            HideEndTags = false;
+        }
+    }
+
+    private static int parseInt( String s, String option )
+    {
+        int i = 0;
+        try {
+            i = Integer.parseInt( s );
+        }
+        catch ( NumberFormatException e ) {
+            Report.badArgument(option);
+            i = -1;
+        }
+        return i;
+    }
+
+    private static boolean parseBool( String s, String option )
+    {
+        boolean b = false;
+        if ( s != null && s.length() > 0 ) {
+            char c = s.charAt(0);
+            if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
+                b = true;
+            else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
+                b = false;
+            else
+                Report.badArgument(option);
+        }
+        return b;
+    }
+
+    private static boolean parseInvBool( String s, String option )
+    {
+        boolean b = false;
+        if ( s != null && s.length() > 0 ) {
+            char c = s.charAt(0);
+            if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
+                b = true;
+            else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
+                b = false;
+            else
+                Report.badArgument(option);
+        }
+        return !b;
+    }
+
+    private static String parseName( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s );
+        String rs = null;
+        if ( t.countTokens() >= 1 )
+            rs = t.nextToken();
+        else
+            Report.badArgument(option);
+        return rs;
+    }
+
+    private static int parseCharEncoding( String s, String option )
+    {
+        int result = ASCII;
+
+        if (Lexer.wstrcasecmp(s, "ascii") == 0)
+            result = ASCII;
+        else if (Lexer.wstrcasecmp(s, "latin1") == 0)
+            result = LATIN1;
+        else if (Lexer.wstrcasecmp(s, "raw") == 0)
+            result = RAW;
+        else if (Lexer.wstrcasecmp(s, "utf8") == 0)
+            result = UTF8;
+        else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
+            result = ISO2022;
+        else if (Lexer.wstrcasecmp(s, "mac") == 0)
+            result = MACROMAN;
+        else
+            Report.badArgument(option);
+
+        return result;
+    }
+
+    /* slight hack to avoid changes to pprint.c */
+    private boolean parseIndent( String s, String option )
+    {
+        boolean b = IndentContent;
+
+        if (Lexer.wstrcasecmp(s, "yes") == 0)
+        {
+            b = true;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "true") == 0)
+        {
+            b = true;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "no") == 0)
+        {
+            b = false;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "false") == 0)
+        {
+            b = false;
+            SmartIndent = false;
+        }
+        else if (Lexer.wstrcasecmp(s, "auto") == 0)
+        {
+            b = true;
+            SmartIndent = true;
+        }
+        else
+            Report.badArgument(option);
+        return b;
+    }
+
+    private void parseInlineTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.defineInlineTag( t.nextToken() );
+        }
+    }
+
+    private void parseBlockTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.defineBlockTag( t.nextToken() );
+        }
+    }
+
+    private void parseEmptyTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.defineEmptyTag( t.nextToken() );
+        }
+    }
+
+    private void parsePreTagNames( String s, String option )
+    {
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        while ( t.hasMoreTokens() ) {
+            tt.definePreTag( t.nextToken() );
+        }
+    }
+
+    /*
+       doctype: omit | auto | strict | loose | <fpi>
+
+       where the fpi is a string similar to
+
+          "-//ACME//DTD HTML 3.14159//EN"
+    */
+    protected String parseDocType( String s, String option )
+    {
+        s = s.trim();
+
+        /* "-//ACME//DTD HTML 3.14159//EN" or similar */
+
+        if (s.startsWith("\""))
+        {
+            docTypeMode = DOCTYPE_USER;
+            return s;
+        }
+
+        /* read first word */
+        String word = "";
+        StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
+        if (t.hasMoreTokens())
+            word = t.nextToken();
+
+        if (Lexer.wstrcasecmp(word, "omit") == 0)
+            docTypeMode = DOCTYPE_OMIT;
+        else if (Lexer.wstrcasecmp(word, "strict") == 0)
+            docTypeMode = DOCTYPE_STRICT;
+        else if (Lexer.wstrcasecmp(word, "loose") == 0 ||
+                 Lexer.wstrcasecmp(word, "transitional") == 0)
+            docTypeMode = DOCTYPE_LOOSE;
+        else if (Lexer.wstrcasecmp(word, "auto") == 0)
+            docTypeMode = DOCTYPE_AUTO;
+        else
+        {
+            docTypeMode = DOCTYPE_AUTO;
+            Report.badArgument(option);
+        }
+        return null;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMAttrImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMAttrImpl.java
new file mode 100644 (file)
index 0000000..de54167
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * @(#)DOMAttrImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMAttrImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr {
+
+    protected AttVal avAdaptee;
+
+    protected DOMAttrImpl(AttVal adaptee)
+    {
+        super(null); // must override all methods of DOMNodeImpl
+        this.avAdaptee = adaptee;
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    public String getNodeValue() throws DOMException
+    {
+        return getValue();
+    }
+
+    public void setNodeValue(String nodeValue) throws DOMException
+    {
+        setValue(nodeValue);
+    }
+
+    public String getNodeName()
+    {
+        return getName();
+    }
+
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.ATTRIBUTE_NODE;
+    }
+
+    public org.w3c.dom.Node getParentNode()
+    {
+        return null;
+    }
+
+    public org.w3c.dom.NodeList getChildNodes()
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    public org.w3c.dom.Node getFirstChild()
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    public org.w3c.dom.Node getLastChild()
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    public org.w3c.dom.Node getPreviousSibling()
+    {
+        return null;
+    }
+
+    public org.w3c.dom.Node getNextSibling()
+    {
+        return null;
+    }
+
+    public org.w3c.dom.NamedNodeMap getAttributes()
+    {
+        return null;
+    }
+
+    public org.w3c.dom.Document getOwnerDocument()
+    {
+        return null;
+    }
+
+    public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, 
+                                         org.w3c.dom.Node refChild)
+                                             throws DOMException
+    {
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, 
+                                         org.w3c.dom.Node oldChild)
+                                             throws DOMException
+    {
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild)
+                                            throws DOMException
+    {
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild)
+                                            throws DOMException
+    {
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    public boolean hasChildNodes()
+    {
+        return false;
+    }
+
+    public org.w3c.dom.Node cloneNode(boolean deep)
+    {
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Attr#getName
+     */
+    public String getName()
+    {
+        return avAdaptee.attribute;
+    }
+
+    /**
+     * @see org.w3c.dom.Attr#getSpecified
+     */
+    public boolean getSpecified()
+    {
+        return true;
+    }
+
+    /**
+     * Returns value of this attribute.  If this attribute has a null value,
+     * then the attribute name is returned instead.
+     * Thanks to Brett Knights <brett@knightsofthenet.com> for this fix.
+     * @see org.w3c.dom.Attr#getValue
+     * 
+     */
+    public String getValue()
+    {
+        return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value ;
+    }
+
+    /**
+     * @see org.w3c.dom.Attr#setValue
+     */
+    public void setValue(String value)
+    {
+        avAdaptee.value = value;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public org.w3c.dom.Element getOwnerElement() {
+       return null;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMAttrMapImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMAttrMapImpl.java
new file mode 100644 (file)
index 0000000..99ad86b
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * @(#)DOMAttrMapImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMAttrMapImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap {
+
+    private AttVal first = null;
+
+    protected DOMAttrMapImpl(AttVal first)
+    {
+        this.first = first;
+    }
+
+    /**
+     * @see org.w3c.dom.NamedNodeMap#getNamedItem
+     */
+    public org.w3c.dom.Node getNamedItem(String name)
+    {
+        AttVal att = this.first;
+        while (att != null) {
+            if (att.attribute.equals(name)) break;
+            att = att.next;
+        }
+        if (att != null)
+            return att.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.NamedNodeMap#setNamedItem
+     */
+    public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg)
+                                             throws DOMException
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.NamedNodeMap#removeNamedItem
+     */
+    public org.w3c.dom.Node removeNamedItem(String name)
+                                                throws DOMException
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.NamedNodeMap#item
+     */
+    public org.w3c.dom.Node item(int index)
+    {
+        int i = 0;
+        AttVal att = this.first;
+        while (att != null) {
+            if (i >= index) break;
+            i++;
+            att = att.next;
+        }
+        if (att != null)
+            return att.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.NamedNodeMap#getLength
+     */
+    public int getLength()
+    {
+        int len = 0;
+        AttVal att = this.first;
+        while (att != null) {
+            len++;
+            att = att.next;
+        }
+        return len;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public org.w3c.dom.Node getNamedItemNS(String namespaceURI,
+                                           String localName)
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg)
+        throws org.w3c.dom.DOMException
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public org.w3c.dom.Node removeNamedItemNS(String namespaceURI,
+                                              String localName)
+        throws org.w3c.dom.DOMException
+    {
+       return null;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCDATASectionImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCDATASectionImpl.java
new file mode 100644 (file)
index 0000000..235888b
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * @(#)DOMCDATASectionImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMCDATASectionImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @author  Gary L Peskin <garyp@firstech.com>
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMCDATASectionImpl extends DOMTextImpl
+                            implements org.w3c.dom.CDATASection {
+
+    protected DOMCDATASectionImpl(Node adaptee)
+    {
+        super(adaptee);
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeName
+     */
+    public String getNodeName()
+    {
+        return "#cdata-section";
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.CDATA_SECTION_NODE;
+    }
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCharacterDataImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCharacterDataImpl.java
new file mode 100644 (file)
index 0000000..9b78042
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * @(#)DOMCharacterDataImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMCharacterDataImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMCharacterDataImpl extends DOMNodeImpl
+                            implements org.w3c.dom.CharacterData {
+
+    protected DOMCharacterDataImpl(Node adaptee)
+    {
+        super(adaptee);
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.CharacterData#getData
+     */
+    public String getData() throws DOMException
+    {
+        return getNodeValue();
+    }
+
+    /**
+     * @see org.w3c.dom.CharacterData#setData
+     */
+    public void setData(String data) throws DOMException
+    {
+        // NOT SUPPORTED
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    /**
+     * @see org.w3c.dom.CharacterData#getLength
+     */
+    public int getLength()
+    {
+        int len = 0;
+        if (adaptee.textarray != null && adaptee.start < adaptee.end)
+            len = adaptee.end - adaptee.start;
+        return len;
+    }
+
+    /**
+     * @see org.w3c.dom.CharacterData#substringData
+     */
+    public String substringData(int offset, 
+                                int count) throws DOMException
+    {
+        int len;
+        String value = null;
+        if (count < 0)
+        {
+            throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR,
+                                       "Invalid length");
+        }
+        if (adaptee.textarray != null && adaptee.start < adaptee.end)
+        {
+            if (adaptee.start + offset >= adaptee.end)
+            {
+                throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR,
+                                           "Invalid offset");
+            }
+            len = count;
+            if (adaptee.start + offset + len - 1 >= adaptee.end)
+                len = adaptee.end - adaptee.start - offset;
+
+            value = Lexer.getString(adaptee.textarray,
+                                    adaptee.start + offset,
+                                    len);
+        }
+        return value;
+    }
+
+    /**
+     * @see org.w3c.dom.CharacterData#appendData
+     */
+    public void appendData(String arg) throws DOMException
+    {
+        // NOT SUPPORTED
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    /**
+     * @see org.w3c.dom.CharacterData#insertData
+     */
+    public void insertData(int offset, 
+                           String arg) throws DOMException
+    {
+        // NOT SUPPORTED
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    /**
+     * @see org.w3c.dom.CharacterData#deleteData
+     */
+    public void deleteData(int offset, 
+                           int count) throws DOMException
+    {
+        // NOT SUPPORTED
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+    /**
+     * @see org.w3c.dom.CharacterData#replaceData
+     */
+    public void replaceData(int offset, 
+                            int count, 
+                            String arg) throws DOMException
+    {
+        // NOT SUPPORTED
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCommentImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMCommentImpl.java
new file mode 100644 (file)
index 0000000..8708c10
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * @(#)DOMCommentImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMCommentImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMCommentImpl extends DOMCharacterDataImpl
+                            implements org.w3c.dom.Comment {
+
+    protected DOMCommentImpl(Node adaptee)
+    {
+        super(adaptee);
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeName
+     */
+    public String getNodeName()
+    {
+        return "#comment";
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.COMMENT_NODE;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMDocumentImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMDocumentImpl.java
new file mode 100644 (file)
index 0000000..55a1eff
--- /dev/null
@@ -0,0 +1,261 @@
+/*
+ * @(#)DOMDocumentImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMDocumentImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document {
+
+    private TagTable tt;      // a DOM Document has its own TagTable.
+
+    protected DOMDocumentImpl(Node adaptee)
+    {
+        super(adaptee);
+        tt = new TagTable();
+    }
+
+    public void setTagTable(TagTable tt)
+    {
+        this.tt = tt;
+    }
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeName
+     */
+    public String getNodeName()
+    {
+        return "#document";
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.DOCUMENT_NODE;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#getDoctype
+     */
+    public org.w3c.dom.DocumentType       getDoctype()
+    {
+        Node node = adaptee.content;
+        while (node != null) {
+            if (node.type == Node.DocTypeTag) break;
+            node = node.next;
+        }
+        if (node != null)
+            return (org.w3c.dom.DocumentType)node.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#getImplementation
+     */
+    public org.w3c.dom.DOMImplementation  getImplementation()
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#getDocumentElement
+     */
+    public org.w3c.dom.Element            getDocumentElement()
+    {
+        Node node = adaptee.content;
+        while (node != null) {
+            if (node.type == Node.StartTag ||
+                node.type == Node.StartEndTag) break;
+            node = node.next;
+        }
+        if (node != null)
+            return (org.w3c.dom.Element)node.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createElement
+     */
+    public org.w3c.dom.Element            createElement(String tagName)
+                                            throws DOMException
+    {
+        Node node = new Node(Node.StartEndTag, null, 0, 0, tagName, tt);
+        if (node != null) {
+            if (node.tag == null)           // Fix Bug 121206
+              node.tag = tt.xmlTags;
+            return (org.w3c.dom.Element)node.getAdapter();
+        }
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createDocumentFragment
+     */
+    public org.w3c.dom.DocumentFragment   createDocumentFragment()
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createTextNode
+     */
+    public org.w3c.dom.Text               createTextNode(String data)
+    {
+        byte[] textarray = Lexer.getBytes(data);
+        Node node = new Node(Node.TextNode, textarray, 0, textarray.length);
+        if (node != null)
+            return (org.w3c.dom.Text)node.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createComment
+     */
+    public org.w3c.dom.Comment            createComment(String data)
+    {
+        byte[] textarray = Lexer.getBytes(data);
+        Node node = new Node(Node.CommentTag, textarray, 0, textarray.length);
+        if (node != null)
+            return (org.w3c.dom.Comment)node.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createCDATASection
+     */
+    public org.w3c.dom.CDATASection       createCDATASection(String data)
+                                                 throws DOMException
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createProcessingInstruction
+     */
+    public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target, 
+                                                          String data)
+                                                          throws DOMException
+    {
+        throw new DOMExceptionImpl(DOMException.NOT_SUPPORTED_ERR,
+                                   "HTML document");
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createAttribute
+     */
+    public org.w3c.dom.Attr               createAttribute(String name)
+                                              throws DOMException
+    {
+        AttVal av = new AttVal(null, null, (int)'"', name, null);
+        if (av != null) {
+            av.dict =
+                AttributeTable.getDefaultAttributeTable().findAttribute(av);
+            return (org.w3c.dom.Attr)av.getAdapter();
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * @see org.w3c.dom.Document#createEntityReference
+     */
+    public org.w3c.dom.EntityReference    createEntityReference(String name)
+                                                    throws DOMException
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Document#getElementsByTagName
+     */
+    public org.w3c.dom.NodeList           getElementsByTagName(String tagname)
+    {
+        return new DOMNodeListByTagNameImpl(this.adaptee, tagname);
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep)
+        throws org.w3c.dom.DOMException
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public org.w3c.dom.Attr createAttributeNS(String namespaceURI,
+                                              String qualifiedName)
+        throws org.w3c.dom.DOMException
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public org.w3c.dom.Element createElementNS(String namespaceURI,
+                                               String qualifiedName)
+        throws org.w3c.dom.DOMException
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI,
+                                                       String localName)
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public org.w3c.dom.Element getElementById(String elementId)
+    {
+       return null;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMDocumentTypeImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMDocumentTypeImpl.java
new file mode 100644 (file)
index 0000000..9efe332
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * @(#)DOMDocumentTypeImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMDocumentTypeImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMDocumentTypeImpl extends DOMNodeImpl
+                            implements org.w3c.dom.DocumentType {
+
+    protected DOMDocumentTypeImpl(Node adaptee)
+    {
+        super(adaptee);
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNodeName
+     */
+    public String getNodeName()
+    {
+        return getName();
+    }
+
+    /**
+     * @see org.w3c.dom.DocumentType#getName
+     */
+    public String             getName()
+    {
+        String value = null;
+        if (adaptee.type == Node.DocTypeTag)
+        {
+
+            if (adaptee.textarray != null && adaptee.start < adaptee.end)
+            {
+                value = Lexer.getString(adaptee.textarray,
+                                        adaptee.start,
+                                        adaptee.end - adaptee.start);
+            }
+        }
+        return value;
+    }
+
+    public org.w3c.dom.NamedNodeMap       getEntities()
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    public org.w3c.dom.NamedNodeMap       getNotations()
+    {
+        // NOT SUPPORTED
+        return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public String getPublicId() {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public String getSystemId() {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public String getInternalSubset() {
+       return null;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMElementImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMElementImpl.java
new file mode 100644 (file)
index 0000000..a16a149
--- /dev/null
@@ -0,0 +1,307 @@
+/*
+ * @(#)DOMElementImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMElementImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMElementImpl extends DOMNodeImpl
+                            implements org.w3c.dom.Element {
+
+    protected DOMElementImpl(Node adaptee)
+    {
+        super(adaptee);
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.ELEMENT_NODE;
+    }
+
+    /**
+     * @see org.w3c.dom.Element#getTagName
+     */
+    public String             getTagName()
+    {
+        return super.getNodeName();
+    }
+
+    /**
+     * @see org.w3c.dom.Element#getAttribute
+     */
+    public String             getAttribute(String name)
+    {
+        if (this.adaptee == null)
+            return null;
+
+        AttVal att = this.adaptee.attributes;
+        while (att != null) {
+            if (att.attribute.equals(name)) break;
+            att = att.next;
+        }
+        if (att != null)
+            return att.value;
+        else
+            return "";
+    }
+
+    /**
+     * @see org.w3c.dom.Element#setAttribute
+     */
+    public void               setAttribute(String name, 
+                                           String value)
+                                           throws DOMException
+    {
+        if (this.adaptee == null)
+            return;
+
+        AttVal att = this.adaptee.attributes;
+        while (att != null) {
+            if (att.attribute.equals(name)) break;
+            att = att.next;
+        }
+        if (att != null) {
+            att.value = value;
+        } else {
+            att = new AttVal(null, null, (int)'"', name, value);
+            att.dict =
+              AttributeTable.getDefaultAttributeTable().findAttribute(att);
+            if (this.adaptee.attributes == null) {
+                this.adaptee.attributes = att;
+            } else {
+                att.next = this.adaptee.attributes;
+                this.adaptee.attributes = att;
+            }
+        }
+    }
+
+    /**
+     * @see org.w3c.dom.Element#removeAttribute
+     */
+    public void               removeAttribute(String name)
+                                              throws DOMException
+    {
+        if (this.adaptee == null)
+            return;
+
+        AttVal att = this.adaptee.attributes;
+        AttVal pre = null;
+        while (att != null) {
+            if (att.attribute.equals(name)) break;
+            pre = att;
+            att = att.next;
+        }
+        if (att != null) {
+            if (pre == null) {
+                this.adaptee.attributes = att.next;
+            } else {
+                pre.next = att.next;
+            }
+        }
+    }
+
+    /**
+     * @see org.w3c.dom.Element#getAttributeNode
+     */
+    public org.w3c.dom.Attr   getAttributeNode(String name)
+    {
+        if (this.adaptee == null)
+            return null;
+
+        AttVal att = this.adaptee.attributes;
+        while (att != null) {
+            if (att.attribute.equals(name)) break;
+            att = att.next;
+        }
+        if (att != null)
+            return att.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Element#setAttributeNode
+     */
+    public org.w3c.dom.Attr   setAttributeNode(org.w3c.dom.Attr newAttr)
+                                               throws DOMException
+    {
+        if (newAttr == null)
+            return null;
+        if (!(newAttr instanceof DOMAttrImpl)) {
+            throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+                                       "newAttr not instanceof DOMAttrImpl");
+        }
+
+        DOMAttrImpl newatt = (DOMAttrImpl)newAttr;
+        String name = newatt.avAdaptee.attribute;
+        org.w3c.dom.Attr result = null;
+
+        AttVal att = this.adaptee.attributes;
+        while (att != null) {
+            if (att.attribute.equals(name)) break;
+            att = att.next;
+        }
+        if (att != null) {
+            result = att.getAdapter();
+            att.adapter = newAttr;
+        } else {
+            if (this.adaptee.attributes == null) {
+                this.adaptee.attributes = newatt.avAdaptee;
+            } else {
+                newatt.avAdaptee.next = this.adaptee.attributes;
+                this.adaptee.attributes = newatt.avAdaptee;
+            }
+        }
+        return result;
+    }
+
+    /**
+     * @see org.w3c.dom.Element#removeAttributeNode
+     */
+    public org.w3c.dom.Attr   removeAttributeNode(org.w3c.dom.Attr oldAttr)
+                                                  throws DOMException
+    {
+        if (oldAttr == null)
+            return null;
+
+        org.w3c.dom.Attr result = null;
+        AttVal att = this.adaptee.attributes;
+        AttVal pre = null;
+        while (att != null) {
+            if (att.getAdapter() == oldAttr) break;
+            pre = att;
+            att = att.next;
+        }
+        if (att != null) {
+            if (pre == null) {
+                this.adaptee.attributes = att.next;
+            } else {
+                pre.next = att.next;
+            }
+            result = oldAttr;
+        } else {
+            throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+                                       "oldAttr not found");
+        }
+        return result;
+    }
+
+    /**
+     * @see org.w3c.dom.Element#getElementsByTagName
+     */
+    public org.w3c.dom.NodeList getElementsByTagName(String name)
+    {
+        return new DOMNodeListByTagNameImpl(this.adaptee, name);
+    }
+
+    /**
+     * @see org.w3c.dom.Element#normalize
+     */
+    public void               normalize()
+    {
+        // NOT SUPPORTED
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public String getAttributeNS(String namespaceURI, String localName)
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public void setAttributeNS(String namespaceURI,
+                               String qualifiedName,
+                               String value)
+        throws org.w3c.dom.DOMException
+    {
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public void removeAttributeNS(String namespaceURI, String localName)
+        throws org.w3c.dom.DOMException
+    {
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI,
+                                               String localName)
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     * @exception   org.w3c.dom.DOMException
+     */
+    public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr)
+        throws org.w3c.dom.DOMException
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI,
+                                                       String localName)
+    {
+       return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public boolean hasAttribute(String name)
+    {
+        return false;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public boolean hasAttributeNS(String namespaceURI, 
+                                  String localName)
+    {
+        return false;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMExceptionImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMExceptionImpl.java
new file mode 100644 (file)
index 0000000..4ee5d5a
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * @(#)DOMExceptionImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMExceptionImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMExceptionImpl extends org.w3c.dom.DOMException {
+
+    public DOMExceptionImpl(short code, String message) {
+        super(code, message);
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeImpl.java
new file mode 100644 (file)
index 0000000..cbba266
--- /dev/null
@@ -0,0 +1,488 @@
+/*
+ * @(#)DOMNodeImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMNodeImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM Support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMNodeImpl implements org.w3c.dom.Node {
+
+    protected Node adaptee;
+
+    protected DOMNodeImpl(Node adaptee)
+    {
+        this.adaptee = adaptee;
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeValue
+     */
+    public String getNodeValue() throws DOMException
+    {
+        String value = ""; //BAK 10/10/2000 replaced null
+        if (adaptee.type == Node.TextNode ||
+            adaptee.type == Node.CDATATag ||
+            adaptee.type == Node.CommentTag ||
+            adaptee.type == Node.ProcInsTag)
+        {
+
+            if (adaptee.textarray != null && adaptee.start < adaptee.end)
+            {
+                value = Lexer.getString(adaptee.textarray,
+                                        adaptee.start,
+                                        adaptee.end - adaptee.start);
+            }
+        }
+        return value;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#setNodeValue
+     */
+    public void setNodeValue(String nodeValue) throws DOMException
+    {
+        if (adaptee.type == Node.TextNode ||
+            adaptee.type == Node.CDATATag ||
+            adaptee.type == Node.CommentTag ||
+            adaptee.type == Node.ProcInsTag)
+        {
+            byte[] textarray = Lexer.getBytes(nodeValue);
+            adaptee.textarray = textarray;
+            adaptee.start = 0;
+            adaptee.end = textarray.length;
+        }
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNodeName
+     */
+    public String getNodeName()
+    {
+        return adaptee.element;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        short result = -1;
+        switch (adaptee.type) {
+        case Node.RootNode:
+            result = org.w3c.dom.Node.DOCUMENT_NODE;
+            break;
+        case Node.DocTypeTag:
+            result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
+            break;
+        case Node.CommentTag:
+            result = org.w3c.dom.Node.COMMENT_NODE;
+            break;
+        case Node.ProcInsTag:
+            result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
+            break;
+        case Node.TextNode:
+            result = org.w3c.dom.Node.TEXT_NODE;
+            break;
+        case Node.CDATATag:
+            result = org.w3c.dom.Node.CDATA_SECTION_NODE;
+            break;
+        case Node.StartTag:
+        case Node.StartEndTag:
+            result = org.w3c.dom.Node.ELEMENT_NODE;
+            break;
+        }
+        return result;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getParentNode
+     */
+    public org.w3c.dom.Node getParentNode()
+    {
+        if (adaptee.parent != null)
+            return adaptee.parent.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getChildNodes
+     */
+    public org.w3c.dom.NodeList getChildNodes()
+    {
+        return new DOMNodeListImpl(adaptee);
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getFirstChild
+     */
+    public org.w3c.dom.Node getFirstChild()
+    {
+        if (adaptee.content != null)
+            return adaptee.content.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getLastChild
+     */
+    public org.w3c.dom.Node getLastChild()
+    {
+        if (adaptee.last != null)
+            return adaptee.last.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getPreviousSibling
+     */
+    public org.w3c.dom.Node getPreviousSibling()
+    {
+        if (adaptee.prev != null)
+            return adaptee.prev.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNextSibling
+     */
+    public org.w3c.dom.Node getNextSibling()
+    {
+        if (adaptee.next != null)
+            return adaptee.next.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getAttributes
+     */
+    public org.w3c.dom.NamedNodeMap getAttributes()
+    {
+        return new DOMAttrMapImpl(adaptee.attributes);
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getOwnerDocument
+     */
+    public org.w3c.dom.Document getOwnerDocument()
+    {
+        Node node;
+
+        node = this.adaptee;
+        if (node != null && node.type == Node.RootNode)
+            return null;
+
+        for (node = this.adaptee;
+            node != null && node.type != Node.RootNode; node = node.parent);
+
+        if (node != null)
+            return (org.w3c.dom.Document)node.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#insertBefore
+     */
+    public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild,
+                                         org.w3c.dom.Node refChild)
+                                             throws DOMException
+    {
+        // TODO - handle newChild already in tree
+
+        if (newChild == null)
+            return null;
+        if (!(newChild instanceof DOMNodeImpl)) {
+            throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+                                       "newChild not instanceof DOMNodeImpl");
+        }
+        DOMNodeImpl newCh = (DOMNodeImpl)newChild;
+
+        if (this.adaptee.type == Node.RootNode) {
+            if (newCh.adaptee.type != Node.DocTypeTag &&
+                newCh.adaptee.type != Node.ProcInsTag) {
+                throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+                                       "newChild cannot be a child of this node");
+            }
+        } else if (this.adaptee.type == Node.StartTag) {
+            if (newCh.adaptee.type != Node.StartTag &&
+                newCh.adaptee.type != Node.StartEndTag &&
+                newCh.adaptee.type != Node.CommentTag &&
+                newCh.adaptee.type != Node.TextNode &&
+                newCh.adaptee.type != Node.CDATATag) {
+                throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+                                       "newChild cannot be a child of this node");
+            }
+        }
+        if (refChild == null) {
+            Node.insertNodeAtEnd(this.adaptee, newCh.adaptee);
+            if (this.adaptee.type == Node.StartEndTag) {
+              this.adaptee.setType(Node.StartTag);
+            }
+        } else {
+            Node ref = this.adaptee.content;
+            while (ref != null) {
+                if (ref.getAdapter() == refChild) break;
+                ref = ref.next;
+            }
+            if (ref == null) {
+                throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+                                           "refChild not found");
+            }
+            Node.insertNodeBeforeElement(ref, newCh.adaptee);
+        }
+        return newChild;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#replaceChild
+     */
+    public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild,
+                                         org.w3c.dom.Node oldChild)
+                                             throws DOMException
+    {
+        // TODO - handle newChild already in tree
+
+        if (newChild == null)
+            return null;
+        if (!(newChild instanceof DOMNodeImpl)) {
+            throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+                                       "newChild not instanceof DOMNodeImpl");
+        }
+        DOMNodeImpl newCh = (DOMNodeImpl)newChild;
+
+        if (this.adaptee.type == Node.RootNode) {
+            if (newCh.adaptee.type != Node.DocTypeTag &&
+                newCh.adaptee.type != Node.ProcInsTag) {
+                throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+                                       "newChild cannot be a child of this node");
+            }
+        } else if (this.adaptee.type == Node.StartTag) {
+            if (newCh.adaptee.type != Node.StartTag &&
+                newCh.adaptee.type != Node.StartEndTag &&
+                newCh.adaptee.type != Node.CommentTag &&
+                newCh.adaptee.type != Node.TextNode &&
+                newCh.adaptee.type != Node.CDATATag) {
+                throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+                                       "newChild cannot be a child of this node");
+            }
+        }
+        if (oldChild == null) {
+            throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+                                       "oldChild not found");
+        } else {
+            Node n;
+            Node ref = this.adaptee.content;
+            while (ref != null) {
+                if (ref.getAdapter() == oldChild) break;
+                ref = ref.next;
+            }
+            if (ref == null) {
+                throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+                                           "oldChild not found");
+            }
+            newCh.adaptee.next = ref.next;
+            newCh.adaptee.prev = ref.prev;
+            newCh.adaptee.last = ref.last;
+            newCh.adaptee.parent = ref.parent;
+            newCh.adaptee.content = ref.content;
+            if (ref.parent != null) {
+                if (ref.parent.content == ref)
+                    ref.parent.content = newCh.adaptee;
+                if (ref.parent.last == ref)
+                    ref.parent.last = newCh.adaptee;
+            }
+            if (ref.prev != null) {
+                ref.prev.next = newCh.adaptee;
+            }
+            if (ref.next != null) {
+                ref.next.prev = newCh.adaptee;
+            }
+            for (n = ref.content; n != null; n = n.next) {
+                if (n.parent == ref)
+                    n.parent = newCh.adaptee;
+            }
+        }
+        return oldChild;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#removeChild
+     */
+    public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild)
+                                            throws DOMException
+    {
+        if (oldChild == null)
+            return null;
+
+        Node ref = this.adaptee.content;
+        while (ref != null) {
+            if (ref.getAdapter() == oldChild) break;
+            ref = ref.next;
+        }
+        if (ref == null) {
+            throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
+                                       "refChild not found");
+        }
+        Node.discardElement(ref);
+
+        if (this.adaptee.content == null
+        &&  this.adaptee.type == Node.StartTag) {
+          this.adaptee.setType(Node.StartEndTag);
+        }
+
+        return oldChild;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#appendChild
+     */
+    public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild)
+                                            throws DOMException
+    {
+        // TODO - handle newChild already in tree
+
+        if (newChild == null)
+            return null;
+        if (!(newChild instanceof DOMNodeImpl)) {
+            throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
+                                       "newChild not instanceof DOMNodeImpl");
+        }
+        DOMNodeImpl newCh = (DOMNodeImpl)newChild;
+
+        if (this.adaptee.type == Node.RootNode) {
+            if (newCh.adaptee.type != Node.DocTypeTag &&
+                newCh.adaptee.type != Node.ProcInsTag) {
+                throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+                                       "newChild cannot be a child of this node");
+            }
+        } else if (this.adaptee.type == Node.StartTag) {
+            if (newCh.adaptee.type != Node.StartTag &&
+                newCh.adaptee.type != Node.StartEndTag &&
+                newCh.adaptee.type != Node.CommentTag &&
+                newCh.adaptee.type != Node.TextNode &&
+                newCh.adaptee.type != Node.CDATATag) {
+                throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
+                                       "newChild cannot be a child of this node");
+            }
+        }
+        Node.insertNodeAtEnd(this.adaptee, newCh.adaptee);
+
+        if (this.adaptee.type == Node.StartEndTag) {
+          this.adaptee.setType(Node.StartTag);
+        }
+
+        return newChild;
+    }
+
+    /**
+     * @see org.w3c.dom.Node#hasChildNodes
+     */
+    public boolean hasChildNodes()
+    {
+        return (adaptee.content != null);
+    }
+
+    /**
+     * @see org.w3c.dom.Node#cloneNode
+     */
+    public org.w3c.dom.Node cloneNode(boolean deep)
+    {
+        Node node = adaptee.cloneNode(deep);
+        node.parent = null;
+        return node.getAdapter();
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public void normalize()
+    {
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public boolean supports(String feature, String version)
+    {
+        return isSupported(feature, version);
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public String getNamespaceURI()
+    {
+        return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public String getPrefix()
+    {
+        return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public void setPrefix(String prefix)
+                            throws DOMException
+    {
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public String getLocalName()
+    {
+      return null;
+    }
+
+    /**
+     * DOM2 - not implemented.
+     */
+    public boolean isSupported(String feature,String version) {
+        return false;
+    }
+
+    /**
+     * DOM2 - @see org.w3c.dom.Node#hasAttributes
+     * contributed by dlp@users.sourceforge.net
+     */
+    public boolean hasAttributes()
+    {
+        return adaptee.attributes != null;
+    }
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeListByTagNameImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeListByTagNameImpl.java
new file mode 100644 (file)
index 0000000..d83d2dc
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * @(#)DOMNodeListByTagNameImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * DOMNodeListByTagNameImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/**
+ * <p>The items in the <code>NodeList</code> are accessible via an integral 
+ * index, starting from 0. 
+ *
+ */
+public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList {
+
+    private Node first = null;
+    private String tagName = "*";
+    private int currIndex = 0;
+    private int maxIndex = 0;
+    private Node currNode = null;
+
+    protected DOMNodeListByTagNameImpl(Node first, String tagName)
+    {
+        this.first = first;
+        this.tagName = tagName;
+    }
+
+    /**
+     * @see org.w3c.dom.NodeList#item
+     */
+    public org.w3c.dom.Node item(int index)
+    {
+        currIndex = 0;
+        maxIndex = index;
+        preTraverse(first);
+
+        if (currIndex > maxIndex && currNode != null)
+            return currNode.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.NodeList#getLength
+     */
+    public int getLength()
+    {
+        currIndex = 0;
+        maxIndex = Integer.MAX_VALUE;
+        preTraverse(first);
+        return currIndex;
+    }
+
+    protected void preTraverse(Node node)
+    {
+        if (node == null)
+            return;
+
+        if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+        {
+            if (currIndex <= maxIndex &&
+                (tagName.equals("*") || tagName.equals(node.element)))
+            {
+                currIndex += 1;
+                currNode = node;
+            }
+        }
+        if (currIndex > maxIndex)
+            return;
+
+        node = node.content;
+        while (node != null)
+        {
+            preTraverse(node);
+            node = node.next;
+        }
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeListImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMNodeListImpl.java
new file mode 100644 (file)
index 0000000..3ff832e
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * @(#)DOMNodeListImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * DOMNodeListImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/**
+ * <p>The items in the <code>NodeList</code> are accessible via an integral 
+ * index, starting from 0. 
+ *
+ */
+public class DOMNodeListImpl implements org.w3c.dom.NodeList {
+
+    private Node parent = null;
+
+    protected DOMNodeListImpl(Node parent)
+    {
+        this.parent = parent;
+    }
+
+    /**
+     * @see org.w3c.dom.NodeList#item
+     */
+    public org.w3c.dom.Node item(int index)
+    {
+        int i = 0;
+        Node node = parent.content;
+        while (node != null) {
+            if (i >= index) break;
+            i++;
+            node = node.next;
+        }
+        if (node != null)
+            return node.getAdapter();
+        else
+            return null;
+    }
+
+    /**
+     * @see org.w3c.dom.NodeList#getLength
+     */
+    public int getLength()
+    {
+        int len = 0;
+        Node node = parent.content;
+        while (node != null) {
+            len++;
+            node = node.next;
+        }
+        return len;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMProcessingInstructionImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMProcessingInstructionImpl.java
new file mode 100644 (file)
index 0000000..0e76fae
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * @(#)DOMProcessingInstructionImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMProcessingInstructionImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMProcessingInstructionImpl extends DOMNodeImpl
+                            implements org.w3c.dom.ProcessingInstruction {
+
+    protected DOMProcessingInstructionImpl(Node adaptee)
+    {
+        super(adaptee);
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
+    }
+
+    /**
+     * @see org.w3c.dom.ProcessingInstruction#getTarget
+     */
+    public String getTarget()
+    {
+        // TODO
+        return null;
+    }
+
+    /**
+     * @see org.w3c.dom.ProcessingInstruction#getData
+     */
+    public String getData()
+    {
+        return getNodeValue();
+    }
+
+    /**
+     * @see org.w3c.dom.ProcessingInstruction#setData
+     */
+    public void setData(String data) throws DOMException
+    {
+        // NOT SUPPORTED
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMTextImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/DOMTextImpl.java
new file mode 100644 (file)
index 0000000..0155373
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * @(#)DOMTextImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+import org.w3c.dom.DOMException;
+
+/**
+ *
+ * DOMTextImpl
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class DOMTextImpl extends DOMCharacterDataImpl
+                            implements org.w3c.dom.Text {
+
+    protected DOMTextImpl(Node adaptee)
+    {
+        super(adaptee);
+    }
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    /**
+     * @see org.w3c.dom.Node#getNodeName
+     */
+    public String getNodeName()
+    {
+        return "#text";
+    }
+
+    /**
+     * @see org.w3c.dom.Node#getNodeType
+     */
+    public short getNodeType()
+    {
+        return org.w3c.dom.Node.TEXT_NODE;
+    }
+
+    /**
+     * @see org.w3c.dom.Text#splitText
+     */
+    public org.w3c.dom.Text splitText(int offset) throws DOMException
+    {
+        // NOT SUPPORTED
+        throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
+                                   "Not supported");
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Dict.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Dict.java
new file mode 100644 (file)
index 0000000..37babb2
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * @(#)Dict.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Tag dictionary node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Dict {
+
+    /* content model shortcut encoding */
+
+    public static final int CM_UNKNOWN         = 0;
+    public static final int CM_EMPTY        = (1 << 0);
+    public static final int CM_HTML         = (1 << 1);
+    public static final int CM_HEAD         = (1 << 2);
+    public static final int CM_BLOCK        = (1 << 3);
+    public static final int CM_INLINE       = (1 << 4);
+    public static final int CM_LIST         = (1 << 5);
+    public static final int CM_DEFLIST      = (1 << 6);
+    public static final int CM_TABLE        = (1 << 7);
+    public static final int CM_ROWGRP       = (1 << 8);
+    public static final int CM_ROW          = (1 << 9);
+    public static final int CM_FIELD        = (1 << 10);
+    public static final int CM_OBJECT       = (1 << 11);
+    public static final int CM_PARAM        = (1 << 12);
+    public static final int CM_FRAMES       = (1 << 13);
+    public static final int CM_HEADING      = (1 << 14);
+    public static final int CM_OPT          = (1 << 15);
+    public static final int CM_IMG          = (1 << 16);
+    public static final int CM_MIXED        = (1 << 17);
+    public static final int CM_NO_INDENT    = (1 << 18);
+    public static final int CM_OBSOLETE     = (1 << 19);
+    public static final int CM_NEW          = (1 << 20);
+    public static final int CM_OMITST       = (1 << 21);
+
+    /*
+
+     If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0
+     Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary
+     tags and attributes then describe it as HTML Proprietary. If it includes the
+     xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe
+     it as one of the flavors of Voyager (strict, loose or frameset).
+    */
+
+    public static final short VERS_UNKNOWN       = 0;
+
+    public static final short VERS_HTML20        = 1;
+    public static final short VERS_HTML32        = 2;
+    public static final short VERS_HTML40_STRICT = 4;
+    public static final short VERS_HTML40_LOOSE  = 8;
+    public static final short VERS_FRAMES        = 16;
+    public static final short VERS_XML           = 32;
+
+    public static final short VERS_NETSCAPE      = 64;
+    public static final short VERS_MICROSOFT     = 128;
+    public static final short VERS_SUN           = 256;
+
+    public static final short VERS_MALFORMED     = 512;
+
+    public static final short VERS_ALL = (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES);
+    public static final short VERS_HTML40 = (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES);
+    public static final short VERS_LOOSE = (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES);
+    public static final short VERS_IFRAMES = (VERS_HTML40_LOOSE|VERS_FRAMES);
+    public static final short VERS_FROM32 = (VERS_HTML40_STRICT|VERS_LOOSE);
+    public static final short VERS_PROPRIETARY = (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN);
+
+    public static final short VERS_EVERYTHING = (VERS_ALL|VERS_PROPRIETARY);
+
+    public Dict( String name, short versions, int model,
+                 Parser parser, CheckAttribs chkattrs )
+    {
+        this.name = name;
+        this.versions = versions;
+        this.model = model;
+        this.parser = parser;
+        this.chkattrs = chkattrs;
+    }
+
+    public String name;
+    public short versions;
+    public int model;
+    public Parser parser;
+    public CheckAttribs chkattrs;
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Entity.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Entity.java
new file mode 100644 (file)
index 0000000..95bdc41
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * @(#)Entity.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * HTML ISO entity
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Entity {
+
+    public Entity( String name, short code )
+    {
+        this.name = name;
+        this.code = code;
+    }
+
+    public Entity( String name, int code )
+    {
+        this.name = name;
+        this.code = (short)code;
+    }
+
+    public String name;
+    public short  code;
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/EntityTable.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/EntityTable.java
new file mode 100644 (file)
index 0000000..f7bcf55
--- /dev/null
@@ -0,0 +1,386 @@
+/*
+ * @(#)EntityTable.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Entity hash table
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.util.Hashtable;
+import java.util.Enumeration;
+
+public class EntityTable {
+
+    public EntityTable()
+    {
+    }
+
+    public Entity lookup( String name )
+    {
+        return (Entity)entityHashtable.get( name );
+    }
+
+    public Entity install( String name, short code )
+    {
+        Entity ent = lookup( name );
+        if ( ent == null ) {
+            ent = new Entity( name, code );
+            entityHashtable.put( name, ent );
+        } else {
+            ent.code = code;
+        }
+        return ent;
+    }
+
+    public Entity install( Entity ent )
+    {
+        return (Entity)entityHashtable.put( ent.name, ent );
+    }
+
+    /* entity starting with "&" returns zero on error */
+    public short entityCode( String name )
+    {
+        int c;
+
+        if (name.length() <= 1)
+            return 0;
+
+        /* numeric entitity: name = "&#" followed by number */
+        if ( name.charAt(1) == '#' ) {
+            c = 0;  /* zero on missing/bad number */
+
+            /* 'x' prefix denotes hexadecimal number format */
+            try {
+                if (name.length() >= 4 && name.charAt(2) == 'x') {
+                    c = Integer.parseInt( name.substring(3), 16 );
+                } else if (name.length() >= 3) {
+                    c = Integer.parseInt( name.substring(2) );
+                }
+            }
+            catch ( NumberFormatException e ) {}
+
+            return (short)c;
+        }
+
+        /* Named entity: name ="&" followed by a name */
+        Entity ent = lookup( name.substring(1) );
+        if ( ent != null ) {
+            return ent.code;
+        }
+
+        return 0;   /* zero signifies unknown entity name */
+    }
+
+    public String entityName( short code )
+    {
+        String name = null;
+        Entity ent;
+        Enumeration en = entityHashtable.elements();
+        while ( en.hasMoreElements() ) {
+            ent = (Entity)en.nextElement();
+            if ( ent.code == code ) {
+                name = ent.name;
+                break;
+            }
+        }
+        return name;
+    }
+
+    private Hashtable entityHashtable = new Hashtable();
+
+    private static EntityTable defaultEntityTable = null;
+
+    private static Entity[] entities = {
+
+    new Entity( "nbsp",   160 ),
+    new Entity( "iexcl",  161 ),
+    new Entity( "cent",   162 ),
+    new Entity( "pound",  163 ),
+    new Entity( "curren", 164 ),
+    new Entity( "yen",    165 ),
+    new Entity( "brvbar", 166 ),
+    new Entity( "sect",   167 ),
+    new Entity( "uml",    168 ),
+    new Entity( "copy",   169 ),
+    new Entity( "ordf",   170 ),
+    new Entity( "laquo",  171 ),
+    new Entity( "not",    172 ),
+    new Entity( "shy",    173 ),
+    new Entity( "reg",    174 ),
+    new Entity( "macr",   175 ),
+    new Entity( "deg",    176 ),
+    new Entity( "plusmn", 177 ),
+    new Entity( "sup2",   178 ),
+    new Entity( "sup3",   179 ),
+    new Entity( "acute",  180 ),
+    new Entity( "micro",  181 ),
+    new Entity( "para",   182 ),
+    new Entity( "middot", 183 ),
+    new Entity( "cedil",  184 ),
+    new Entity( "sup1",   185 ),
+    new Entity( "ordm",   186 ),
+    new Entity( "raquo",  187 ),
+    new Entity( "frac14", 188 ),
+    new Entity( "frac12", 189 ),
+    new Entity( "frac34", 190 ),
+    new Entity( "iquest", 191 ),
+    new Entity( "Agrave", 192 ),
+    new Entity( "Aacute", 193 ),
+    new Entity( "Acirc",  194 ),
+    new Entity( "Atilde", 195 ),
+    new Entity( "Auml",   196 ),
+    new Entity( "Aring",  197 ),
+    new Entity( "AElig",  198 ),
+    new Entity( "Ccedil", 199 ),
+    new Entity( "Egrave", 200 ),
+    new Entity( "Eacute", 201 ),
+    new Entity( "Ecirc",  202 ),
+    new Entity( "Euml",   203 ),
+    new Entity( "Igrave", 204 ),
+    new Entity( "Iacute", 205 ),
+    new Entity( "Icirc",  206 ),
+    new Entity( "Iuml",   207 ),
+    new Entity( "ETH",    208 ),
+    new Entity( "Ntilde", 209 ),
+    new Entity( "Ograve", 210 ),
+    new Entity( "Oacute", 211 ),
+    new Entity( "Ocirc",  212 ),
+    new Entity( "Otilde", 213 ),
+    new Entity( "Ouml",   214 ),
+    new Entity( "times",  215 ),
+    new Entity( "Oslash", 216 ),
+    new Entity( "Ugrave", 217 ),
+    new Entity( "Uacute", 218 ),
+    new Entity( "Ucirc",  219 ),
+    new Entity( "Uuml",   220 ),
+    new Entity( "Yacute", 221 ),
+    new Entity( "THORN",  222 ),
+    new Entity( "szlig",  223 ),
+    new Entity( "agrave", 224 ),
+    new Entity( "aacute", 225 ),
+    new Entity( "acirc",  226 ),
+    new Entity( "atilde", 227 ),
+    new Entity( "auml",   228 ),
+    new Entity( "aring",  229 ),
+    new Entity( "aelig",  230 ),
+    new Entity( "ccedil", 231 ),
+    new Entity( "egrave", 232 ),
+    new Entity( "eacute", 233 ),
+    new Entity( "ecirc",  234 ),
+    new Entity( "euml",   235 ),
+    new Entity( "igrave", 236 ),
+    new Entity( "iacute", 237 ),
+    new Entity( "icirc",  238 ),
+    new Entity( "iuml",   239 ),
+    new Entity( "eth",    240 ),
+    new Entity( "ntilde", 241 ),
+    new Entity( "ograve", 242 ),
+    new Entity( "oacute", 243 ),
+    new Entity( "ocirc",  244 ),
+    new Entity( "otilde", 245 ),
+    new Entity( "ouml",   246 ),
+    new Entity( "divide", 247 ),
+    new Entity( "oslash", 248 ),
+    new Entity( "ugrave", 249 ),
+    new Entity( "uacute", 250 ),
+    new Entity( "ucirc",  251 ),
+    new Entity( "uuml",   252 ),
+    new Entity( "yacute", 253 ),
+    new Entity( "thorn",  254 ),
+    new Entity( "yuml",   255 ),
+    new Entity( "fnof",     402 ),
+    new Entity( "Alpha",    913 ),
+    new Entity( "Beta",     914 ),
+    new Entity( "Gamma",    915 ),
+    new Entity( "Delta",    916 ),
+    new Entity( "Epsilon",  917 ),
+    new Entity( "Zeta",     918 ),
+    new Entity( "Eta",      919 ),
+    new Entity( "Theta",    920 ),
+    new Entity( "Iota",     921 ),
+    new Entity( "Kappa",    922 ),
+    new Entity( "Lambda",   923 ),
+    new Entity( "Mu",       924 ),
+    new Entity( "Nu",       925 ),
+    new Entity( "Xi",       926 ),
+    new Entity( "Omicron",  927 ),
+    new Entity( "Pi",       928 ),
+    new Entity( "Rho",      929 ),
+    new Entity( "Sigma",    931 ),
+    new Entity( "Tau",      932 ),
+    new Entity( "Upsilon",  933 ),
+    new Entity( "Phi",      934 ),
+    new Entity( "Chi",      935 ),
+    new Entity( "Psi",      936 ),
+    new Entity( "Omega",    937 ),
+    new Entity( "alpha",    945 ),
+    new Entity( "beta",     946 ),
+    new Entity( "gamma",    947 ),
+    new Entity( "delta",    948 ),
+    new Entity( "epsilon",  949 ),
+    new Entity( "zeta",     950 ),
+    new Entity( "eta",      951 ),
+    new Entity( "theta",    952 ),
+    new Entity( "iota",     953 ),
+    new Entity( "kappa",    954 ),
+    new Entity( "lambda",   955 ),
+    new Entity( "mu",       956 ),
+    new Entity( "nu",       957 ),
+    new Entity( "xi",       958 ),
+    new Entity( "omicron",  959 ),
+    new Entity( "pi",       960 ),
+    new Entity( "rho",      961 ),
+    new Entity( "sigmaf",   962 ),
+    new Entity( "sigma",    963 ),
+    new Entity( "tau",      964 ),
+    new Entity( "upsilon",  965 ),
+    new Entity( "phi",      966 ),
+    new Entity( "chi",      967 ),
+    new Entity( "psi",      968 ),
+    new Entity( "omega",    969 ),
+    new Entity( "thetasym", 977 ),
+    new Entity( "upsih",    978 ),
+    new Entity( "piv",      982 ),
+    new Entity( "bull",     8226 ),
+    new Entity( "hellip",   8230 ),
+    new Entity( "prime",    8242 ),
+    new Entity( "Prime",    8243 ),
+    new Entity( "oline",    8254 ),
+    new Entity( "frasl",    8260 ),
+    new Entity( "weierp",   8472 ),
+    new Entity( "image",    8465 ),
+    new Entity( "real",     8476 ),
+    new Entity( "trade",    8482 ),
+    new Entity( "alefsym",  8501 ),
+    new Entity( "larr",     8592 ),
+    new Entity( "uarr",     8593 ),
+    new Entity( "rarr",     8594 ),
+    new Entity( "darr",     8595 ),
+    new Entity( "harr",     8596 ),
+    new Entity( "crarr",    8629 ),
+    new Entity( "lArr",     8656 ),
+    new Entity( "uArr",     8657 ),
+    new Entity( "rArr",     8658 ),
+    new Entity( "dArr",     8659 ),
+    new Entity( "hArr",     8660 ),
+    new Entity( "forall",   8704 ),
+    new Entity( "part",     8706 ),
+    new Entity( "exist",    8707 ),
+    new Entity( "empty",    8709 ),
+    new Entity( "nabla",    8711 ),
+    new Entity( "isin",     8712 ),
+    new Entity( "notin",    8713 ),
+    new Entity( "ni",       8715 ),
+    new Entity( "prod",     8719 ),
+    new Entity( "sum",      8721 ),
+    new Entity( "minus",    8722 ),
+    new Entity( "lowast",   8727 ),
+    new Entity( "radic",    8730 ),
+    new Entity( "prop",     8733 ),
+    new Entity( "infin",    8734 ),
+    new Entity( "ang",      8736 ),
+    new Entity( "and",      8743 ),
+    new Entity( "or",       8744 ),
+    new Entity( "cap",      8745 ),
+    new Entity( "cup",      8746 ),
+    new Entity( "int",      8747 ),
+    new Entity( "there4",   8756 ),
+    new Entity( "sim",      8764 ),
+    new Entity( "cong",     8773 ),
+    new Entity( "asymp",    8776 ),
+    new Entity( "ne",       8800 ),
+    new Entity( "equiv",    8801 ),
+    new Entity( "le",       8804 ),
+    new Entity( "ge",       8805 ),
+    new Entity( "sub",      8834 ),
+    new Entity( "sup",      8835 ),
+    new Entity( "nsub",     8836 ),
+    new Entity( "sube",     8838 ),
+    new Entity( "supe",     8839 ),
+    new Entity( "oplus",    8853 ),
+    new Entity( "otimes",   8855 ),
+    new Entity( "perp",     8869 ),
+    new Entity( "sdot",     8901 ),
+    new Entity( "lceil",    8968 ),
+    new Entity( "rceil",    8969 ),
+    new Entity( "lfloor",   8970 ),
+    new Entity( "rfloor",   8971 ),
+    new Entity( "lang",     9001 ),
+    new Entity( "rang",     9002 ),
+    new Entity( "loz",      9674 ),
+    new Entity( "spades",   9824 ),
+    new Entity( "clubs",    9827 ),
+    new Entity( "hearts",   9829 ),
+    new Entity( "diams",    9830 ),
+    new Entity( "quot",    34 ),
+    new Entity( "amp",     38 ),
+    new Entity( "lt",      60 ),
+    new Entity( "gt",      62 ),
+    new Entity( "OElig",   338 ),
+    new Entity( "oelig",   339 ),
+    new Entity( "Scaron",  352 ),
+    new Entity( "scaron",  353 ),
+    new Entity( "Yuml",    376 ),
+    new Entity( "circ",    710 ),
+    new Entity( "tilde",   732 ),
+    new Entity( "ensp",    8194 ),
+    new Entity( "emsp",    8195 ),
+    new Entity( "thinsp",  8201 ),
+    new Entity( "zwnj",    8204 ),
+    new Entity( "zwj",     8205 ),
+    new Entity( "lrm",     8206 ),
+    new Entity( "rlm",     8207 ),
+    new Entity( "ndash",   8211 ),
+    new Entity( "mdash",   8212 ),
+    new Entity( "lsquo",   8216 ),
+    new Entity( "rsquo",   8217 ),
+    new Entity( "sbquo",   8218 ),
+    new Entity( "ldquo",   8220 ),
+    new Entity( "rdquo",   8221 ),
+    new Entity( "bdquo",   8222 ),
+    new Entity( "dagger",  8224 ),
+    new Entity( "Dagger",  8225 ),
+    new Entity( "permil",  8240 ),
+    new Entity( "lsaquo",  8249 ),
+    new Entity( "rsaquo",  8250 ),
+    new Entity( "euro",    8364 )
+
+    };
+
+    public static EntityTable getDefaultEntityTable()
+    {
+        if ( defaultEntityTable == null ) {
+            defaultEntityTable = new EntityTable();
+            for ( int i = 0; i < entities.length; i++ ) {
+                defaultEntityTable.install( entities[i] );
+            }
+        }
+        return defaultEntityTable;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/IStack.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/IStack.java
new file mode 100644 (file)
index 0000000..2628ee0
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * @(#)IStack.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Inline stack node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class IStack {
+
+    /*
+      Mosaic handles inlines via a separate stack from other elements
+      We duplicate this to recover from inline markup errors such as:
+
+         <i>italic text
+         <p>more italic text</b> normal text
+
+      which for compatibility with Mosaic is mapped to:
+
+         <i>italic text</i>
+         <p><i>more italic text</i> normal text
+
+      Note that any inline end tag pop's the effect of the current
+      inline start tag, so that </b> pop's <i> in the above example.
+    */
+
+    public IStack next;
+    public Dict tag;   /* tag's dictionary definition */
+    public String element;          /* name (null for text nodes) */
+    public AttVal attributes;
+
+    public IStack()
+    {
+        next       = null;
+        tag        = null;
+        element    = null;
+        attributes = null;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Lexer.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Lexer.java
new file mode 100644 (file)
index 0000000..f5f5548
--- /dev/null
@@ -0,0 +1,3134 @@
+/*
+ * @(#)Lexer.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Lexer for html parser
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+  Given a file stream fp it returns a sequence of tokens.
+
+     GetToken(fp) gets the next token
+     UngetToken(fp) provides one level undo
+
+  The tags include an attribute list:
+
+    - linked list of attribute/value nodes
+    - each node has 2 null-terminated strings.
+    - entities are replaced in attribute values
+
+  white space is compacted if not in preformatted mode
+  If not in preformatted mode then leading white space
+  is discarded and subsequent white space sequences
+  compacted to single space chars.
+
+  If XmlTags is no then Tag names are folded to upper
+  case and attribute names to lower case.
+
+ Not yet done:
+    -   Doctype subset and marked sections
+*/
+
+import java.io.PrintWriter;
+import java.util.Stack;
+import java.util.Vector;
+
+import org.eclipse.core.resources.IFile;
+import sun.security.krb5.internal.av;
+
+public class Lexer {
+
+    private IFile iFile;
+    public StreamIn in;   /* file stream */
+    public PrintWriter errout;   /* error output stream */
+    public short badAccess; /* for accessibility errors */
+    public short badLayout; /* for bad style errors */
+    public short badChars;  /* for bad char encodings */
+    public short badForm;   /* for mismatched/mispositioned form tags */
+    public short warnings;  /* count of warnings in this document */
+    public short errors;    /* count of errors */
+    public int   lines;     /* lines seen */
+    public int   columns;   /* at start of current token */
+    public boolean waswhite;  /* used to collapse contiguous white space */
+    public boolean pushed;    /* true after token has been pushed back */
+    public boolean insertspace;   /* when space is moved after end tag */
+    public boolean excludeBlocks;  /* Netscape compatibility */
+    public boolean exiled;    /* true if moved out of table */
+    public boolean isvoyager; /* true if xmlns attribute on html element */
+    public short versions;  /* bit vector of HTML versions */
+    public int doctype;    /* version as given by doctype (if any) */
+    public boolean badDoctype; /* e.g. if html or PUBLIC is missing */
+    public int txtstart;  /* start of current node */
+    public int txtend;    /* end of current node */
+    public short state;     /* state of lexer's finite state machine */
+    public Node token;
+
+    /* 
+      lexer character buffer
+
+      parse tree nodes span onto this buffer
+      which contains the concatenated text
+      contents of all of the elements.
+
+     lexsize must be reset for each file.
+    */
+    public byte[] lexbuf;   /* byte buffer of UTF-8 chars */
+    public int lexlength;   /* allocated */
+    public int lexsize;     /* used */
+
+    /* Inline stack for compatibility with Mosaic */
+    public Node inode;        /* for deferring text node */
+    public int insert;        /* for inferring inline tags */
+    public Stack istack;
+    public int istackbase;    /* start of frame */
+
+    public Style styles;      /* used for cleaning up presentation markup */
+
+    public Configuration configuration;
+    protected int seenBodyEndTag; /* used by parser */
+    private Vector nodeList;
+
+    public Lexer(IFile iFile, StreamIn in, Configuration configuration)
+    {
+        this.iFile = iFile;
+        this.in = in;
+        this.lines = 1;
+        this.columns = 1;
+        this.state = LEX_CONTENT;
+        this.badAccess = 0;
+        this.badLayout = 0;
+        this.badChars = 0;
+        this.badForm = 0;
+        this.warnings = 0;
+        this.errors = 0;
+        this.waswhite = false;
+        this.pushed = false;
+        this.insertspace = false;
+        this.exiled = false;
+        this.isvoyager = false;
+        this.versions = Dict.VERS_EVERYTHING;
+        this.doctype = Dict.VERS_UNKNOWN;
+        this.badDoctype = false;
+        this.txtstart = 0;
+        this.txtend = 0;
+        this.token = null;
+        this.lexbuf =  null;
+        this.lexlength = 0;
+        this.lexsize = 0;
+        this.inode = null;
+        this.insert = -1;
+        this.istack = new Stack();
+        this.istackbase = 0;
+        this.styles = null;
+        this.configuration = configuration;
+        this.seenBodyEndTag = 0;
+        this.nodeList = new Vector();
+    }
+
+    public IFile getIFile() {
+      return iFile; 
+    }
+    
+    public Node newNode()
+    {
+        Node node = new Node();
+        nodeList.addElement(node);
+        return node;
+    }
+
+    public Node newNode(short type, byte[] textarray, int start, int end)
+    {
+        Node node = new Node(type, textarray, start, end);
+        nodeList.addElement(node);
+        return node;
+    }
+
+    public Node newNode(short type, byte[] textarray, int start, int end, String element)
+    {
+        Node node = new Node(type, textarray, start, end, element, configuration.tt);
+        nodeList.addElement(node);
+        return node;
+    }
+
+    public Node cloneNode(Node node)
+    {
+        Node cnode = (Node)node.clone();
+        nodeList.addElement(cnode);
+        for (AttVal att = cnode.attributes; att != null; att = att.next) {
+            if (att.asp != null)
+                nodeList.addElement(att.asp);
+            if (att.php != null)
+                nodeList.addElement(att.php);
+        }
+        return cnode;
+    }
+
+    public AttVal cloneAttributes(AttVal attrs)
+    {
+        AttVal cattrs = (AttVal)attrs.clone();
+        for (AttVal att = cattrs; att != null; att = att.next) {
+            if (att.asp != null)
+                nodeList.addElement(att.asp);
+            if (att.php != null)
+                nodeList.addElement(att.php);
+        }
+        return cattrs;
+    }
+
+    protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray)
+    {
+        Node node;
+        for (int i = 0; i < nodeList.size(); i++) {
+            node = (Node)(nodeList.elementAt(i));
+            if (node.textarray == oldtextarray)
+                node.textarray = newtextarray;
+        }
+    }
+
+    /* used for creating preformatted text from Word2000 */
+    public Node newLineNode()
+    {
+        Node node = newNode();
+
+        node.textarray = this.lexbuf;
+        node.start = this.lexsize;
+        addCharToLexer((int)'\n');
+        node.end = this.lexsize;
+        return node;
+    }
+
+    // Should always be able convert to/from UTF-8, so encoding exceptions are
+    // converted to an Error to avoid adding throws declarations in
+    // lots of methods.
+    
+    public static byte[] getBytes(String str) {
+        try {
+            return str.getBytes("UTF8");
+        } catch (java.io.UnsupportedEncodingException e) {
+            throw new Error("string to UTF-8 conversion failed: " + e.getMessage());
+        }
+    }
+
+    public static String getString(byte[] bytes, int offset, int length) {
+        try {
+            return new String(bytes, offset, length, "UTF8");
+        } catch (java.io.UnsupportedEncodingException e) {
+            throw new Error("UTF-8 to string conversion failed: " + e.getMessage());
+        }
+    }
+
+    public boolean endOfInput()
+    {
+        return this.in.isEndOfStream();
+    }
+
+    public void addByte(int c)
+    {
+        if (this.lexsize + 1 >= this.lexlength)
+        {
+            while (this.lexsize + 1 >= this.lexlength)
+            {
+                if (this.lexlength == 0)
+                    this.lexlength = 8192;
+                else
+                    this.lexlength = this.lexlength * 2;
+            }
+
+            byte[] temp = this.lexbuf;
+            this.lexbuf = new byte[ this.lexlength ];
+            if (temp != null)
+            {
+                System.arraycopy( temp, 0, this.lexbuf, 0, temp.length );
+                updateNodeTextArrays(temp, this.lexbuf);
+            }
+        }
+
+        this.lexbuf[this.lexsize++] = (byte)c;
+        this.lexbuf[this.lexsize] = (byte)'\0';  /* debug */
+    }
+
+    public void changeChar(byte c)
+    {
+        if (this.lexsize > 0)
+        {
+            this.lexbuf[this.lexsize-1] = c;
+        }
+    }
+
+    /* store char c as UTF-8 encoded byte stream */
+    public void addCharToLexer(int c)
+    {
+        if (c < 128)
+            addByte(c);
+        else if (c <= 0x7FF)
+        {
+            addByte(0xC0 | (c >> 6));
+            addByte(0x80 | (c & 0x3F));
+        }
+        else if (c <= 0xFFFF)
+        {
+            addByte(0xE0 | (c >> 12));
+            addByte(0x80 | ((c >> 6) & 0x3F));
+            addByte(0x80 | (c & 0x3F));
+        }
+        else if (c <= 0x1FFFFF)
+        {
+            addByte(0xF0 | (c >> 18));
+            addByte(0x80 | ((c >> 12) & 0x3F));
+            addByte(0x80 | ((c >> 6) & 0x3F));
+            addByte(0x80 | (c & 0x3F));
+        }
+        else
+        {
+            addByte(0xF8 | (c >> 24));
+            addByte(0x80 | ((c >> 18) & 0x3F));
+            addByte(0x80 | ((c >> 12) & 0x3F));
+            addByte(0x80 | ((c >> 6) & 0x3F));
+            addByte(0x80 | (c & 0x3F));
+        }
+    }
+
+    public void addStringToLexer(String str)
+    {
+        for ( int i = 0; i < str.length(); i++ ) {
+            addCharToLexer( (int)str.charAt(i) );
+        }
+    }
+
+    /*
+      No longer attempts to insert missing ';' for unknown
+      enitities unless one was present already, since this
+      gives unexpected results.
+
+      For example:   <a href="something.htm?foo&bar&fred">
+      was tidied to: <a href="something.htm?foo&amp;bar;&amp;fred;">
+      rather than:   <a href="something.htm?foo&amp;bar&amp;fred">
+
+      My thanks for Maurice Buxton for spotting this.
+    */
+    public void parseEntity(short mode)
+    {
+        short map;
+        int start;
+        boolean first = true;
+        boolean semicolon = false;
+        boolean numeric = false;
+        int c, ch, startcol;
+        String str;
+
+        start = this.lexsize - 1;  /* to start at "&" */
+        startcol = this.in.curcol - 1;
+
+        while (true)
+        {
+            c = this.in.readChar();
+            if (c == StreamIn.EndOfStream) break;
+            if (c == ';')
+            {
+                semicolon = true;
+                break;
+            }
+
+            if (first && c == '#')
+            {
+                addCharToLexer(c);
+                first = false;
+                numeric = true;
+                continue;
+            }
+
+            first = false;
+            map = MAP((char)c);
+
+            /* AQ: Added flag for numeric entities so that numeric entities
+               with missing semi-colons are recognized.
+               Eg. "&#114e&#112;..." is recognized as "rep"
+            */
+            if (numeric && ((c == 'x') || ((map & DIGIT) != 0)))
+            {
+                addCharToLexer(c);
+                continue;
+            }
+            if (!numeric && ((map & NAMECHAR) != 0))
+            {
+                addCharToLexer(c);
+                continue;
+            }
+
+            /* otherwise put it back */
+
+            this.in.ungetChar(c);
+            break;
+        }
+
+        str = getString( this.lexbuf, start, this.lexsize - start );
+        ch = EntityTable.getDefaultEntityTable().entityCode( str );
+
+        /* deal with unrecognized entities */
+        if (ch <= 0)
+        {
+            /* set error position just before offending chararcter */
+            this.lines = this.in.curline;
+            this.columns = startcol;
+
+            if (this.lexsize > start +1 )
+            {
+                Report.entityError(this, Report.UNKNOWN_ENTITY, str, ch);
+
+                if (semicolon)
+                    addCharToLexer(';');
+            }
+            else /* naked & */
+            {
+                Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch);
+            }
+        }
+        else
+        {
+            if (c != ';')    /* issue warning if not terminated by ';' */
+            {
+                /* set error position just before offending chararcter */
+                this.lines = this.in.curline;
+                this.columns = startcol;
+                Report.entityError(this, Report.MISSING_SEMICOLON, str, c);
+            }
+
+            this.lexsize = start;
+
+            if (ch == 160 && (mode & Preformatted) != 0)
+                ch = ' ';
+
+            addCharToLexer(ch);
+
+            if (ch == '&' && !this.configuration.QuoteAmpersand)
+            {
+                addCharToLexer('a');
+                addCharToLexer('m');
+                addCharToLexer('p');
+                addCharToLexer(';');
+            }
+        }
+    }
+
+    public char parseTagName()
+    {
+        short map;
+        int c;
+
+        /* fold case of first char in buffer */
+
+        c = this.lexbuf[this.txtstart];
+        map = MAP((char)c);
+
+        if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
+        {
+            c += (int)((int)'a' - (int)'A');
+            this.lexbuf[this.txtstart] = (byte)c;
+        }
+
+        while (true)
+        {
+            c = this.in.readChar();
+            if (c == StreamIn.EndOfStream) break;
+            map = MAP((char)c);
+
+            if ((map & NAMECHAR) == 0)
+                break;
+
+            /* fold case of subsequent chars */
+
+            if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
+                c += (int)((int)'a' - (int)'A');
+
+            addCharToLexer(c);
+        }
+
+        this.txtend = this.lexsize;
+        return (char)c;
+    }
+
+    public void addStringLiteral(String str)
+    {
+        for ( int i = 0; i < str.length(); i++ ) {
+            addCharToLexer( (int)str.charAt(i) );
+        }
+    }
+
+    /* choose what version to use for new doctype */
+    public short HTMLVersion()
+    {
+        short versions;
+
+        versions = this.versions;
+
+        if ((versions & Dict.VERS_HTML20) != 0)
+            return Dict.VERS_HTML20;
+
+        if ((versions & Dict.VERS_HTML32) != 0)
+            return Dict.VERS_HTML32;
+
+        if ((versions & Dict.VERS_HTML40_STRICT) != 0)
+            return Dict.VERS_HTML40_STRICT;
+
+        if ((versions & Dict.VERS_HTML40_LOOSE) != 0)
+            return Dict.VERS_HTML40_LOOSE;
+
+        if ((versions & Dict.VERS_FRAMES) != 0)
+            return Dict.VERS_FRAMES;
+
+        return Dict.VERS_UNKNOWN;
+    }
+
+    public String HTMLVersionName()
+    {
+        short guessed;
+        int j;
+
+        guessed = apparentVersion();
+
+        for (j = 0; j < W3CVersion.length; ++j)
+        {
+            if (guessed == W3CVersion[j].code)
+            {
+                if (this.isvoyager)
+                    return W3CVersion[j].voyagerName;
+
+                return W3CVersion[j].name;
+            }
+        }
+
+        return null;
+    }
+
+    /* add meta element for Tidy */
+    public boolean addGenerator(Node root)
+    {
+        AttVal attval;
+        Node node;
+        Node head = root.findHEAD(configuration.tt);
+
+        if (head != null)
+        {
+            for (node = head.content; node != null; node = node.next)
+            {
+                if (node.tag == configuration.tt.tagMeta)
+                {
+                    attval = node.getAttrByName("name");
+
+                    if (attval != null && attval.value != null &&
+                        Lexer.wstrcasecmp(attval.value, "generator") == 0)
+                    {
+                        attval = node.getAttrByName("content");
+
+                        if (attval != null && attval.value != null &&
+                            attval.value.length() >= 9 &&
+                            Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0)
+                        {
+                            return false;
+                        }
+                    }
+                }
+            }
+
+            node = this.inferredTag("meta");
+            node.addAttribute("content", "HTML Tidy, see www.w3.org");
+            node.addAttribute("name", "generator");
+            Node.insertNodeAtStart(head, node);
+            return true;
+        }
+
+        return false;
+    }
+
+    /* return true if substring s is in p and isn't all in upper case */
+    /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */
+    /* len is how many chars to check in p */
+    private static boolean findBadSubString(String s, String p, int len)
+    {
+        int n = s.length();
+        int i = 0;
+        String ps;
+
+        while (n < len)
+        {
+            ps = p.substring(i, i + n);
+            if (wstrcasecmp(s, ps) == 0)
+                return (!ps.equals(s.substring(0, n)));
+
+            ++i;
+            --len;
+        }
+
+        return false;
+    }
+
+    public boolean checkDocTypeKeyWords(Node doctype)
+    {
+        int len = doctype.end - doctype.start;
+        String s = getString(this.lexbuf, doctype.start, len);
+
+        return !(
+            findBadSubString("SYSTEM", s, len) ||
+            findBadSubString("PUBLIC", s, len) ||
+            findBadSubString("//DTD", s, len) ||
+            findBadSubString("//W3C", s, len) ||
+            findBadSubString("//EN", s, len)
+            );
+    }
+
+    /* examine <!DOCTYPE> to identify version */
+    public short findGivenVersion(Node doctype)
+    {
+        String p, s;
+        int i, j;
+        int len;
+        String str1;
+        String str2;
+
+        /* if root tag for doctype isn't html give up now */
+        str1 = getString(this.lexbuf, doctype.start, 5);
+        if (wstrcasecmp(str1, "html ") != 0)
+            return 0;
+
+        if (!checkDocTypeKeyWords(doctype))
+            Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
+
+        /* give up if all we are given is the system id for the doctype */
+        str1 = getString(this.lexbuf, doctype.start + 5, 7);
+        if (wstrcasecmp(str1, "SYSTEM ") == 0)
+        {
+            /* but at least ensure the case is correct */
+            if (!str1.substring(0, 6).equals("SYSTEM"))
+                System.arraycopy( getBytes("SYSTEM"), 0,
+                                  this.lexbuf, doctype.start + 5, 6 );
+            return 0;  /* unrecognized */
+        }
+
+        if (wstrcasecmp(str1, "PUBLIC ") == 0)
+        {
+            if (!str1.substring(0, 6).equals("PUBLIC"))
+                System.arraycopy( getBytes("PUBLIC "), 0,
+                                  this.lexbuf, doctype.start + 5, 6 );
+        }
+        else
+            this.badDoctype = true;
+
+        for (i = doctype.start; i < doctype.end; ++i)
+        {
+            if (this.lexbuf[i] == (byte)'"')
+            {
+                str1 = getString( this.lexbuf, i + 1, 12 );
+                str2 = getString( this.lexbuf, i + 1, 13 );
+                if (str1.equals("-//W3C//DTD "))
+                {
+                    /* compute length of identifier e.g. "HTML 4.0 Transitional" */
+                    for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
+                    len = j - i - 13;
+                    p = getString( this.lexbuf, i + 13, len );
+
+                    for (j = 1; j < W3CVersion.length; ++j)
+                    {
+                        s = W3CVersion[j].name;
+                        if (len == s.length() && s.equals(p))
+                            return W3CVersion[j].code;
+                    }
+
+                    /* else unrecognized version */
+                }
+                else if (str2.equals("-//IETF//DTD "))
+                {
+                    /* compute length of identifier e.g. "HTML 2.0" */
+                    for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
+                    len = j - i - 14;
+
+                    p = getString( this.lexbuf, i + 14, len );
+                    s = W3CVersion[0].name;
+                    if (len == s.length() && s.equals(p))
+                        return W3CVersion[0].code;
+
+                    /* else unrecognized version */
+                }
+                break;
+            }
+        }
+
+        return 0;
+    }
+
+    public void fixHTMLNameSpace(Node root, String profile)
+    {
+        Node node;
+        AttVal prev, attr;
+
+        for (node = root.content; 
+                node != null && node.tag != configuration.tt.tagHtml; node = node.next);
+
+        if (node != null)
+        {
+            prev = null;
+
+            for (attr = node.attributes; attr != null; attr = attr.next)
+            {
+                if (attr.attribute.equals("xmlns"))
+                    break;
+
+                prev = attr;
+            }
+
+            if (attr != null)
+            {
+                if (!attr.value.equals(profile))
+                {
+                    Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE);
+                    attr.value = profile;
+                }
+            }
+            else
+            {
+                attr = new AttVal( node.attributes, null, (int)'"',
+                                   "xmlns", profile );
+                attr.dict =
+                    AttributeTable.getDefaultAttributeTable().findAttribute( attr );
+                node.attributes = attr;
+            }
+        }
+    }
+
+    public boolean setXHTMLDocType(Node root)
+    {
+        String fpi = " ";
+        String sysid = "";
+        String namespace = XHTML_NAMESPACE;
+        Node doctype;
+
+        doctype = root.findDocType();
+
+        if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
+        {
+            if (doctype != null)
+                Node.discardElement(doctype);
+            return true;
+        }
+
+        if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
+        {
+            /* see what flavor of XHTML this document matches */
+            if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
+            {  /* use XHTML strict */
+                fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
+                sysid = voyager_strict;
+            }
+            else if ((this.versions & Dict.VERS_LOOSE) != 0)
+            {
+                fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+                sysid = voyager_loose;
+            }
+            else if ((this.versions & Dict.VERS_FRAMES) != 0)
+            {   /* use XHTML frames */
+                fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
+                sysid = voyager_frameset;
+            }
+            else /* lets assume XHTML transitional */
+            {
+                fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+                sysid = voyager_loose;
+            }
+        }
+        else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
+        {
+            fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
+            sysid = voyager_strict;
+        }
+        else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
+        {
+            fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+            sysid = voyager_loose;
+        }
+
+        fixHTMLNameSpace(root, namespace);
+
+        if (doctype == null)
+        {
+            doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
+            doctype.next = root.content;
+            doctype.parent = root;
+            doctype.prev = null;
+            root.content = doctype;
+        }
+
+        if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
+            configuration.docTypeStr != null)
+        {
+            fpi = configuration.docTypeStr;
+            sysid = "";
+        }
+
+        this.txtstart = this.lexsize;
+        this.txtend = this.lexsize;
+
+        /* add public identifier */
+        addStringLiteral("html PUBLIC ");
+
+        /* check if the fpi is quoted or not */
+        if (fpi.charAt(0) == '"')
+            addStringLiteral(fpi);
+        else
+        {
+            addStringLiteral("\"");
+            addStringLiteral(fpi);
+            addStringLiteral("\"");
+        }
+
+        if (sysid.length() + 6 >= this.configuration.wraplen)
+            addStringLiteral("\n\"");
+        else
+            addStringLiteral("\n    \"");
+
+        /* add system identifier */
+        addStringLiteral(sysid);
+        addStringLiteral("\"");
+
+        this.txtend = this.lexsize;
+
+        doctype.start = this.txtstart;
+        doctype.end = this.txtend;
+
+        return false;
+    }
+
+    public short apparentVersion()
+    {
+        switch (this.doctype)
+        {
+        case Dict.VERS_UNKNOWN:
+            return HTMLVersion();
+
+        case Dict.VERS_HTML20:
+            if ((this.versions & Dict.VERS_HTML20) != 0)
+                return Dict.VERS_HTML20;
+
+            break;
+
+        case Dict.VERS_HTML32:
+            if ((this.versions & Dict.VERS_HTML32) != 0)
+                return Dict.VERS_HTML32;
+
+            break; /* to replace old version by new */
+
+        case Dict.VERS_HTML40_STRICT:
+            if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
+                return Dict.VERS_HTML40_STRICT;
+
+            break;
+
+        case Dict.VERS_HTML40_LOOSE:
+            if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
+                return Dict.VERS_HTML40_LOOSE;
+
+            break; /* to replace old version by new */
+
+        case Dict.VERS_FRAMES:
+            if ((this.versions & Dict.VERS_FRAMES) != 0)
+                return Dict.VERS_FRAMES;
+
+            break;
+        }
+
+        Report.warning(this, null, null, Report.INCONSISTENT_VERSION);
+        return this.HTMLVersion();
+    }
+
+    /* fixup doctype if missing */
+    public boolean fixDocType(Node root)
+    {
+        Node doctype;
+        int guessed = Dict.VERS_HTML40_STRICT, i;
+
+        if (this.badDoctype)
+            Report.warning(this, null, null, Report.MALFORMED_DOCTYPE);
+
+        if (configuration.XmlOut)
+            return true;
+
+        doctype = root.findDocType();
+
+        if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
+        {
+            if (doctype != null)
+                Node.discardElement(doctype);
+            return true;
+        }
+
+        if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
+        {
+            Node.discardElement(doctype);
+            doctype = null;
+            guessed = Dict.VERS_HTML40_STRICT;
+        }
+        else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
+        {
+            Node.discardElement(doctype);
+            doctype = null;
+            guessed = Dict.VERS_HTML40_LOOSE;
+        }
+        else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
+        {
+            if (doctype != null)
+            {
+                if (this.doctype == Dict.VERS_UNKNOWN)
+                    return false;
+
+                switch (this.doctype)
+                {
+                case Dict.VERS_UNKNOWN:
+                    return false;
+
+                case Dict.VERS_HTML20:
+                    if ((this.versions & Dict.VERS_HTML20) != 0)
+                        return true;
+
+                    break; /* to replace old version by new */
+
+                case Dict.VERS_HTML32:
+                    if ((this.versions & Dict.VERS_HTML32) != 0)
+                        return true;
+
+                    break; /* to replace old version by new */
+
+                case Dict.VERS_HTML40_STRICT:
+                    if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
+                        return true;
+
+                    break; /* to replace old version by new */
+
+                case Dict.VERS_HTML40_LOOSE:
+                    if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
+                        return true;
+
+                    break; /* to replace old version by new */
+
+                case Dict.VERS_FRAMES:
+                    if ((this.versions & Dict.VERS_FRAMES) != 0)
+                        return true;
+
+                    break; /* to replace old version by new */
+                }
+
+                /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
+            }
+
+            /* choose new doctype */
+            guessed = HTMLVersion();
+        }
+
+        if (guessed == Dict.VERS_UNKNOWN)
+            return false;
+
+        /* for XML use the Voyager system identifier */
+        if (this.configuration.XmlOut || this.configuration.XmlTags || this.isvoyager)
+        {
+            if (doctype != null)
+                Node.discardElement(doctype);
+
+            for (i = 0; i < W3CVersion.length; ++i)
+            {
+                if (guessed == W3CVersion[i].code)
+                {
+                    fixHTMLNameSpace(root, W3CVersion[i].profile);
+                    break;
+                }
+            }
+
+            return true;
+        }
+
+        if (doctype == null)
+        {
+            doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
+            doctype.next = root.content;
+            doctype.parent = root;
+            doctype.prev = null;
+            root.content = doctype;
+        }
+
+        this.txtstart = this.lexsize;
+        this.txtend = this.lexsize;
+
+        /* use the appropriate public identifier */
+        addStringLiteral("html PUBLIC ");
+
+        if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
+            configuration.docTypeStr != null)
+            addStringLiteral(configuration.docTypeStr);
+        else if (guessed == Dict.VERS_HTML20)
+            addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
+        else
+        {
+            addStringLiteral("\"-//W3C//DTD ");
+
+            for (i = 0; i < W3CVersion.length; ++i)
+            {
+                if (guessed == W3CVersion[i].code)
+                {
+                    addStringLiteral(W3CVersion[i].name);
+                    break;
+                }
+            }
+
+            addStringLiteral("//EN\"");
+        }
+
+        this.txtend = this.lexsize;
+
+        doctype.start = this.txtstart;
+        doctype.end = this.txtend;
+
+        return true;
+    }
+
+    /* ensure XML document starts with <?XML version="1.0"?> */
+    public boolean fixXMLPI(Node root)
+    {
+        Node xml;
+        int s;
+
+        if( root.content != null && root.content.type == Node.ProcInsTag)
+        {
+            s = root.content.start;
+
+            if (this.lexbuf[s] == (byte)'x' &&
+                this.lexbuf[s+1] == (byte)'m' &&
+                this.lexbuf[s+2] == (byte)'l')
+                return true;
+        }
+
+        xml = newNode(Node.ProcInsTag, this.lexbuf, 0, 0);
+        xml.next = root.content;
+
+        if (root.content != null)
+        {
+            root.content.prev = xml;
+            xml.next = root.content;
+        }
+    
+        root.content = xml;
+
+        this.txtstart = this.lexsize;
+        this.txtend = this.lexsize;
+        addStringLiteral("xml version=\"1.0\"");
+        if (this.configuration.CharEncoding == Configuration.LATIN1)
+            addStringLiteral(" encoding=\"ISO-8859-1\"");
+        this.txtend = this.lexsize;
+
+        xml.start = this.txtstart;
+        xml.end = this.txtend;
+        return false;
+    }
+
+    public Node inferredTag(String name)
+    {
+        Node node;
+
+        node = newNode(Node.StartTag,
+                        this.lexbuf,
+                        this.txtstart,
+                        this.txtend,
+                        name);
+        node.implicit = true;
+        return node;
+    }
+
+    public static boolean expectsContent(Node node)
+    {
+        if (node.type != Node.StartTag)
+            return false;
+
+        /* unknown element? */
+        if (node.tag == null)
+            return true;
+
+        if ((node.tag.model & Dict.CM_EMPTY) != 0)
+            return false;
+
+        return true;
+    }
+
+    /*
+      create a text node for the contents of
+      a CDATA element like style or script
+      which ends with </foo> for some foo.
+    */
+    public Node getCDATA(Node container)
+    {
+        int c, lastc, start, len, i;
+        String str;
+        boolean endtag = false;
+
+        this.lines = this.in.curline;
+        this.columns = this.in.curcol;
+        this.waswhite = false;
+        this.txtstart = this.lexsize;
+        this.txtend = this.lexsize;
+
+        lastc = (int)'\0';
+        start = -1;
+
+        while (true)
+        {
+            c = this.in.readChar();
+            if (c == StreamIn.EndOfStream) break;
+            /* treat \r\n as \n and \r as \n */
+
+            if (c == (int)'/' && lastc == (int)'<')
+            {
+                if (endtag)
+                {
+                    this.lines = this.in.curline;
+                    this.columns = this.in.curcol - 3;
+
+                    Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
+                }
+
+                start = this.lexsize + 1;  /* to first letter */
+                endtag = true;
+            }
+            else if (c == (int)'>' && start >= 0)
+            {
+                len = this.lexsize - start;
+                if (len == container.element.length())
+                {
+                    str = getString( this.lexbuf, start, len );
+                    if (Lexer.wstrcasecmp(str, container.element) == 0)
+                    {
+                        this.txtend = start - 2;
+                        break;
+                    }
+                }
+
+                this.lines = this.in.curline;
+                this.columns = this.in.curcol - 3;
+
+                Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
+
+                /* if javascript insert backslash before / */
+
+                if (ParserImpl.isJavaScript(container))
+                {
+                    for (i = this.lexsize; i > start-1; --i)
+                        this.lexbuf[i] = this.lexbuf[i-1];
+
+                    this.lexbuf[start-1] = (byte)'\\';
+                    this.lexsize++;
+                }
+
+                start = -1;
+            }
+            else if (c == (int)'\r')
+            {
+                c = this.in.readChar();
+
+                if (c != (int)'\n')
+                    this.in.ungetChar(c);
+
+                c = (int)'\n';
+            }
+
+            addCharToLexer((int)c);
+            this.txtend = this.lexsize;
+            lastc = c;
+        }
+
+        if (c == StreamIn.EndOfStream)
+            Report.warning(this, container, null, Report.MISSING_ENDTAG_FOR);
+
+        if (this.txtend > this.txtstart)
+        {
+            this.token = newNode(Node.TextNode,
+                                  this.lexbuf,
+                                  this.txtstart,
+                                  this.txtend);
+            return this.token;
+        }
+
+        return null;
+    }
+
+    public void ungetToken()
+    {
+        this.pushed = true;
+    }
+
+    public static final short IgnoreWhitespace    = 0;
+    public static final short MixedContent        = 1;
+    public static final short Preformatted        = 2;
+    public static final short IgnoreMarkup        = 3;
+
+    /*
+      modes for GetToken()
+
+      MixedContent   -- for elements which don't accept PCDATA
+      Preformatted       -- white space preserved as is
+      IgnoreMarkup       -- for CDATA elements such as script, style
+    */
+
+    public Node getToken(short mode)
+    {
+        short map;
+        int c = 0;
+        int lastc;
+        int badcomment = 0;
+        MutableBoolean isempty = new MutableBoolean();
+        AttVal attributes;
+
+        if (this.pushed)
+        {
+            /* duplicate inlines in preference to pushed text nodes when appropriate */
+            if (this.token.type != Node.TextNode ||
+                (this.insert == -1 && this.inode == null))
+            {
+                this.pushed = false;
+                return this.token;
+            }
+        }
+
+        /* at start of block elements, unclosed inline
+           elements are inserted into the token stream */
+     
+        if (this.insert != -1 || this.inode != null)
+            return insertedToken();
+
+        this.lines = this.in.curline;
+        this.columns = this.in.curcol;
+        this.waswhite = false;
+
+        this.txtstart = this.lexsize;
+        this.txtend = this.lexsize;
+
+        while (true)
+        {
+            c = this.in.readChar();
+            if (c == StreamIn.EndOfStream) break;
+            if (this.insertspace && mode != IgnoreWhitespace)
+            {
+                addCharToLexer(' ');
+                this.waswhite = true;
+                this.insertspace = false;
+            }
+
+            /* treat \r\n as \n and \r as \n */
+
+            if (c == '\r')
+            {
+                c = this.in.readChar();
+
+                if (c != '\n')
+                    this.in.ungetChar(c);
+
+                c = '\n';
+            }
+
+            addCharToLexer(c);
+
+            switch (this.state)
+            {
+            case LEX_CONTENT:  /* element content */
+                map = MAP((char)c);
+
+                /*
+                 Discard white space if appropriate. Its cheaper
+                 to do this here rather than in parser methods
+                 for elements that don't have mixed content.
+                */
+                if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) 
+                      && this.lexsize == this.txtstart + 1)
+                {
+                    --this.lexsize;
+                    this.waswhite = false;
+                    this.lines = this.in.curline;
+                    this.columns = this.in.curcol;
+                    continue;
+                }
+
+                if (c == '<')
+                {
+                    this.state = LEX_GT;
+                    continue;
+                }
+
+                if ((map & WHITE) != 0)
+                {
+                    /* was previous char white? */
+                    if (this.waswhite)
+                    {
+                        if (mode != Preformatted && mode != IgnoreMarkup)
+                        {
+                            --this.lexsize;
+                            this.lines = this.in.curline;
+                            this.columns = this.in.curcol;
+                        }
+                    }
+                    else /* prev char wasn't white */
+                    {
+                        this.waswhite = true;
+                        lastc = c;
+
+                        if (mode != Preformatted && mode != IgnoreMarkup && c != ' ')
+                            changeChar((byte)' ');
+                    }
+
+                    continue;
+                }
+                else if (c == '&' && mode != IgnoreMarkup)
+                    parseEntity(mode);
+
+                /* this is needed to avoid trimming trailing whitespace */
+                if (mode == IgnoreWhitespace)
+                    mode = MixedContent;
+
+                this.waswhite = false;
+                continue;
+
+            case LEX_GT:  /* < */
+
+                /* check for endtag */
+                if (c == '/')
+                {
+                    c = this.in.readChar();
+                    if (c == StreamIn.EndOfStream)
+                    {
+                        this.in.ungetChar(c);
+                        continue;
+                    }
+
+                    addCharToLexer(c);
+                    map = MAP((char)c);
+
+                    if ((map & LETTER) != 0)
+                    {
+                        this.lexsize -= 3;
+                        this.txtend = this.lexsize;
+                        this.in.ungetChar(c);
+                        this.state = LEX_ENDTAG;
+                        this.lexbuf[this.lexsize] = (byte)'\0';  /* debug */
+                        this.in.curcol -= 2;
+
+                        /* if some text before the </ return it now */
+                        if (this.txtend > this.txtstart)
+                        {
+                            /* trim space char before end tag */
+                            if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ')
+                            {
+                                this.lexsize -= 1;
+                                this.txtend = this.lexsize;
+                            }
+
+                            this.token = newNode(Node.TextNode,
+                                                  this.lexbuf,
+                                                  this.txtstart,
+                                                  this.txtend);
+                            return this.token;
+                        }
+
+                        continue;       /* no text so keep going */
+                    }
+
+                    /* otherwise treat as CDATA */
+                    this.waswhite = false;
+                    this.state = LEX_CONTENT;
+                    continue;
+                }
+
+                if (mode == IgnoreMarkup)
+                {
+                    /* otherwise treat as CDATA */
+                    this.waswhite = false;
+                    this.state = LEX_CONTENT;
+                    continue;
+                }
+
+                /*
+                   look out for comments, doctype or marked sections
+                   this isn't quite right, but its getting there ...
+                */
+                if (c == '!')
+                {
+                    c = this.in.readChar();
+
+                    if (c == '-')
+                    {
+                        c = this.in.readChar();
+
+                        if (c == '-')
+                        {
+                            this.state = LEX_COMMENT;  /* comment */
+                            this.lexsize -= 2;
+                            this.txtend = this.lexsize;
+
+                            /* if some text before < return it now */
+                            if (this.txtend > this.txtstart)
+                            {
+                                this.token = newNode(Node.TextNode,
+                                                      this.lexbuf,
+                                                      this.txtstart,
+                                                      this.txtend);
+                                return this.token;
+                            }
+
+                            this.txtstart = this.lexsize;
+                            continue;
+                        }
+
+                        Report.warning(this, null, null, Report.MALFORMED_COMMENT);
+                    }
+                    else if (c == 'd' || c == 'D')
+                    {
+                        this.state = LEX_DOCTYPE; /* doctype */
+                        this.lexsize -= 2;
+                        this.txtend = this.lexsize;
+                        mode = IgnoreWhitespace;
+
+                        /* skip until white space or '>' */
+
+                        for (;;)
+                        {
+                            c = this.in.readChar();
+
+                            if (c == StreamIn.EndOfStream || c == '>')
+                            {
+                                this.in.ungetChar(c);
+                                break;
+                            }
+
+                            map = MAP((char)c);
+
+                            if ((map & WHITE) == 0)
+                                continue;
+
+                            /* and skip to end of whitespace */
+
+                            for (;;)
+                            {
+                                c = this.in.readChar();
+
+                                if (c == StreamIn.EndOfStream || c == '>')
+                                {
+                                    this.in.ungetChar(c);
+                                    break;
+                                }
+
+                                map = MAP((char)c);
+
+                                if ((map & WHITE) != 0)
+                                    continue;
+
+                                this.in.ungetChar(c);
+                                    break;
+                            }
+
+                            break;
+                        }
+
+                        /* if some text before < return it now */
+                        if (this.txtend > this.txtstart)
+                        {
+                                this.token = newNode(Node.TextNode,
+                                                      this.lexbuf,
+                                                      this.txtstart,
+                                                      this.txtend);
+                                return this.token;
+                        }
+
+                        this.txtstart = this.lexsize;
+                        continue;
+                    }
+                    else if (c == '[')
+                    {
+                        /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
+                        this.lexsize -= 2;
+                        this.state = LEX_SECTION;
+                        this.txtend = this.lexsize;
+
+                        /* if some text before < return it now */
+                        if (this.txtend > this.txtstart)
+                        {
+                                this.token = newNode(Node.TextNode,
+                                                      this.lexbuf,
+                                                      this.txtstart,
+                                                      this.txtend);
+                                return this.token;
+                        }
+
+                        this.txtstart = this.lexsize;
+                        continue;
+                    }
+
+                    /* otherwise swallow chars up to and including next '>' */
+                    while (true)
+                    {
+                        c = this.in.readChar();
+                        if (c == '>') break;
+                        if (c == -1)
+                        {
+                            this.in.ungetChar(c);
+                            break;
+                        }
+                    }
+
+                    this.lexsize -= 2;
+                    this.lexbuf[this.lexsize] = (byte)'\0';
+                    this.state = LEX_CONTENT;
+                    continue;
+                }
+
+                /*
+                   processing instructions
+                */
+
+                if (c == '?')
+                {
+                    this.lexsize -= 2;
+                    this.state = LEX_PROCINSTR;
+                    this.txtend = this.lexsize;
+
+                    /* if some text before < return it now */
+                    if (this.txtend > this.txtstart)
+                    {
+                        this.token = newNode(Node.TextNode,
+                                              this.lexbuf,
+                                              this.txtstart,
+                                              this.txtend);
+                        return this.token;
+                    }
+
+                    this.txtstart = this.lexsize;
+                    continue;
+                }
+
+                /* Microsoft ASP's e.g. <% ... server-code ... %> */
+                if (c == '%')
+                {
+                    this.lexsize -= 2;
+                    this.state = LEX_ASP;
+                    this.txtend = this.lexsize;
+
+                    /* if some text before < return it now */
+                    if (this.txtend > this.txtstart)
+                    {
+                        this.token = newNode(Node.TextNode,
+                                              this.lexbuf,
+                                              this.txtstart,
+                                              this.txtend);
+                        return this.token;
+                    }
+
+                    this.txtstart = this.lexsize;
+                    continue;
+                }
+
+                /* Netscapes JSTE e.g. <# ... server-code ... #> */
+                if (c == '#')
+                {
+                    this.lexsize -= 2;
+                    this.state = LEX_JSTE;
+                    this.txtend = this.lexsize;
+
+                    /* if some text before < return it now */
+                    if (this.txtend > this.txtstart)
+                    {
+                        this.token = newNode(Node.TextNode,
+                                              this.lexbuf,
+                                              this.txtstart,
+                                              this.txtend);
+                        return this.token;
+                    }
+
+                    this.txtstart = this.lexsize;
+                    continue;
+                }
+
+                map = MAP((char)c);
+
+                /* check for start tag */
+                if ((map & LETTER) != 0)
+                {
+                    this.in.ungetChar(c);     /* push back letter */
+                    this.lexsize -= 2;      /* discard "<" + letter */
+                    this.txtend = this.lexsize;
+                    this.state = LEX_STARTTAG;         /* ready to read tag name */
+
+                    /* if some text before < return it now */
+                    if (this.txtend > this.txtstart)
+                    {
+                        this.token = newNode(Node.TextNode,
+                                              this.lexbuf,
+                                              this.txtstart,
+                                              this.txtend);
+                        return this.token;
+                    }
+
+                    continue;       /* no text so keep going */
+                }
+
+                /* otherwise treat as CDATA */
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                continue;
+
+            case LEX_ENDTAG:  /* </letter */
+                this.txtstart = this.lexsize - 1;
+                this.in.curcol += 2;
+                c = parseTagName();
+                this.token = newNode(Node.EndTag, /* create endtag token */
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend,
+                                      getString(this.lexbuf,
+                                                 this.txtstart,
+                                                 this.txtend - this.txtstart));
+                this.lexsize = this.txtstart;
+                this.txtend = this.txtstart;
+
+                /* skip to '>' */
+                while (c != '>')
+                {
+                    c = this.in.readChar();
+
+                    if (c == StreamIn.EndOfStream)
+                        break;
+                }
+
+                if (c == StreamIn.EndOfStream)
+                {
+                    this.in.ungetChar(c);
+                    continue;
+                }
+
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                return this.token;  /* the endtag token */
+
+            case LEX_STARTTAG: /* first letter of tagname */
+                this.txtstart = this.lexsize - 1; /* set txtstart to first letter */
+                c = parseTagName();
+                isempty.value = false;
+                attributes = null;
+                this.token = newNode((isempty.value ? Node.StartEndTag : Node.StartTag),
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend,
+                                      getString(this.lexbuf,
+                                                 this.txtstart,
+                                                 this.txtend - this.txtstart));
+
+                /* parse attributes, consuming closing ">" */
+                if (c != '>')
+                {
+                    if (c == '/')
+                        this.in.ungetChar(c);
+
+                    attributes = parseAttrs(isempty);
+                }
+
+                if (isempty.value)
+                    this.token.type = Node.StartEndTag;
+
+                this.token.attributes = attributes;
+                this.lexsize = this.txtstart;
+                this.txtend = this.txtstart;
+
+                /* swallow newline following start tag */
+                /* special check needed for CRLF sequence */
+                /* this doesn't apply to empty elements */
+
+                if (expectsContent(this.token) ||
+                    this.token.tag == configuration.tt.tagBr)
+                {
+
+                    c = this.in.readChar();
+
+                    if (c == '\r')
+                    {
+                        c = this.in.readChar();
+
+                        if (c != '\n')
+                            this.in.ungetChar(c);
+                    }
+                    else if (c != '\n' && c != '\f')
+                        this.in.ungetChar(c);
+
+                    this.waswhite = true;  /* to swallow leading whitespace */
+                }
+                else
+                    this.waswhite = false;
+
+                this.state = LEX_CONTENT;
+
+                if (this.token.tag == null)
+                    Report.error(this, null, this.token, Report.UNKNOWN_ELEMENT);
+                else if (!this.configuration.XmlTags)
+                {
+                    this.versions &= this.token.tag.versions;
+                    
+                    if ((this.token.tag.versions & Dict.VERS_PROPRIETARY) != 0)
+                    {
+                        if (!this.configuration.MakeClean && (this.token.tag == configuration.tt.tagNobr ||
+                                                this.token.tag == configuration.tt.tagWbr))
+                            Report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT);
+                    }
+
+                    if (this.token.tag.chkattrs != null)
+                    {
+                        this.token.checkUniqueAttributes(this);
+                        this.token.tag.chkattrs.check(this, this.token);
+                    }
+                    else
+                        this.token.checkAttributes(this);
+                }
+
+                return this.token;  /* return start tag */
+
+            case LEX_COMMENT:  /* seen <!-- so look for --> */
+
+                if (c != '-')
+                    continue;
+
+                c = this.in.readChar();
+                addCharToLexer(c);
+
+                if (c != '-')
+                    continue;
+
+                end_comment: while (true) {
+                    c = this.in.readChar();
+
+                    if (c == '>')
+                    {
+                        if (badcomment != 0)
+                            Report.warning(this, null, null, Report.MALFORMED_COMMENT);
+
+                        this.txtend = this.lexsize - 2; // AQ 8Jul2000
+                        this.lexbuf[this.lexsize] = (byte)'\0';
+                        this.state = LEX_CONTENT;
+                        this.waswhite = false;
+                        this.token = newNode(Node.CommentTag,
+                                              this.lexbuf,
+                                              this.txtstart,
+                                              this.txtend);
+
+                        /* now look for a line break */
+
+                        c = this.in.readChar();
+
+                        if (c == '\r')
+                        {
+                            c = this.in.readChar();
+
+                            if (c != '\n')
+                                this.token.linebreak = true;
+                        }
+
+                        if (c == '\n')
+                            this.token.linebreak = true;
+                        else
+                            this.in.ungetChar(c);
+
+                        return this.token;
+                    }
+
+                    /* note position of first such error in the comment */
+                    if (badcomment == 0)
+                    {
+                        this.lines = this.in.curline;
+                        this.columns = this.in.curcol - 3;
+                    }
+
+                    badcomment++;
+                    if (this.configuration.FixComments)
+                        this.lexbuf[this.lexsize - 2] = (byte)'=';
+
+                    addCharToLexer(c);
+
+                    /* if '-' then look for '>' to end the comment */
+                    if (c != '-')
+                        break end_comment;
+
+                }
+                /* otherwise continue to look for --> */
+                this.lexbuf[this.lexsize - 2] = (byte)'=';
+                continue;
+
+            case LEX_DOCTYPE:  /* seen <!d so look for '>' munging whitespace */
+                map = MAP((char)c);
+
+                if ((map & WHITE) != 0)
+                {
+                    if (this.waswhite)
+                        this.lexsize -= 1;
+
+                    this.waswhite = true;
+                }
+                else
+                    this.waswhite = false;
+
+                if (c != '>')
+                    continue;
+
+                this.lexsize -= 1;
+                this.txtend = this.lexsize;
+                this.lexbuf[this.lexsize] = (byte)'\0';
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                this.token = newNode(Node.DocTypeTag,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                /* make a note of the version named by the doctype */
+                this.doctype = findGivenVersion(this.token);
+                return this.token;
+
+            case LEX_PROCINSTR:  /* seen <? so look for '>' */
+                /* check for PHP preprocessor instructions <?php ... ?> */
+
+                if  (this.lexsize - this.txtstart == 3)
+                {
+                    if ((getString(this.lexbuf, this.txtstart, 3)).equals("php"))
+                    {
+                        this.state = LEX_PHP;
+                        continue;
+                    }
+                }
+
+                if (this.configuration.XmlPIs)  /* insist on ?> as terminator */
+                {
+                    if (c != '?')
+                        continue;
+
+                    /* now look for '>' */
+                    c = this.in.readChar();
+
+                    if (c == StreamIn.EndOfStream)
+                    {
+                        Report.warning(this, null, null, Report.UNEXPECTED_END_OF_FILE);
+                        this.in.ungetChar(c);
+                        continue;
+                    }
+
+                    addCharToLexer(c);
+                }
+
+                if (c != '>')
+                    continue;
+
+                this.lexsize -= 1;
+                this.txtend = this.lexsize;
+                this.lexbuf[this.lexsize] = (byte)'\0';
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                this.token = newNode(Node.ProcInsTag,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                return this.token;
+
+            case LEX_ASP:  /* seen <% so look for "%>" */
+                if (c != '%')
+                    continue;
+
+                /* now look for '>' */
+                c = this.in.readChar();
+
+
+                if (c != '>')
+                {
+                    this.in.ungetChar(c);
+                    continue;
+                }
+
+                this.lexsize -= 1;
+                this.txtend = this.lexsize;
+                this.lexbuf[this.lexsize] = (byte)'\0';
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                this.token = newNode(Node.AspTag,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                return this.token;
+
+            case LEX_JSTE:  /* seen <# so look for "#>" */
+                if (c != '#')
+                    continue;
+
+                /* now look for '>' */
+                c = this.in.readChar();
+
+
+                if (c != '>')
+                {
+                    this.in.ungetChar(c);
+                    continue;
+                }
+
+                this.lexsize -= 1;
+                this.txtend = this.lexsize;
+                this.lexbuf[this.lexsize] = (byte)'\0';
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                this.token = newNode(Node.JsteTag,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                return this.token;
+
+            case LEX_PHP: /* seen "<?php" so look for "?>" */
+                if (c != '?')
+                    continue;
+
+                /* now look for '>' */
+                c = this.in.readChar();
+
+                if (c != '>')
+                {
+                    this.in.ungetChar(c);
+                    continue;
+                }
+
+                this.lexsize -= 1;
+                this.txtend = this.lexsize;
+                this.lexbuf[this.lexsize] = (byte)'\0';
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                this.token = newNode(Node.PhpTag,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                return this.token;
+
+            case LEX_SECTION: /* seen "<![" so look for "]>" */
+                if (c == '[')
+                {
+                    if (this.lexsize == (this.txtstart + 6) &&
+                        (getString(this.lexbuf, this.txtstart, 6)).equals("CDATA["))
+                    {
+                        this.state = LEX_CDATA;
+                        this.lexsize -= 6;
+                        continue;
+                    }
+                }
+
+                if (c != ']')
+                    continue;
+
+                /* now look for '>' */
+                c = this.in.readChar();
+
+                if (c != '>')
+                {
+                    this.in.ungetChar(c);
+                    continue;
+                }
+
+                this.lexsize -= 1;
+                this.txtend = this.lexsize;
+                this.lexbuf[this.lexsize] = (byte)'\0';
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                this.token = newNode(Node.SectionTag,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                return this.token;
+
+            case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */
+                if (c != ']')
+                    continue;
+
+                /* now look for ']' */
+                c = this.in.readChar();
+
+                if (c != ']')
+                {
+                    this.in.ungetChar(c);
+                    continue;
+                }
+
+                /* now look for '>' */
+                c = this.in.readChar();
+
+                if (c != '>')
+                {
+                    this.in.ungetChar(c);
+                    continue;
+                }
+
+                this.lexsize -= 1;
+                this.txtend = this.lexsize;
+                this.lexbuf[this.lexsize] = (byte)'\0';
+                this.state = LEX_CONTENT;
+                this.waswhite = false;
+                this.token = newNode(Node.CDATATag,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                return this.token;
+            }
+        }
+
+        if (this.state == LEX_CONTENT)  /* text string */
+        {
+            this.txtend = this.lexsize;
+
+            if (this.txtend > this.txtstart)
+            {
+                this.in.ungetChar(c);
+
+                if (this.lexbuf[this.lexsize - 1] == (byte)' ')
+                {
+                    this.lexsize -= 1;
+                    this.txtend = this.lexsize;
+                }
+
+                this.token = newNode(Node.TextNode,
+                                      this.lexbuf,
+                                      this.txtstart,
+                                      this.txtend);
+                return this.token;
+            }
+        }
+        else if (this.state == LEX_COMMENT) /* comment */
+        {
+            if (c == StreamIn.EndOfStream)
+                Report.warning(this, null, null, Report.MALFORMED_COMMENT);
+
+            this.txtend = this.lexsize;
+            this.lexbuf[this.lexsize] = (byte)'\0';
+            this.state = LEX_CONTENT;
+            this.waswhite = false;
+            this.token = newNode(Node.CommentTag,
+                                  this.lexbuf,
+                                  this.txtstart,
+                                  this.txtend);
+            return this.token;
+        }
+
+        return null;
+    }
+
+    /*
+     parser for ASP within start tags
+
+     Some people use ASP for to customize attributes
+     Tidy isn't really well suited to dealing with ASP
+     This is a workaround for attributes, but won't
+     deal with the case where the ASP is used to tailor
+     the attribute value. Here is an example of a work
+     around for using ASP in attribute values:
+
+      href="<%=rsSchool.Fields("ID").Value%>"
+
+     where the ASP that generates the attribute value
+     is masked from Tidy by the quotemarks.
+
+    */
+
+    public Node parseAsp()
+    {
+        int c;
+        Node asp = null;
+
+        this.txtstart = this.lexsize;
+
+        for (;;)
+        {
+            c = this.in.readChar();
+            addCharToLexer(c);
+
+
+            if (c != '%')
+                continue;
+
+            c = this.in.readChar();
+            addCharToLexer(c);
+
+            if (c == '>')
+                break;
+        }
+
+        this.lexsize -= 2;
+        this.txtend = this.lexsize;
+
+        if (this.txtend > this.txtstart)
+            asp = newNode(Node.AspTag,
+                           this.lexbuf,
+                           this.txtstart,
+                           this.txtend);
+
+        this.txtstart = this.txtend;
+        return asp;
+    }   
+    /*
+     PHP is like ASP but is based upon XML
+     processing instructions, e.g. <?php ... ?>
+    */
+    public Node parsePhp()
+    {
+        int c;
+        Node php = null;
+
+        this.txtstart = this.lexsize;
+
+        for (;;)
+        {
+            c = this.in.readChar();
+            addCharToLexer(c);
+
+
+            if (c != '?')
+                continue;
+
+            c = this.in.readChar();
+            addCharToLexer(c);
+
+            if (c == '>')
+                break;
+        }
+
+        this.lexsize -= 2;
+        this.txtend = this.lexsize;
+
+        if (this.txtend > this.txtstart)
+            php = newNode(Node.PhpTag,
+                           this.lexbuf,
+                           this.txtstart,
+                           this.txtend);
+
+        this.txtstart = this.txtend;
+        return php;
+    }   
+
+    /* consumes the '>' terminating start tags */
+    public String parseAttribute(MutableBoolean isempty, MutableObject asp,
+                                 MutableObject php)
+    {
+        int start = 0;
+        // int len = 0;   Removed by BUGFIX for 126265
+        short map;
+        String attr;
+        int c = 0;
+
+        asp.setObject(null);  /* clear asp pointer */
+        php.setObject(null);  /* clear php pointer */
+        /* skip white space before the attribute */
+
+        for (;;)
+        {
+            c = this.in.readChar();
+
+            if (c == '/')
+            {
+                c = this.in.readChar();
+
+                if (c == '>')
+                {
+                    isempty.value = true;
+                    return null;
+                }
+
+                this.in.ungetChar(c);
+                c = '/';
+                break;
+            }
+
+            if (c == '>')
+                return null;
+
+            if (c =='<')
+            {
+                c = this.in.readChar();
+
+                if (c == '%')
+                {
+                    asp.setObject(parseAsp());
+                    return null;
+                }
+                else if (c == '?')
+                {
+                    php.setObject(parsePhp());
+                    return null;
+                }
+
+                this.in.ungetChar(c);
+                Report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+                return null;
+            }
+
+            if (c == '"' || c == '\'')
+            {
+                Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK);
+                continue;
+            }
+
+            if (c == StreamIn.EndOfStream)
+            {
+                Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
+                this.in.ungetChar(c);
+                return null;
+            }
+
+            map = MAP((char)c);
+
+            if ((map & WHITE) == 0)
+                break;
+        }
+
+        start = this.lexsize;
+
+        for (;;)
+        {
+         /* but push back '=' for parseValue() */
+            if (c == '=' || c == '>')
+            {
+                this.in.ungetChar(c);
+                break;
+            }
+
+            if (c == '<' || c == StreamIn.EndOfStream)
+            {
+                this.in.ungetChar(c);
+                break;
+            }
+
+            map = MAP((char)c);
+
+            if ((map & WHITE) != 0)
+                break;
+
+         /* what should be done about non-namechar characters? */
+         /* currently these are incorporated into the attr name */
+
+            if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
+                c += (int)('a' - 'A');
+
+            //  ++len;    Removed by BUGFIX for 126265 
+            addCharToLexer(c);
+
+            c = this.in.readChar();
+        }
+
+        // Following line added by GLP to fix BUG 126265.  This is a temporary comment
+        // and should be removed when Tidy is fixed.
+        int len = this.lexsize - start;
+        attr = (len > 0 ? getString(this.lexbuf, start, len) : null);
+        this.lexsize = start;
+
+        return attr;
+    }
+
+    /*
+     invoked when < is seen in place of attribute value
+     but terminates on whitespace if not ASP, PHP or Tango
+     this routine recognizes ' and " quoted strings
+    */
+    public int parseServerInstruction()
+    {
+        int c, map, delim = '"';
+        boolean isrule = false;
+
+        c = this.in.readChar();
+        addCharToLexer(c);
+
+        /* check for ASP, PHP or Tango */
+        if (c == '%' || c == '?' || c == '@')
+            isrule = true;
+
+        for (;;)
+        {
+            c = this.in.readChar();
+
+            if (c == StreamIn.EndOfStream)
+                break;
+
+            if (c == '>')
+            {
+                if (isrule)
+                    addCharToLexer(c);
+                else
+                    this.in.ungetChar(c);
+
+                break;
+            }
+
+            /* if not recognized as ASP, PHP or Tango */
+            /* then also finish value on whitespace */
+            if (!isrule)
+            {
+                map = MAP((char)c);
+
+                if ((map & WHITE) != 0)
+                    break;
+            }
+
+            addCharToLexer(c);
+
+            if (c == '"')
+            {
+                do
+                {
+                    c = this.in.readChar();
+                    addCharToLexer(c);
+                }
+                while (c != '"');
+                delim = '\'';
+                continue;
+            }
+
+            if (c == '\'')
+            {
+                do
+                {
+                    c = this.in.readChar();
+                    addCharToLexer(c);
+                }
+                while (c != '\'');
+            }
+        }
+
+        return delim;
+    }
+
+    /* values start with "=" or " = " etc. */
+    /* doesn't consume the ">" at end of start tag */
+
+    public String parseValue(String name, boolean foldCase,
+                             MutableBoolean isempty, MutableInteger pdelim)
+    {
+        int len = 0;
+        int start;
+        short map;
+        boolean seen_gt = false;
+        boolean munge = true;
+        int c = 0;
+        int lastc, delim, quotewarning;
+        String value;
+
+        delim = 0;
+        pdelim.value = (int)'"';
+
+        /*
+         Henry Zrepa reports that some folk are using the
+         embed element with script attributes where newlines
+         are significant and must be preserved
+        */
+        if (configuration.LiteralAttribs)
+            munge = false;
+
+        /* skip white space before the '=' */
+
+        for (;;)
+        {
+            c = this.in.readChar();
+
+            if (c == StreamIn.EndOfStream)
+            {
+                this.in.ungetChar(c);
+                break;
+            }
+
+            map = MAP((char)c);
+
+            if ((map & WHITE) == 0)
+               break;
+        }
+
+    /*
+      c should be '=' if there is a value
+      other legal possibilities are white
+      space, '/' and '>'
+    */
+
+        if (c != '=')
+        {
+            this.in.ungetChar(c);
+            return null;
+        }
+
+     /* skip white space after '=' */
+
+        for (;;)
+        {
+            c = this.in.readChar();
+
+            if (c == StreamIn.EndOfStream)
+            {
+                this.in.ungetChar(c);
+                break;
+            }
+
+            map = MAP((char)c);
+
+            if ((map & WHITE) == 0)
+               break;
+        }
+
+     /* check for quote marks */
+
+        if (c == '"' || c == '\'')
+            delim = c;
+        else if (c == '<')
+        {
+            start = this.lexsize;
+            addCharToLexer(c);
+            pdelim.value = parseServerInstruction();
+            len = this.lexsize - start;
+            this.lexsize = start;
+            return (len > 0 ? getString(this.lexbuf, start, len) : null);
+        }
+        else
+            this.in.ungetChar(c);
+
+     /*
+       and read the value string
+       check for quote mark if needed
+     */
+
+        quotewarning = 0;
+        start = this.lexsize;
+        c = '\0';
+
+        for (;;)
+        {
+            lastc = c;  /* track last character */
+            c = this.in.readChar();
+
+            if (c == StreamIn.EndOfStream)
+            {
+                Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
+                this.in.ungetChar(c);
+                break;
+            }
+
+            if (delim == (char)0)
+            {
+                if (c == '>')
+                {
+                    this.in.ungetChar(c);
+                    break;
+                }
+
+                if (c == '"' || c == '\'')
+                {
+                    Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK);
+                    break;
+                }
+
+                if (c == '<')
+                {
+                    /* this.in.ungetChar(c); */
+                    Report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
+                    /* break; */
+                }
+
+                /*
+                 For cases like <br clear=all/> need to avoid treating /> as
+                 part of the attribute value, however care is needed to avoid
+                 so treating <a href=http://www.acme.com/> in this way, which
+                 would map the <a> tag to <a href="http://www.acme.com"/>
+                */
+                if (c == '/')
+                {
+                    /* peek ahead in case of /> */
+                    c = this.in.readChar();
+
+                    if (c == '>' &&
+                        !AttributeTable.getDefaultAttributeTable().isUrl(name))
+                    {
+                        isempty.value = true;
+                        this.in.ungetChar(c);
+                        break;
+                    }
+
+                    /* unget peeked char */
+                    this.in.ungetChar(c);
+                    c = '/';
+                }
+            }
+            else  /* delim is '\'' or '"' */
+            {
+                if (c == delim)
+                    break;
+
+                /* treat CRLF, CR and LF as single line break */
+
+                if (c == '\r')
+                {
+                    c = this.in.readChar();
+                    if (c != '\n')
+                        this.in.ungetChar(c);
+
+                    c = '\n';
+                }
+
+                if (c == '\n' || c == '<' || c == '>')
+                    ++quotewarning;
+
+                if (c == '>')
+                    seen_gt = true;
+            }
+
+            if (c == '&')
+            {
+                addCharToLexer(c);
+                parseEntity((short)0);
+                continue;
+            }
+
+            /*
+             kludge for JavaScript attribute values
+             with line continuations in string literals
+            */
+            if (c == '\\')
+            {
+                c = this.in.readChar();
+
+                if (c != '\n')
+                {
+                    this.in.ungetChar(c);
+                    c = '\\';
+                }
+            }
+
+            map = MAP((char)c);
+
+            if ((map & WHITE) != 0)
+            {
+                if (delim == (char)0)
+                    break;
+
+                if (munge)
+                {
+                    c = ' ';
+
+                    if (lastc == ' ')
+                        continue;
+                }
+            }
+            else if (foldCase && (map & UPPERCASE) != 0)
+                c += (int)('a' - 'A');
+
+            addCharToLexer(c);
+        }
+
+        if (quotewarning > 10 && seen_gt && munge)
+        {
+            /*
+               there is almost certainly a missing trailling quote mark
+               as we have see too many newlines, < or > characters.
+
+               an exception is made for Javascript attributes and the
+               javascript URL scheme which may legitimately include < and >
+            */
+            if (!AttributeTable.getDefaultAttributeTable().isScript(name) &&
+                !(AttributeTable.getDefaultAttributeTable().isUrl(name) &&
+                  (getString(this.lexbuf, start, 11)).equals("javascript:")))
+                    Report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE);
+        }
+
+        len = this.lexsize - start;
+        this.lexsize = start;
+
+        if (len > 0 || delim != 0)
+            value = getString(this.lexbuf, start, len);
+        else
+            value = null;
+
+        /* note delimiter if given */
+        if (delim != 0)
+            pdelim.value = delim;
+        else
+            pdelim.value = (int)'"';
+
+        return value;
+    }
+
+    /* attr must be non-null */
+    public static boolean isValidAttrName(String attr)
+    {
+        short map;
+        char c;
+        int i;
+
+        /* first character should be a letter */
+        c = attr.charAt(0);
+        map = MAP(c);
+
+        if (!((map & LETTER) != 0))
+            return false;
+
+        /* remaining characters should be namechars */
+        for( i = 1; i < attr.length(); i++)
+        {
+            c = attr.charAt(i);
+            map = MAP(c);
+
+            if((map & NAMECHAR) != 0)
+                continue;
+
+            return false;
+        }
+
+        return true;
+    }
+
+    /* swallows closing '>' */
+
+    public AttVal parseAttrs(MutableBoolean isempty)
+    {
+        AttVal av, list;
+        String attribute, value;
+        MutableInteger delim = new MutableInteger();
+        MutableObject asp = new MutableObject();
+        MutableObject php = new MutableObject();
+
+        list = null;
+
+        for (; !endOfInput();)
+        {
+            attribute = parseAttribute(isempty, asp, php);
+
+            if (attribute == null)
+            {
+                /* check if attributes are created by ASP markup */
+                if (asp.getObject() != null)
+                {
+                    av = new AttVal(list, null, (Node)asp.getObject(), null,
+                                    '\0', null, null );
+                    list = av;
+                    continue;
+                }
+
+                /* check if attributes are created by PHP markup */
+                if (php.getObject() != null)
+                {
+                    av = new AttVal(list, null, null, (Node)php.getObject(),
+                                    '\0', null, null );
+                    list = av;
+                    continue;
+                }
+
+                break;
+            }
+
+            value = parseValue(attribute, false, isempty, delim);
+
+            if (attribute != null && isValidAttrName(attribute))
+            {
+                av = new AttVal( list, null, null, null,
+                                 delim.value, attribute, value );
+                av.dict =
+                    AttributeTable.getDefaultAttributeTable().findAttribute(av);
+                list = av;
+            }
+            else
+            {
+                av = new AttVal( null, null, null, null,
+                                 0, attribute, value );
+                Report.attrError(this, this.token, value, Report.BAD_ATTRIBUTE_VALUE);
+            }
+        }
+
+        return list;
+    }
+
+    /*
+      push a copy of an inline node onto stack
+      but don't push if implicit or OBJECT or APPLET
+      (implicit tags are ones generated from the istack)
+
+      One issue arises with pushing inlines when
+      the tag is already pushed. For instance:
+
+          <p><em>text
+          <p><em>more text
+
+      Shouldn't be mapped to
+
+          <p><em>text</em></p>
+          <p><em><em>more text</em></em>
+    */
+    public void pushInline( Node node )
+    {
+        IStack is;
+
+        if (node.implicit)
+            return;
+
+        if (node.tag == null)
+            return;
+
+        if ((node.tag.model & Dict.CM_INLINE) == 0 )
+            return;
+
+        if ((node.tag.model & Dict.CM_OBJECT) != 0)
+            return;
+
+        if (node.tag != configuration.tt.tagFont && isPushed(node))
+            return;
+
+        // make sure there is enough space for the stack
+        is = new IStack();
+        is.tag = node.tag;
+        is.element = node.element;
+        if (node.attributes != null)
+            is.attributes = cloneAttributes(node.attributes);
+        this.istack.push( is );
+    }
+
+    /* pop inline stack */
+    public void popInline( Node node )
+    {
+        AttVal av;
+        IStack is;
+
+        if (node != null) {
+
+            if (node.tag == null)
+                return;
+
+            if ((node.tag.model & Dict.CM_INLINE) == 0)
+                return;
+
+            if ((node.tag.model & Dict.CM_OBJECT) != 0)
+                return;
+
+            // if node is </a> then pop until we find an <a>
+            if (node.tag == configuration.tt.tagA) {
+
+                while (this.istack.size() > 0) {
+                    is = (IStack)this.istack.pop();
+                    if (is.tag == configuration.tt.tagA) {
+                        break;
+                    }
+                }
+
+                if (this.insert >= this.istack.size())
+                    this.insert = -1;
+                return;
+            }
+        }
+
+        if (this.istack.size() > 0) {
+            is = (IStack)this.istack.pop();
+            if (this.insert >= this.istack.size())
+                this.insert = -1;
+        }
+    }
+
+    public boolean isPushed( Node node )
+    {
+        int i;
+        IStack is;
+
+        for (i = this.istack.size() - 1; i >= 0; --i) {
+            is = (IStack)this.istack.elementAt(i);
+            if (is.tag == node.tag)
+                return true;
+        }
+
+        return false;
+    }
+
+    /*
+      This has the effect of inserting "missing" inline
+      elements around the contents of blocklevel elements
+      such as P, TD, TH, DIV, PRE etc. This procedure is
+      called at the start of ParseBlock. when the inline
+      stack is not empty, as will be the case in:
+
+        <i><h1>italic heading</h1></i>
+
+      which is then treated as equivalent to
+
+        <h1><i>italic heading</i></h1>
+
+      This is implemented by setting the lexer into a mode
+      where it gets tokens from the inline stack rather than
+      from the input stream.
+    */
+    public int inlineDup( Node node )
+    {
+        int n;
+
+        n = this.istack.size() - this.istackbase;
+        if ( n > 0 ) {
+            this.insert = this.istackbase;
+            this.inode = node;
+        }
+
+        return n;
+    }
+
+    public Node insertedToken()
+    {
+        Node node;
+        IStack is;
+        int n;
+
+        // this will only be null if inode != null
+        if (this.insert == -1) {
+            node = this.inode;
+            this.inode = null;
+            return node;
+        }
+
+        // is this is the "latest" node then update
+        // the position, otherwise use current values
+
+        if (this.inode == null) {
+            this.lines = this.in.curline;
+            this.columns = this.in.curcol;
+        }
+
+        node = newNode(Node.StartTag,
+                        this.lexbuf,
+                        this.txtstart,
+                        this.txtend);   // GLP:  Bugfix 126261.  Remove when this change
+                                        //       is fixed in istack.c in the original Tidy
+        node.implicit = true;
+        is = (IStack)this.istack.elementAt( this.insert );
+        node.element = is.element;
+        node.tag = is.tag;
+        if (is.attributes != null)
+            node.attributes = cloneAttributes(is.attributes);
+
+        // advance lexer to next item on the stack
+        n = this.insert;
+
+        // and recover state if we have reached the end
+        if (++n < this.istack.size() ) {
+            this.insert = n;
+        } else {
+            this.insert = -1;
+        }
+
+        return node;
+    }
+
+    /* AQ: Try this for speed optimization */
+    public static int wstrcasecmp(String s1, String s2)
+    {
+        return (s1.equalsIgnoreCase(s2) ? 0 : 1);
+    }
+
+    public static int wstrcaselexcmp(String s1, String s2)
+    {
+        char c;
+        int i = 0;
+
+        while ( i < s1.length() && i < s2.length() ) {
+            c = s1.charAt(i);
+            if ( toLower(c) != toLower( s2.charAt(i) ) ) {
+                break;
+            }
+            i += 1;
+        }
+        if ( i == s1.length() && i == s2.length() ) {
+            return 0;
+        } else if ( i == s1.length() ) {
+            return -1;
+        } else if ( i == s2.length() ) {
+            return 1;
+        } else {
+            return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 );
+        }
+    }
+
+    public static boolean wsubstr(String s1, String s2)
+    {
+        int i;
+        int len1 = s1.length();
+        int len2 = s2.length();
+
+        for (i = 0; i <= len1 - len2; ++i)
+        {
+            if (s2.equalsIgnoreCase(s1.substring(i)))
+                return true;
+        }
+
+        return false;
+    }
+
+    public boolean canPrune(Node element)
+    {
+        if (element.type == Node.TextNode)
+            return true;
+
+        if (element.content != null)
+            return false;
+
+        if (element.tag == configuration.tt.tagA && element.attributes != null)
+            return false;
+
+        if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas)
+            return false;
+
+        if (element.tag == null)
+            return false;
+
+        if ((element.tag.model & Dict.CM_ROW) != 0)
+            return false;
+
+        if (element.tag == configuration.tt.tagApplet)
+            return false;
+
+        if (element.tag == configuration.tt.tagObject)
+            return false;
+
+        if (element.attributes != null &&
+            (element.getAttrByName("id") != null ||
+               element.getAttrByName("name") != null) )
+            return false;
+
+        return true;
+    }
+
+    /* duplicate name attribute as an id */
+    public void fixId(Node node)
+    {
+        AttVal name = node.getAttrByName("name");
+        AttVal id = node.getAttrByName("id");
+
+        if (name != null)
+        {
+            if (id != null)
+            {
+                if (!id.value.equals(name.value))
+                    Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH);
+            }
+            else if (this.configuration.XmlOut)
+                node.addAttribute("id", name.value);
+        }
+    }
+
+    /*
+     defer duplicates when entering a table or other
+     element where the inlines shouldn't be duplicated
+    */
+    public void deferDup()
+    {
+        this.insert = -1;
+        this.inode = null;
+    }
+
+    /* Private methods and fields */
+
+    /* lexer char types */
+    private static final short DIGIT       = 1;
+    private static final short LETTER      = 2;
+    private static final short NAMECHAR    = 4;
+    private static final short WHITE       = 8;
+    private static final short NEWLINE     = 16;
+    private static final short LOWERCASE   = 32;
+    private static final short UPPERCASE   = 64;
+
+    /* lexer GetToken states */
+
+    private static final short LEX_CONTENT     = 0;
+    private static final short LEX_GT          = 1;
+    private static final short LEX_ENDTAG      = 2;
+    private static final short LEX_STARTTAG    = 3;
+    private static final short LEX_COMMENT     = 4;
+    private static final short LEX_DOCTYPE     = 5;
+    private static final short LEX_PROCINSTR   = 6;
+    private static final short LEX_ENDCOMMENT  = 7;
+    private static final short LEX_CDATA       = 8;
+    private static final short LEX_SECTION     = 9;
+    private static final short LEX_ASP         = 10;
+    private static final short LEX_JSTE        = 11;
+    private static final short LEX_PHP         = 12;
+
+    /* used to classify chars for lexical purposes */
+    private static short[] lexmap = new short[128];
+
+    private static void mapStr(String str, short code)
+    {
+        int j;
+
+        for ( int i = 0; i < str.length(); i++ ) {
+            j = (int)str.charAt(i);
+            lexmap[j] |= code;
+        }
+    }
+
+    static {
+        mapStr("\r\n\f", (short)(NEWLINE|WHITE));
+        mapStr(" \t", WHITE);
+        mapStr("-.:_", NAMECHAR);
+        mapStr("0123456789", (short)(DIGIT|NAMECHAR));
+        mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR));
+        mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR));
+    }
+
+    private static short MAP( char c )
+    {
+        return ((int)c < 128 ? lexmap[(int)c] : 0);
+    }
+
+    private static boolean isWhite(char c)
+    {
+        short m = MAP(c);
+
+        return (m & WHITE) != 0;
+    }
+
+    private static boolean isDigit(char c)
+    {
+        short m;
+
+        m = MAP(c);
+
+        return (m & DIGIT) != 0;
+    }
+
+    private static boolean isLetter(char c)
+    {
+        short m;
+
+        m = MAP(c);
+
+        return (m & LETTER) != 0;
+    }
+
+    private static char toLower(char c)
+    {
+        short m = MAP(c);
+
+        if ((m & UPPERCASE) != 0)
+            c = (char)( (int)c + (int)'a' - (int)'A' );
+
+        return c;
+    }
+
+    private static char toUpper(char c)
+    {
+        short m = MAP(c);
+
+        if ((m & LOWERCASE) != 0)
+            c = (char)( (int)c + (int)'A' - (int)'a' );
+
+        return c;
+    }
+
+    public static char foldCase(char c, boolean tocaps, boolean xmlTags)
+    {
+        short m;
+
+        if (!xmlTags)
+        {
+            m = MAP(c);
+
+            if (tocaps)
+            {
+                if ((m & LOWERCASE) != 0)
+                    c = (char)( (int)c + (int)'A' - (int)'a' );
+            }
+            else /* force to lower case */
+            {
+                if ((m & UPPERCASE) != 0)
+                    c = (char)( (int)c + (int)'a' - (int)'A' );
+            }
+        }
+
+        return c;
+    }
+
+
+    private static class W3CVersionInfo
+    {
+        String name;
+        String voyagerName;
+        String profile;
+        short code;
+
+        public W3CVersionInfo( String name,
+                               String voyagerName,
+                               String profile,
+                               short code )
+        {
+            this.name = name;
+            this.voyagerName = voyagerName;
+            this.profile = profile;
+            this.code = code;
+        }
+    }
+
+    /* the 3 URIs  for the XHTML 1.0 DTDs */
+    private static final String voyager_loose    = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
+    private static final String voyager_strict   = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
+    private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd";
+
+    private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
+
+    private static Lexer.W3CVersionInfo[] W3CVersion =
+    {
+        new W3CVersionInfo("HTML 4.01",
+                           "XHTML 1.0 Strict",
+                           voyager_strict,
+                           Dict.VERS_HTML40_STRICT),
+        new W3CVersionInfo("HTML 4.01 Transitional",
+                           "XHTML 1.0 Transitional",
+                           voyager_loose,
+                           Dict.VERS_HTML40_LOOSE),
+        new W3CVersionInfo("HTML 4.01 Frameset",
+                           "XHTML 1.0 Frameset",
+                           voyager_frameset,
+                           Dict.VERS_FRAMES),
+        new W3CVersionInfo("HTML 4.0",
+                           "XHTML 1.0 Strict",
+                           voyager_strict,
+                           Dict.VERS_HTML40_STRICT),
+        new W3CVersionInfo("HTML 4.0 Transitional",
+                           "XHTML 1.0 Transitional",
+                           voyager_loose,
+                           Dict.VERS_HTML40_LOOSE),
+        new W3CVersionInfo("HTML 4.0 Frameset",
+                           "XHTML 1.0 Frameset",
+                           voyager_frameset,
+                           Dict.VERS_FRAMES),
+        new W3CVersionInfo("HTML 3.2",
+                           "XHTML 1.0 Transitional",
+                           voyager_loose,
+                           Dict.VERS_HTML32),
+        new W3CVersionInfo("HTML 2.0",
+                           "XHTML 1.0 Strict",
+                           voyager_strict,
+                           Dict.VERS_HTML20)
+    };
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableBoolean.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableBoolean.java
new file mode 100644 (file)
index 0000000..a10a59d
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * @(#)MutableBoolean.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Mutable Boolean
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class MutableBoolean {
+
+    public boolean value;
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableInteger.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableInteger.java
new file mode 100644 (file)
index 0000000..1b9cd4a
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * @(#)MutableInteger.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Mutable Integer
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class MutableInteger {
+
+    public int value;
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableObject.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/MutableObject.java
new file mode 100644 (file)
index 0000000..f1bf964
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * @(#)MutableObject.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Mutable Object
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class MutableObject {
+
+    public MutableObject()
+    {
+        this(null);
+    }
+
+    public MutableObject(Object o)
+    {
+        this.value = o;
+    }
+
+    public void setObject(Object o)
+    {
+        value = o;
+    }
+
+    public Object getObject()
+    {
+        return value;
+    }
+
+    private Object value;
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Node.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Node.java
new file mode 100644 (file)
index 0000000..064e061
--- /dev/null
@@ -0,0 +1,917 @@
+/*
+ * @(#)Node.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Node
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+  Used for elements and text nodes
+  element name is null for text nodes
+  start and end are offsets into lexbuf
+  which contains the textual content of
+  all elements in the parse tree.
+
+  parent and content allow traversal
+  of the parse tree in any direction.
+  attributes are represented as a linked
+  list of AttVal nodes which hold the
+  strings for attribute/value pairs.
+*/
+
+public class Node {
+
+    public static final short RootNode        = 0;
+    public static final short DocTypeTag      = 1;
+    public static final short CommentTag      = 2;
+    public static final short ProcInsTag      = 3;
+    public static final short TextNode        = 4;
+    public static final short StartTag        = 5;
+    public static final short EndTag          = 6;
+    public static final short StartEndTag     = 7;
+    public static final short CDATATag        = 8;
+    public static final short SectionTag      = 9;
+    public static final short AspTag          = 10;
+    public static final short JsteTag         = 11;
+    public static final short PhpTag          = 12;
+
+    protected Node parent;
+    protected Node prev;
+    protected Node next;
+    protected Node last;
+    protected int start;             /* start of span onto text array */
+    protected int end;               /* end of span onto text array */
+    protected byte[] textarray;      /* the text array */
+    protected short type;              /* TextNode, StartTag, EndTag etc. */
+    protected boolean closed;            /* true if closed by explicit end tag */
+    protected boolean implicit;          /* true if inferred */
+    protected boolean linebreak;         /* true if followed by a line break */
+    protected Dict was;   /* old tag when it was changed */
+    protected Dict tag;   /* tag's dictionary definition */
+    protected String element;          /* name (null for text nodes) */
+    protected AttVal attributes;
+    protected Node content;
+
+    public Node()
+    {
+        this(TextNode, null, 0, 0);
+    }
+
+    public Node(short type, byte[] textarray, int start, int end)
+    {
+        this.parent = null;
+        this.prev = null;
+        this.next = null;
+        this.last = null;
+        this.start = start;
+        this.end = end;
+        this.textarray = textarray;
+        this.type = type;
+        this.closed = false;
+        this.implicit = false;
+        this.linebreak = false;
+        this.was = null;
+        this.tag = null;
+        this.element = null;
+        this.attributes = null;
+        this.content = null;
+    }
+
+    public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
+    {
+        this.parent = null;
+        this.prev = null;
+        this.next = null;
+        this.last = null;
+        this.start = start;
+        this.end = end;
+        this.textarray = textarray;
+        this.type = type;
+        this.closed = false;
+        this.implicit = false;
+        this.linebreak = false;
+        this.was = null;
+        this.tag = null;
+        this.element = element;
+        this.attributes = null;
+        this.content = null;
+        if (type == StartTag || type == StartEndTag || type == EndTag)
+            tt.findTag(this);
+    }
+
+    /* used to clone heading nodes when split by an <HR> */
+    protected Object clone()
+    {
+        Node node = new Node();
+
+        node.parent = this.parent;
+        if (this.textarray != null)
+        {
+            node.textarray = new byte[this.end - this.start];
+            node.start = 0;
+            node.end = this.end - this.start;
+            if (node.end > 0)
+                System.arraycopy(this.textarray, this.start,
+                                 node.textarray, node.start, node.end);
+        }
+        node.type = this.type;
+        node.closed = this.closed;
+        node.implicit = this.implicit;
+        node.linebreak = this.linebreak;
+        node.was = this.was;
+        node.tag = this.tag;
+        if (this.element != null)
+            node.element = this.element;
+        if (this.attributes != null)
+            node.attributes = (AttVal)this.attributes.clone();
+        return node;
+    }
+
+    public AttVal getAttrByName(String name)
+    {
+        AttVal attr;
+
+        for (attr = this.attributes; attr != null; attr = attr.next)
+        {
+            if (name != null &&
+                attr.attribute != null &&
+                attr.attribute.equals(name))
+                break;
+        }
+
+        return attr;
+    }
+
+    /* default method for checking an element's attributes */
+    public void checkAttributes( Lexer lexer )
+    {
+        AttVal attval;
+
+        for (attval = this.attributes; attval != null; attval = attval.next)
+            attval.checkAttribute( lexer, this );
+    }
+
+    public void checkUniqueAttributes(Lexer lexer)
+    {
+        AttVal attval;
+
+        for (attval = this.attributes; attval != null; attval = attval.next) {
+            if (attval.asp == null && attval.php == null)
+                attval.checkUniqueAttribute(lexer, this);
+        }
+    }
+
+    public void addAttribute(String name, String value)
+    {
+        AttVal av = new AttVal(null, null, null, null,
+                               '"', name, value);
+        av.dict =
+          AttributeTable.getDefaultAttributeTable().findAttribute(av);
+
+        if (this.attributes == null)
+            this.attributes = av;
+        else /* append to end of attributes */
+        {
+            AttVal here = this.attributes;
+
+            while (here.next != null)
+                here = here.next;
+
+            here.next = av;
+        }
+    }
+
+    /* remove attribute from node then free it */
+    public void removeAttribute(AttVal attr)
+    {
+        AttVal av;
+        AttVal prev = null;
+        AttVal next;
+
+        for (av = this.attributes; av != null; av = next)
+        {
+            next = av.next;
+
+            if (av == attr)
+            {
+                if (prev != null)
+                    prev.next = next;
+                else
+                    this.attributes = next;
+            }
+            else
+                prev = av;
+       }
+    }
+
+    /* find doctype element */
+    public Node findDocType()
+    {
+        Node node;
+
+        for (node = this.content; 
+            node != null && node.type != DocTypeTag; node = node.next);
+
+        return node;
+    }
+
+    public void discardDocType()
+    {
+        Node node;
+
+        node = findDocType();
+        if (node != null)
+        {
+            if (node.prev != null)
+                node.prev.next = node.next;
+            else
+                node.parent.content = node.next;
+
+            if (node.next != null)
+                node.next.prev = node.prev;
+
+            node.next = null;
+        }
+    }
+
+    /* remove node from markup tree and discard it */
+    public static Node discardElement(Node element)
+    {
+        Node next = null;
+
+        if (element != null)
+        {
+            next = element.next;
+            removeNode(element);
+        }
+
+        return next;
+    }
+
+    /* insert node into markup tree */
+    public static void insertNodeAtStart(Node element, Node node)
+    {
+        node.parent = element;
+
+        if (element.content == null)
+            element.last = node;
+        else
+            element.content.prev = node; // AQ added 13 Apr 2000
+
+        node.next = element.content;
+        node.prev = null;
+        element.content = node;
+    }
+
+    /* insert node into markup tree */
+    public static void insertNodeAtEnd(Node element, Node node)
+    {
+        node.parent = element;
+        node.prev = element.last;
+
+        if (element.last != null)
+            element.last.next = node;
+        else
+            element.content = node;
+
+        element.last = node;
+    }
+
+    /*
+     insert node into markup tree in pace of element
+     which is moved to become the child of the node
+    */
+    public static void insertNodeAsParent(Node element, Node node)
+    {
+        node.content = element;
+        node.last = element;
+        node.parent = element.parent;
+        element.parent = node;
+    
+        if (node.parent.content == element)
+            node.parent.content = node;
+
+        if (node.parent.last == element)
+            node.parent.last = node;
+
+        node.prev = element.prev;
+        element.prev = null;
+
+        if (node.prev != null)
+            node.prev.next = node;
+
+        node.next = element.next;
+        element.next = null;
+
+        if (node.next != null)
+            node.next.prev = node;
+    }
+
+    /* insert node into markup tree before element */
+    public static void insertNodeBeforeElement(Node element, Node node)
+    {
+        Node parent;
+
+        parent = element.parent;
+        node.parent = parent;
+        node.next = element;
+        node.prev = element.prev;
+        element.prev = node;
+
+        if (node.prev != null)
+            node.prev.next = node;
+
+        if (parent.content == element)
+            parent.content = node;
+    }
+
+    /* insert node into markup tree after element */
+    public static void insertNodeAfterElement(Node element, Node node)
+    {
+        Node parent;
+
+        parent = element.parent;
+        node.parent = parent;
+
+        // AQ - 13Jan2000 fix for parent == null
+        if (parent != null && parent.last == element)
+            parent.last = node;
+        else
+        {
+            node.next = element.next;
+            // AQ - 13Jan2000 fix for node.next == null
+            if (node.next != null)
+                node.next.prev = node;
+        }
+
+        element.next = node;
+        node.prev = element;
+    }
+
+    public static void trimEmptyElement(Lexer lexer, Node element)
+    {
+        TagTable tt = lexer.configuration.tt;
+
+        if (lexer.canPrune(element))
+        {
+            if (element.type != TextNode)
+                Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
+
+            discardElement(element);
+        }
+        else if (element.tag == tt.tagP && element.content == null)
+        {
+            /* replace <p></p> by <br><br> to preserve formatting */
+            Node node = lexer.inferredTag("br");
+            Node.coerceNode(lexer, element, tt.tagBr);
+            Node.insertNodeAfterElement(element, node);
+        }
+    }
+
+    /*
+      This maps 
+           <em>hello </em><strong>world</strong>
+      to
+           <em>hello</em> <strong>world</strong>
+
+      If last child of element is a text node
+      then trim trailing white space character
+      moving it to after element's end tag.
+    */
+    public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
+    {
+        byte c;
+        TagTable tt = lexer.configuration.tt;
+
+        if (last != null && last.type == Node.TextNode &&
+            last.end > last.start)
+        {
+            c = lexer.lexbuf[last.end - 1];
+
+            if (c == 160 || c == (byte)' ')
+            {
+                /* take care with <td>&nbsp;</td> */
+                if (element.tag == tt.tagTd ||
+                    element.tag == tt.tagTh)
+                {
+                    if (last.end > last.start + 1)
+                        last.end -= 1;
+                }
+                else
+                {
+                    last.end -= 1;
+
+                    if (((element.tag.model & Dict.CM_INLINE) != 0) &&
+                            !((element.tag.model & Dict.CM_FIELD) != 0))
+                        lexer.insertspace = true;
+
+                    /* if empty string then delete from parse tree */
+                    if (last.start == last.end)
+                        trimEmptyElement(lexer, last);
+                }
+            }
+        }
+    }
+
+    /*
+      This maps 
+           <p>hello<em> world</em>
+      to
+           <p>hello <em>world</em>
+
+      Trims initial space, by moving it before the
+      start tag, or if this element is the first in
+      parent's content, then by discarding the space
+    */
+    public static void trimInitialSpace(Lexer lexer, Node element, Node text)
+    {
+        Node prev, node;
+
+        // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
+        //      31-Oct-00. 
+        if (text.type == TextNode && text.textarray[text.start] == (byte)' ' 
+                           && (text.start < text.end))
+        {
+            if (((element.tag.model & Dict.CM_INLINE) != 0) &&
+                !((element.tag.model & Dict.CM_FIELD) != 0) &&
+                element.parent.content != element)
+            {
+                prev = element.prev;
+
+                if (prev != null && prev.type == TextNode)
+                {
+                    if (prev.textarray[prev.end - 1] != (byte)' ')
+                        prev.textarray[prev.end++] = (byte)' ';
+
+                    ++element.start;
+                }
+                else /* create new node */
+                {
+                    node = lexer.newNode();
+                    // Local fix for bug 228486 (GLP).  This handles the case
+                    // where we need to create a preceeding text node but there are
+                    // no "slots" in textarray that we can steal from the current
+                    // element.  Therefore, we create a new textarray containing
+                    // just the blank.  When Tidy is fixed, this should be removed.
+                    if (element.start >= element.end)
+                    {
+                        node.start = 0;
+                        node.end = 1;
+                        node.textarray = new byte[1];
+                    }
+                    else
+                    {
+                        node.start = element.start++;
+                        node.end = element.start;
+                        node.textarray = element.textarray;
+                    }
+                    node.textarray[node.start] = (byte)' ';
+                    node.prev = prev;
+                    if (prev != null)
+                        prev.next = node;
+                    node.next = element;
+                    element.prev = node;
+                    node.parent = element.parent;
+                }
+            }
+
+            /* discard the space  in current node */
+            ++text.start;
+        }
+    }
+
+    /* 
+      Move initial and trailing space out.
+      This routine maps:
+
+           hello<em> world</em>
+      to
+           hello <em>world</em>
+      and
+           <em>hello </em><strong>world</strong>
+      to
+           <em>hello</em> <strong>world</strong>
+    */
+    public static void trimSpaces(Lexer lexer, Node element)
+    {
+        Node text = element.content;
+        TagTable tt = lexer.configuration.tt;
+
+        if (text != null && text.type == Node.TextNode &&
+            element.tag != tt.tagPre)
+            trimInitialSpace(lexer, element, text);
+
+        text = element.last;
+
+        if (text != null && text.type == Node.TextNode)
+            trimTrailingSpace(lexer, element, text);
+    }
+
+    public boolean isDescendantOf(Dict tag)
+    {
+        Node parent;
+
+        for (parent = this.parent;
+                parent != null; parent = parent.parent)
+        {
+            if (parent.tag == tag)
+                return true;
+        }
+
+        return false;
+    }
+
+    /*
+     the doctype has been found after other tags,
+     and needs moving to before the html element
+    */
+    public static void insertDocType(Lexer lexer, Node element, Node doctype)
+    {
+        TagTable tt = lexer.configuration.tt;
+      
+        Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
+
+        while (element.tag != tt.tagHtml)
+            element = element.parent;
+
+        insertNodeBeforeElement(element, doctype);
+    }
+
+    public Node findBody(TagTable tt)
+    {
+        Node node;
+
+        node = this.content;
+
+        while (node != null && node.tag != tt.tagHtml)
+            node = node.next;
+
+        if (node == null)
+            return null;
+
+        node = node.content;
+
+        while (node != null && node.tag != tt.tagBody)
+            node = node.next;
+
+        return node;
+    }
+
+    public boolean isElement()
+    {
+        return (this.type == StartTag || this.type == StartEndTag ? true : false);
+    }
+
+    /*
+     unexpected content in table row is moved to just before
+     the table in accordance with Netscape and IE. This code
+     assumes that node hasn't been inserted into the row.
+    */
+    public static void moveBeforeTable(Node row, Node node, TagTable tt)
+    {
+        Node table;
+
+        /* first find the table element */
+        for (table = row.parent; table != null; table = table.parent)
+        {
+            if (table.tag == tt.tagTable)
+            {
+                if (table.parent.content == table)
+                    table.parent.content = node;
+
+                node.prev = table.prev;
+                node.next = table;
+                table.prev = node;
+                node.parent = table.parent;
+        
+                if (node.prev != null)
+                    node.prev.next = node;
+
+                break;
+            }
+        }
+    }
+
+    /*
+     if a table row is empty then insert an empty cell
+     this practice is consistent with browser behavior
+     and avoids potential problems with row spanning cells
+    */
+    public static void fixEmptyRow(Lexer lexer, Node row)
+    {
+        Node cell;
+
+        if (row.content == null)
+        {
+            cell = lexer.inferredTag("td");
+            insertNodeAtEnd(row, cell);
+            Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
+        }
+    }
+
+    public static void coerceNode(Lexer lexer, Node node, Dict tag)
+    {
+        Node tmp = lexer.inferredTag(tag.name);
+        Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
+        node.was = node.tag;
+        node.tag = tag;
+        node.type = StartTag;
+        node.implicit = true;
+        node.element = tag.name;
+    }
+
+    /* extract a node and its children from a markup tree */
+    public static void removeNode(Node node)
+    {
+        if (node.prev != null)
+            node.prev.next = node.next;
+
+        if (node.next != null)
+            node.next.prev = node.prev;
+
+        if (node.parent != null)
+        {
+            if (node.parent.content == node)
+                node.parent.content = node.next;
+
+            if (node.parent.last == node)
+                node.parent.last = node.prev;
+        }
+
+        node.parent = node.prev = node.next = null;
+    }
+
+    public static boolean insertMisc(Node element, Node node)
+    {
+        if (node.type == CommentTag ||
+            node.type == ProcInsTag ||
+            node.type == CDATATag ||
+            node.type == SectionTag ||
+            node.type == AspTag ||
+            node.type == JsteTag ||
+            node.type == PhpTag)
+        {
+            insertNodeAtEnd(element, node);
+            return true;
+        }
+
+        return false;
+    }
+
+    /*
+     used to determine how attributes
+     without values should be printed
+     this was introduced to deal with
+     user defined tags e.g. Cold Fusion
+    */
+    public static boolean isNewNode(Node node)
+    {
+        if (node != null && node.tag != null)
+        {
+            return ((node.tag.model & Dict.CM_NEW) != 0);
+        }
+
+        return true;
+    }
+
+    public boolean hasOneChild()
+    {
+        return (this.content != null && this.content.next == null);
+    }
+
+    /* find html element */
+    public Node findHTML(TagTable tt)
+    {
+        Node node;
+
+        for (node = this.content;
+                node != null && node.tag != tt.tagHtml; node = node.next);
+
+        return node;
+    }
+
+    public Node findHEAD(TagTable tt)
+    {
+        Node node;
+
+        node = this.findHTML(tt);
+
+        if (node != null)
+        {
+            for (node = node.content;
+                node != null && node.tag != tt.tagHead;
+                node = node.next);
+        }
+
+        return node;
+    }
+
+    public boolean checkNodeIntegrity()
+    {
+        Node child;
+        boolean found = false;
+
+        if (this.prev != null)
+        {
+            if (this.prev.next != this)
+                return false;
+        }
+
+        if (this.next != null)
+        {
+            if (this.next.prev != this)
+                return false;
+        }
+
+        if (this.parent != null)
+        {
+            if (this.prev == null && this.parent.content != this)
+                return false;
+
+            if (this.next == null && this.parent.last != this)
+                return false;
+
+            for (child = this.parent.content; child != null; child = child.next)
+                if (child == this)
+                {
+                    found = true;
+                    break;
+                }
+
+            if (!found)
+                return false;
+        }
+
+        for (child = this.content; child != null; child = child.next)
+            if (!child.checkNodeIntegrity())
+                return false;
+
+        return true;
+    }
+
+    /*
+     Add class="foo" to node
+    */
+    public static void addClass(Node node, String classname)
+    {
+        AttVal classattr = node.getAttrByName("class");
+
+            /*
+             if there already is a class attribute
+             then append class name after a space
+            */
+            if (classattr != null)
+            {
+                classattr.value = classattr.value + " " + classname;
+            }
+            else /* create new class attribute */
+                node.addAttribute("class", classname);
+    }
+
+    /* --------------------- DEBUG -------------------------- */
+
+    private static final String[] nodeTypeString =
+    {
+        "RootNode",
+        "DocTypeTag",
+        "CommentTag",
+        "ProcInsTag",
+        "TextNode",
+        "StartTag",
+        "EndTag",
+        "StartEndTag",
+        "SectionTag",
+        "AspTag",
+        "PhpTag"
+    };
+
+    public String toString()
+    {
+        String s = "";
+        Node n = this;
+
+        while (n != null) {
+            s += "[Node type=";
+            s += nodeTypeString[n.type];
+            s += ",element=";
+            if (n.element != null)
+                s += n.element;
+            else
+                s += "null";
+            if (n.type == TextNode ||
+                n.type == CommentTag ||
+                n.type == ProcInsTag) {
+                s += ",text=";
+                if (n.textarray != null && n.start <= n.end) {
+                    s += "\"";
+                    s += Lexer.getString(n.textarray, n.start, n.end - n.start);
+                    s += "\"";
+                } else {
+                    s += "null";
+                }
+            }
+            s += ",content=";
+            if (n.content != null)
+                s += n.content.toString();
+            else
+                s += "null";
+            s += "]";
+            if (n.next != null)
+                s += ",";
+            n = n.next;
+        }
+        return s;
+    }
+    /* --------------------- END DEBUG ---------------------- */
+
+
+    /* --------------------- DOM ---------------------------- */
+
+    protected org.w3c.dom.Node adapter = null;
+
+    protected org.w3c.dom.Node getAdapter()
+    {
+        if (adapter == null)
+        {
+            switch (this.type)
+            {
+                case RootNode:
+                    adapter = new DOMDocumentImpl(this);
+                    break;
+                case StartTag:
+                case StartEndTag:
+                    adapter = new DOMElementImpl(this);
+                    break;
+                case DocTypeTag:
+                    adapter = new DOMDocumentTypeImpl(this);
+                    break;
+                case CommentTag:
+                    adapter = new DOMCommentImpl(this);
+                    break;
+                case TextNode:
+                    adapter = new DOMTextImpl(this);
+                    break;
+                case CDATATag:
+                    adapter = new DOMCDATASectionImpl(this);
+                    break;
+                case ProcInsTag:
+                    adapter = new DOMProcessingInstructionImpl(this);
+                    break;
+                default:
+                    adapter = new DOMNodeImpl(this);
+            }
+        }
+        return adapter;
+    }
+
+    protected Node cloneNode(boolean deep)
+    {
+        Node node = (Node)this.clone();
+        if (deep)
+        {
+            Node child;
+            Node newChild;
+            for (child = this.content; child != null; child = child.next)
+            {
+                newChild = child.cloneNode(deep);
+                insertNodeAtEnd(node, newChild);
+            }
+        }
+        return node;
+    }
+
+
+    protected void setType(short newType)
+    {
+        this.type = newType;
+    }
+
+    /* --------------------- END DOM ------------------------ */
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Out.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Out.java
new file mode 100644 (file)
index 0000000..49be756
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * @(#)Out.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Output Stream
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.OutputStream;
+
+public abstract class Out
+{
+    public int encoding;
+    public int state;     /* for ISO 2022 */
+    public OutputStream out;
+
+    public abstract void outc(int c);
+
+    public abstract void outc(byte c);
+
+    public abstract void newline();
+
+};
+
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/OutImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/OutImpl.java
new file mode 100644 (file)
index 0000000..5653e14
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * @(#)OutImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Output Stream Implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.IOException;
+
+public class OutImpl extends Out
+{
+
+    public OutImpl()
+    {
+        this.out = null;
+    }
+
+    public void outc(byte c) {
+        outc(((int)c) & 0xFF);  // Convert to unsigned.
+    }
+
+    /* For mac users, should we map Unicode back to MacRoman? */
+    public void outc(int c)
+    {
+        int ch;
+
+        try {
+            if (this.encoding == Configuration.UTF8)
+            {
+                if (c < 128)
+                    this.out.write(c);
+                else if (c <= 0x7FF)
+                {
+                    ch = (0xC0 | (c >> 6)); this.out.write(ch);
+                    ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+                }
+                else if (c <= 0xFFFF)
+                {
+                    ch = (0xE0 | (c >> 12)); this.out.write(ch);
+                    ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
+                    ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+                }
+                else if (c <= 0x1FFFFF)
+                {
+                    ch = (0xF0 | (c >> 18)); this.out.write(ch);
+                    ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch);
+                    ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
+                    ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+                }
+                else
+                {
+                    ch = (0xF8 | (c >> 24)); this.out.write(ch);
+                    ch = (0x80 | ((c >> 18) & 0x3F)); this.out.write(ch);
+                    ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch);
+                    ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
+                    ch = (0x80 | (c & 0x3F)); this.out.write(ch);
+                }
+            }
+            else if (this.encoding == Configuration.ISO2022)
+            {
+                if (c == 0x1b)  /* ESC */
+                    this.state = StreamIn.FSM_ESC;
+                else
+                {
+                    switch (this.state)
+                    {
+                    case StreamIn.FSM_ESC:
+                        if (c == '$')
+                            this.state = StreamIn.FSM_ESCD;
+                        else if (c == '(')
+                            this.state = StreamIn.FSM_ESCP;
+                        else
+                            this.state = StreamIn.FSM_ASCII;
+                        break;
+
+                    case StreamIn.FSM_ESCD:
+                        if (c == '(')
+                            this.state = StreamIn.FSM_ESCDP;
+                        else
+                            this.state = StreamIn.FSM_NONASCII;
+                        break;
+
+                    case StreamIn.FSM_ESCDP:
+                        this.state = StreamIn.FSM_NONASCII;
+                        break;
+
+                    case StreamIn.FSM_ESCP:
+                        this.state = StreamIn.FSM_ASCII;
+                        break;
+
+                    case StreamIn.FSM_NONASCII:
+                        c &= 0x7F;
+                        break;
+                    }
+                }
+
+                this.out.write(c);
+            }
+            else
+                this.out.write(c);
+        }
+        catch (IOException e) {
+            System.err.println("OutImpl.outc: " + e.toString());
+        }
+    }
+
+    public void newline()
+    {
+        try {
+            this.out.write(nlBytes);
+            this.out.flush();
+        }
+        catch (IOException e) {
+            System.err.println("OutImpl.newline: " + e.toString());
+        }
+    }
+
+    private static final byte[] nlBytes =
+        (System.getProperty("line.separator")).getBytes();
+
+};
+
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/PPrint.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/PPrint.java
new file mode 100644 (file)
index 0000000..435e606
--- /dev/null
@@ -0,0 +1,1845 @@
+/*
+ * @(#)PPrint.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Pretty print parse tree
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+/*
+  Block-level and unknown elements are printed on
+  new lines and their contents indented 2 spaces
+
+  Inline elements are printed inline.
+
+  Inline content is wrapped on spaces (except in
+  attribute values or preformatted text, after
+  start tags and before end tags
+*/
+
+import java.io.FileOutputStream;
+import java.io.File;
+
+import java.io.IOException;
+import java.io.FileNotFoundException;
+
+public class PPrint {
+
+    /* page transition effects */
+
+    public static final short EFFECT_BLEND               = -1;
+    public static final short EFFECT_BOX_IN              = 0;
+    public static final short EFFECT_BOX_OUT             = 1;
+    public static final short EFFECT_CIRCLE_IN           = 2;
+    public static final short EFFECT_CIRCLE_OUT          = 3;
+    public static final short EFFECT_WIPE_UP             = 4;
+    public static final short EFFECT_WIPE_DOWN           = 5;
+    public static final short EFFECT_WIPE_RIGHT          = 6;
+    public static final short EFFECT_WIPE_LEFT           = 7;
+    public static final short EFFECT_VERT_BLINDS         = 8;
+    public static final short EFFECT_HORZ_BLINDS         = 9;
+    public static final short EFFECT_CHK_ACROSS          = 10;
+    public static final short EFFECT_CHK_DOWN            = 11;
+    public static final short EFFECT_RND_DISSOLVE        = 12;
+    public static final short EFFECT_SPLIT_VIRT_IN       = 13;
+    public static final short EFFECT_SPLIT_VIRT_OUT      = 14;
+    public static final short EFFECT_SPLIT_HORZ_IN       = 15;
+    public static final short EFFECT_SPLIT_HORZ_OUT      = 16;
+    public static final short EFFECT_STRIPS_LEFT_DOWN    = 17;
+    public static final short EFFECT_STRIPS_LEFT_UP      = 18;
+    public static final short EFFECT_STRIPS_RIGHT_DOWN   = 19;
+    public static final short EFFECT_STRIPS_RIGHT_UP     = 20;
+    public static final short EFFECT_RND_BARS_HORZ       = 21;
+    public static final short EFFECT_RND_BARS_VERT       = 22;
+    public static final short EFFECT_RANDOM              = 23;
+
+    private static final short NORMAL        = 0;
+    private static final short PREFORMATTED  = 1;
+    private static final short COMMENT       = 2;
+    private static final short ATTRIBVALUE   = 4;
+    private static final short NOWRAP        = 8;
+    private static final short CDATA         = 16;
+
+    private int[] linebuf = null;
+    private int lbufsize = 0;
+    private int linelen = 0;
+    private int wraphere = 0;
+    private boolean inAttVal = false;
+    private boolean InString = false;
+
+    private int slide = 0;
+    private int count = 0;
+    private Node slidecontent = null;
+
+    private Configuration configuration;
+
+    public PPrint(Configuration configuration)
+    {
+        this.configuration = configuration;
+    }
+
+    /*
+      1010  A
+      1011  B
+      1100  C
+      1101  D
+      1110  E
+      1111  F
+    */
+
+    /* return one less that the number of bytes used by UTF-8 char */
+    /* str points to 1st byte, *ch initialized to 1st byte */
+    public static int getUTF8(byte[] str, int start, MutableInteger ch)
+    {
+        int c, n, i, bytes;
+
+        c = ((int)str[start]) & 0xFF;  // Convert to unsigned.
+
+        if ((c & 0xE0) == 0xC0)  /* 110X XXXX  two bytes */
+        {
+            n = c & 31;
+            bytes = 2;
+        }
+        else if ((c & 0xF0) == 0xE0)  /* 1110 XXXX  three bytes */
+        {
+            n = c & 15;
+            bytes = 3;
+        }
+        else if ((c & 0xF8) == 0xF0)  /* 1111 0XXX  four bytes */
+        {
+            n = c & 7;
+            bytes = 4;
+        }
+        else if ((c & 0xFC) == 0xF8)  /* 1111 10XX  five bytes */
+        {
+            n = c & 3;
+            bytes = 5;
+        }
+        else if ((c & 0xFE) == 0xFC)       /* 1111 110X  six bytes */
+
+        {
+            n = c & 1;
+            bytes = 6;
+        }
+        else  /* 0XXX XXXX one byte */
+        {
+            ch.value = c;
+            return 0;
+        }
+
+        /* successor bytes should have the form 10XX XXXX */
+        for (i = 1; i < bytes; ++i)
+        {
+            c = ((int)str[start + i])  & 0xFF;  // Convert to unsigned.
+            n = (n << 6) | (c & 0x3F);
+        }
+
+        ch.value = n;
+        return bytes - 1;
+    }
+
+    /* store char c as UTF-8 encoded byte stream */
+    public static int putUTF8(byte[] buf, int start, int c)
+    {
+        if (c < 128)
+            buf[start++] = (byte)c;
+        else if (c <= 0x7FF)
+        {
+            buf[start++] = (byte)(0xC0 | (c >> 6));
+            buf[start++] = (byte)(0x80 | (c & 0x3F));
+        }
+        else if (c <= 0xFFFF)
+        {
+            buf[start++] =  (byte)(0xE0 | (c >> 12));
+            buf[start++] =  (byte)(0x80 | ((c >> 6) & 0x3F));
+            buf[start++] =  (byte)(0x80 | (c & 0x3F));
+        }
+        else if (c <= 0x1FFFFF)
+        {
+            buf[start++] =  (byte)(0xF0 | (c >> 18));
+            buf[start++] =  (byte)(0x80 | ((c >> 12) & 0x3F));
+            buf[start++] =  (byte)(0x80 | ((c >> 6) & 0x3F));
+            buf[start++] =  (byte)(0x80 | (c & 0x3F));
+        }
+        else
+        {
+            buf[start++] =  (byte)(0xF8 | (c >> 24));
+            buf[start++] =  (byte)(0x80 | ((c >> 18) & 0x3F));
+            buf[start++] =  (byte)(0x80 | ((c >> 12) & 0x3F));
+            buf[start++] =  (byte)(0x80 | ((c >> 6) & 0x3F));
+            buf[start++] =  (byte)(0x80 | (c & 0x3F));
+        }
+
+        return start;
+    }
+
+    private void addC(int c, int index)
+    {
+        if (index + 1 >= lbufsize)
+        {
+            while (index + 1 >= lbufsize)
+            {
+                if (lbufsize == 0)
+                    lbufsize = 256;
+                else
+                    lbufsize = lbufsize * 2;
+            }
+
+           int[] temp = new int[ lbufsize ];
+           if (linebuf != null)
+               System.arraycopy(linebuf, 0, temp, 0, index);
+           linebuf = temp;
+        }
+
+        linebuf[index] = c;
+    }
+
+    private void wrapLine(Out fout, int indent)
+    {
+        int i, p, q;
+
+        if (wraphere == 0)
+            return;
+
+        for (i = 0; i < indent; ++i)
+            fout.outc((int)' ');
+
+        for (i = 0; i < wraphere; ++i)
+            fout.outc(linebuf[i]);
+
+        if (InString)
+        {
+            fout.outc((int)' ');
+            fout.outc((int)'\\');
+        }
+
+        fout.newline();
+
+        if (linelen > wraphere)
+        {
+            p = 0;
+
+            if (linebuf[wraphere] == ' ')
+                ++wraphere;
+
+            q = wraphere;
+            addC('\0', linelen);
+
+            while (true)
+            {
+                linebuf[p] = linebuf[q];
+                if (linebuf[q] == 0) break;
+                p++;
+                q++;
+            }
+            linelen -= wraphere;
+        }
+        else
+            linelen = 0;
+
+        wraphere = 0;
+    }
+
+    private void wrapAttrVal(Out fout, int indent, boolean inString)
+    {
+        int i, p, q;
+
+        for (i = 0; i < indent; ++i)
+            fout.outc((int)' ');
+
+        for (i = 0; i < wraphere; ++i)
+            fout.outc(linebuf[i]);
+
+        fout.outc((int)' ');
+
+        if (inString)
+            fout.outc((int)'\\');
+
+        fout.newline();
+
+        if (linelen > wraphere)
+        {
+            p = 0;
+
+            if (linebuf[wraphere] == ' ')
+                ++wraphere;
+
+            q = wraphere;
+            addC('\0', linelen);
+
+            while (true)
+            {
+                linebuf[p] = linebuf[q];
+                if (linebuf[q] == 0) break;
+                p++;
+                q++;
+            }
+            linelen -= wraphere;
+        }
+        else
+            linelen = 0;
+
+        wraphere = 0;
+    }
+
+    public void flushLine(Out fout, int indent)
+    {
+        int i;
+
+        if (linelen > 0)
+        {
+            if (indent + linelen >= this.configuration.wraplen)
+                wrapLine(fout, indent);
+
+            if (!inAttVal || this.configuration.IndentAttributes)
+            {
+                for (i = 0; i < indent; ++i)
+                    fout.outc((int)' ');
+            }
+
+            for (i = 0; i < linelen; ++i)
+                fout.outc(linebuf[i]);
+        }
+
+        fout.newline();
+        linelen = 0;
+        wraphere = 0;
+        inAttVal = false;
+    }
+
+    public void condFlushLine(Out fout, int indent)
+    {
+        int i;
+
+        if (linelen > 0)
+        {
+            if (indent + linelen >= this.configuration.wraplen)
+                wrapLine(fout, indent);
+
+            if (!inAttVal || this.configuration.IndentAttributes)
+            {
+                for (i = 0; i < indent; ++i)
+                    fout.outc((int)' ');
+            }
+
+            for (i = 0; i < linelen; ++i)
+                fout.outc(linebuf[i]);
+
+            fout.newline();
+            linelen = 0;
+            wraphere = 0;
+            inAttVal = false;
+        }
+    }
+
+    private void printChar(int c, short mode)
+    {
+        String entity;
+
+        if (c == ' ' && !((mode & (PREFORMATTED | COMMENT | ATTRIBVALUE)) != 0))
+        {
+            /* coerce a space character to a non-breaking space */
+            if ((mode & NOWRAP) != 0)
+            {
+                /* by default XML doesn't define &nbsp; */
+                if (this.configuration.NumEntities || this.configuration.XmlTags)
+                {
+                    addC('&', linelen++);
+                    addC('#', linelen++);
+                    addC('1', linelen++);
+                    addC('6', linelen++);
+                    addC('0', linelen++);
+                    addC(';', linelen++);
+                }
+                else /* otherwise use named entity */
+                {
+                    addC('&', linelen++);
+                    addC('n', linelen++);
+                    addC('b', linelen++);
+                    addC('s', linelen++);
+                    addC('p', linelen++);
+                    addC(';', linelen++);
+                }
+                return;
+            }
+            else
+                wraphere = linelen;
+        }
+
+        /* comment characters are passed raw */
+        if ((mode & COMMENT) != 0)
+        {
+            addC(c, linelen++);
+            return;
+        }
+
+        /* except in CDATA map < to &lt; etc. */
+        if (! ((mode & CDATA) != 0) )
+        {
+            if (c == '<')
+            {
+                addC('&', linelen++);
+                addC('l', linelen++);
+                addC('t', linelen++);
+                addC(';', linelen++);
+                return;
+            }
+            
+            if (c == '>')
+            {
+                addC('&', linelen++);
+                addC('g', linelen++);
+                addC('t', linelen++);
+                addC(';', linelen++);
+                return;
+            }
+
+            /*
+              naked '&' chars can be left alone or
+              quoted as &amp; The latter is required
+              for XML where naked '&' are illegal.
+            */
+            if (c == '&' && this.configuration.QuoteAmpersand)
+            {
+                addC('&', linelen++);
+                addC('a', linelen++);
+                addC('m', linelen++);
+                addC('p', linelen++);
+                addC(';', linelen++);
+                return;
+            }
+
+            if (c == '"' && this.configuration.QuoteMarks)
+            {
+                addC('&', linelen++);
+                addC('q', linelen++);
+                addC('u', linelen++);
+                addC('o', linelen++);
+                addC('t', linelen++);
+                addC(';', linelen++);
+                return;
+            }
+
+            if (c == '\'' && this.configuration.QuoteMarks)
+            {
+                addC('&', linelen++);
+                addC('#', linelen++);
+                addC('3', linelen++);
+                addC('9', linelen++);
+                addC(';', linelen++);
+                return;
+            }
+
+            if (c == 160 && this.configuration.CharEncoding != Configuration.RAW)
+            {
+                if (this.configuration.QuoteNbsp)
+                {
+                    addC('&', linelen++);
+
+                    if (this.configuration.NumEntities)
+                    {
+                        addC('#', linelen++);
+                        addC('1', linelen++);
+                        addC('6', linelen++);
+                        addC('0', linelen++);
+                    }
+                    else
+                    {
+                        addC('n', linelen++);
+                        addC('b', linelen++);
+                        addC('s', linelen++);
+                        addC('p', linelen++);
+                    }
+
+                    addC(';', linelen++);
+                }
+                else
+                    addC(c, linelen++);
+
+                return;
+            }
+        }
+
+        /* otherwise ISO 2022 characters are passed raw */
+        if (this.configuration.CharEncoding == Configuration.ISO2022 ||
+            this.configuration.CharEncoding == Configuration.RAW)
+        {
+            addC(c, linelen++);
+            return;
+        }
+
+        /* if preformatted text, map &nbsp; to space */
+        if (c == 160 && ((mode & PREFORMATTED) != 0))
+        {
+            addC(' ', linelen++);
+            return;
+        }
+
+        /*
+         Filters from Word and PowerPoint often use smart
+         quotes resulting in character codes between 128
+         and 159. Unfortunately, the corresponding HTML 4.0
+         entities for these are not widely supported. The
+         following converts dashes and quotation marks to
+         the nearest ASCII equivalent. My thanks to
+         Andrzej Novosiolov for his help with this code.
+        */
+
+        if (this.configuration.MakeClean)
+        {
+            if (c >= 0x2013 && c <= 0x201E)
+            {
+                switch (c) {
+                case 0x2013:
+                case 0x2014:
+                  c = '-';
+                  break;
+                case 0x2018:
+                case 0x2019:
+                case 0x201A:
+                  c = '\'';
+                  break;
+                case 0x201C:
+                case 0x201D:
+                case 0x201E:
+                  c = '"';
+                  break;
+                }
+            }
+        }
+
+        /* don't map latin-1 chars to entities */
+        if (this.configuration.CharEncoding == Configuration.LATIN1)
+        {
+            if (c > 255)  /* multi byte chars */
+            {
+                if (!this.configuration.NumEntities)
+                {
+                    entity = EntityTable.getDefaultEntityTable().entityName((short)c);
+                    if (entity != null)
+                        entity = "&" + entity + ";";
+                    else
+                        entity = "&#" + c + ";";
+                }
+                else
+                    entity = "&#" + c + ";";
+
+                for (int i = 0; i < entity.length(); i++)
+                    addC((int)entity.charAt(i), linelen++);
+
+                return;
+            }
+
+            if (c > 126 && c < 160)
+            {
+                entity = "&#" + c + ";";
+
+                for (int i = 0; i < entity.length(); i++)
+                    addC((int)entity.charAt(i), linelen++);
+
+                return;
+            }
+
+            addC(c, linelen++);
+            return;
+        }
+
+        /* don't map utf8 chars to entities */
+        if (this.configuration.CharEncoding == Configuration.UTF8)
+        {
+            addC(c, linelen++);
+            return;
+        }
+
+        /* use numeric entities only  for XML */
+        if (this.configuration.XmlTags)
+        {
+            /* if ASCII use numeric entities for chars > 127 */
+            if (c > 127 && this.configuration.CharEncoding == Configuration.ASCII)
+            {
+                entity = "&#" + c + ";";
+
+                for (int i = 0; i < entity.length(); i++)
+                    addC((int)entity.charAt(i), linelen++);
+
+                return;
+            }
+
+            /* otherwise output char raw */
+            addC(c, linelen++);
+            return;
+        }
+
+        /* default treatment for ASCII */
+        if (c > 126 || (c < ' ' && c != '\t'))
+        {
+            if (!this.configuration.NumEntities)
+            {
+                entity = EntityTable.getDefaultEntityTable().entityName((short)c);
+                if (entity != null)
+                    entity = "&" + entity + ";";
+                else
+                    entity = "&#" + c + ";";
+            }
+            else
+                entity = "&#" + c + ";";
+
+            for (int i = 0; i < entity.length(); i++)
+                addC((int)entity.charAt(i), linelen++);
+
+            return;
+        }
+
+        addC(c, linelen++);
+    }
+
+    /* 
+      The line buffer is uint not char so we can
+      hold Unicode values unencoded. The translation
+      to UTF-8 is deferred to the outc routine called
+      to flush the line buffer.
+    */
+    private void printText(Out fout, short mode, int indent,
+                           byte[] textarray, int start, int end)
+    {
+        int i, c;
+        MutableInteger ci = new MutableInteger();
+
+        for (i = start; i < end; ++i)
+        {
+            if (indent + linelen >= this.configuration.wraplen)
+                wrapLine(fout, indent);
+
+            c = ((int)textarray[i]) & 0xFF;  // Convert to unsigned.
+
+            /* look for UTF-8 multibyte character */
+            if (c > 0x7F)
+            {
+                 i += getUTF8(textarray, i, ci);
+                 c = ci.value;
+            }
+
+            if (c == '\n')
+            {
+                flushLine(fout, indent);
+                continue;
+            }
+
+            printChar(c, mode);
+        }
+    }
+
+    private void printString(Out fout, int indent, String str)
+    {
+        for (int i = 0; i < str.length(); i++ )
+            addC((int)str.charAt(i), linelen++);
+    }
+
+    private void printAttrValue(Out fout, int indent, String value, int delim, boolean wrappable)
+    {
+        int c;
+        MutableInteger ci = new MutableInteger();
+        boolean wasinstring = false;
+        byte[] valueChars = null;
+        int i;
+        short mode = (wrappable ? (short)(NORMAL | ATTRIBVALUE) :
+                                  (short)(PREFORMATTED | ATTRIBVALUE));
+
+        if (value != null)
+        {
+            valueChars = Lexer.getBytes(value);
+        }
+
+        /* look for ASP, Tango or PHP instructions for computed attribute value */
+        if (valueChars != null && valueChars.length >= 5 && valueChars[0] == '<')
+        {
+            if (valueChars[1] == '%' || valueChars[1] == '@'||
+                (new String(valueChars, 0, 5)).equals("<?php"))
+                mode |= CDATA;
+        }
+
+        if (delim == 0)
+            delim = '"';
+
+        addC('=', linelen++);
+
+        /* don't wrap after "=" for xml documents */
+        if (!this.configuration.XmlOut) {
+
+            if (indent + linelen < this.configuration.wraplen)
+                wraphere = linelen;
+
+            if (indent + linelen >= this.configuration.wraplen)
+                wrapLine(fout, indent);
+
+            if (indent + linelen < this.configuration.wraplen)
+                wraphere = linelen;
+            else
+                condFlushLine(fout, indent);
+        }
+
+        addC(delim, linelen++);
+
+        if (value != null)
+        {
+            InString = false;
+
+            i = 0;
+            while (i < valueChars.length)
+            {
+                c = ((int)valueChars[i]) & 0xFF;  // Convert to unsigned.
+
+                if (wrappable && c == ' ' && indent + linelen < this.configuration.wraplen)
+                {
+                    wraphere = linelen;
+                    wasinstring = InString;
+                }
+
+                if (wrappable && wraphere > 0 && indent + linelen >= this.configuration.wraplen)
+                    wrapAttrVal(fout, indent, wasinstring);
+
+                if (c == delim)
+                {
+                    String entity;
+
+                    entity = (c == '"' ? "&quot;" : "&#39;");
+
+                    for (int j = 0; j < entity.length(); j++ )
+                        addC(entity.charAt(j), linelen++);
+
+                    ++i;
+                    continue;
+                }
+                else if (c == '"')
+                {
+                    if (this.configuration.QuoteMarks)
+                    {
+                        addC('&', linelen++);
+                        addC('q', linelen++);
+                        addC('u', linelen++);
+                        addC('o', linelen++);
+                        addC('t', linelen++);
+                        addC(';', linelen++);
+                    }
+                    else
+                        addC('"', linelen++);
+
+                    if (delim == '\'')
+                        InString = !InString;
+
+                    ++i;
+                    continue;
+                }
+                else if (c == '\'')
+                {
+                    if (this.configuration.QuoteMarks)
+                    {
+                        addC('&', linelen++);
+                        addC('#', linelen++);
+                        addC('3', linelen++);
+                        addC('9', linelen++);
+                        addC(';', linelen++);
+                    }
+                    else
+                        addC('\'', linelen++);
+
+                    if (delim == '"')
+                        InString = !InString;
+
+                    ++i;
+                    continue;
+                }
+
+                /* look for UTF-8 multibyte character */
+                if (c > 0x7F)
+                {
+                     i += getUTF8(valueChars, i, ci);
+                     c = ci.value;
+                }
+
+                ++i;
+
+                if (c == '\n')
+                {
+                    flushLine(fout, indent);
+                    continue;
+                }
+
+                printChar(c, mode);
+            }
+        }
+
+        InString = false;
+        addC(delim, linelen++);
+    }
+
+    private void printAttribute(Out fout, int indent, Node node, AttVal attr)
+    {
+        String name;
+        boolean wrappable = false;
+
+        if (this.configuration.IndentAttributes)
+        {
+            flushLine(fout, indent);
+            indent += this.configuration.spaces;
+        }
+
+        name = attr.attribute;
+
+        if (indent + linelen >= this.configuration.wraplen)
+            wrapLine(fout, indent);
+
+        if (!this.configuration.XmlTags && !this.configuration.XmlOut && attr.dict != null)
+        {
+            if (AttributeTable.getDefaultAttributeTable().isScript(name))
+                wrappable = this.configuration.WrapScriptlets;
+            else if (!attr.dict.nowrap && this.configuration.WrapAttVals)
+                wrappable = true;
+        }
+
+        if (indent + linelen < this.configuration.wraplen)
+        {
+            wraphere = linelen;
+            addC(' ', linelen++);
+        }
+        else
+        {
+            condFlushLine(fout, indent);
+            addC(' ', linelen++);
+        }
+
+        for (int i = 0; i < name.length(); i++ )
+            addC((int)Lexer.foldCase(name.charAt(i),
+                                     this.configuration.UpperCaseAttrs,
+                                     this.configuration.XmlTags),
+                 linelen++);
+
+        if (indent + linelen >= this.configuration.wraplen)
+            wrapLine(fout, indent);
+        if (attr.value == null)
+        {
+            if (this.configuration.XmlTags || this.configuration.XmlOut)
+                printAttrValue(fout, indent, attr.attribute, attr.delim, true);
+            else if (!attr.isBoolAttribute() && !Node.isNewNode(node))
+                printAttrValue(fout, indent, "", attr.delim, true);
+            else if (indent + linelen < this.configuration.wraplen)
+                wraphere = linelen;
+
+        }
+        else
+            printAttrValue(fout, indent, attr.value, attr.delim, wrappable);
+    }
+
+    private void printAttrs(Out fout, int indent,
+                            Node node, AttVal attr)
+    {
+        if (attr != null)
+        {
+            if (attr.next != null)
+                printAttrs(fout, indent, node, attr.next);
+
+            if (attr.attribute != null)
+                printAttribute(fout, indent, node, attr);
+            else if (attr.asp != null)
+            {
+                addC(' ', linelen++);
+                printAsp(fout, indent, attr.asp);
+            }
+            else if (attr.php != null)
+            {
+                addC(' ', linelen++);
+                printPhp(fout, indent, attr.php);
+            }
+        }
+
+        /* add xml:space attribute to pre and other elements */
+        if (configuration.XmlOut &&
+                configuration.XmlSpace &&
+                ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt) &&
+                node.getAttrByName("xml:space") == null)
+            printString(fout, indent, " xml:space=\"preserve\"");
+    }
+
+    /*
+     Line can be wrapped immediately after inline start tag provided
+     if follows a text node ending in a space, or it parent is an
+     inline element that that rule applies to. This behaviour was
+     reverse engineered from Netscape 3.0
+    */
+    private static boolean afterSpace(Node node)
+    {
+        Node prev;
+        int c;
+
+        if (node == null || node.tag == null || !((node.tag.model & Dict.CM_INLINE) != 0))
+            return true;
+
+        prev = node.prev;
+
+        if (prev != null)
+        {
+            if (prev.type == Node.TextNode && prev.end > prev.start)
+            {
+                c = ((int)prev.textarray[prev.end - 1]) & 0xFF;  // Convert to unsigned.
+
+                if (c == 160 || c == ' ' || c == '\n')
+                    return true;
+            }
+
+            return false;
+        }
+
+        return afterSpace(node.parent);
+    }
+
+    private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node)
+    {
+        char c;
+        String p;
+        TagTable tt = this.configuration.tt;
+
+        addC('<', linelen++);
+
+        if (node.type == Node.EndTag)
+            addC('/', linelen++);
+
+        p = node.element;
+        for (int i = 0; i < p.length(); i++ )
+            addC((int)Lexer.foldCase(p.charAt(i),
+                                     this.configuration.UpperCaseTags,
+                                     this.configuration.XmlTags),
+                 linelen++);
+
+        printAttrs(fout, indent, node, node.attributes);
+
+        if ((this.configuration.XmlOut || lexer != null && lexer.isvoyager) &&
+                (node.type == Node.StartEndTag || (node.tag.model & Dict.CM_EMPTY) != 0))
+        {
+            addC(' ', linelen++);   /* compatibility hack */
+            addC('/', linelen++);
+        }
+
+        addC('>', linelen++);;
+
+        if (node.type != Node.StartEndTag && !((mode & PREFORMATTED) != 0))
+        {
+            if (indent + linelen >= this.configuration.wraplen)
+                wrapLine(fout, indent);
+
+            if (indent + linelen < this.configuration.wraplen)
+            {
+                /*
+                 wrap after start tag if is <br/> or if it's not
+                 inline or it is an empty tag followed by </a>
+                */
+                if (afterSpace(node))
+                {
+                    if (!((mode & NOWRAP) != 0) &&
+                        (!((node.tag.model & Dict.CM_INLINE) != 0) ||
+                          (node.tag == tt.tagBr) ||
+                          (((node.tag.model & Dict.CM_EMPTY) != 0) && 
+                          node.next == null &&
+                          node.parent.tag == tt.tagA)))
+                    {
+                        wraphere = linelen;
+                    }
+                }
+            }
+            else
+                condFlushLine(fout, indent);
+        }
+    }
+
+    private void printEndTag(Out fout, short mode, int indent, Node node)
+    {
+        char c;
+        String p;
+
+       /*
+         Netscape ignores SGML standard by not ignoring a
+         line break before </A> or </U> etc. To avoid rendering 
+         this as an underlined space, I disable line wrapping
+         before inline end tags by the #if 0 ... #endif
+       */
+if (false) {
+        if (indent + linelen < this.configuration.wraplen && !((mode & NOWRAP) != 0))
+            wraphere = linelen;
+}
+
+        addC('<', linelen++);
+        addC('/', linelen++);
+
+        p = node.element;
+        for (int i = 0; i < p.length(); i++ )
+            addC((int)Lexer.foldCase(p.charAt(i),
+                                     this.configuration.UpperCaseTags,
+                                     this.configuration.XmlTags),
+                 linelen++);
+
+        addC('>', linelen++);
+    }
+
+    private void printComment(Out fout, int indent, Node node)
+    {
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+
+        addC('<', linelen++);
+        addC('!', linelen++);
+        addC('-', linelen++);
+        addC('-', linelen++);
+if (false) {
+        if (linelen < this.configuration.wraplen)
+            wraphere = linelen;
+}
+        printText(fout, COMMENT, indent,
+                        node.textarray, node.start, node.end);
+if (false) {
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+}
+        // See Lexer.java: AQ 8Jul2000
+        addC('-', linelen++);
+        addC('-', linelen++);
+        addC('>', linelen++);
+
+        if (node.linebreak)
+            flushLine(fout, indent);
+    }
+
+    private void printDocType(Out fout, int indent, Node node)
+    {
+        boolean q = this.configuration.QuoteMarks;
+
+        this.configuration.QuoteMarks = false;
+
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+
+        condFlushLine(fout, indent);
+
+        addC('<', linelen++);
+        addC('!', linelen++);
+        addC('D', linelen++);
+        addC('O', linelen++);
+        addC('C', linelen++);
+        addC('T', linelen++);
+        addC('Y', linelen++);
+        addC('P', linelen++);
+        addC('E', linelen++);
+        addC(' ', linelen++);
+
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+
+        printText(fout, (short)0, indent,
+                        node.textarray, node.start, node.end);
+
+        if (linelen < this.configuration.wraplen)
+            wraphere = linelen;
+
+        addC('>', linelen++);
+        this.configuration.QuoteMarks = q;
+        condFlushLine(fout, indent);
+    }
+
+    private void printPI(Out fout, int indent, Node node)
+    {
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+
+        addC('<', linelen++);
+        addC('?', linelen++);
+
+        /* set CDATA to pass < and > unescaped */
+        printText(fout, CDATA, indent,
+                    node.textarray, node.start, node.end);
+
+        if (node.textarray[node.end - 1] != (byte)'?')
+            addC('?', linelen++);
+
+        addC('>', linelen++);
+        condFlushLine(fout, indent);
+    }
+
+    /* note ASP and JSTE share <% ... %> syntax */
+    private void printAsp(Out fout, int indent, Node node)
+    {
+        int savewraplen = this.configuration.wraplen;
+
+        /* disable wrapping if so requested */
+
+        if (!this.configuration.WrapAsp || !this.configuration.WrapJste)
+            this.configuration.wraplen = 0xFFFFFF;  /* a very large number */
+if (false) { //#if 0
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+} //#endif
+
+        addC('<', linelen++);
+        addC('%', linelen++);
+
+        printText(fout, (this.configuration.WrapAsp ? CDATA : COMMENT), indent,
+                    node.textarray, node.start, node.end);
+
+        addC('%', linelen++);
+        addC('>', linelen++);
+        /* condFlushLine(fout, indent); */
+        this.configuration.wraplen = savewraplen;
+    }
+
+    /* JSTE also supports <# ... #> syntax */
+    private void printJste(Out fout, int indent, Node node)
+    {
+        int savewraplen = this.configuration.wraplen;
+
+        /* disable wrapping if so requested */
+
+        if (!this.configuration.WrapJste)
+            this.configuration.wraplen = 0xFFFFFF;  /* a very large number */
+
+        addC('<', linelen++);
+        addC('#', linelen++);
+
+        printText(fout, (this.configuration.WrapJste ? CDATA : COMMENT), indent,
+                    node.textarray, node.start, node.end);
+
+        addC('#', linelen++);
+        addC('>', linelen++);
+        /* condFlushLine(fout, indent); */
+        this.configuration.wraplen = savewraplen;
+    }
+
+    /* PHP is based on XML processing instructions */
+    private void printPhp(Out fout, int indent, Node node)
+    {
+        int savewraplen = this.configuration.wraplen;
+
+        /* disable wrapping if so requested */
+
+        if (!this.configuration.WrapPhp)
+            this.configuration.wraplen = 0xFFFFFF;  /* a very large number */
+
+if (false) { //#if 0
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+} //#endif
+        addC('<', linelen++);
+        addC('?', linelen++);
+
+        printText(fout, (this.configuration.WrapPhp ? CDATA : COMMENT), indent,
+                        node.textarray, node.start, node.end);
+
+        addC('?', linelen++);
+        addC('>', linelen++);
+        /* PCondFlushLine(fout, indent); */
+        this.configuration.wraplen = savewraplen;
+    }
+
+    private void printCDATA(Out fout, int indent, Node node)
+    {
+        int savewraplen = this.configuration.wraplen;
+
+        condFlushLine(fout, indent);
+
+        /* disable wrapping */
+
+        this.configuration.wraplen = 0xFFFFFF;  /* a very large number */
+
+        addC('<', linelen++);
+        addC('!', linelen++);
+        addC('[', linelen++);
+        addC('C', linelen++);
+        addC('D', linelen++);
+        addC('A', linelen++);
+        addC('T', linelen++);
+        addC('A', linelen++);
+        addC('[', linelen++);
+
+        printText(fout, COMMENT, indent,
+                        node.textarray, node.start, node.end);
+
+        addC(']', linelen++);
+        addC(']', linelen++);
+        addC('>', linelen++);
+        condFlushLine(fout, indent);
+        this.configuration.wraplen = savewraplen;
+    }
+
+    private void printSection(Out fout, int indent, Node node)
+    {
+        int savewraplen = this.configuration.wraplen;
+
+        /* disable wrapping if so requested */
+
+        if (!this.configuration.WrapSection)
+            this.configuration.wraplen = 0xFFFFFF;  /* a very large number */
+
+if (false) { //#if 0
+        if (indent + linelen < this.configuration.wraplen)
+            wraphere = linelen;
+} //#endif
+        addC('<', linelen++);
+        addC('!', linelen++);
+        addC('[', linelen++);
+
+        printText(fout, (this.configuration.WrapSection ? CDATA : COMMENT), indent,
+                        node.textarray, node.start, node.end);
+
+        addC(']', linelen++);
+        addC('>', linelen++);
+        /* PCondFlushLine(fout, indent); */
+        this.configuration.wraplen = savewraplen;
+    }
+
+    private boolean shouldIndent(Node node)
+    {
+        TagTable tt = this.configuration.tt;
+
+        if (!this.configuration.IndentContent)
+            return false;
+
+        if (this.configuration.SmartIndent)
+        {
+            if (node.content != null && ((node.tag.model & Dict.CM_NO_INDENT) != 0))
+            {
+                for (node = node.content; node != null; node = node.next)
+                    if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0)
+                        return true;
+
+                return false;
+            }
+
+            if ((node.tag.model & Dict.CM_HEADING) != 0)
+                return false;
+
+            if (node.tag == tt.tagP)
+                return false;
+
+            if (node.tag == tt.tagTitle)
+                return false;
+        }
+
+        if ((node.tag.model & (Dict.CM_FIELD | Dict.CM_OBJECT)) != 0)
+            return true;
+
+        if (node.tag == tt.tagMap)
+            return true;
+
+        return !((node.tag.model & Dict.CM_INLINE) != 0);
+    }
+
+    public void printTree(Out fout, short mode, int indent,
+                          Lexer lexer, Node node)
+    {
+        Node content, last;
+        TagTable tt = this.configuration.tt;
+
+        if (node == null)
+            return;
+
+        if (node.type == Node.TextNode)
+            printText(fout, mode, indent,
+                        node.textarray, node.start, node.end);
+        else if (node.type == Node.CommentTag)
+        {
+            printComment(fout, indent, node);
+        }
+        else if (node.type == Node.RootNode)
+        {
+            for (content = node.content;
+                    content != null;
+                    content = content.next)
+               printTree(fout, mode, indent, lexer, content);
+        }
+        else if (node.type == Node.DocTypeTag)
+            printDocType(fout, indent, node);
+        else if (node.type == Node.ProcInsTag)
+            printPI(fout, indent, node);
+        else if (node.type == Node.CDATATag)
+            printCDATA(fout, indent, node);
+        else if (node.type == Node.SectionTag)
+            printSection(fout, indent, node);
+        else if (node.type == Node.AspTag)
+            printAsp(fout, indent, node);
+        else if (node.type == Node.JsteTag)
+            printJste(fout, indent, node);
+        else if (node.type == Node.PhpTag)
+            printPhp(fout, indent, node);
+        else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag)
+        {
+            if (!((node.tag.model & Dict.CM_INLINE) != 0))
+                condFlushLine(fout, indent);
+
+            if (node.tag == tt.tagBr && node.prev != null &&
+                node.prev.tag != tt.tagBr && this.configuration.BreakBeforeBR)
+                flushLine(fout, indent);
+
+            if (this.configuration.MakeClean && node.tag == tt.tagWbr)
+                printString(fout, indent, " ");
+            else
+                printTag(lexer, fout, mode, indent, node);
+
+            if (node.tag == tt.tagParam || node.tag == tt.tagArea)
+                condFlushLine(fout, indent);
+            else if (node.tag == tt.tagBr || node.tag == tt.tagHr)
+                flushLine(fout, indent);
+        }
+        else /* some kind of container element */
+        {
+            if (node.tag != null && node.tag.parser == ParserImpl.getParsePre())
+            {
+                condFlushLine(fout, indent);
+
+                indent = 0;
+                condFlushLine(fout, indent);
+                printTag(lexer, fout, mode, indent, node);
+                flushLine(fout, indent);
+
+                for (content = node.content;
+                        content != null;
+                        content = content.next)
+                    printTree(fout, (short)(mode | PREFORMATTED | NOWRAP), indent, lexer, content);
+
+                condFlushLine(fout, indent);
+                printEndTag(fout, mode, indent, node);
+                flushLine(fout, indent);
+
+                if (this.configuration.IndentContent == false && node.next != null)
+                    flushLine(fout, indent);
+            }
+            else if (node.tag == tt.tagStyle || node.tag == tt.tagScript)
+            {
+                condFlushLine(fout, indent);
+
+                indent = 0;
+                condFlushLine(fout, indent);
+                printTag(lexer, fout, mode, indent, node);
+                flushLine(fout, indent);
+
+                for (content = node.content;
+                        content != null;
+                        content = content.next)
+                    printTree(fout, (short)(mode | PREFORMATTED | NOWRAP |CDATA), indent, lexer, content);
+
+                condFlushLine(fout, indent);
+                printEndTag(fout, mode, indent, node);
+                flushLine(fout, indent);
+
+                if (this.configuration.IndentContent == false && node.next != null)
+                    flushLine(fout, indent);
+            }
+            else if ((node.tag.model & Dict.CM_INLINE) != 0)
+            {
+                if (this.configuration.MakeClean)
+                {
+                    /* discards <font> and </font> tags */
+                    if (node.tag == tt.tagFont)
+                    {
+                        for (content = node.content;
+                                content != null;
+                                content = content.next)
+                            printTree(fout, mode, indent, lexer, content);
+                        return;
+                    }
+
+                    /* replace <nobr>...</nobr> by &nbsp; or &#160; etc. */
+                    if (node.tag == tt.tagNobr)
+                    {
+                        for (content = node.content;
+                                content != null;
+                                content = content.next)
+                            printTree(fout, (short)(mode|NOWRAP), indent, lexer, content);
+                        return;
+                    }
+                }
+
+                /* otherwise a normal inline element */
+
+                printTag(lexer, fout, mode, indent, node);
+
+                /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */
+
+                if (shouldIndent(node))
+                {
+                    condFlushLine(fout, indent);
+                    indent += this.configuration.spaces;
+
+                    for (content = node.content;
+                            content != null;
+                            content = content.next)
+                        printTree(fout, mode, indent, lexer, content);
+
+                    condFlushLine(fout, indent);
+                    indent -= this.configuration.spaces;
+                    condFlushLine(fout, indent);
+                }
+                else
+                {
+
+                    for (content = node.content;
+                            content != null;
+                            content = content.next)
+                        printTree(fout, mode, indent, lexer, content);
+                }
+
+                printEndTag(fout, mode, indent, node);
+            }
+            else /* other tags */
+            {
+                condFlushLine(fout, indent);
+
+                if (this.configuration.SmartIndent && node.prev != null)
+                    flushLine(fout, indent);
+
+                if (this.configuration.HideEndTags == false ||
+                    !(node.tag != null && ((node.tag.model & Dict.CM_OMITST) != 0)))
+                {
+                    printTag(lexer, fout, mode, indent, node);
+
+                    if (shouldIndent(node))
+                        condFlushLine(fout, indent);
+                    else if ((node.tag.model & Dict.CM_HTML) != 0 ||
+                             node.tag == tt.tagNoframes ||
+                                ((node.tag.model & Dict.CM_HEAD) != 0 &&
+                                !(node.tag == tt.tagTitle)))
+                        flushLine(fout, indent);
+                }
+
+                if (node.tag == tt.tagBody && this.configuration.BurstSlides)
+                    printSlide(fout, mode, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer);
+                else
+                {
+                    last = null;
+
+                    for (content = node.content;
+                            content != null; content = content.next)
+                    {
+                        /* kludge for naked text before block level tag */
+                        if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode &&
+                            content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0)
+                        {
+                            flushLine(fout, indent);
+                            flushLine(fout, indent);
+                        }
+
+                        printTree(fout, mode,
+                            (shouldIndent(node) ? indent+this.configuration.spaces : indent), lexer, content);
+
+                        last = content;
+                    }
+                }
+
+                /* don't flush line for td and th */
+                if (shouldIndent(node) ||
+                    (((node.tag.model & Dict.CM_HTML) != 0 || node.tag == tt.tagNoframes ||
+                        ((node.tag.model & Dict.CM_HEAD) != 0 && !(node.tag == tt.tagTitle)))
+                        && this.configuration.HideEndTags == false))
+                {
+                    condFlushLine(fout, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent));
+
+                    if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0))
+                    {
+                        printEndTag(fout, mode, indent, node);
+                        flushLine(fout, indent);
+                    }
+                }
+                else
+                {
+                    if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0))
+                        printEndTag(fout, mode, indent, node);
+
+                    flushLine(fout, indent);
+                }
+
+                if (this.configuration.IndentContent == false &&
+                    node.next != null &&
+                    this.configuration.HideEndTags == false &&
+                    (node.tag.model & (Dict.CM_BLOCK|Dict.CM_LIST|Dict.CM_DEFLIST|Dict.CM_TABLE)) != 0)
+                {
+                    flushLine(fout, indent);
+                }
+            }
+        }
+    }
+
+    public void printXMLTree(Out fout, short mode, int indent,
+                             Lexer lexer, Node node)
+    {
+        TagTable tt = this.configuration.tt;
+
+        if (node == null)
+            return;
+
+        if (node.type == Node.TextNode)
+        {
+            printText(fout, mode, indent,
+                        node.textarray, node.start, node.end);
+        }
+        else if (node.type == Node.CommentTag)
+        {
+            condFlushLine(fout, indent);
+            printComment(fout, 0, node);
+            condFlushLine(fout, 0);
+        }
+        else if (node.type == Node.RootNode)
+        {
+            Node content;
+
+            for (content = node.content;
+                    content != null;
+                    content = content.next)
+               printXMLTree(fout, mode, indent, lexer, content);
+        }
+        else if (node.type == Node.DocTypeTag)
+            printDocType(fout, indent, node);
+        else if (node.type == Node.ProcInsTag)
+            printPI(fout, indent, node);
+        else if (node.type == Node.SectionTag)
+            printSection(fout, indent, node);
+        else if (node.type == Node.AspTag)
+            printAsp(fout, indent, node);
+        else if (node.type == Node.JsteTag)
+            printJste(fout, indent, node);
+        else if (node.type == Node.PhpTag)
+            printPhp(fout, indent, node);
+        else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag)
+        {
+            condFlushLine(fout, indent);
+            printTag(lexer, fout, mode, indent, node);
+            flushLine(fout, indent);
+
+            if (node.next != null)
+                flushLine(fout, indent);
+        }
+        else /* some kind of container element */
+        {
+            Node content;
+            boolean mixed = false;
+            int cindent;
+
+            for (content = node.content; content != null; content = content.next)
+            {
+                if (content.type == Node.TextNode)
+                {
+                    mixed = true;
+                    break;
+                }
+            }
+
+            condFlushLine(fout, indent);
+
+            if (ParserImpl.XMLPreserveWhiteSpace(node, tt))
+            {
+                indent = 0;
+                cindent = 0;
+                mixed = false;
+            }
+            else if (mixed)
+                cindent = indent;
+            else
+                cindent = indent + this.configuration.spaces;
+
+            printTag(lexer, fout, mode, indent, node);
+
+            if (!mixed)
+                flushLine(fout, indent);
+            for (content = node.content;
+                    content != null;
+                    content = content.next)
+                printXMLTree(fout, mode, cindent, lexer, content);
+
+            if (!mixed)
+                condFlushLine(fout, cindent);
+            printEndTag(fout, mode, indent, node);
+            condFlushLine(fout, indent);
+
+            if (node.next != null)
+                flushLine(fout, indent);
+        }
+    }
+
+
+    /* split parse tree by h2 elements and output to separate files */
+
+    /* counts number of h2 children belonging to node */
+    public int countSlides(Node node)
+    {
+        int n = 1;
+        TagTable tt = this.configuration.tt;
+
+        for (node = node.content; node != null; node = node.next)
+            if (node.tag == tt.tagH2)
+                ++n;
+
+        return n;
+    }
+
+    /*
+       inserts a space gif called "dot.gif" to ensure
+       that the  slide is at least n pixels high
+     */
+    private void printVertSpacer(Out fout, int indent)
+    {
+        condFlushLine(fout, indent);
+        printString(fout, indent , 
+        "<img width=\"0\" height=\"0\" hspace=\"1\" src=\"dot.gif\" vspace=\"%d\" align=\"left\">");
+        condFlushLine(fout, indent);
+    }
+
+    private void printNavBar(Out fout, int indent)
+    {
+        String buf;
+
+        condFlushLine(fout, indent);
+        printString(fout, indent , "<center><small>");
+
+        if (slide > 1)
+        {
+            buf = "<a href=\"slide" +
+                  (new Integer(slide - 1)).toString() +
+                  ".html\">previous</a> | ";
+            printString(fout, indent , buf);
+            condFlushLine(fout, indent);
+
+            if (slide < count)
+                printString(fout, indent , "<a href=\"slide1.html\">start</a> | ");
+            else
+                printString(fout, indent , "<a href=\"slide1.html\">start</a>");
+
+            condFlushLine(fout, indent);
+        }
+
+        if (slide < count)
+        {
+            buf = "<a href=\"slide" +
+                  (new Integer(slide + 1)).toString() +
+                  ".html\">next</a>";
+            printString(fout, indent , buf);
+        }
+
+        printString(fout, indent , "</small></center>");
+        condFlushLine(fout, indent);
+    }
+
+    /*
+      Called from printTree to print the content of a slide from
+      the node slidecontent. On return slidecontent points to the
+      node starting the next slide or null. The variables slide
+      and count are used to customise the navigation bar.
+    */
+    public void printSlide(Out fout, short mode, int indent, Lexer lexer)
+    {
+        Node content, last;
+        TagTable tt = this.configuration.tt;
+
+        /* insert div for onclick handler */
+        String s;
+        s = "<div onclick=\"document.location='slide" +
+            (new Integer(slide < count ? slide + 1 : 1)).toString() +
+            ".html'\">";
+        printString(fout, indent, s);
+        condFlushLine(fout, indent);
+
+        /* first print the h2 element and navbar */
+        if (slidecontent.tag == tt.tagH2)
+        {
+            printNavBar(fout, indent);
+
+            /* now print an hr after h2 */
+
+            addC('<', linelen++);
+
+
+            addC((int)Lexer.foldCase('h',
+                                     this.configuration.UpperCaseTags,
+                                     this.configuration.XmlTags),
+                 linelen++);
+            addC((int)Lexer.foldCase('r',
+                                     this.configuration.UpperCaseTags,
+                                     this.configuration.XmlTags),
+                 linelen++);
+
+            if (this.configuration.XmlOut == true)
+                printString(fout, indent , " />");
+            else
+                addC('>', linelen++);
+
+
+            if (this.configuration.IndentContent == true)
+                condFlushLine(fout, indent);
+
+            /* PrintVertSpacer(fout, indent); */
+
+            /*condFlushLine(fout, indent); */
+
+            /* print the h2 element */
+            printTree(fout, mode,
+                (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, slidecontent);
+
+            slidecontent = slidecontent.next;
+        }
+    
+        /* now continue until we reach the next h2 */
+
+        last = null;
+        content = slidecontent;
+
+        for (; content != null; content = content.next)
+        {
+            if (content.tag == tt.tagH2)
+                break;
+
+            /* kludge for naked text before block level tag */
+            if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode &&
+                content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0)
+            {
+                flushLine(fout, indent);
+                flushLine(fout, indent);
+            }
+
+            printTree(fout, mode,
+                (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, content);
+
+            last = content;
+        }
+
+        slidecontent = content;
+
+        /* now print epilog */
+
+        condFlushLine(fout, indent);
+
+        printString(fout, indent , "<br clear=\"all\">");
+        condFlushLine(fout, indent);
+
+        addC('<', linelen++);
+
+
+        addC((int)Lexer.foldCase('h',
+                                 this.configuration.UpperCaseTags,
+                                 this.configuration.XmlTags),
+             linelen++);
+        addC((int)Lexer.foldCase('r',
+                                 this.configuration.UpperCaseTags,
+                                 this.configuration.XmlTags),
+             linelen++);
+
+        if (this.configuration.XmlOut == true)
+            printString(fout, indent , " />");
+        else
+            addC('>', linelen++);
+
+
+        if (this.configuration.IndentContent == true)
+            condFlushLine(fout, indent);
+
+        printNavBar(fout, indent);
+
+        /* end tag for div */
+        printString(fout, indent, "</div>");
+        condFlushLine(fout, indent);
+    }
+
+
+    /*
+    Add meta element for page transition effect, this works on IE but not NS
+    */
+
+    public void addTransitionEffect(Lexer lexer, Node root, short effect, double duration)
+    {
+        Node head = root.findHEAD(lexer.configuration.tt);
+        String transition;
+
+        if (0 <= effect && effect <= 23)
+            transition = "revealTrans(Duration=" +
+                         (new Double(duration)).toString() +
+                         ",Transition=" + effect + ")";
+        else
+            transition = "blendTrans(Duration=" +
+                         (new Double(duration)).toString() + ")";
+
+        if (head != null)
+        {
+            Node meta = lexer.inferredTag("meta");
+            meta.addAttribute("http-equiv", "Page-Enter");
+            meta.addAttribute("content", transition);
+            Node.insertNodeAtStart(head, meta);
+        }
+    }
+
+    public void createSlides(Lexer lexer, Node root)
+    {
+        Node body;
+        String buf;
+        Out out = new OutImpl();
+
+        body = root.findBody(lexer.configuration.tt);
+        count = countSlides(body);
+        slidecontent = body.content;
+        addTransitionEffect(lexer, root, EFFECT_BLEND, 3.0);
+
+        for (slide = 1; slide <= count; ++slide)
+        {
+            buf = "slide" + slide + ".html";
+            out.state = StreamIn.FSM_ASCII;
+            out.encoding = this.configuration.CharEncoding;
+
+            try
+            {
+                out.out = new FileOutputStream(buf);
+                printTree(out, (short)0, 0, lexer, root);
+                flushLine(out, 0);
+                out.out.close();
+            }
+            catch (IOException e)
+            {
+                System.err.println(buf + e.toString() );
+            }
+        }
+
+        /*
+         delete superfluous slides by deleting slideN.html
+         for N = count+1, count+2, etc. until no such file
+         is found.     
+        */
+
+        for (;;)
+        {
+            buf = "slide" + slide + "html";
+
+            if (!(new File(buf)).delete())
+                break;
+
+            ++slide;
+        }
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Parser.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Parser.java
new file mode 100644 (file)
index 0000000..ed8ee7c
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * @(#)Parser.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * HTML Parser
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public interface Parser {
+
+    public void parse( Lexer lexer, Node node, short mode );
+
+}
+
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/ParserImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/ParserImpl.java
new file mode 100644 (file)
index 0000000..8ac6ecb
--- /dev/null
@@ -0,0 +1,3205 @@
+/*
+ * @(#)ParserImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * HTML Parser implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class ParserImpl {
+
+    //private static int SeenBodyEndTag;  /* AQ: moved into lexer structure */
+
+    private static void parseTag(Lexer lexer, Node node, short mode)
+    {
+        // Local fix by GLP 2000-12-21.  Need to reset insertspace if this 
+        // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
+        // Remove this code once the fix is made in Tidy.
+
+/******  (Original code follows)
+        if ((node.tag.model & Dict.CM_EMPTY) != 0)
+        {
+            lexer.waswhite = false;
+            return;
+        }
+        else if (!((node.tag.model & Dict.CM_INLINE) != 0))
+            lexer.insertspace = false;
+*******/
+
+        if (!((node.tag.model & Dict.CM_INLINE) != 0))
+            lexer.insertspace = false;
+
+        if ((node.tag.model & Dict.CM_EMPTY) != 0)
+        {
+            lexer.waswhite = false;
+            return;
+        }
+
+        if (node.tag.parser == null || node.type == Node.StartEndTag)
+            return;
+
+        node.tag.parser.parse(lexer, node, mode);
+    }
+
+    private static void moveToHead(Lexer lexer, Node element, Node node)
+    {
+        Node head;
+        TagTable tt = lexer.configuration.tt;
+
+
+        if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+        {
+            Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+
+            while (element.tag != tt.tagHtml)
+                element = element.parent;
+
+            for (head = element.content; head != null; head = head.next)
+            {
+                if (head.tag == tt.tagHead)
+                {
+                    Node.insertNodeAtEnd(head, node);
+                    break;
+                }
+            }
+
+            if (node.tag.parser != null)
+                parseTag(lexer, node, Lexer.IgnoreWhitespace);
+        }
+        else
+        {
+            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+        }
+    }
+
+    public static class ParseHTML implements Parser {
+
+        public void parse( Lexer lexer, Node html, short mode )
+        {
+            Node node, head;
+            Node frameset = null;
+            Node noframes = null;
+
+            lexer.configuration.XmlTags = false;
+            lexer.seenBodyEndTag = 0;
+            TagTable tt = lexer.configuration.tt;
+
+            for (;;)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+
+                if (node == null)
+                {
+                    node = lexer.inferredTag("head");
+                    break;
+                }
+
+                if (node.tag == tt.tagHead)
+                    break;
+
+                if (node.tag == html.tag && node.type == Node.EndTag)
+                {
+                    Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(html, node))
+                    continue;
+
+                lexer.ungetToken();
+                node = lexer.inferredTag("head");
+                break;
+            }
+
+            head = node;
+            Node.insertNodeAtEnd(html, head);
+            getParseHead().parse(lexer, head, mode);
+
+            for (;;)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+
+                if (node == null)
+                {
+                    if (frameset == null) /* create an empty body */
+                        node = lexer.inferredTag("body");
+
+                    return;
+                }
+
+                /* robustly handle html tags */
+                if (node.tag == html.tag)
+                {
+                    if (node.type != Node.StartTag && frameset == null)
+                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+
+                    continue;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(html, node))
+                    continue;
+
+                /* if frameset document coerce <body> to <noframes> */
+                if (node.tag == tt.tagBody)
+                {
+                    if (node.type != Node.StartTag)
+                    {
+                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (frameset != null)
+                    {
+                        lexer.ungetToken();
+
+                        if (noframes == null)
+                        {
+                            noframes = lexer.inferredTag("noframes");
+                            Node.insertNodeAtEnd(frameset, noframes);
+                            Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
+                        }
+
+                        parseTag(lexer, noframes, mode);
+                        continue;
+                    }
+
+                    break;  /* to parse body */
+                }
+
+                /* flag an error if we see more than one frameset */
+                if (node.tag == tt.tagFrameset)
+                {
+                    if (node.type != Node.StartTag)
+                    {
+                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (frameset != null)
+                        Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
+                    else
+                        frameset = node;
+
+                    Node.insertNodeAtEnd(html, node);
+                    parseTag(lexer, node, mode);
+
+                    /*
+                      see if it includes a noframes element so
+                      that we can merge subsequent noframes elements
+                    */
+
+                    for (node = frameset.content; node != null; node = node.next)
+                    {
+                        if (node.tag == tt.tagNoframes)
+                            noframes = node;
+                    }
+                    continue;
+                }
+
+                /* if not a frameset document coerce <noframes> to <body> */
+                if (node.tag == tt.tagNoframes)
+                {
+                    if (node.type != Node.StartTag)
+                    {
+                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (frameset == null)
+                    {
+                        Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
+                        node = lexer.inferredTag("body");
+                        break;
+                    }
+
+                    if (noframes == null)
+                    {
+                        noframes = node;
+                        Node.insertNodeAtEnd(frameset, noframes);
+                    }
+
+                    parseTag(lexer, noframes, mode);
+                    continue;
+                }
+
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
+                    {
+                        moveToHead(lexer, html, node);
+                        continue;
+                    }
+                }
+
+                lexer.ungetToken();
+
+                /* insert other content into noframes element */
+
+                if (frameset != null)
+                {
+                    if (noframes == null)
+                    {
+                        noframes = lexer.inferredTag("noframes");
+                        Node.insertNodeAtEnd(frameset, noframes);
+                    }
+                    else
+                        Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
+
+                    parseTag(lexer, noframes, mode);
+                    continue;
+                }
+
+                node = lexer.inferredTag("body");
+                break;
+            }
+
+            /* node must be body */
+
+            Node.insertNodeAtEnd(html, node);
+            parseTag(lexer, node, mode);
+        }
+
+    };
+
+    public static class ParseHead implements Parser {
+
+        public void parse( Lexer lexer, Node head, short mode )
+        {
+            Node node;
+            int HasTitle = 0;
+            int HasBase = 0;
+            TagTable tt = lexer.configuration.tt;
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == head.tag && node.type == Node.EndTag)
+                {
+                    head.closed = true;
+                    break;
+                }
+
+                if (node.type == Node.TextNode)
+                {
+                    lexer.ungetToken();
+                    break;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(head, node))
+                    continue;
+
+                if (node.type == Node.DocTypeTag)
+                {
+                    Node.insertDocType(lexer, head, node);
+                    continue;
+                }
+
+                /* discard unknown tags */
+                if (node.tag == null)
+                {
+                    Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+        
+                if (!((node.tag.model & Dict.CM_HEAD) != 0))
+                {
+                    lexer.ungetToken();
+                    break;
+                }
+
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    if (node.tag == tt.tagTitle)
+                    {
+                        ++HasTitle;
+
+                        if (HasTitle > 1)
+                            Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
+                    }
+                    else if (node.tag == tt.tagBase)
+                    {
+                        ++HasBase;
+
+                        if (HasBase > 1)
+                            Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
+                    }
+                    else if (node.tag == tt.tagNoscript)
+                        Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
+
+                    Node.insertNodeAtEnd(head, node);
+                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
+                    continue;
+                }
+
+                /* discard unexpected text nodes and end tags */
+                Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            if (HasTitle == 0)
+            {
+                Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
+                Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
+            }
+        }
+
+    };
+
+    public static class ParseTitle implements Parser {
+
+        public void parse( Lexer lexer, Node title, short mode )
+        {
+            Node node;
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.MixedContent);
+                if (node == null) break;
+                if (node.tag == title.tag && node.type == Node.EndTag)
+                {
+                    title.closed = true;
+                    Node.trimSpaces(lexer, title);
+                    return;
+                }
+
+                if (node.type == Node.TextNode)
+                {
+                    /* only called for 1st child */
+                    if (title.content == null)
+                        Node.trimInitialSpace(lexer, title, node);
+
+                    if (node.start >= node.end)
+                    {
+                        continue;
+                    }
+
+                    Node.insertNodeAtEnd(title, node);
+                    continue;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(title, node))
+                    continue;
+
+                /* discard unknown tags */
+                if (node.tag == null)
+                {
+                    Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* pushback unexpected tokens */
+                Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
+                lexer.ungetToken();
+                Node.trimSpaces(lexer, title);
+                return;
+            }
+
+            Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
+        }
+
+    };
+
+    public static class ParseScript implements Parser {
+
+        public void parse( Lexer lexer, Node script, short mode )
+        {
+        /*
+          This isn't quite right for CDATA content as it recognises
+          tags within the content and parses them accordingly.
+          This will unfortunately screw up scripts which include
+          < + letter,  < + !, < + ?  or  < + / + letter
+        */
+
+            Node node;
+
+            node = lexer.getCDATA( script);
+
+            if (node != null)
+                Node.insertNodeAtEnd(script, node);
+        }
+
+    };
+
+    public static class ParseBody implements Parser {
+
+        public void parse( Lexer lexer, Node body, short mode )
+        {
+            Node node;
+            boolean checkstack, iswhitenode;
+
+            mode = Lexer.IgnoreWhitespace;
+            checkstack = true;
+            TagTable tt = lexer.configuration.tt;
+
+            while (true)
+            {
+                node = lexer.getToken(mode);
+                if (node == null) break;
+                if (node.tag == body.tag && node.type == Node.EndTag)
+                {
+                    body.closed = true;
+                    Node.trimSpaces(lexer, body);
+                    lexer.seenBodyEndTag = 1;
+                    mode = Lexer.IgnoreWhitespace;
+
+                    if (body.parent.tag == tt.tagNoframes)
+                        break;
+
+                    continue;
+                }
+        
+                if (node.tag == tt.tagNoframes)
+                {
+                    if (node.type == Node.StartTag)
+                    {
+                        Node.insertNodeAtEnd(body, node);
+                        getParseBlock().parse(lexer, node, mode);
+                        continue;
+                    }
+
+                    if (node.type == Node.EndTag &&
+                        body.parent.tag == tt.tagNoframes)
+                    {
+                        Node.trimSpaces(lexer, body);
+                        lexer.ungetToken();
+                        break;
+                    }
+                }
+
+                if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
+                    && body.parent.tag == tt.tagNoframes)
+                {
+                    Node.trimSpaces(lexer, body);
+                    lexer.ungetToken();
+                    break;
+                }
+        
+                if (node.tag == tt.tagHtml)
+                {
+                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                        Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
+
+                    continue;
+                }
+
+                iswhitenode = false;
+
+                if (node.type == Node.TextNode &&
+                       node.end <= node.start + 1 &&
+                       node.textarray[node.start] == (byte)' ')
+                    iswhitenode = true;
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(body, node))
+                    continue;
+
+                if (lexer.seenBodyEndTag == 1 && !iswhitenode)
+                {
+                    ++lexer.seenBodyEndTag;
+                    Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
+                }
+
+                /* mixed content model permits text */
+                if (node.type == Node.TextNode)
+                {
+                    if (iswhitenode && mode == Lexer.IgnoreWhitespace)
+                    {
+                        continue;
+                    }
+
+                    if (lexer.configuration.EncloseBodyText && !iswhitenode)
+                    {
+                        Node para;
+                
+                        lexer.ungetToken();
+                        para = lexer.inferredTag("p");
+                        Node.insertNodeAtEnd(body, para);
+                        parseTag(lexer, para, mode);
+                        mode = Lexer.MixedContent;
+                        continue;
+                    }
+                    else /* strict doesn't allow text here */
+                        lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
+
+                    if (checkstack)
+                    {
+                        checkstack = false;
+
+                        if (lexer.inlineDup( node) > 0)
+                            continue;
+                    }
+
+                    Node.insertNodeAtEnd(body, node);
+                    mode = Lexer.MixedContent;
+                    continue;
+                }
+
+                if (node.type == Node.DocTypeTag)
+                {
+                    Node.insertDocType(lexer, body, node);
+                    continue;
+                }
+                /* discard unknown  and PARAM tags */
+                if (node.tag == null || node.tag == tt.tagParam)
+                {
+                    Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /*
+                  Netscape allows LI and DD directly in BODY
+                  We infer UL or DL respectively and use this
+                  boolean to exclude block-level elements so as
+                  to match Netscape's observed behaviour.
+                */
+                lexer.excludeBlocks = false;
+        
+                if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
+                    !((node.tag.model & Dict.CM_INLINE) != 0))
+                {
+                    /* avoid this error message being issued twice */
+                    if (!((node.tag.model & Dict.CM_HEAD) != 0))
+                        Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
+
+                    if ((node.tag.model & Dict.CM_HTML) != 0)
+                    {
+                        /* copy body attributes if current body was inferred */
+                        if (node.tag == tt.tagBody && body.implicit 
+                                            && body.attributes == null)
+                        {
+                            body.attributes = node.attributes;
+                            node.attributes = null;
+                        }
+
+                        continue;
+                    }
+
+                    if ((node.tag.model & Dict.CM_HEAD) != 0)
+                    {
+                        moveToHead(lexer, body, node);
+                        continue;
+                    }
+
+                    if ((node.tag.model & Dict.CM_LIST) != 0)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag( "ul");
+                        Node.addClass(node, "noindent");
+                        lexer.excludeBlocks = true;
+                    }
+                    else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag( "dl");
+                        lexer.excludeBlocks = true;
+                    }
+                    else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag( "table");
+                        lexer.excludeBlocks = true;
+                    }
+                    else
+                    {
+                        /* AQ: The following line is from the official C
+                           version of tidy.  It doesn't make sense to me
+                           because the '!' operator has higher precedence
+                           than the '&' operator.  It seems to me that the
+                           expression always evaluates to 0.
+
+                           if (!node->tag->model & (CM_ROW | CM_FIELD))
+
+                           AQ: 13Jan2000 fixed in C tidy
+                        */
+                        if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
+                        {
+                            lexer.ungetToken();
+                            return;
+                        }
+
+                        /* ignore </td> </th> <option> etc. */
+                        continue;
+                    }
+                }
+
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagBr)
+                        node.type = Node.StartTag;
+                    else if (node.tag == tt.tagP)
+                    {
+                        Node.coerceNode(lexer, node, tt.tagBr);
+                        Node.insertNodeAtEnd(body, node);
+                        node = lexer.inferredTag("br");
+                    }
+                    else if ((node.tag.model & Dict.CM_INLINE) != 0)
+                        lexer.popInline(node);
+                }
+
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
+                    {
+                        /* HTML4 strict doesn't allow inline content here */
+                        /* but HTML2 does allow img elements as children of body */
+                        if (node.tag == tt.tagImg)
+                            lexer.versions &= ~Dict.VERS_HTML40_STRICT;
+                        else
+                            lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
+
+                        if (checkstack && !node.implicit)
+                        {
+                            checkstack = false;
+
+                            if (lexer.inlineDup( node) > 0)
+                                continue;
+                        }
+
+                        mode = Lexer.MixedContent;
+                    }
+                    else
+                    {
+                        checkstack = true;
+                        mode = Lexer.IgnoreWhitespace;
+                    }
+
+                    if (node.implicit)
+                        Report.warning(lexer, body, node, Report.INSERTING_TAG);
+
+                    Node.insertNodeAtEnd(body, node);
+                    parseTag(lexer, node, mode);
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
+            }
+        }
+
+    };
+
+    public static class ParseFrameSet implements Parser {
+
+        public void parse( Lexer lexer, Node frameset, short mode )
+        {
+            Node node;
+            TagTable tt = lexer.configuration.tt;
+
+            lexer.badAccess |=  Report.USING_FRAMES;
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == frameset.tag && node.type == Node.EndTag)
+                {
+                    frameset.closed = true;
+                    Node.trimSpaces(lexer, frameset);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(frameset, node))
+                    continue;
+
+                if (node.tag == null)
+                {
+                    Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
+                    continue; 
+                }
+
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
+                    {
+                        moveToHead(lexer, frameset, node);
+                        continue;
+                    }
+                }
+
+                if (node.tag == tt.tagBody)
+                {
+                    lexer.ungetToken();
+                    node = lexer.inferredTag("noframes");
+                    Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
+                }
+
+                if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
+                {
+                    Node.insertNodeAtEnd(frameset, node);
+                    lexer.excludeBlocks = false;
+                    parseTag(lexer, node, Lexer.MixedContent);
+                    continue;
+                }
+                else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
+                {
+                    Node.insertNodeAtEnd(frameset, node);
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
+        }
+
+    };
+
+    public static class ParseInline implements Parser {
+
+        public void parse( Lexer lexer, Node element, short mode )
+        {
+            Node node, parent;
+            TagTable tt = lexer.configuration.tt;
+
+            if ((element.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            if (element.tag == tt.tagA)
+            {
+                if (element.attributes == null)
+                {
+                    Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
+                    Node.discardElement(element);
+                    return;
+                }
+            }
+
+            /*
+             ParseInline is used for some block level elements like H1 to H6
+             For such elements we need to insert inline emphasis tags currently
+             on the inline stack. For Inline elements, we normally push them
+             onto the inline stack provided they aren't implicit or OBJECT/APPLET.
+             This test is carried out in PushInline and PopInline, see istack.c
+             We don't push A or SPAN to replicate current browser behavior
+            */
+            if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
+                lexer.inlineDup( null);
+            else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
+                        element.tag != tt.tagA && element.tag != tt.tagSpan)
+                lexer.pushInline( element);
+
+            if (element.tag == tt.tagNobr)
+                lexer.badLayout |= Report.USING_NOBR;
+            else if (element.tag == tt.tagFont)
+                lexer.badLayout |= Report.USING_FONT;
+
+            /* Inline elements may or may not be within a preformatted element */
+            if (mode != Lexer.Preformatted)
+                mode = Lexer.MixedContent;
+
+            while (true)
+            {
+                node = lexer.getToken(mode);
+                if (node == null) break;
+                /* end tag for current element */
+                if (node.tag == element.tag && node.type == Node.EndTag)
+                {
+                    if ((element.tag.model & Dict.CM_INLINE) != 0 &&
+                        element.tag != tt.tagA)
+                        lexer.popInline( node);
+
+                    if (!((mode & Lexer.Preformatted) != 0))
+                        Node.trimSpaces(lexer, element);
+                    /*
+                     if a font element wraps an anchor and nothing else
+                     then move the font element inside the anchor since
+                     otherwise it won't alter the anchor text color
+                    */
+                    if (element.tag == tt.tagFont &&
+                        element.content != null &&
+                        element.content == element.last)
+                    {
+                        Node child = element.content;
+
+                        if (child.tag == tt.tagA)
+                        {
+                            child.parent = element.parent;
+                            child.next = element.next;
+                            child.prev = element.prev;
+
+                            if (child.prev != null)
+                                child.prev.next = child;
+                            else
+                                child.parent.content = child;
+
+                            if (child.next != null)
+                                child.next.prev = child;
+                            else
+                                child.parent.last = child;
+
+                            element.next = null;
+                            element.prev = null;
+                            element.parent = child;
+                            element.content = child.content;
+                            element.last = child.last;
+                            child.content = element;
+                            child.last = element;
+                            for (child = element.content; child != null; child = child.next)
+                                child.parent = element;
+                        }
+                    }
+                    element.closed = true;
+                    Node.trimSpaces(lexer, element);
+                    Node.trimEmptyElement(lexer, element);
+                    return;
+                }
+
+                /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
+                /* otherwise emphasis nesting is probably unintentional */
+                /* big and small have cumulative effect to leave them alone */
+                if (node.type == Node.StartTag
+                        && node.tag == element.tag
+                        && lexer.isPushed(node)
+                        && !node.implicit
+                        && !element.implicit
+                        && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
+                        && node.tag != tt.tagA
+                        && node.tag != tt.tagFont
+                        && node.tag != tt.tagBig
+                        && node.tag != tt.tagSmall)
+                {
+                    if (element.content != null && node.attributes == null)
+                    {
+                        Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
+                        node.type = Node.EndTag;
+                        lexer.ungetToken();
+                        continue;
+                    }
+
+                    Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
+                }
+
+                if (node.type == Node.TextNode)
+                {
+                    /* only called for 1st child */
+                    if (element.content == null &&
+                        !((mode & Lexer.Preformatted) != 0))
+                        Node.trimSpaces(lexer, element);
+
+                    if (node.start >= node.end)
+                    {
+                        continue;
+                    }
+
+                    Node.insertNodeAtEnd(element, node);
+                    continue;
+                }
+
+                /* mixed content model so allow text */
+                if (Node.insertMisc(element, node))
+                    continue;
+
+                /* deal with HTML tags */
+                if (node.tag == tt.tagHtml)
+                {
+                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                    {
+                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    /* otherwise infer end of inline element */
+                    lexer.ungetToken();
+                    if (!((mode & Lexer.Preformatted) != 0))
+                        Node.trimSpaces(lexer, element);
+                    Node.trimEmptyElement(lexer, element);
+                    return;
+                }
+
+                /* within <dt> or <pre> map <p> to <br> */
+                if (node.tag == tt.tagP &&
+                      node.type == Node.StartTag &&
+                      ((mode & Lexer.Preformatted) != 0 ||
+                       element.tag == tt.tagDt ||
+                      element.isDescendantOf(tt.tagDt)))
+                {
+                    node.tag = tt.tagBr;
+                    node.element = "br";
+                    Node.trimSpaces(lexer, element);
+                    Node.insertNodeAtEnd(element, node);
+                    continue;
+                }
+
+                /* ignore unknown and PARAM tags */
+                if (node.tag == null || node.tag == tt.tagParam)
+                {
+                    Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                if (node.tag == tt.tagBr && node.type == Node.EndTag)
+                    node.type = Node.StartTag;
+
+                if (node.type == Node.EndTag)
+                {
+                    /* coerce </br> to <br> */
+                    if (node.tag == tt.tagBr)
+                        node.type = Node.StartTag;
+                    else if (node.tag == tt.tagP)
+                    {
+                        /* coerce unmatched </p> to <br><br> */
+                        if (!element.isDescendantOf(tt.tagP))
+                        {
+                            Node.coerceNode(lexer, node, tt.tagBr);
+                            Node.trimSpaces(lexer, element);
+                            Node.insertNodeAtEnd(element, node);
+                            node = lexer.inferredTag("br");
+                            continue;
+                        }
+                    }
+                    else if ((node.tag.model & Dict.CM_INLINE) != 0
+                                && node.tag != tt.tagA
+                                        && !((node.tag.model & Dict.CM_OBJECT) != 0)
+                                        && (element.tag.model & Dict.CM_INLINE) != 0)
+                    {
+                        /* allow any inline end tag to end current element */
+                        lexer.popInline( element);
+
+                        if (element.tag != tt.tagA)
+                        {
+                            if (node.tag == tt.tagA && node.tag != element.tag)
+                            {
+                               Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+                               lexer.ungetToken();
+                            }
+                            else
+                            {
+                                Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
+                            }
+
+                            if (!((mode & Lexer.Preformatted) != 0))
+                                Node.trimSpaces(lexer, element);
+                            Node.trimEmptyElement(lexer, element);
+                            return;
+                        }
+
+                        /* if parent is <a> then discard unexpected inline end tag */
+                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }  /* special case </tr> etc. for stuff moved in front of table */
+                    else if (lexer.exiled
+                                && node.tag.model != 0
+                                && (node.tag.model & Dict.CM_TABLE) != 0)
+                    {
+                        lexer.ungetToken();
+                        Node.trimSpaces(lexer, element);
+                        Node.trimEmptyElement(lexer, element);
+                        return;
+                    }
+                }
+
+                /* allow any header tag to end current header */
+                if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
+                {
+                    if (node.tag == element.tag)
+                    {
+                        Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
+                    }
+                    else
+                    {
+                        Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+                        lexer.ungetToken();
+                    }
+                    if (!((mode & Lexer.Preformatted) != 0))
+                        Node.trimSpaces(lexer, element);
+                    Node.trimEmptyElement(lexer, element);
+                    return;
+                }
+
+                /*
+                   an <A> tag to ends any open <A> element
+                   but <A href=...> is mapped to </A><A href=...>
+                */
+                if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
+                {
+                 /* coerce <a> to </a> unless it has some attributes */
+                    if (node.attributes == null)
+                    {
+                        node.type = Node.EndTag;
+                        Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
+                        lexer.popInline( node);
+                        lexer.ungetToken();
+                        continue;
+                    }
+
+                    lexer.ungetToken();
+                    Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+                    lexer.popInline( element);
+                    if (!((mode & Lexer.Preformatted) != 0))
+                        Node.trimSpaces(lexer, element);
+                    Node.trimEmptyElement(lexer, element);
+                    return;
+                }
+
+                if ((element.tag.model & Dict.CM_HEADING) != 0)
+                {
+                    if (node.tag == tt.tagCenter ||
+                        node.tag == tt.tagDiv)
+                    {
+                        if (node.type != Node.StartTag &&
+                            node.type != Node.StartEndTag)
+                        {
+                            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                            continue;
+                        }
+
+                        Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+
+                        /* insert center as parent if heading is empty */
+                        if (element.content == null)
+                        {
+                            Node.insertNodeAsParent(element, node);
+                            continue;
+                        }
+
+                        /* split heading and make center parent of 2nd part */
+                        Node.insertNodeAfterElement(element, node);
+
+                        if (!((mode & Lexer.Preformatted) != 0))
+                            Node.trimSpaces(lexer, element);
+
+                        element = lexer.cloneNode(element);
+                        element.start = lexer.lexsize;
+                        element.end   = lexer.lexsize;
+                        Node.insertNodeAtEnd(node, element);
+                        continue;
+                    }
+
+                    if (node.tag == tt.tagHr)
+                    {
+                        if (node.type != Node.StartTag &&
+                            node.type != Node.StartEndTag)
+                        {
+                            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                            continue;
+                        }
+
+                        Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+
+                        /* insert hr before heading if heading is empty */
+                        if (element.content == null)
+                        {
+                            Node.insertNodeBeforeElement(element, node);
+                            continue;
+                        }
+
+                        /* split heading and insert hr before 2nd part */
+                        Node.insertNodeAfterElement(element, node);
+
+                        if (!((mode & Lexer.Preformatted) != 0))
+                            Node.trimSpaces(lexer, element);
+
+                        element = lexer.cloneNode(element);
+                        element.start = lexer.lexsize;
+                        element.end   = lexer.lexsize;
+                        Node.insertNodeAfterElement(node, element);
+                        continue;
+                    }
+                }
+
+                if (element.tag == tt.tagDt)
+                {
+                    if (node.tag == tt.tagHr)
+                    {
+                        Node dd;
+
+                        if (node.type != Node.StartTag &&
+                            node.type != Node.StartEndTag)
+                        {
+                            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                            continue;
+                        }
+
+                        Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
+                        dd = lexer.inferredTag("dd");
+
+                        /* insert hr within dd before dt if dt is empty */
+                        if (element.content == null)
+                        {
+                            Node.insertNodeBeforeElement(element, dd);
+                            Node.insertNodeAtEnd(dd, node);
+                            continue;
+                        }
+
+                        /* split dt and insert hr within dd before 2nd part */
+                        Node.insertNodeAfterElement(element, dd);
+                        Node.insertNodeAtEnd(dd, node);
+
+                        if (!((mode & Lexer.Preformatted) != 0))
+                            Node.trimSpaces(lexer, element);
+
+                        element = lexer.cloneNode(element);
+                        element.start = lexer.lexsize;
+                        element.end   = lexer.lexsize;
+                        Node.insertNodeAfterElement(dd, element);
+                        continue;
+                    }
+                }
+
+
+                /* 
+                  if this is the end tag for an ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    for (parent = element.parent;
+                            parent != null; parent = parent.parent)
+                    {
+                        if (node.tag == parent.tag)
+                        {
+                            if (!((element.tag.model & Dict.CM_OPT) != 0) &&
+                                !element.implicit)
+                                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+                            if (element.tag == tt.tagA)
+                                lexer.popInline(element);
+
+                            lexer.ungetToken();
+
+                            if (!((mode & Lexer.Preformatted) != 0))
+                                Node.trimSpaces(lexer, element);
+
+                            Node.trimEmptyElement(lexer, element);
+                            return;
+                        }
+                    }
+                }
+
+                /* block level tags end this element */
+                if (!((node.tag.model & Dict.CM_INLINE) != 0))
+                {
+                    if (node.type != Node.StartTag)
+                    {
+                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (!((element.tag.model & Dict.CM_OPT) != 0))
+                        Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+                    if ((node.tag.model & Dict.CM_HEAD) != 0 &&
+                        !((node.tag.model & Dict.CM_BLOCK) != 0))
+                    {
+                        moveToHead(lexer, element, node);
+                        continue;
+                    }
+
+                    /*
+                       prevent anchors from propagating into block tags
+                       except for headings h1 to h6
+                    */
+                    if (element.tag == tt.tagA)
+                    {
+                        if (node.tag != null &&
+                            !((node.tag.model & Dict.CM_HEADING) != 0))
+                            lexer.popInline(element);
+                        else if (!(element.content != null))
+                        {
+                            Node.discardElement(element);
+                            lexer.ungetToken();
+                            return;
+                        }
+                    }
+
+                    lexer.ungetToken();
+
+                    if (!((mode & Lexer.Preformatted) != 0))
+                        Node.trimSpaces(lexer, element);
+
+                    Node.trimEmptyElement(lexer, element);
+                    return;
+                }
+
+                /* parse inline element */
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    if (node.implicit)
+                        Report.warning(lexer, element, node, Report.INSERTING_TAG);
+
+                    /* trim white space before <br> */
+                    if (node.tag == tt.tagBr)
+                        Node.trimSpaces(lexer, element);
+            
+                    Node.insertNodeAtEnd(element, node);
+                    parseTag(lexer, node, mode);
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            if (!((element.tag.model & Dict.CM_OPT) != 0))
+                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
+
+            Node.trimEmptyElement(lexer, element);
+        }
+    };
+
+    public static class ParseList implements Parser {
+
+        public void parse( Lexer lexer, Node list, short mode )
+        {
+            Node node;
+            Node parent;
+            TagTable tt = lexer.configuration.tt;
+
+            if ((list.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            lexer.insert = -1;  /* defer implicit inline start tags */
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+
+                if (node.tag == list.tag && node.type == Node.EndTag)
+                {
+                    if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+                        Node.coerceNode(lexer, list, tt.tagUl);
+
+                    list.closed = true;
+                    Node.trimEmptyElement(lexer, list);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(list, node))
+                    continue;
+
+                if (node.type != Node.TextNode && node.tag == null)
+                {
+                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* 
+                  if this is the end tag for an ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.badForm = 1;
+                        Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
+                    {
+                        Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+                        lexer.popInline(node);
+                        continue;
+                    }
+
+                    for (parent = list.parent;
+                            parent != null; parent = parent.parent)
+                    {
+                        if (node.tag == parent.tag)
+                        {
+                            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
+                            lexer.ungetToken();
+
+                            if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+                                Node.coerceNode(lexer, list, tt.tagUl);
+
+                            Node.trimEmptyElement(lexer, list);
+                            return;
+                        }
+                    }
+
+                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                if (node.tag != tt.tagLi)
+                {
+                    lexer.ungetToken();
+
+                    if (node.tag != null &&
+                        (node.tag.model & Dict.CM_BLOCK) != 0 &&
+                        lexer.excludeBlocks)
+                    {
+                        Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
+                        Node.trimEmptyElement(lexer, list);
+                        return;
+                    }
+
+                    node = lexer.inferredTag("li");
+                    node.addAttribute("style", "list-style: none");
+                    Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+                }
+
+                /* node should be <LI> */
+                Node.insertNodeAtEnd(list, node);
+                parseTag(lexer, node, Lexer.IgnoreWhitespace);
+            }
+
+            if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
+                Node.coerceNode(lexer, list, tt.tagUl);
+
+            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
+            Node.trimEmptyElement(lexer, list);
+        }
+
+    };
+
+    public static class ParseDefList implements Parser {
+
+        public void parse( Lexer lexer, Node list, short mode )
+        {
+            Node node, parent;
+            TagTable tt = lexer.configuration.tt;
+
+            if ((list.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            lexer.insert = -1;  /* defer implicit inline start tags */
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == list.tag && node.type == Node.EndTag)
+                {
+                    list.closed = true;
+                    Node.trimEmptyElement(lexer, list);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(list, node))
+                    continue;
+
+                if (node.type == Node.TextNode)
+                {
+                    lexer.ungetToken();
+                    node = lexer.inferredTag( "dt");
+                    Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+                }
+
+                if (node.tag == null)
+                {
+                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* 
+                  if this is the end tag for an ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.badForm = 1;
+                        Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    for (parent = list.parent;
+                            parent != null; parent = parent.parent)
+                    {
+                        if (node.tag == parent.tag)
+                        {
+                            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
+
+                            lexer.ungetToken();
+                            Node.trimEmptyElement(lexer, list);
+                            return;
+                        }
+                    }
+                }
+
+                /* center in a dt or a dl breaks the dl list in two */
+                if (node.tag == tt.tagCenter)
+                {
+                    if (list.content != null)
+                        Node.insertNodeAfterElement(list, node);
+                    else /* trim empty dl list */
+                    {
+                        Node.insertNodeBeforeElement(list, node);
+                        Node.discardElement(list);
+                    }
+
+                    /* and parse contents of center */
+                    parseTag(lexer, node, mode);
+
+                    /* now create a new dl element */
+                    list = lexer.inferredTag("dl");
+                    Node.insertNodeAfterElement(node, list);
+                    continue;
+                }
+
+                if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
+                {
+                    lexer.ungetToken();
+
+                    if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
+                    {
+                        Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
+                        Node.trimEmptyElement(lexer, list);
+                        return;
+                    }
+
+                    /* if DD appeared directly in BODY then exclude blocks */
+                    if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
+                    {
+                        Node.trimEmptyElement(lexer, list);
+                        return;
+                    }
+
+                    node = lexer.inferredTag( "dd");
+                    Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
+                }
+
+                if (node.type == Node.EndTag)
+                {
+                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+        
+                /* node should be <DT> or <DD>*/
+                Node.insertNodeAtEnd(list, node);
+                parseTag(lexer, node, Lexer.IgnoreWhitespace);
+            }
+
+            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
+            Node.trimEmptyElement(lexer, list);
+        }
+
+    };
+
+    public static class ParsePre implements Parser {
+
+        public void parse( Lexer lexer, Node pre, short mode )
+        {
+            Node node, parent;
+            TagTable tt = lexer.configuration.tt;
+
+            if ((pre.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
+                Node.coerceNode(lexer, pre, tt.tagPre);
+
+            lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.Preformatted);
+                if (node == null) break;
+                if (node.tag == pre.tag && node.type == Node.EndTag)
+                {
+                    Node.trimSpaces(lexer, pre);
+                    pre.closed = true;
+                    Node.trimEmptyElement(lexer, pre);
+                    return;
+                }
+
+                if (node.tag == tt.tagHtml)
+                {
+                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+
+                    continue;
+                }
+
+                if (node.type == Node.TextNode)
+                {
+                    /* if first check for inital newline */
+                    if (pre.content == null)
+                    {
+                        if (node.textarray[node.start] == (byte)'\n')
+                            ++node.start;
+
+                        if (node.start >= node.end)
+                        {
+                            continue;
+                        }
+                    }
+
+                    Node.insertNodeAtEnd(pre, node);
+                    continue;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(pre, node))
+                    continue;
+
+                /* discard unknown  and PARAM tags */
+                if (node.tag == null || node.tag == tt.tagParam)
+                {
+                    Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                if (node.tag == tt.tagP)
+                {
+                    if (node.type == Node.StartTag)
+                    {
+                        Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
+
+                        /* trim white space before <p> in <pre>*/
+                        Node.trimSpaces(lexer, pre);
+            
+                        /* coerce both <p> and </p> to <br> */
+                        Node.coerceNode(lexer, node, tt.tagBr);
+                        Node.insertNodeAtEnd(pre, node);
+                    }
+                    else
+                    {
+                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+                    }
+                    continue;
+                }
+
+                if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
+                {
+                    moveToHead(lexer, pre, node);
+                    continue;
+                }
+
+                /* 
+                  if this is the end tag for an ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.badForm = 1;
+                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    for (parent = pre.parent;
+                            parent != null; parent = parent.parent)
+                    {
+                        if (node.tag == parent.tag)
+                        {
+                            Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+
+                            lexer.ungetToken();
+                            Node.trimSpaces(lexer, pre);
+                            Node.trimEmptyElement(lexer, pre);
+                            return;
+                        }
+                    }
+                }
+
+                /* what about head content, HEAD, BODY tags etc? */
+                if (!((node.tag.model & Dict.CM_INLINE) != 0))
+                {
+                    if (node.type != Node.StartTag)
+                    {
+                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+                    Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+                    lexer.excludeBlocks = true;
+
+                    /* check if we need to infer a container */
+                    if ((node.tag.model & Dict.CM_LIST) != 0)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag( "ul");
+                        Node.addClass(node, "noindent");
+                    }
+                    else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag( "dl");
+                    }
+                    else if ((node.tag.model & Dict.CM_TABLE) != 0)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag( "table");
+                    }
+
+                    Node.insertNodeAfterElement(pre, node);
+                    pre = lexer.inferredTag( "pre");
+                    Node.insertNodeAfterElement(node, pre);
+                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
+                    lexer.excludeBlocks = false;
+                    continue;
+                }
+                /*
+                if (!((node.tag.model & Dict.CM_INLINE) != 0))
+                {
+                    Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
+                    lexer.ungetToken();
+                    return;
+                }
+                */
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    /* trim white space before <br> */
+                    if (node.tag == tt.tagBr)
+                        Node.trimSpaces(lexer, pre);
+            
+                    Node.insertNodeAtEnd(pre, node);
+                    parseTag(lexer, node, Lexer.Preformatted);
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
+            Node.trimEmptyElement(lexer, pre);
+        }
+
+    };
+
+    public static class ParseBlock implements Parser {
+
+        public void parse( Lexer lexer, Node element, short mode )
+        /*
+           element is node created by the lexer
+           upon seeing the start tag, or by the
+           parser when the start tag is inferred
+        */
+        {
+            Node node, parent;
+            boolean checkstack;
+            int istackbase = 0;
+            TagTable tt = lexer.configuration.tt;
+
+            checkstack = true;
+
+            if ((element.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
+                Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
+
+            /*
+             InlineDup() asks the lexer to insert inline emphasis tags
+             currently pushed on the istack, but take care to avoid
+             propagating inline emphasis inside OBJECT or APPLET.
+             For these elements a fresh inline stack context is created
+             and disposed of upon reaching the end of the element.
+             They thus behave like table cells in this respect.
+            */
+            if ((element.tag.model & Dict.CM_OBJECT) != 0)
+            {
+                istackbase = lexer.istackbase;
+                lexer.istackbase = lexer.istack.size();
+            }
+
+            if (!((element.tag.model & Dict.CM_MIXED) != 0))
+                lexer.inlineDup( null);
+
+            mode = Lexer.IgnoreWhitespace;
+
+            while (true)
+            {
+                node = lexer.getToken(mode /*Lexer.MixedContent*/);
+                if (node == null) break;
+                /* end tag for this element */
+                if (node.type == Node.EndTag && node.tag != null &&
+                    (node.tag == element.tag || element.was == node.tag))
+                {
+
+                    if ((element.tag.model & Dict.CM_OBJECT) != 0)
+                    {
+                        /* pop inline stack */
+                        while (lexer.istack.size() > lexer.istackbase)
+                            lexer.popInline( null);
+                        lexer.istackbase = istackbase;
+                    }
+
+                    element.closed = true;
+                    Node.trimSpaces(lexer, element);
+                    Node.trimEmptyElement(lexer, element);
+                    return;
+                }
+
+                if (node.tag == tt.tagHtml ||
+                    node.tag == tt.tagHead ||
+                    node.tag == tt.tagBody)
+                {
+                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+
+                    continue;
+                }
+
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == null)
+                    {
+                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+
+                        continue;
+                    }
+                    else if (node.tag == tt.tagBr)
+                        node.type = Node.StartTag;
+                    else if (node.tag == tt.tagP)
+                    {
+                        Node.coerceNode(lexer, node, tt.tagBr);
+                        Node.insertNodeAtEnd(element, node);
+                        node = lexer.inferredTag("br");
+                    }
+                    else
+                    {
+                        /* 
+                          if this is the end tag for an ancestor element
+                          then infer end tag for this element
+                        */
+                        for (parent = element.parent;
+                                parent != null; parent = parent.parent)
+                        {
+                            if (node.tag == parent.tag)
+                            {
+                                if (!((element.tag.model & Dict.CM_OPT) != 0))
+                                    Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+                                lexer.ungetToken();
+
+                                if ((element.tag.model & Dict.CM_OBJECT) != 0)
+                                {
+                                    /* pop inline stack */
+                                    while (lexer.istack.size() > lexer.istackbase)
+                                        lexer.popInline( null);
+                                    lexer.istackbase = istackbase;
+                                }
+
+                                Node.trimSpaces(lexer, element);
+                                Node.trimEmptyElement(lexer, element);
+                                return;
+                            }
+                        }
+                        /* special case </tr> etc. for stuff moved in front of table */
+                        if (lexer.exiled
+                                    && node.tag.model != 0
+                                    && (node.tag.model & Dict.CM_TABLE) != 0)
+                        {
+                            lexer.ungetToken();
+                            Node.trimSpaces(lexer, element);
+                            Node.trimEmptyElement(lexer, element);
+                            return;
+                        }
+                    }
+                }
+
+                /* mixed content model permits text */
+                if (node.type == Node.TextNode)
+                {
+                    boolean iswhitenode = false;
+
+                    if (node.type == Node.TextNode &&
+                           node.end <= node.start + 1 &&
+                           lexer.lexbuf[node.start] == (byte)' ')
+                        iswhitenode = true;
+
+                    if (lexer.configuration.EncloseBlockText && !iswhitenode)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag("p");
+                        Node.insertNodeAtEnd(element, node);
+                        parseTag(lexer, node, Lexer.MixedContent);
+                        continue;
+                    }
+
+                    if (checkstack)
+                    {
+                        checkstack = false;
+
+                        if (!((element.tag.model & Dict.CM_MIXED) != 0))
+                        {
+                            if (lexer.inlineDup( node) > 0)
+                                continue;
+                        }
+                    }
+
+                    Node.insertNodeAtEnd(element, node);
+                    mode = Lexer.MixedContent;
+                    /*
+                      HTML4 strict doesn't allow mixed content for
+                      elements with %block; as their content model
+                    */
+                    lexer.versions &= ~Dict.VERS_HTML40_STRICT;
+                    continue;
+                }
+
+                if (Node.insertMisc(element, node))
+                    continue;
+
+                /* allow PARAM elements? */
+                if (node.tag == tt.tagParam)
+                {
+                    if (((element.tag.model & Dict.CM_PARAM) != 0) &&
+                            (node.type == Node.StartTag || node.type == Node.StartEndTag))
+                    {
+                        Node.insertNodeAtEnd(element, node);
+                        continue;
+                    }
+
+                    /* otherwise discard it */
+                    Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* allow AREA elements? */
+                if (node.tag == tt.tagArea)
+                {
+                    if ((element.tag == tt.tagMap) &&
+                            (node.type == Node.StartTag || node.type == Node.StartEndTag))
+                    {
+                        Node.insertNodeAtEnd(element, node);
+                        continue;
+                    }
+
+                    /* otherwise discard it */
+                    Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* ignore unknown start/end tags */
+                if (node.tag == null)
+                {
+                    Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /*
+                  Allow Dict.CM_INLINE elements here.
+
+                  Allow Dict.CM_BLOCK elements here unless
+                  lexer.excludeBlocks is yes.
+
+                  LI and DD are special cased.
+
+                  Otherwise infer end tag for this element.
+                */
+
+                if (!((node.tag.model & Dict.CM_INLINE) != 0))
+                {
+                    if (node.type != Node.StartTag && node.type != Node.StartEndTag)
+                    {
+                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (element.tag == tt.tagTd || element.tag == tt.tagTh)
+                    {
+                        /* if parent is a table cell, avoid inferring the end of the cell */
+
+                        if ((node.tag.model & Dict.CM_HEAD) != 0)
+                        {
+                            moveToHead(lexer, element, node);
+                            continue;
+                        }
+
+                        if ((node.tag.model & Dict.CM_LIST) != 0)
+                        {
+                            lexer.ungetToken();
+                            node = lexer.inferredTag( "ul");
+                            Node.addClass(node, "noindent");
+                            lexer.excludeBlocks = true;
+                        }
+                        else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+                        {
+                            lexer.ungetToken();
+                            node = lexer.inferredTag( "dl");
+                            lexer.excludeBlocks = true;
+                        }
+
+                        /* infer end of current table cell */
+                        if (!((node.tag.model & Dict.CM_BLOCK) != 0))
+                        {
+                            lexer.ungetToken();
+                            Node.trimSpaces(lexer, element);
+                            Node.trimEmptyElement(lexer, element);
+                            return;
+                        }
+                    }
+                    else if ((node.tag.model & Dict.CM_BLOCK) != 0)
+                    {
+                        if (lexer.excludeBlocks)
+                        {
+                            if (!((element.tag.model & Dict.CM_OPT) != 0))
+                                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+                            lexer.ungetToken();
+
+                            if ((element.tag.model & Dict.CM_OBJECT) != 0)
+                                lexer.istackbase = istackbase;
+
+                            Node.trimSpaces(lexer, element);
+                            Node.trimEmptyElement(lexer, element);
+                            return;
+                        }
+                    }
+                    else /* things like list items */
+                    {
+                        if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
+                            Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
+
+                        if ((node.tag.model & Dict.CM_HEAD) != 0)
+                        {
+                            moveToHead(lexer, element, node);
+                            continue;
+                        }
+
+                        lexer.ungetToken();
+
+                        if ((node.tag.model & Dict.CM_LIST) != 0)
+                        {
+                            if (element.parent != null && element.parent.tag != null &&
+                                element.parent.tag.parser == getParseList())
+                            {
+                                Node.trimSpaces(lexer, element);
+                                Node.trimEmptyElement(lexer, element);
+                                return;
+                            }
+
+                            node = lexer.inferredTag("ul");
+                            Node.addClass(node, "noindent");
+                        }
+                        else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
+                        {
+                            if (element.parent.tag == tt.tagDl)
+                            {
+                                Node.trimSpaces(lexer, element);
+                                Node.trimEmptyElement(lexer, element);
+                                return;
+                            }
+
+                            node = lexer.inferredTag("dl");
+                        }
+                        else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
+                                 (node.tag.model & Dict.CM_ROW) != 0)
+                        {
+                            node = lexer.inferredTag("table");
+                        }
+                        else if ((element.tag.model & Dict.CM_OBJECT) != 0)
+                        {
+                            /* pop inline stack */
+                            while (lexer.istack.size() > lexer.istackbase)
+                                lexer.popInline( null);
+                            lexer.istackbase = istackbase;
+                            Node.trimSpaces(lexer, element);
+                            Node.trimEmptyElement(lexer, element);
+                            return;
+
+                        }
+                        else
+                        {
+                            Node.trimSpaces(lexer, element);
+                            Node.trimEmptyElement(lexer, element);
+                            return;
+                        }
+                    }
+                }
+
+                /* parse known element */
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    if ((node.tag.model & Dict.CM_INLINE) != 0)
+                    {
+                        if (checkstack && !node.implicit)
+                        {
+                            checkstack = false;
+
+                            if (lexer.inlineDup( node) > 0)
+                                continue;
+                        }
+
+                        mode = Lexer.MixedContent;
+                    }
+                    else
+                    {
+                        checkstack = true;
+                        mode = Lexer.IgnoreWhitespace;
+                    }
+
+                    /* trim white space before <br> */
+                    if (node.tag == tt.tagBr)
+                        Node.trimSpaces(lexer, element);
+
+                    Node.insertNodeAtEnd(element, node);
+            
+                    if (node.implicit)
+                        Report.warning(lexer, element, node, Report.INSERTING_TAG);
+
+                    parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                if (node.type == Node.EndTag)
+                    lexer.popInline( node);  /* if inline end tag */
+
+                Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            if (!((element.tag.model & Dict.CM_OPT) != 0))
+                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
+
+            if ((element.tag.model & Dict.CM_OBJECT) != 0)
+            {
+                /* pop inline stack */
+                while (lexer.istack.size() > lexer.istackbase)
+                    lexer.popInline( null);
+                lexer.istackbase = istackbase;
+            }
+
+            Node.trimSpaces(lexer, element);
+            Node.trimEmptyElement(lexer, element);
+        }
+
+    };
+
+    public static class ParseTableTag implements Parser {
+
+        public void parse( Lexer lexer, Node table, short mode )
+        {
+            Node node, parent;
+            int istackbase;
+            TagTable tt = lexer.configuration.tt;
+
+            lexer.deferDup();
+            istackbase = lexer.istackbase;
+            lexer.istackbase = lexer.istack.size();
+    
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == table.tag && node.type == Node.EndTag)
+                {
+                    lexer.istackbase = istackbase;
+                    table.closed = true;
+                    Node.trimEmptyElement(lexer, table);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(table, node))
+                    continue;
+
+                /* discard unknown tags */
+                if (node.tag == null && node.type != Node.TextNode)
+                {
+                    Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* if TD or TH or text or inline or block then infer <TR> */
+
+                if (node.type != Node.EndTag)
+                {
+                    if (node.tag == tt.tagTd || 
+                        node.tag == tt.tagTh || 
+                        node.tag == tt.tagTable)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag( "tr");
+                        Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
+                    }
+                    else if (node.type == Node.TextNode
+                               || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+                    {
+                        Node.insertNodeBeforeElement(table, node);
+                        Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
+                        lexer.exiled = true;
+
+                        /* AQ: TODO
+                           Line 2040 of parser.c (13 Jan 2000) reads as follows:
+                           if (!node->type == TextNode)
+                           This will always evaluate to false.
+                           This has been reported to Dave Raggett <dsr@w3.org>
+                        */
+                        //Should be?: if (!(node.type == Node.TextNode))
+                        if (false)
+                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+                        lexer.exiled = false;
+                        continue;
+                    }
+                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
+                    {
+                        moveToHead(lexer, table, node);
+                        continue;
+                    }
+                }
+
+                /* 
+                  if this is the end tag for an ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.badForm = 1;
+                        Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
+                    {
+                        Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    for (parent = table.parent;
+                            parent != null; parent = parent.parent)
+                    {
+                        if (node.tag == parent.tag)
+                        {
+                            Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
+                            lexer.ungetToken();
+                            lexer.istackbase = istackbase;
+                            Node.trimEmptyElement(lexer, table);
+                            return;
+                        }
+                    }
+                }
+
+                if (!((node.tag.model & Dict.CM_TABLE) != 0))
+                {
+                    lexer.ungetToken();
+                    Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
+                    lexer.istackbase = istackbase;
+                    Node.trimEmptyElement(lexer, table);
+                    return;
+                }
+
+                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                {
+                    Node.insertNodeAtEnd(table, node);;
+                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
+                    continue;
+                }
+
+                /* discard unexpected text nodes and end tags */
+                Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
+            Node.trimEmptyElement(lexer, table);
+            lexer.istackbase = istackbase;
+        }
+
+    };
+
+    public static class ParseColGroup implements Parser {
+
+        public void parse( Lexer lexer, Node colgroup, short mode )
+        {
+            Node node, parent;
+            TagTable tt = lexer.configuration.tt;
+
+            if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == colgroup.tag && node.type == Node.EndTag)
+                {
+                    colgroup.closed = true;
+                    return;
+                }
+
+                /* 
+                  if this is the end tag for an ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.badForm = 1;
+                        Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    for (parent = colgroup.parent;
+                            parent != null; parent = parent.parent)
+                    {
+
+                        if (node.tag == parent.tag)
+                        {
+                            lexer.ungetToken();
+                            return;
+                        }
+                    }
+                }
+
+                if (node.type == Node.TextNode)
+                {
+                    lexer.ungetToken();
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(colgroup, node))
+                    continue;
+
+                /* discard unknown tags */
+                if (node.tag == null)
+                {
+                    Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                if (node.tag != tt.tagCol)
+                {
+                    lexer.ungetToken();
+                    return;
+                }
+
+                if (node.type == Node.EndTag)
+                {
+                    Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+        
+                /* node should be <COL> */
+                Node.insertNodeAtEnd(colgroup, node);
+                parseTag(lexer, node, Lexer.IgnoreWhitespace);
+            }
+        }
+
+    };
+
+    public static class ParseRowGroup implements Parser {
+
+        public void parse( Lexer lexer, Node rowgroup, short mode )
+        {
+            Node node, parent;
+            TagTable tt = lexer.configuration.tt;
+
+            if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == rowgroup.tag)
+                {
+                    if (node.type == Node.EndTag)
+                    {
+                        rowgroup.closed = true;
+                        Node.trimEmptyElement(lexer, rowgroup);
+                        return;
+                    }
+
+                    lexer.ungetToken();
+                    return;
+                }
+
+                /* if </table> infer end tag */
+                if (node.tag == tt.tagTable && node.type == Node.EndTag)
+                {
+                    lexer.ungetToken();
+                    Node.trimEmptyElement(lexer, rowgroup);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(rowgroup, node))
+                    continue;
+
+                /* discard unknown tags */
+                if (node.tag == null && node.type != Node.TextNode)
+                {
+                    Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /*
+                  if TD or TH then infer <TR>
+                  if text or inline or block move before table
+                  if head content move to head
+                */
+
+                if (node.type != Node.EndTag)
+                {
+                    if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag("tr");
+                        Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
+                    }
+                    else if (node.type == Node.TextNode
+                            || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+                    {
+                        Node.moveBeforeTable(rowgroup, node, tt);
+                        Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
+                        lexer.exiled = true;
+
+                        if (node.type != Node.TextNode)
+                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+                        lexer.exiled = false;
+                        continue;
+                    }
+                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
+                    {
+                        Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
+                        moveToHead(lexer, rowgroup, node);
+                        continue;
+                    }
+                }
+
+                /* 
+                  if this is the end tag for ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.badForm = 1;
+                        Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
+                    {
+                        Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    for (parent = rowgroup.parent;
+                            parent != null; parent = parent.parent)
+                    {
+                        if (node.tag == parent.tag)
+                        {
+                            lexer.ungetToken();
+                            Node.trimEmptyElement(lexer, rowgroup);
+                            return;
+                        }
+                    }
+                }
+
+                /*
+                  if THEAD, TFOOT or TBODY then implied end tag
+
+                */
+                if ((node.tag.model & Dict.CM_ROWGRP) != 0)
+                {
+                    if (node.type != Node.EndTag)
+                        lexer.ungetToken();
+
+                    Node.trimEmptyElement(lexer, rowgroup);
+                    return;
+                }
+
+                if (node.type == Node.EndTag)
+                {
+                    Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+        
+                if (!(node.tag == tt.tagTr))
+                {
+                    node = lexer.inferredTag( "tr");
+                    Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
+                    lexer.ungetToken();
+                }
+
+               /* node should be <TR> */
+                Node.insertNodeAtEnd(rowgroup, node);
+                parseTag(lexer, node, Lexer.IgnoreWhitespace);
+            }
+
+            Node.trimEmptyElement(lexer, rowgroup);
+        }
+
+    };
+
+    public static class ParseRow implements Parser {
+
+        public void parse( Lexer lexer, Node row, short mode )
+        {
+            Node node, parent;
+            boolean exclude_state;
+            TagTable tt = lexer.configuration.tt;
+
+            if ((row.tag.model & Dict.CM_EMPTY) != 0)
+                return;
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == row.tag)
+                {
+                    if (node.type == Node.EndTag)
+                    {
+                        row.closed = true;
+                        Node.fixEmptyRow(lexer, row);
+                        return;
+                    }
+
+                    lexer.ungetToken();
+                    Node.fixEmptyRow(lexer, row);
+                    return;
+                }
+
+                /* 
+                  if this is the end tag for an ancestor element
+                  then infer end tag for this element
+                */
+                if (node.type == Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.badForm = 1;
+                        Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    if (node.tag == tt.tagTd || node.tag == tt.tagTh)
+                    {
+                        Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+                        continue;
+                    }
+
+                    for (parent = row.parent;
+                            parent != null; parent = parent.parent)
+                    {
+                        if (node.tag == parent.tag)
+                        {
+                            lexer.ungetToken();
+                            Node.trimEmptyElement(lexer, row);
+                            return;
+                        }
+                    }
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(row, node))
+                    continue;
+
+                /* discard unknown tags */
+                if (node.tag == null && node.type != Node.TextNode)
+                {
+                    Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* discard unexpected <table> element */
+                if (node.tag == tt.tagTable)
+                {
+                    Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* THEAD, TFOOT or TBODY */
+                if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
+                {
+                    lexer.ungetToken();
+                    Node.trimEmptyElement(lexer, row);
+                    return;
+                }
+
+                if (node.type == Node.EndTag)
+                {
+                    Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /*
+                  if text or inline or block move before table
+                  if head content move to head
+                */
+
+                if (node.type != Node.EndTag)
+                {
+                    if (node.tag == tt.tagForm)
+                    {
+                        lexer.ungetToken();
+                        node = lexer.inferredTag("td");
+                        Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
+                    }
+                    else if (node.type == Node.TextNode
+                            || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
+                    {
+                        Node.moveBeforeTable(row, node, tt);
+                        Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+                        lexer.exiled = true;
+
+                        if (node.type != Node.TextNode)
+                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
+
+                        lexer.exiled = false;
+                        continue;
+                    }
+                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
+                    {
+                        Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+                        moveToHead(lexer, row, node);
+                        continue;
+                    }
+                }
+
+                if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
+                {
+                    Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
+                    continue;
+                }
+        
+                /* node should be <TD> or <TH> */
+                Node.insertNodeAtEnd(row, node);
+                exclude_state = lexer.excludeBlocks;
+                lexer.excludeBlocks = false;
+                parseTag(lexer, node, Lexer.IgnoreWhitespace);
+                lexer.excludeBlocks = exclude_state;
+
+                /* pop inline stack */
+
+                while (lexer.istack.size() > lexer.istackbase)
+                    lexer.popInline( null);
+            }
+
+            Node.trimEmptyElement(lexer, row);
+        }
+
+    };
+
+    public static class ParseNoFrames implements Parser {
+
+        public void parse( Lexer lexer, Node noframes, short mode )
+        {
+            Node node;
+            boolean checkstack;
+            TagTable tt = lexer.configuration.tt;
+
+            lexer.badAccess |=  Report.USING_NOFRAMES;
+            mode = Lexer.IgnoreWhitespace;
+            checkstack = true;
+
+            while (true)
+            {
+                node = lexer.getToken(mode);
+                if (node == null) break;
+                if (node.tag == noframes.tag && node.type == Node.EndTag)
+                {
+                    noframes.closed = true;
+                    Node.trimSpaces(lexer, noframes);
+                    return;
+                }
+
+                if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
+                {
+                    Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
+                    Node.trimSpaces(lexer, noframes);
+                    lexer.ungetToken();
+                    return;
+                }
+
+                if (node.tag == tt.tagHtml)
+                {
+                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
+                        Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
+
+                    continue;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(noframes, node))
+                    continue;
+
+                if (node.tag == tt.tagBody && node.type == Node.StartTag)
+                {
+                    Node.insertNodeAtEnd(noframes, node);
+                    parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
+                    continue;
+                }
+
+                /* implicit body element inferred */
+                if (node.type == Node.TextNode || node.tag != null)
+                {
+                    lexer.ungetToken();
+                    node = lexer.inferredTag("body");
+                    if (lexer.configuration.XmlOut)
+                        Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
+                    Node.insertNodeAtEnd(noframes, node);
+                    parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
+                    continue;
+                }
+                /* discard unexpected end tags */
+                Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
+        }
+
+    };
+
+    public static class ParseSelect implements Parser {
+
+        public void parse( Lexer lexer, Node field, short mode )
+        {
+            Node node;
+            TagTable tt = lexer.configuration.tt;
+
+            lexer.insert = -1;  /* defer implicit inline start tags */
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == field.tag && node.type == Node.EndTag)
+                {
+                    field.closed = true;
+                    Node.trimSpaces(lexer, field);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(field, node))
+                    continue;
+
+                if (node.type == Node.StartTag && 
+                     (node.tag == tt.tagOption ||
+                      node.tag == tt.tagOptgroup ||
+                      node.tag == tt.tagScript))
+                {
+                    Node.insertNodeAtEnd(field, node);
+                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+            }
+
+            Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+        }
+
+    };
+
+    public static class ParseText implements Parser {
+
+        public void parse( Lexer lexer, Node field, short mode )
+        {
+            Node node;
+            TagTable tt = lexer.configuration.tt;
+
+            lexer.insert = -1;  /* defer implicit inline start tags */
+
+            if (field.tag == tt.tagTextarea)
+                mode = Lexer.Preformatted;
+
+            while (true)
+            {
+                node = lexer.getToken(mode);
+                if (node == null) break;
+                if (node.tag == field.tag && node.type == Node.EndTag)
+                {
+                    field.closed = true;
+                    Node.trimSpaces(lexer, field);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(field, node))
+                    continue;
+
+                if (node.type == Node.TextNode)
+                {
+                    /* only called for 1st child */
+                    if (field.content == null && !((mode & Lexer.Preformatted) != 0))
+                        Node.trimSpaces(lexer, field);
+
+                    if (node.start >= node.end)
+                    {
+                        continue;
+                    }
+
+                    Node.insertNodeAtEnd(field, node);
+                    continue;
+                }
+
+                if (node.tag == tt.tagFont)
+                {
+                    Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+                    continue;
+                }
+
+                /* terminate element on other tags */
+                if (!((field.tag.model & Dict.CM_OPT) != 0))
+                        Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
+
+                lexer.ungetToken();
+                Node.trimSpaces(lexer, field);
+                return;
+            }
+
+            if (!((field.tag.model & Dict.CM_OPT) != 0))
+                Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
+        }
+
+    };
+
+    public static class ParseOptGroup implements Parser {
+
+        public void parse( Lexer lexer, Node field, short mode )
+        {
+            Node node;
+            TagTable tt = lexer.configuration.tt;
+
+            lexer.insert = -1;  /* defer implicit inline start tags */
+
+            while (true)
+            {
+                node = lexer.getToken(Lexer.IgnoreWhitespace);
+                if (node == null) break;
+                if (node.tag == field.tag && node.type == Node.EndTag)
+                {
+                    field.closed = true;
+                    Node.trimSpaces(lexer, field);
+                    return;
+                }
+
+                /* deal with comments etc. */
+                if (Node.insertMisc(field, node))
+                    continue;
+
+                if (node.type == Node.StartTag && 
+                     (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
+                {
+                    if (node.tag == tt.tagOptgroup)
+                        Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
+
+                    Node.insertNodeAtEnd(field, node);
+                    parseTag(lexer, node, Lexer.MixedContent);
+                    continue;
+                }
+
+                /* discard unexpected tags */
+                Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
+            }
+        }
+
+    };
+
+    public static Parser getParseHTML()
+    {
+        return _parseHTML;
+    }
+
+    public static Parser getParseHead()
+    {
+        return _parseHead;
+    }
+
+    public static Parser getParseTitle()
+    {
+        return _parseTitle;
+    }
+
+    public static Parser getParseScript()
+    {
+        return _parseScript;
+    }
+
+    public static Parser getParseBody()
+    {
+        return _parseBody;
+    }
+
+    public static Parser getParseFrameSet()
+    {
+        return _parseFrameSet;
+    }
+
+    public static Parser getParseInline()
+    {
+        return _parseInline;
+    }
+
+    public static Parser getParseList()
+    {
+        return _parseList;
+    }
+
+    public static Parser getParseDefList()
+    {
+        return _parseDefList;
+    }
+
+    public static Parser getParsePre()
+    {
+        return _parsePre;
+    }
+
+    public static Parser getParseBlock()
+    {
+        return _parseBlock;
+    }
+
+    public static Parser getParseTableTag()
+    {
+        return _parseTableTag;
+    }
+
+    public static Parser getParseColGroup()
+    {
+        return _parseColGroup;
+    }
+
+    public static Parser getParseRowGroup()
+    {
+        return _parseRowGroup;
+    }
+
+    public static Parser getParseRow()
+    {
+        return _parseRow;
+    }
+
+    public static Parser getParseNoFrames()
+    {
+        return _parseNoFrames;
+    }
+
+    public static Parser getParseSelect()
+    {
+        return _parseSelect;
+    }
+
+    public static Parser getParseText()
+    {
+        return _parseText;
+    }
+
+    public static Parser getParseOptGroup()
+    {
+        return _parseOptGroup;
+    }
+
+
+    private static Parser _parseHTML = new ParseHTML();
+    private static Parser _parseHead = new ParseHead();
+    private static Parser _parseTitle = new ParseTitle();
+    private static Parser _parseScript = new ParseScript();
+    private static Parser _parseBody = new ParseBody();
+    private static Parser _parseFrameSet = new ParseFrameSet();
+    private static Parser _parseInline = new ParseInline();
+    private static Parser _parseList = new ParseList();
+    private static Parser _parseDefList = new ParseDefList();
+    private static Parser _parsePre = new ParsePre();
+    private static Parser _parseBlock = new ParseBlock();
+    private static Parser _parseTableTag = new ParseTableTag();
+    private static Parser _parseColGroup = new ParseColGroup();
+    private static Parser _parseRowGroup = new ParseRowGroup();
+    private static Parser _parseRow = new ParseRow();
+    private static Parser _parseNoFrames = new ParseNoFrames();
+    private static Parser _parseSelect = new ParseSelect();
+    private static Parser _parseText = new ParseText();
+    private static Parser _parseOptGroup = new ParseOptGroup();
+
+    /*
+      HTML is the top level element
+    */
+    public static Node parseDocument(Lexer lexer)
+    {
+        Node node, document, html;
+        Node doctype = null;
+        TagTable tt = lexer.configuration.tt;
+
+        document = lexer.newNode();
+        document.type = Node.RootNode;
+
+        while (true)
+        {
+            node = lexer.getToken(Lexer.IgnoreWhitespace);
+            if (node == null) break;
+
+            /* deal with comments etc. */
+            if (Node.insertMisc(document, node))
+                continue;
+
+            if (node.type == Node.DocTypeTag)
+            {
+                if (doctype == null)
+                {
+                    Node.insertNodeAtEnd(document, node);
+                    doctype = node;
+                }
+                else
+                    Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
+                continue;
+            }
+
+            if (node.type == Node.EndTag)
+            {
+                Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
+                continue;
+            }
+
+            if (node.type != Node.StartTag || node.tag != tt.tagHtml)
+            {
+                lexer.ungetToken();
+                html = lexer.inferredTag("html");
+            }
+            else
+                html = node;
+
+            Node.insertNodeAtEnd(document, html);
+            getParseHTML().parse(lexer, html, (short)0); // TODO?
+            break;
+        }
+
+        return document;
+    }
+
+    /**
+     *  Indicates whether or not whitespace should be preserved for this element.
+     *  If an <code>xml:space</code> attribute is found, then if the attribute value is
+     *  <code>preserve</code>, returns <code>true</code>.  For any other value, returns
+     *  <code>false</code>.  If an <code>xml:space</code> attribute was <em>not</em>
+     *  found, then the following element names result in a return value of <code>true:
+     *  pre, script, style,</code> and <code>xsl:text</code>.  Finally, if a
+     *  <code>TagTable</code> was passed in and the element appears as the "pre" element
+     *  in the <code>TagTable</code>, then <code>true</code> will be returned.
+     *  Otherwise, <code>false</code> is returned.
+     *  @param element The <code>Node</code> to test to see if whitespace should be
+     *                 preserved.
+     *  @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
+     *            function.  This may be <code>null</code>, in which case this test
+     *            is bypassed.
+     *  @return <code>true</code> or <code>false</code>, as explained above.
+     */
+
+    public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
+    {
+        AttVal attribute;
+
+        /* search attributes for xml:space */
+        for (attribute = element.attributes; attribute != null; attribute = attribute.next)
+        {
+            if (attribute.attribute.equals("xml:space"))
+            {
+                if (attribute.value.equals("preserve"))
+                    return true;
+
+                return false;
+            }
+        }
+
+        /* kludge for html docs without explicit xml:space attribute */
+        if (Lexer.wstrcasecmp(element.element, "pre") == 0
+            || Lexer.wstrcasecmp(element.element, "script") == 0
+            || Lexer.wstrcasecmp(element.element, "style") == 0)
+            return true;
+
+        if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
+            return true;
+
+        /* kludge for XSL docs */
+        if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
+            return true;
+
+        return false;
+    }
+
+    /*
+      XML documents
+    */
+    public static void parseXMLElement(Lexer lexer, Node element, short mode)
+    {
+        Node node;
+
+        /* Jeff Young's kludge for XSL docs */
+
+        if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
+            return;
+
+        /* if node is pre or has xml:space="preserve" then do so */
+
+        if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
+            mode = Lexer.Preformatted;
+
+        while (true)
+        {
+            node = lexer.getToken(mode);
+            if (node == null) break;
+            if (node.type == Node.EndTag && node.element.equals(element.element))
+            {
+                element.closed = true;
+                break;
+            }
+
+            /* discard unexpected end tags */
+            if (node.type == Node.EndTag)
+            {
+                Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
+                continue;
+            }
+
+            /* parse content on seeing start tag */
+            if (node.type == Node.StartTag)
+                parseXMLElement(lexer, node, mode);
+
+            Node.insertNodeAtEnd(element, node);
+        }
+
+        /*
+         if first child is text then trim initial space and
+         delete text node if it is empty.
+        */
+
+        node = element.content;
+
+        if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
+        {
+            if (node.textarray[node.start] == (byte)' ')
+            {
+                node.start++;
+
+                if (node.start >= node.end)
+                    Node.discardElement(node);
+            }
+        }
+
+        /*
+         if last child is text then trim final space and
+         delete the text node if it is empty
+        */
+
+        node = element.last;
+
+        if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
+        {
+            if (node.textarray[node.end - 1] == (byte)' ')
+            {
+                node.end--;
+
+                if (node.start >= node.end)
+                    Node.discardElement(node);
+            }
+        }
+    }
+
+    public static Node parseXMLDocument(Lexer lexer)
+    {
+        Node node, document, doctype;
+
+        document = lexer.newNode();
+        document.type = Node.RootNode;
+        doctype = null;
+        lexer.configuration.XmlTags = true;
+
+        while (true)
+        {
+            node = lexer.getToken(Lexer.IgnoreWhitespace);
+            if (node == null) break;
+            /* discard unexpected end tags */
+            if (node.type == Node.EndTag)
+            {
+                Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
+                continue;
+            }
+
+            /* deal with comments etc. */
+            if (Node.insertMisc(document, node))
+                continue;
+
+            if (node.type == Node.DocTypeTag)
+            {
+                if (doctype == null)
+                {
+                    Node.insertNodeAtEnd(document, node);
+                    doctype = node;
+                }
+                else
+                    Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
+                continue;
+            }
+
+            /* if start tag then parse element's content */
+            if (node.type == Node.StartTag)
+            {
+                Node.insertNodeAtEnd(document, node);
+                parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
+            }
+
+        }
+
+if (false) { //#if 0
+        /* discard the document type */
+        node = document.findDocType();
+
+        if (node != null)
+            Node.discardElement(node);
+} // #endif
+
+        if  (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
+                Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
+
+        /* ensure presence of initial <?XML version="1.0"?> */
+        if (lexer.configuration.XmlPi)
+            lexer.fixXMLPI(document);
+
+        return document;
+    }
+
+    public static boolean isJavaScript(Node node)
+    {
+        boolean result = false;
+        AttVal attr;
+
+        if (node.attributes == null)
+            return true;
+
+        for (attr = node.attributes; attr != null; attr = attr.next)
+        {
+            if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
+                    || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
+                    && Lexer.wsubstr(attr.value, "javascript"))
+                result = true;
+        }
+
+        return result;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Report.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Report.java
new file mode 100644 (file)
index 0000000..abed122
--- /dev/null
@@ -0,0 +1,1130 @@
+/*
+ * @(#)Report.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Error/informational message reporter.
+ *
+ * You should only need to edit the file TidyMessages.properties
+ * to localize HTML tidy.
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.PrintWriter;
+import java.text.MessageFormat;
+import java.util.Hashtable;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import org.eclipse.core.resources.IMarker;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.ui.texteditor.MarkerUtilities;
+
+public class Report {
+
+  /* used to point to Web Accessibility Guidelines */
+  public static final String ACCESS_URL = "http://www.w3.org/WAI/GL";
+
+  public static final String RELEASE_DATE = "4th August 2000";
+
+  public static String currentFile; /* sasdjb 01May00 for GNU Emacs error parsing */
+
+  /* error codes for entities */
+
+  public static final short MISSING_SEMICOLON = 1;
+  public static final short UNKNOWN_ENTITY = 2;
+  public static final short UNESCAPED_AMPERSAND = 3;
+
+  /* error codes for element messages */
+
+  public static final short MISSING_ENDTAG_FOR = 1;
+  public static final short MISSING_ENDTAG_BEFORE = 2;
+  public static final short DISCARDING_UNEXPECTED = 3;
+  public static final short NESTED_EMPHASIS = 4;
+  public static final short NON_MATCHING_ENDTAG = 5;
+  public static final short TAG_NOT_ALLOWED_IN = 6;
+  public static final short MISSING_STARTTAG = 7;
+  public static final short UNEXPECTED_ENDTAG = 8;
+  public static final short USING_BR_INPLACE_OF = 9;
+  public static final short INSERTING_TAG = 10;
+  public static final short SUSPECTED_MISSING_QUOTE = 11;
+  public static final short MISSING_TITLE_ELEMENT = 12;
+  public static final short DUPLICATE_FRAMESET = 13;
+  public static final short CANT_BE_NESTED = 14;
+  public static final short OBSOLETE_ELEMENT = 15;
+  public static final short PROPRIETARY_ELEMENT = 16;
+  public static final short UNKNOWN_ELEMENT = 17;
+  public static final short TRIM_EMPTY_ELEMENT = 18;
+  public static final short COERCE_TO_ENDTAG = 19;
+  public static final short ILLEGAL_NESTING = 20;
+  public static final short NOFRAMES_CONTENT = 21;
+  public static final short CONTENT_AFTER_BODY = 22;
+  public static final short INCONSISTENT_VERSION = 23;
+  public static final short MALFORMED_COMMENT = 24;
+  public static final short BAD_COMMENT_CHARS = 25;
+  public static final short BAD_XML_COMMENT = 26;
+  public static final short BAD_CDATA_CONTENT = 27;
+  public static final short INCONSISTENT_NAMESPACE = 28;
+  public static final short DOCTYPE_AFTER_TAGS = 29;
+  public static final short MALFORMED_DOCTYPE = 30;
+  public static final short UNEXPECTED_END_OF_FILE = 31;
+  public static final short DTYPE_NOT_UPPER_CASE = 32;
+  public static final short TOO_MANY_ELEMENTS = 33;
+
+  /* error codes used for attribute messages */
+
+  public static final short UNKNOWN_ATTRIBUTE = 1;
+  public static final short MISSING_ATTRIBUTE = 2;
+  public static final short MISSING_ATTR_VALUE = 3;
+  public static final short BAD_ATTRIBUTE_VALUE = 4;
+  public static final short UNEXPECTED_GT = 5;
+  public static final short PROPRIETARY_ATTR_VALUE = 6;
+  public static final short REPEATED_ATTRIBUTE = 7;
+  public static final short MISSING_IMAGEMAP = 8;
+  public static final short XML_ATTRIBUTE_VALUE = 9;
+  public static final short UNEXPECTED_QUOTEMARK = 10;
+  public static final short ID_NAME_MISMATCH = 11;
+
+  /* accessibility flaws */
+
+  public static final short MISSING_IMAGE_ALT = 1;
+  public static final short MISSING_LINK_ALT = 2;
+  public static final short MISSING_SUMMARY = 4;
+  public static final short MISSING_IMAGE_MAP = 8;
+  public static final short USING_FRAMES = 16;
+  public static final short USING_NOFRAMES = 32;
+
+  /* presentation flaws */
+
+  public static final short USING_SPACER = 1;
+  public static final short USING_LAYER = 2;
+  public static final short USING_NOBR = 4;
+  public static final short USING_FONT = 8;
+  public static final short USING_BODY = 16;
+
+  /* character encoding errors */
+  public static final short WINDOWS_CHARS = 1;
+  public static final short NON_ASCII = 2;
+  public static final short FOUND_UTF16 = 4;
+
+  private static short optionerrors;
+
+  private static ResourceBundle res = null;
+
+  static {
+    try {
+      res = ResourceBundle.getBundle("org/w3c/tidy/TidyMessages");
+    } catch (MissingResourceException e) {
+      throw new Error(e.toString());
+    }
+  }
+
+  public static void tidyPrint(PrintWriter p, String msg) {
+    p.print(msg);
+  }
+
+  public static void tidyPrintln(PrintWriter p, String msg) {
+    p.println(msg);
+  }
+
+  public static void tidyPrintln(PrintWriter p) {
+    p.println();
+  }
+
+  public static void showVersion(PrintWriter p) {
+    tidyPrintln(p, "Java HTML Tidy release date: " + RELEASE_DATE);
+    tidyPrintln(p, "See http://www.w3.org/People/Raggett for details");
+  }
+
+  public static void tag(Lexer lexer, Node tag) {
+    if (tag != null) {
+      if (tag.type == Node.StartTag)
+        tidyPrint(lexer.errout, "<" + tag.element + ">");
+      else if (tag.type == Node.EndTag)
+        tidyPrint(lexer.errout, "</" + tag.element + ">");
+      else if (tag.type == Node.DocTypeTag)
+        tidyPrint(lexer.errout, "<!DOCTYPE>");
+      else if (tag.type == Node.TextNode)
+        tidyPrint(lexer.errout, "plain text");
+      else
+        tidyPrint(lexer.errout, tag.element);
+    }
+  }
+
+  public static void tag(StringBuffer errorMessage, Lexer lexer, Node tag) {
+    if (tag != null) {
+      if (tag.type == Node.StartTag) {
+        tidyPrint(lexer.errout, "<" + tag.element + ">");
+        errorMessage.append("<" + tag.element + ">");
+      } else if (tag.type == Node.EndTag) {
+        tidyPrint(lexer.errout, "</" + tag.element + ">");
+        errorMessage.append("</" + tag.element + ">");
+      } else if (tag.type == Node.DocTypeTag) {
+        tidyPrint(lexer.errout, "<!DOCTYPE>");
+        errorMessage.append("</" + tag.element + ">");
+      } else if (tag.type == Node.TextNode) {
+        tidyPrint(lexer.errout, "plain text");
+        errorMessage.append("plain text");
+      } else {
+        tidyPrint(lexer.errout, tag.element);
+        errorMessage.append(tag.element);
+      }
+    }
+  }
+
+  /* lexer is not defined when this is called */
+  public static void unknownOption(String option) {
+    optionerrors++;
+    try {
+      System.err.println(MessageFormat.format(res.getString("unknown_option"), new Object[] { option }));
+    } catch (MissingResourceException e) {
+      System.err.println(e.toString());
+    }
+  }
+
+  /* lexer is not defined when this is called */
+  public static void badArgument(String option) {
+    optionerrors++;
+    try {
+      System.err.println(MessageFormat.format(res.getString("bad_argument"), new Object[] { option }));
+    } catch (MissingResourceException e) {
+      System.err.println(e.toString());
+    }
+  }
+
+  public static void position(Lexer lexer) {
+    try {
+      /* Change formatting to be parsable by GNU Emacs */
+      if (lexer.configuration.Emacs) {
+        tidyPrint(
+          lexer.errout,
+          MessageFormat.format(
+            res.getString("emacs_format"),
+            new Object[] { currentFile, new Integer(lexer.lines), new Integer(lexer.columns)}));
+        tidyPrint(lexer.errout, " ");
+      } else /* traditional format */ {
+        tidyPrint(
+          lexer.errout,
+          MessageFormat.format(res.getString("line_column"), new Object[] { new Integer(lexer.lines), new Integer(lexer.columns)}));
+      }
+    } catch (MissingResourceException e) {
+      lexer.errout.println(e.toString());
+    }
+  }
+
+  public static void encodingError(Lexer lexer, short code, int c) {
+    lexer.warnings++;
+
+    if (lexer.configuration.ShowWarnings) {
+      position(lexer);
+
+      if (code == WINDOWS_CHARS) {
+        lexer.badChars |= WINDOWS_CHARS;
+        try {
+          Hashtable attributes = new Hashtable();
+          StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
+          MarkerUtilities.setLineNumber(attributes, lexer.lines);
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)}));
+          errorMessage.append(MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)}));
+          attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+          // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+          // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+          try {
+            MarkerUtilities.setMessage(attributes, errorMessage.toString());
+            MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
+          } catch (CoreException e) {
+          }
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      tidyPrintln(lexer.errout);
+    }
+  }
+
+  public static void entityError(Lexer lexer, short code, String entity, int c) {
+    lexer.warnings++;
+
+    if (lexer.configuration.ShowWarnings) {
+      position(lexer);
+      Hashtable attributes = new Hashtable();
+      StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
+      MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+      if (code == MISSING_SEMICOLON) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity }));
+          errorMessage.append(MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == UNKNOWN_ENTITY) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity }));
+          errorMessage.append(MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == UNESCAPED_AMPERSAND) {
+        try {
+          tidyPrint(lexer.errout, res.getString("unescaped_ampersand"));
+          errorMessage.append(res.getString("unescaped_ampersand"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+      attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+      // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+      // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+      try {
+        MarkerUtilities.setMessage(attributes, errorMessage.toString());
+        MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
+      } catch (CoreException e) {
+      }
+      tidyPrintln(lexer.errout);
+    }
+  }
+
+  public static void attrError(Lexer lexer, Node node, String attr, short code) {
+    lexer.warnings++;
+
+    /* keep quiet after 6 errors */
+    if (lexer.errors > 6)
+      return;
+
+    Hashtable attributes = new Hashtable();
+    StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
+
+    if (lexer.configuration.ShowWarnings) {
+      /* on end of file adjust reported position to end of input */
+      if (code == UNEXPECTED_END_OF_FILE) {
+        lexer.lines = lexer.in.curline;
+        lexer.columns = lexer.in.curcol;
+      }
+
+      position(lexer);
+
+      MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+      if (code == UNKNOWN_ATTRIBUTE) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr }));
+          errorMessage.append(MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == MISSING_ATTRIBUTE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr }));
+          errorMessage.append(MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == MISSING_ATTR_VALUE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr }));
+          errorMessage.append(MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == MISSING_IMAGEMAP) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, res.getString("missing_imagemap"));
+          errorMessage.append(res.getString("missing_imagemap"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        lexer.badAccess |= MISSING_IMAGE_MAP;
+      } else if (code == BAD_ATTRIBUTE_VALUE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(lexer, node);
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr }));
+          errorMessage.append(MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == XML_ATTRIBUTE_VALUE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr }));
+          errorMessage.append(MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == UNEXPECTED_GT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("error"));
+          errorMessage.append(res.getString("error"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, res.getString("unexpected_gt"));
+          errorMessage.append(res.getString("unexpected_gt"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        lexer.errors++;
+        ;
+      } else if (code == UNEXPECTED_QUOTEMARK) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, res.getString("unexpected_quotemark"));
+          errorMessage.append(res.getString("unexpected_quotemark"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == REPEATED_ATTRIBUTE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, res.getString("repeated_attribute"));
+          errorMessage.append(res.getString("repeated_attribute"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == PROPRIETARY_ATTR_VALUE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr }));
+          errorMessage.append(MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == UNEXPECTED_END_OF_FILE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("unexpected_end_of_file"));
+          errorMessage.append(res.getString("unexpected_end_of_file"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == ID_NAME_MISMATCH) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+          tag(errorMessage, lexer, node);
+          tidyPrint(lexer.errout, res.getString("id_name_mismatch"));
+          errorMessage.append(res.getString("id_name_mismatch"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+      attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+      // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+      try {
+        MarkerUtilities.setMessage(attributes, errorMessage.toString());
+        MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
+      } catch (CoreException e) {
+      }
+      tidyPrintln(lexer.errout);
+    } else if (code == UNEXPECTED_GT) {
+      position(lexer);
+      MarkerUtilities.setLineNumber(attributes, lexer.lines);
+      try {
+        tidyPrint(lexer.errout, res.getString("error"));
+        errorMessage.append(res.getString("error"));
+        tag(errorMessage, lexer, node);
+        tidyPrint(lexer.errout, res.getString("unexpected_gt"));
+        errorMessage.append(res.getString("unexpected_gt"));
+        attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+        // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+        // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+        try {
+          MarkerUtilities.setMessage(attributes, errorMessage.toString());
+          MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
+        } catch (CoreException e) {
+        }
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+      tidyPrintln(lexer.errout);
+      lexer.errors++;
+      ;
+    }
+
+  }
+
+  public static void warning(Lexer lexer, Node element, Node node, short code) {
+
+    TagTable tt = lexer.configuration.tt;
+
+    lexer.warnings++;
+
+    /* keep quiet after 6 errors */
+    if (lexer.errors > 6)
+      return;
+
+    if (lexer.configuration.ShowWarnings) {
+
+      /* on end of file adjust reported position to end of input */
+      if (code == UNEXPECTED_END_OF_FILE) {
+        lexer.lines = lexer.in.curline;
+        lexer.columns = lexer.in.curcol;
+      }
+
+      position(lexer);
+      Hashtable attributes = new Hashtable();
+      StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
+
+      MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+      if (code == MISSING_ENDTAG_FOR) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == MISSING_ENDTAG_BEFORE) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element }));
+
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+      } else if (code == DISCARDING_UNEXPECTED) {
+        try {
+          tidyPrint(lexer.errout, res.getString("discarding_unexpected"));
+          errorMessage.append(res.getString("discarding_unexpected"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+      } else if (code == NESTED_EMPHASIS) {
+        try {
+          tidyPrint(lexer.errout, res.getString("nested_emphasis"));
+          errorMessage.append(res.getString("nested_emphasis"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+      } else if (code == COERCE_TO_ENDTAG) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == NON_MATCHING_ENDTAG) {
+        try {
+          tidyPrint(lexer.errout, res.getString("non_matching_endtag_1"));
+          errorMessage.append(res.getString("non_matching_endtag_1"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == TAG_NOT_ALLOWED_IN) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == DOCTYPE_AFTER_TAGS) {
+        try {
+          tidyPrint(lexer.errout, res.getString("doctype_after_tags"));
+          errorMessage.append(res.getString("doctype_after_tags"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == MISSING_STARTTAG) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element }));
+          errorMessage.append(MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == UNEXPECTED_ENDTAG) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
+          if (element != null)
+            tidyPrint(
+              lexer.errout,
+              MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == TOO_MANY_ELEMENTS) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element }));
+          if (element != null)
+            tidyPrint(
+              lexer.errout,
+              MessageFormat.format(res.getString("too_many_elements_suffix"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == USING_BR_INPLACE_OF) {
+        try {
+          tidyPrint(lexer.errout, res.getString("using_br_inplace_of"));
+          errorMessage.append(res.getString("using_br_inplace_of"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+      } else if (code == INSERTING_TAG) {
+        try {
+          tidyPrint(lexer.errout, MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element }));
+          errorMessage.append(MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element }));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == CANT_BE_NESTED) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+        try {
+          tidyPrint(lexer.errout, res.getString("cant_be_nested"));
+          errorMessage.append(res.getString("cant_be_nested"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == PROPRIETARY_ELEMENT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+        try {
+          tidyPrint(lexer.errout, res.getString("proprietary_element"));
+          errorMessage.append(res.getString("proprietary_element"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+
+        if (node.tag == tt.tagLayer)
+          lexer.badLayout |= USING_LAYER;
+        else if (node.tag == tt.tagSpacer)
+          lexer.badLayout |= USING_SPACER;
+        else if (node.tag == tt.tagNobr)
+          lexer.badLayout |= USING_NOBR;
+      } else if (code == OBSOLETE_ELEMENT) {
+        try {
+          if (element.tag != null && (element.tag.model & Dict.CM_OBSOLETE) != 0) {
+            tidyPrint(lexer.errout, res.getString("obsolete_element"));
+            errorMessage.append(res.getString("obsolete_element"));
+          } else {
+            tidyPrint(lexer.errout, res.getString("replacing_element"));
+            errorMessage.append(res.getString("replacing_element"));
+          }
+
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, element);
+        try {
+          tidyPrint(lexer.errout, res.getString("by"));
+          errorMessage.append(res.getString("by"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+      } else if (code == TRIM_EMPTY_ELEMENT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("trim_empty_element"));
+          errorMessage.append(res.getString("trim_empty_element"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, element);
+      } else if (code == MISSING_TITLE_ELEMENT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("missing_title_element"));
+          errorMessage.append(res.getString("missing_title_element"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == ILLEGAL_NESTING) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, element);
+        try {
+          tidyPrint(lexer.errout, res.getString("illegal_nesting"));
+          errorMessage.append(res.getString("illegal_nesting"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == NOFRAMES_CONTENT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("warning"));
+          errorMessage.append(res.getString("warning"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, node);
+        try {
+          tidyPrint(lexer.errout, res.getString("noframes_content"));
+          errorMessage.append(res.getString("noframes_content"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == INCONSISTENT_VERSION) {
+        try {
+          tidyPrint(lexer.errout, res.getString("inconsistent_version"));
+          errorMessage.append(res.getString("inconsistent_version"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == MALFORMED_DOCTYPE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("malformed_doctype"));
+          errorMessage.append(res.getString("malformed_doctype"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == CONTENT_AFTER_BODY) {
+        try {
+          tidyPrint(lexer.errout, res.getString("content_after_body"));
+          errorMessage.append(res.getString("content_after_body"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == MALFORMED_COMMENT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("malformed_comment"));
+          errorMessage.append(res.getString("malformed_comment"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == BAD_COMMENT_CHARS) {
+        try {
+          tidyPrint(lexer.errout, res.getString("bad_comment_chars"));
+          errorMessage.append(res.getString("bad_comment_chars"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == BAD_XML_COMMENT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("bad_xml_comment"));
+          errorMessage.append(res.getString("bad_xml_comment"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == BAD_CDATA_CONTENT) {
+        try {
+          tidyPrint(lexer.errout, res.getString("bad_cdata_content"));
+          errorMessage.append(res.getString("bad_cdata_content"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == INCONSISTENT_NAMESPACE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("inconsistent_namespace"));
+          errorMessage.append(res.getString("inconsistent_namespace"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == DTYPE_NOT_UPPER_CASE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("dtype_not_upper_case"));
+          errorMessage.append(res.getString("dtype_not_upper_case"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      } else if (code == UNEXPECTED_END_OF_FILE) {
+        try {
+          tidyPrint(lexer.errout, res.getString("unexpected_end_of_file"));
+          errorMessage.append(res.getString("unexpected_end_of_file"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+        tag(errorMessage, lexer, element);
+      }
+      //       attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+      attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+      //  attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+      try {
+        MarkerUtilities.setMessage(attributes, errorMessage.toString());
+        MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
+      } catch (CoreException e) {
+      }
+      tidyPrintln(lexer.errout);
+    }
+  }
+
+  public static void error(Lexer lexer, Node element, Node node, short code) {
+    lexer.warnings++;
+
+    /* keep quiet after 6 errors */
+    if (lexer.errors > 6)
+      return;
+
+    lexer.errors++;
+
+    position(lexer);
+
+    Hashtable attributes = new Hashtable();
+    StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
+
+    MarkerUtilities.setLineNumber(attributes, lexer.lines);
+
+    if (code == SUSPECTED_MISSING_QUOTE) {
+      try {
+        tidyPrint(lexer.errout, res.getString("suspected_missing_quote"));
+        errorMessage.append(res.getString("suspected_missing_quote"));
+
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+    } else if (code == DUPLICATE_FRAMESET) {
+      try {
+        tidyPrint(lexer.errout, res.getString("duplicate_frameset"));
+        errorMessage.append(res.getString("duplicate_frameset"));
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+    } else if (code == UNKNOWN_ELEMENT) {
+      try {
+        tidyPrint(lexer.errout, res.getString("error"));
+        errorMessage.append(res.getString("error"));
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+      tag(errorMessage, lexer, node);
+      try {
+        tidyPrint(lexer.errout, res.getString("unknown_element"));
+        errorMessage.append(res.getString("unknown_element"));
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+    } else if (code == UNEXPECTED_ENDTAG) {
+      try {
+        tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
+        errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
+        if (element != null) {
+          tidyPrint(
+            lexer.errout,
+            MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
+          errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
+        }
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+    }
+    attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
+    // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
+    // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
+    try {
+      MarkerUtilities.setMessage(attributes, errorMessage.toString());
+      MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
+    } catch (CoreException e) {
+    }
+    tidyPrintln(lexer.errout);
+  }
+
+  public static void errorSummary(Lexer lexer) {
+    /* adjust badAccess to that its null if frames are ok */
+    if ((lexer.badAccess & (USING_FRAMES | USING_NOFRAMES)) != 0) {
+      if (!(((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0)))
+        lexer.badAccess &= ~(USING_FRAMES | USING_NOFRAMES);
+    }
+
+    if (lexer.badChars != 0) {
+      if ((lexer.badChars & WINDOWS_CHARS) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badchars_summary"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+    }
+
+    if (lexer.badForm != 0) {
+      try {
+        tidyPrint(lexer.errout, res.getString("badform_summary"));
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+    }
+
+    if (lexer.badAccess != 0) {
+      if ((lexer.badAccess & MISSING_SUMMARY) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badaccess_missing_summary"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if ((lexer.badAccess & MISSING_IMAGE_ALT) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badaccess_missing_image_alt"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if ((lexer.badAccess & MISSING_IMAGE_MAP) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badaccess_missing_image_map"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if ((lexer.badAccess & MISSING_LINK_ALT) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badaccess_missing_link_alt"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if (((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0)) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badaccess_frames"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      try {
+        tidyPrint(lexer.errout, MessageFormat.format(res.getString("badaccess_summary"), new Object[] { ACCESS_URL }));
+      } catch (MissingResourceException e) {
+        lexer.errout.println(e.toString());
+      }
+    }
+
+    if (lexer.badLayout != 0) {
+      if ((lexer.badLayout & USING_LAYER) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badlayout_using_layer"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if ((lexer.badLayout & USING_SPACER) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badlayout_using_spacer"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if ((lexer.badLayout & USING_FONT) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badlayout_using_font"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if ((lexer.badLayout & USING_NOBR) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badlayout_using_nobr"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+
+      if ((lexer.badLayout & USING_BODY) != 0) {
+        try {
+          tidyPrint(lexer.errout, res.getString("badlayout_using_body"));
+        } catch (MissingResourceException e) {
+          lexer.errout.println(e.toString());
+        }
+      }
+    }
+  }
+
+  public static void unknownOption(PrintWriter errout, char c) {
+    try {
+      tidyPrintln(
+        errout,
+        MessageFormat.format(res.getString("unrecognized_option"), new Object[] { new String(new char[] { c })
+      }));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void unknownFile(PrintWriter errout, String program, String file) {
+    try {
+      tidyPrintln(errout, MessageFormat.format(res.getString("unknown_file"), new Object[] { program, file }));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void needsAuthorIntervention(PrintWriter errout) {
+    try {
+      tidyPrintln(errout, res.getString("needs_author_intervention"));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void missingBody(PrintWriter errout) {
+    try {
+      tidyPrintln(errout, res.getString("missing_body"));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void reportNumberOfSlides(PrintWriter errout, int count) {
+    try {
+      tidyPrintln(errout, MessageFormat.format(res.getString("slides_found"), new Object[] { new Integer(count)}));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void generalInfo(PrintWriter errout) {
+    try {
+      tidyPrintln(errout, res.getString("general_info"));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void helloMessage(PrintWriter errout, String date, String filename) {
+    currentFile = filename; /* for use with Gnu Emacs */
+
+    try {
+      tidyPrintln(errout, MessageFormat.format(res.getString("hello_message"), new Object[] { date, filename }));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void reportVersion(PrintWriter errout, Lexer lexer, String filename, Node doctype) {
+    int i, c;
+    int state = 0;
+    String vers = lexer.HTMLVersionName();
+    MutableInteger cc = new MutableInteger();
+
+    try {
+      if (doctype != null) {
+        tidyPrint(errout, MessageFormat.format(res.getString("doctype_given"), new Object[] { filename }));
+
+        for (i = doctype.start; i < doctype.end; ++i) {
+          c = (int) doctype.textarray[i];
+
+          /* look for UTF-8 multibyte character */
+          if (c < 0) {
+            i += PPrint.getUTF8(doctype.textarray, i, cc);
+            c = cc.value;
+          }
+
+          if (c == (char) '"')
+            ++state;
+          else if (state == 1)
+            errout.print((char) c);
+        }
+
+        errout.print('"');
+      }
+
+      tidyPrintln(
+        errout,
+        MessageFormat.format(
+          res.getString("report_version"),
+          new Object[] { filename, (vers != null ? vers : "HTML proprietary")}));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+  public static void reportNumWarnings(PrintWriter errout, Lexer lexer) {
+    if (lexer.warnings > 0) {
+      try {
+        tidyPrintln(errout, MessageFormat.format(res.getString("num_warnings"), new Object[] { new Integer(lexer.warnings)}));
+      } catch (MissingResourceException e) {
+        errout.println(e.toString());
+      }
+    } else {
+      try {
+        tidyPrintln(errout, res.getString("no_warnings"));
+      } catch (MissingResourceException e) {
+        errout.println(e.toString());
+      }
+    }
+  }
+
+  public static void helpText(PrintWriter out, String prog) {
+    try {
+      tidyPrintln(out, MessageFormat.format(res.getString("help_text"), new Object[] { prog, RELEASE_DATE }));
+    } catch (MissingResourceException e) {
+      out.println(e.toString());
+    }
+  }
+
+  public static void badTree(PrintWriter errout) {
+    try {
+      tidyPrintln(errout, res.getString("bad_tree"));
+    } catch (MissingResourceException e) {
+      errout.println(e.toString());
+    }
+  }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StreamIn.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StreamIn.java
new file mode 100644 (file)
index 0000000..0fb5c43
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * @(#)StreamIn.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Input Stream
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.InputStream;
+
+public abstract class StreamIn {
+
+    public static final int EndOfStream = -1; // EOF
+
+    /* states for ISO 2022 
+
+     A document in ISO-2022 based encoding uses some ESC sequences called 
+     "designator" to switch character sets. The designators defined and 
+     used in ISO-2022-JP are:
+
+        "ESC" + "(" + ?     for ISO646 variants
+
+        "ESC" + "$" + ?     and
+        "ESC" + "$" + "(" + ?   for multibyte character sets
+    */
+
+    public static final int FSM_ASCII    = 0;
+    public static final int FSM_ESC      = 1;
+    public static final int FSM_ESCD     = 2;
+    public static final int FSM_ESCDP    = 3;
+    public static final int FSM_ESCP     = 4;
+    public static final int FSM_NONASCII = 5;
+
+    /* non-raw input is cleaned up*/
+    public int state;     /* FSM for ISO2022 */
+    public boolean pushed;
+    public int c;
+    public int tabs;
+    public int tabsize;
+    public int lastcol;
+    public int curcol;
+    public int curline;
+    public int encoding;
+    public InputStream stream;
+    public boolean endOfStream;
+    public Object lexer;  /* needed for error reporting */
+
+    /* read char from stream */
+    public abstract int readCharFromStream();
+
+    public abstract int readChar();
+
+    public abstract void ungetChar(int c);
+
+    public abstract boolean isEndOfStream();
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StreamInImpl.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StreamInImpl.java
new file mode 100644 (file)
index 0000000..4e898fc
--- /dev/null
@@ -0,0 +1,367 @@
+/*
+ * @(#)StreamInImpl.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Input Stream Implementation
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.io.InputStream;
+import java.io.IOException;
+
+public class StreamInImpl extends StreamIn {
+
+    /* Mapping for Windows Western character set (128-159) to Unicode */
+    private static int[] Win2Unicode =
+    {
+    0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
+    0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
+    };
+
+    /*
+    John Love-Jensen contributed this table for mapping MacRoman
+    character set to Unicode
+    */
+
+    private static int[] Mac2Unicode = 
+    {
+
+    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+    0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+
+    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+    0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+
+    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+    0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+
+    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+    0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+
+    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+    0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+
+    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+    0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+
+    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+    0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+
+    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+    0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+    /* x7F = DEL */
+    0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
+    0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
+
+    0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
+    0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
+
+    0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
+    0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
+
+    0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
+    0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
+
+    0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
+    0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
+
+    0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
+    0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
+
+    0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
+    0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
+    /* xF0 = Apple Logo */
+    0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
+    0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7
+    };
+
+    public StreamInImpl(InputStream stream, int encoding, int tabsize)
+    {
+        this.stream = stream;
+        this.pushed = false;
+        this.c = (int)'\0';
+        this.tabs = 0;
+        this.tabsize = tabsize;
+        this.curline = 1;
+        this.curcol = 1;
+        this.encoding = encoding;
+        this.state = FSM_ASCII;
+        this.endOfStream = false;
+    }
+
+    /* read char from stream */
+    public int readCharFromStream()
+    {
+        int n, c, i, count;
+
+        try {
+            c = this.stream.read();
+
+            if (c == EndOfStream) {
+                this.endOfStream = true;
+                return c;
+            }
+
+            /*
+               A document in ISO-2022 based encoding uses some ESC sequences
+               called "designator" to switch character sets. The designators
+               defined and used in ISO-2022-JP are:
+
+                "ESC" + "(" + ?     for ISO646 variants
+
+                "ESC" + "$" + ?     and
+                "ESC" + "$" + "(" + ?   for multibyte character sets
+
+               Where ? stands for a single character used to indicate the
+               character set for multibyte characters.
+
+               Tidy handles this by preserving the escape sequence and
+               setting the top bit of each byte for non-ascii chars. This
+               bit is then cleared on output. The input stream keeps track
+               of the state to determine when to set/clear the bit.
+            */
+
+            if (this.encoding == Configuration.ISO2022)
+            {
+                if (c == 0x1b)  /* ESC */
+                {
+                    this.state = FSM_ESC;
+                    return c;
+                }
+
+                switch (this.state)
+                {
+                case FSM_ESC:
+                    if (c == '$')
+                        this.state = FSM_ESCD;
+                    else if (c == '(')
+                        this.state = FSM_ESCP;
+                    else
+                        this.state = FSM_ASCII;
+                    break;
+
+                case FSM_ESCD:
+                    if (c == '(')
+                        this.state = FSM_ESCDP;
+                    else
+                        this.state = FSM_NONASCII;
+                    break;
+
+                case FSM_ESCDP:
+                    this.state = FSM_NONASCII;
+                    break;
+
+                case FSM_ESCP:
+                    this.state = FSM_ASCII;
+                    break;
+
+                case FSM_NONASCII:
+                    c |= 0x80;
+                    break;
+                }
+
+                return c;
+            }
+
+            if (this.encoding != Configuration.UTF8)
+                return c;
+
+            /* deal with UTF-8 encoded char */
+
+            if ((c & 0xE0) == 0xC0)  /* 110X XXXX  two bytes */
+            {
+                n = c & 31;
+                count = 1;
+            }
+            else if ((c & 0xF0) == 0xE0)  /* 1110 XXXX  three bytes */
+            {
+                n = c & 15;
+                count = 2;
+            }
+            else if ((c & 0xF8) == 0xF0)  /* 1111 0XXX  four bytes */
+            {
+                n = c & 7;
+                count = 3;
+            }
+            else if ((c & 0xFC) == 0xF8)  /* 1111 10XX  five bytes */
+            {
+                n = c & 3;
+                count = 4;
+            }
+            else if ((c & 0xFE) == 0xFC)       /* 1111 110X  six bytes */
+            {
+                n = c & 1;
+                count = 5;
+            }
+            else  /* 0XXX XXXX one byte */
+                return c;
+
+            /* successor bytes should have the form 10XX XXXX */
+            for (i = 1; i <= count; ++i)
+            {
+                c = this.stream.read();
+
+                if (c == EndOfStream) {
+                    this.endOfStream = true;
+                    return c;
+                }
+
+                n = (n << 6) | (c & 0x3F);
+            }
+        }
+        catch (IOException e) {
+            System.err.println("StreamInImpl.readCharFromStream: " + e.toString());
+            n = EndOfStream;
+        }
+
+        return n;
+    }
+
+    public int readChar()
+    {
+        int c;
+
+        if (this.pushed)
+        {
+            this.pushed = false;
+            c =  this.c;
+
+            if (c == '\n')
+            {
+                this.curcol = 1;
+                this.curline++;
+                return c;
+            }
+
+            this.curcol++;
+            return c;
+        }
+
+        this.lastcol = this.curcol;
+
+        if (this.tabs > 0)
+        {
+            this.curcol++;
+            this.tabs--;
+            return ' ';
+        }
+    
+        for (;;)
+        {
+            c = readCharFromStream();
+
+            if (c < 0)
+                return EndOfStream;
+
+            if (c == '\n')
+            {
+                this.curcol = 1;
+                this.curline++;
+                break;
+            }
+
+            if (c == '\r')
+            {
+                c = readCharFromStream();
+                if (c != '\n')
+                {
+                    ungetChar(c);
+                    c = '\n';
+                }
+                this.curcol = 1;
+                this.curline++;
+                break;
+            }
+
+            if (c == '\t')
+            {
+                this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
+                this.curcol++;
+                c = ' ';
+                break;
+            }
+
+            /* strip control characters, except for Esc */
+
+            if (c == '\033')
+                break;
+
+            if (0 < c && c < 32)
+                continue;
+
+            /* watch out for IS02022 */
+
+            if (this.encoding == Configuration.RAW ||
+                this.encoding == Configuration.ISO2022)
+            {
+                this.curcol++;
+                break;
+            }
+
+            if (this.encoding == Configuration.MACROMAN)
+                c = Mac2Unicode[c];
+
+            /* produced e.g. as a side-effect of smart quotes in Word */
+
+            if (127 < c && c < 160)
+            {
+                Report.encodingError((Lexer)this.lexer, Report.WINDOWS_CHARS, c);
+
+                c = Win2Unicode[c - 128];
+
+                if (c == 0)
+                    continue;
+            }
+
+            this.curcol++;
+            break;
+        }
+
+        return c;
+    }
+
+    public void ungetChar(int c)
+    {
+        this.pushed = true;
+        this.c = c;
+
+        if (c == '\n')
+        {
+            --this.curline;
+        }
+
+        this.curcol = this.lastcol;
+    }
+
+    public boolean isEndOfStream()
+    {
+        return this.endOfStream;
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Style.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Style.java
new file mode 100644 (file)
index 0000000..a24270a
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * @(#)Style.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Linked list of class names and styles
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class Style {
+
+    public Style(String tag, String tagClass, String properties, Style next)
+    {
+        this.tag  = tag;
+        this.tagClass = tagClass;
+        this.properties = properties;
+        this.next  = next;
+    }
+
+    public Style(String tag, String tagClass, String properties)
+    {
+        this(tag, tagClass, properties, null);
+    }
+
+    public Style()
+    {
+        this(null, null, null, null);
+    }
+
+    public String tag;
+    public String tagClass;
+    public String properties;
+    public Style  next;
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StyleProp.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/StyleProp.java
new file mode 100644 (file)
index 0000000..339a271
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * @(#)StyleProp.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Linked list of style properties
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+public class StyleProp {
+
+    public StyleProp(String name, String value, StyleProp next)
+    {
+        this.name  = name;
+        this.value = value;
+        this.next  = next;
+    }
+
+    public StyleProp(String name, String value)
+    {
+        this(name, value, null);
+    }
+
+    public StyleProp()
+    {
+        this(null, null, null);
+    }
+
+    public String name;
+    public String value;
+    public StyleProp next;
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TagTable.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TagTable.java
new file mode 100644 (file)
index 0000000..e13d103
--- /dev/null
@@ -0,0 +1,389 @@
+/*
+ * @(#)TagTable.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * Tag dictionary node hash table
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ * Modified from a Singleton to a non-Singleton.
+ */
+
+import java.util.Hashtable;
+import java.util.Enumeration;
+
+public class TagTable {
+
+    private Configuration configuration = null;
+
+    public TagTable()
+    {
+        for ( int i = 0; i < tags.length; i++ ) {
+            install( tags[i] );
+        }
+        tagHtml = lookup("html");
+        tagHead = lookup("head");
+        tagBody = lookup("body");
+        tagFrameset = lookup("frameset");
+        tagFrame = lookup("frame");
+        tagNoframes = lookup("noframes");
+        tagMeta = lookup("meta");
+        tagTitle = lookup("title");
+        tagBase = lookup("base");
+        tagHr = lookup("hr");
+        tagPre = lookup("pre");
+        tagListing = lookup("listing");
+        tagH1 = lookup("h1");
+        tagH2 = lookup("h2");
+        tagP  = lookup("p");
+        tagUl = lookup("ul");
+        tagOl = lookup("ol");
+        tagDir = lookup("dir");
+        tagLi = lookup("li");
+        tagDt = lookup("dt");
+        tagDd = lookup("dd");
+        tagDl = lookup("dl");
+        tagTd = lookup("td");
+        tagTh = lookup("th");
+        tagTr = lookup("tr");
+        tagCol = lookup("col");
+        tagBr = lookup("br");
+        tagA = lookup("a");
+        tagLink = lookup("link");
+        tagB = lookup("b");
+        tagI = lookup("i");
+        tagStrong = lookup("strong");
+        tagEm = lookup("em");
+        tagBig = lookup("big");
+        tagSmall = lookup("small");
+        tagParam = lookup("param");
+        tagOption = lookup("option");
+        tagOptgroup = lookup("optgroup");
+        tagImg = lookup("img");
+        tagMap = lookup("map");
+        tagArea = lookup("area");
+        tagNobr = lookup("nobr");
+        tagWbr = lookup("wbr");
+        tagFont = lookup("font");
+        tagSpacer = lookup("spacer");
+        tagLayer = lookup("layer");
+        tagCenter = lookup("center");
+        tagStyle = lookup("style");
+        tagScript = lookup("script");
+        tagNoscript = lookup("noscript");
+        tagTable = lookup("table");
+        tagCaption = lookup("caption");
+        tagForm = lookup("form");
+        tagTextarea = lookup("textarea");
+        tagBlockquote = lookup("blockquote");
+        tagApplet = lookup("applet");
+        tagObject = lookup("object");
+        tagDiv = lookup("div");
+        tagSpan = lookup("span");
+    }
+
+    public void setConfiguration(Configuration configuration)
+    {
+        this.configuration = configuration;
+    }
+
+    public Dict lookup( String name )
+    {
+        return (Dict)tagHashtable.get( name );
+    }
+
+    public Dict install( Dict dict )
+    {
+        Dict d = (Dict)tagHashtable.get(dict.name);
+        if (d != null)
+        {
+            d.versions = dict.versions;
+            d.model |= dict.model;
+            d.parser = dict.parser;
+            d.chkattrs = dict.chkattrs;
+            return d;
+        }
+        else
+        {
+            tagHashtable.put(dict.name, dict);
+            return dict;
+        }
+    }
+
+    /* public interface for finding tag by name */
+    public boolean findTag( Node node )
+    {
+        Dict np;
+
+        if ( configuration != null && configuration.XmlTags ) {
+            node.tag = xmlTags;
+            return true;
+        }
+
+        if ( node.element != null ) {
+            np = lookup( node.element );
+            if ( np != null ) {
+                node.tag = np;
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    public Parser findParser(Node node)
+    {
+        Dict np;
+
+        if (node.element != null) {
+            np = lookup(node.element);
+            if (np != null) {
+                return np.parser;
+            }
+        }
+
+        return null;
+    }
+
+    private Hashtable tagHashtable = new Hashtable();
+
+    private static Dict[] tags = {
+
+    new Dict( "html",       (short)(Dict.VERS_ALL|Dict.VERS_FRAMES),     (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST),  ParserImpl.getParseHTML(), CheckAttribsImpl.getCheckHTML() ),
+
+    new Dict( "head",       (short)(Dict.VERS_ALL|Dict.VERS_FRAMES),     (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHead(), null ),
+
+    new Dict( "title",      (short)(Dict.VERS_ALL|Dict.VERS_FRAMES),     Dict.CM_HEAD, ParserImpl.getParseTitle(), null ),
+    new Dict( "base",       (short)(Dict.VERS_ALL|Dict.VERS_FRAMES),     (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
+    new Dict( "link",       (short)(Dict.VERS_ALL|Dict.VERS_FRAMES),     (Dict.CM_HEAD|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckLINK() ),
+    new Dict( "meta",       (short)(Dict.VERS_ALL|Dict.VERS_FRAMES),     (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
+    new Dict( "style",      (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES),  Dict.CM_HEAD, ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSTYLE() ),
+    new Dict( "script",     (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES),  (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSCRIPT() ),
+    new Dict( "server",     Dict.VERS_NETSCAPE,  (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), null ),
+
+    new Dict( "body",       Dict.VERS_ALL,     (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseBody(), null ),
+    new Dict( "frameset",   Dict.VERS_FRAMES,  (Dict.CM_HTML|Dict.CM_FRAMES), ParserImpl.getParseFrameSet(), null ),
+
+    new Dict( "p",          Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_OPT), ParserImpl.getParseInline(), null ),
+    new Dict( "h1",         Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+    new Dict( "h2",         Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+    new Dict( "h3",         Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+    new Dict( "h4",         Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+    new Dict( "h5",         Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+    new Dict( "h6",         Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
+    new Dict( "ul",         Dict.VERS_ALL,     Dict.CM_BLOCK, ParserImpl.getParseList(), null ),
+    new Dict( "ol",         Dict.VERS_ALL,     Dict.CM_BLOCK, ParserImpl.getParseList(), null ),
+    new Dict( "dl",         Dict.VERS_ALL,     Dict.CM_BLOCK, ParserImpl.getParseDefList(), null ),
+    new Dict( "dir",        Dict.VERS_LOOSE,   (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ),
+    new Dict( "menu",       Dict.VERS_LOOSE,   (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ),
+    new Dict( "pre",        Dict.VERS_ALL,     Dict.CM_BLOCK, ParserImpl.getParsePre(), null ),
+    new Dict( "listing",    Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
+    new Dict( "xmp",        Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
+    new Dict( "plaintext",  Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
+    new Dict( "address",    Dict.VERS_ALL,     Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "blockquote", Dict.VERS_ALL,     Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "form",       Dict.VERS_ALL,     Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "isindex",    Dict.VERS_LOOSE,   (Dict.CM_BLOCK|Dict.CM_EMPTY), null, null ),
+    new Dict( "fieldset",   Dict.VERS_HTML40,  Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "table",      Dict.VERS_FROM32,  Dict.CM_BLOCK, ParserImpl.getParseTableTag(), CheckAttribsImpl.getCheckTABLE() ),
+    new Dict( "hr",         Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_EMPTY),  null, CheckAttribsImpl.getCheckHR() ),
+    new Dict( "div",        Dict.VERS_FROM32,  Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "multicol",   Dict.VERS_NETSCAPE,  Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "nosave",     Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "layer",      Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "ilayer",     Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "nolayer",    Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
+    new Dict( "align",      Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "center",     Dict.VERS_LOOSE,   Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
+    new Dict( "ins",        Dict.VERS_HTML40,  (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ),
+    new Dict( "del",        Dict.VERS_HTML40,  (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ),
+
+    new Dict( "li",         Dict.VERS_ALL,     (Dict.CM_LIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ),
+    new Dict( "dt",         Dict.VERS_ALL,     (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseInline(), null ),
+    new Dict( "dd",         Dict.VERS_ALL,     (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ),
+
+    new Dict( "caption",    Dict.VERS_FROM32,  Dict.CM_TABLE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckCaption() ),
+    new Dict( "colgroup",   Dict.VERS_HTML40,  (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseColGroup(), null ),
+    new Dict( "col",        Dict.VERS_HTML40,  (Dict.CM_TABLE|Dict.CM_EMPTY),  null, null ),
+    new Dict( "thead",      Dict.VERS_HTML40,  (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
+    new Dict( "tfoot",      Dict.VERS_HTML40,  (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
+    new Dict( "tbody",      Dict.VERS_HTML40,  (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
+    new Dict( "tr",         Dict.VERS_FROM32,  (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseRow(), null ),
+    new Dict( "td",         Dict.VERS_FROM32,  (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ),
+    new Dict( "th",         Dict.VERS_FROM32,  (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ),
+
+    new Dict( "q",          Dict.VERS_HTML40,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "a",          Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckAnchor() ),
+    new Dict( "br",         Dict.VERS_ALL,     (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+    new Dict( "img",        Dict.VERS_ALL,     (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckIMG() ),
+    new Dict( "object",     Dict.VERS_HTML40,  (Dict.CM_OBJECT|Dict.CM_HEAD|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
+    new Dict( "applet",     Dict.VERS_LOOSE,   (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
+    new Dict( "servlet",    Dict.VERS_SUN,     (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
+    new Dict( "param",      Dict.VERS_FROM32,  (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+    new Dict( "embed",      Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ),
+    new Dict( "noembed",    Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "iframe",     Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.getParseBlock(), null ),
+    new Dict( "frame",      Dict.VERS_FRAMES,  (Dict.CM_FRAMES|Dict.CM_EMPTY), null, null ),
+    new Dict( "noframes",   Dict.VERS_IFRAMES, (Dict.CM_BLOCK|Dict.CM_FRAMES), ParserImpl.getParseNoFrames(),  null ),
+    new Dict( "noscript",   (short)(Dict.VERS_FRAMES|Dict.VERS_HTML40),  (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
+    new Dict( "b",          Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "i",          Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "u",          Dict.VERS_LOOSE,   Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "tt",         Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "s",          Dict.VERS_LOOSE,   Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "strike",     Dict.VERS_LOOSE,   Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "big",        Dict.VERS_FROM32,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "small",      Dict.VERS_FROM32,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "sub",        Dict.VERS_FROM32,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "sup",        Dict.VERS_FROM32,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "em",         Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "strong",     Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "dfn",        Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "code",       Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "samp",       Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "kbd",        Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "var",        Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "cite",       Dict.VERS_ALL,     Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "abbr",       Dict.VERS_HTML40,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "acronym",    Dict.VERS_HTML40,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "span",       Dict.VERS_FROM32,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "blink",      Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "nobr",       Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "wbr",        Dict.VERS_PROPRIETARY, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+    new Dict( "marquee",    Dict.VERS_MICROSOFT, (Dict.CM_INLINE|Dict.CM_OPT), ParserImpl.getParseInline(), null ),
+    new Dict( "bgsound",    Dict.VERS_MICROSOFT, (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
+    new Dict( "comment",    Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "spacer",     Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+    new Dict( "keygen",     Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+    new Dict( "nolayer",    Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
+    new Dict( "ilayer",     Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "map",        Dict.VERS_FROM32,  Dict.CM_INLINE, ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckMap() ),
+    new Dict( "area",       Dict.VERS_ALL,     (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckAREA() ),
+    new Dict( "input",      Dict.VERS_ALL,     (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ),
+    new Dict( "select",     Dict.VERS_ALL,     (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseSelect(), null ),
+    new Dict( "option",     Dict.VERS_ALL,     (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseText(), null ),
+    new Dict( "optgroup",   Dict.VERS_HTML40,  (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseOptGroup(), null ),
+    new Dict( "textarea",   Dict.VERS_ALL,     (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseText(), null ),
+    new Dict( "label",      Dict.VERS_HTML40,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "legend",     Dict.VERS_HTML40,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "button",     Dict.VERS_HTML40,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "basefont",   Dict.VERS_LOOSE,   (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
+    new Dict( "font",       Dict.VERS_LOOSE,   Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+    new Dict( "bdo",        Dict.VERS_HTML40,  Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
+
+    };
+
+    /* create dummy entry for all xml tags */
+    public Dict xmlTags = new Dict( null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null );
+
+    public Dict tagHtml = null;
+    public Dict tagHead = null;
+    public Dict tagBody = null;
+    public Dict tagFrameset = null;
+    public Dict tagFrame = null;
+    public Dict tagNoframes = null;
+    public Dict tagMeta = null;
+    public Dict tagTitle = null;
+    public Dict tagBase = null;
+    public Dict tagHr = null;
+    public Dict tagPre = null;
+    public Dict tagListing = null;
+    public Dict tagH1 = null;
+    public Dict tagH2 = null;
+    public Dict tagP  = null;
+    public Dict tagUl = null;
+    public Dict tagOl = null;
+    public Dict tagDir = null;
+    public Dict tagLi = null;
+    public Dict tagDt = null;
+    public Dict tagDd = null;
+    public Dict tagDl = null;
+    public Dict tagTd = null;
+    public Dict tagTh = null;
+    public Dict tagTr = null;
+    public Dict tagCol = null;
+    public Dict tagBr = null;
+    public Dict tagA = null;
+    public Dict tagLink = null;
+    public Dict tagB = null;
+    public Dict tagI = null;
+    public Dict tagStrong = null;
+    public Dict tagEm = null;
+    public Dict tagBig = null;
+    public Dict tagSmall = null;
+    public Dict tagParam = null;
+    public Dict tagOption = null;
+    public Dict tagOptgroup = null;
+    public Dict tagImg = null;
+    public Dict tagMap = null;
+    public Dict tagArea = null;
+    public Dict tagNobr = null;
+    public Dict tagWbr = null;
+    public Dict tagFont = null;
+    public Dict tagSpacer = null;
+    public Dict tagLayer = null;
+    public Dict tagCenter = null;
+    public Dict tagStyle = null;
+    public Dict tagScript = null;
+    public Dict tagNoscript = null;
+    public Dict tagTable = null;
+    public Dict tagCaption = null;
+    public Dict tagForm = null;
+    public Dict tagTextarea = null;
+    public Dict tagBlockquote = null;
+    public Dict tagApplet = null;
+    public Dict tagObject = null;
+    public Dict tagDiv = null;
+    public Dict tagSpan = null;
+
+    public void defineInlineTag( String name )
+    {
+        install( new Dict( name, Dict.VERS_PROPRIETARY,
+                           (Dict.CM_INLINE|Dict.CM_NO_INDENT|Dict.CM_NEW),
+                           ParserImpl.getParseBlock(), null ) );
+    }
+
+    public void defineBlockTag( String name )
+    {
+        install( new Dict( name, Dict.VERS_PROPRIETARY,
+                           (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW),
+                           ParserImpl.getParseBlock(), null ) );
+    }
+
+    public void defineEmptyTag(String name)
+    {
+        install(new Dict(name, Dict.VERS_PROPRIETARY,
+                         (Dict.CM_EMPTY|Dict.CM_NO_INDENT|Dict.CM_NEW),
+                         ParserImpl.getParseBlock(), null));
+    }
+
+    public void definePreTag(String name)
+    {
+        install(new Dict(name, Dict.VERS_PROPRIETARY,
+                         (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW),
+                         ParserImpl.getParsePre(), null));
+    }
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java
new file mode 100644 (file)
index 0000000..20862c1
--- /dev/null
@@ -0,0 +1,1424 @@
+/*
+ * @(#)Tidy.java   1.11 2000/08/16
+ *
+ */
+
+/*
+  HTML parser and pretty printer
+
+  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+  Institute of Technology, Institut National de Recherche en
+  Informatique et en Automatique, Keio University). All Rights
+  Reserved.
+
+  Contributing Author(s):
+
+     Dave Raggett <dsr@w3.org>
+     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+
+  The contributing author(s) would like to thank all those who
+  helped with testing, bug fixes, and patience.  This wouldn't
+  have been possible without all of you.
+
+  COPYRIGHT NOTICE:
+  This software and documentation is provided "as is," and
+  the copyright holders and contributing author(s) make no
+  representations or warranties, express or implied, including
+  but not limited to, warranties of merchantability or fitness
+  for any particular purpose or that the use of the software or
+  documentation will not infringe any third party patents,
+  copyrights, trademarks or other rights. 
+
+  The copyright holders and contributing author(s) will not be
+  liable for any direct, indirect, special or consequential damages
+  arising out of any use of the software or documentation, even if
+  advised of the possibility of such damage.
+
+  Permission is hereby granted to use, copy, modify, and distribute
+  this source code, or portions hereof, documentation and executables,
+  for any purpose, without fee, subject to the following restrictions:
+
+  1. The origin of this source code must not be misrepresented.
+  2. Altered versions must be plainly marked as such and must
+     not be misrepresented as being the original source.
+  3. This Copyright notice may not be removed or altered from any
+     source or altered source distribution.
+  The copyright holders and contributing author(s) specifically
+  permit, without fee, and encourage the use of this source code
+  as a component for supporting the Hypertext Markup Language in
+  commercial products. If you use this source code in a product,
+  acknowledgment is not required but would be appreciated.
+*/
+
+package net.sourceforge.phpdt.tidy;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.util.Properties;
+
+import org.eclipse.core.resources.IFile;
+import org.eclipse.core.resources.IMarker;
+import org.eclipse.core.runtime.CoreException;
+
+/**
+ *
+ * <p>HTML parser and pretty printer</p>
+ *
+ * <p>
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ * </p>
+ *
+ * <p>
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ * </p>
+ *
+ * <p>
+ * Contributing Author(s):<br>
+ *    <a href="mailto:dsr@w3.org">Dave Raggett</a><br>
+ *    <a href="mailto:ac.quick@sympatico.ca">Andy Quick</a> (translation to Java)
+ * </p>
+ *
+ * <p>
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience.  This wouldn't
+ * have been possible without all of you.
+ * </p>
+ *
+ * <p>
+ * COPYRIGHT NOTICE:<br>
+ * 
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights. 
+ * </p>
+ *
+ * <p>
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ * </p>
+ *
+ * <p>
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ * </p>
+ *
+ * <p>
+ * <ol>
+ * <li>The origin of this source code must not be misrepresented.</li>
+ * <li>Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.</li>
+ * <li>This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.</li>
+ * </ol>
+ * </p>
+ *
+ * <p>
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ * </p>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ *
+ */
+
+public class Tidy implements java.io.Serializable {
+
+  static final long serialVersionUID = -2794371560623987718L;
+
+  private boolean initialized = false;
+  private PrintWriter errout = null; /* error output stream */
+  private PrintWriter stderr = null;
+  private Configuration configuration = null;
+  private String inputStreamName = "InputStream";
+  private int parseErrors = 0;
+  private int parseWarnings = 0;
+
+  public Tidy() {
+    init();
+  }
+
+  public Configuration getConfiguration() {
+    return configuration;
+  }
+
+  public PrintWriter getStderr() {
+    return stderr;
+  }
+
+  /**
+   * ParseErrors - the number of errors that occurred in the most
+   * recent parse operation
+   */
+
+  public int getParseErrors() {
+    return parseErrors;
+  }
+
+  /**
+   * ParseWarnings - the number of warnings that occurred in the most
+   * recent parse operation
+   */
+
+  public int getParseWarnings() {
+    return parseWarnings;
+  }
+
+  /**
+   * Errout - the error output stream
+   */
+
+  public PrintWriter getErrout() {
+    return errout;
+  }
+
+  public void setErrout(PrintWriter errout) {
+    this.errout = errout;
+  }
+
+  /**
+   * Spaces - default indentation
+   * @see org.w3c.tidy.Configuration#spaces
+   */
+
+  public void setSpaces(int spaces) {
+    configuration.spaces = spaces;
+  }
+
+  public int getSpaces() {
+    return configuration.spaces;
+  }
+
+  /**
+   * Wraplen - default wrap margin
+   * @see org.w3c.tidy.Configuration#wraplen
+   */
+
+  public void setWraplen(int wraplen) {
+    configuration.wraplen = wraplen;
+  }
+
+  public int getWraplen() {
+    return configuration.wraplen;
+  }
+
+  /**
+   * CharEncoding
+   * @see org.w3c.tidy.Configuration#CharEncoding
+   */
+
+  public void setCharEncoding(int charencoding) {
+    configuration.CharEncoding = charencoding;
+  }
+
+  public int getCharEncoding() {
+    return configuration.CharEncoding;
+  }
+
+  /**
+   * Tabsize
+   * @see org.w3c.tidy.Configuration#tabsize
+   */
+
+  public void setTabsize(int tabsize) {
+    configuration.tabsize = tabsize;
+  }
+
+  public int getTabsize() {
+    return configuration.tabsize;
+  }
+
+  /**
+   * Errfile - file name to write errors to
+   * @see org.w3c.tidy.Configuration#errfile
+   */
+
+  public void setErrfile(String errfile) {
+    configuration.errfile = errfile;
+  }
+
+  public String getErrfile() {
+    return configuration.errfile;
+  }
+
+  /**
+   * Writeback - if true then output tidied markup
+   * NOTE: this property is ignored when parsing from an InputStream.
+   * @see org.w3c.tidy.Configuration#writeback
+   */
+
+  public void setWriteback(boolean writeback) {
+    configuration.writeback = writeback;
+  }
+
+  public boolean getWriteback() {
+    return configuration.writeback;
+  }
+
+  /**
+   * OnlyErrors - if true normal output is suppressed
+   * @see org.w3c.tidy.Configuration#OnlyErrors
+   */
+
+  public void setOnlyErrors(boolean OnlyErrors) {
+    configuration.OnlyErrors = OnlyErrors;
+  }
+
+  public boolean getOnlyErrors() {
+    return configuration.OnlyErrors;
+  }
+
+  /**
+   * ShowWarnings - however errors are always shown
+   * @see org.w3c.tidy.Configuration#ShowWarnings
+   */
+
+  public void setShowWarnings(boolean ShowWarnings) {
+    configuration.ShowWarnings = ShowWarnings;
+  }
+
+  public boolean getShowWarnings() {
+    return configuration.ShowWarnings;
+  }
+
+  /**
+   * Quiet - no 'Parsing X', guessed DTD or summary
+   * @see org.w3c.tidy.Configuration#Quiet
+   */
+
+  public void setQuiet(boolean Quiet) {
+    configuration.Quiet = Quiet;
+  }
+
+  public boolean getQuiet() {
+    return configuration.Quiet;
+  }
+
+  /**
+   * IndentContent - indent content of appropriate tags
+   * @see org.w3c.tidy.Configuration#IndentContent
+   */
+
+  public void setIndentContent(boolean IndentContent) {
+    configuration.IndentContent = IndentContent;
+  }
+
+  public boolean getIndentContent() {
+    return configuration.IndentContent;
+  }
+
+  /**
+   * SmartIndent - does text/block level content effect indentation
+   * @see org.w3c.tidy.Configuration#SmartIndent
+   */
+
+  public void setSmartIndent(boolean SmartIndent) {
+    configuration.SmartIndent = SmartIndent;
+  }
+
+  public boolean getSmartIndent() {
+    return configuration.SmartIndent;
+  }
+
+  /**
+   * HideEndTags - suppress optional end tags
+   * @see org.w3c.tidy.Configuration#HideEndTags
+   */
+
+  public void setHideEndTags(boolean HideEndTags) {
+    configuration.HideEndTags = HideEndTags;
+  }
+
+  public boolean getHideEndTags() {
+    return configuration.HideEndTags;
+  }
+
+  /**
+   * XmlTags - treat input as XML
+   * @see org.w3c.tidy.Configuration#XmlTags
+   */
+
+  public void setXmlTags(boolean XmlTags) {
+    configuration.XmlTags = XmlTags;
+  }
+
+  public boolean getXmlTags() {
+    return configuration.XmlTags;
+  }
+
+  /**
+   * XmlOut - create output as XML
+   * @see org.w3c.tidy.Configuration#XmlOut
+   */
+
+  public void setXmlOut(boolean XmlOut) {
+    configuration.XmlOut = XmlOut;
+  }
+
+  public boolean getXmlOut() {
+    return configuration.XmlOut;
+  }
+
+  /**
+   * XHTML - output extensible HTML
+   * @see org.w3c.tidy.Configuration#xHTML
+   */
+
+  public void setXHTML(boolean xHTML) {
+    configuration.xHTML = xHTML;
+  }
+
+  public boolean getXHTML() {
+    return configuration.xHTML;
+  }
+
+  /**
+   * RawOut - avoid mapping values > 127 to entities
+   * @see org.w3c.tidy.Configuration#RawOut
+   */
+
+  public void setRawOut(boolean RawOut) {
+    configuration.RawOut = RawOut;
+  }
+
+  public boolean getRawOut() {
+    return configuration.RawOut;
+  }
+
+  /**
+   * UpperCaseTags - output tags in upper not lower case
+   * @see org.w3c.tidy.Configuration#UpperCaseTags
+   */
+
+  public void setUpperCaseTags(boolean UpperCaseTags) {
+    configuration.UpperCaseTags = UpperCaseTags;
+  }
+
+  public boolean getUpperCaseTags() {
+    return configuration.UpperCaseTags;
+  }
+
+  /**
+   * UpperCaseAttrs - output attributes in upper not lower case
+   * @see org.w3c.tidy.Configuration#UpperCaseAttrs
+   */
+
+  public void setUpperCaseAttrs(boolean UpperCaseAttrs) {
+    configuration.UpperCaseAttrs = UpperCaseAttrs;
+  }
+
+  public boolean getUpperCaseAttrs() {
+    return configuration.UpperCaseAttrs;
+  }
+
+  /**
+   * MakeClean - remove presentational clutter
+   * @see org.w3c.tidy.Configuration#MakeClean
+   */
+
+  public void setMakeClean(boolean MakeClean) {
+    configuration.MakeClean = MakeClean;
+  }
+
+  public boolean getMakeClean() {
+    return configuration.MakeClean;
+  }
+
+  /**
+   * BreakBeforeBR - o/p newline before &lt;br&gt; or not?
+   * @see org.w3c.tidy.Configuration#BreakBeforeBR
+   */
+
+  public void setBreakBeforeBR(boolean BreakBeforeBR) {
+    configuration.BreakBeforeBR = BreakBeforeBR;
+  }
+
+  public boolean getBreakBeforeBR() {
+    return configuration.BreakBeforeBR;
+  }
+
+  /**
+   * BurstSlides - create slides on each h2 element
+   * @see org.w3c.tidy.Configuration#BurstSlides
+   */
+
+  public void setBurstSlides(boolean BurstSlides) {
+    configuration.BurstSlides = BurstSlides;
+  }
+
+  public boolean getBurstSlides() {
+    return configuration.BurstSlides;
+  }
+
+  /**
+   * NumEntities - use numeric entities
+   * @see org.w3c.tidy.Configuration#NumEntities
+   */
+
+  public void setNumEntities(boolean NumEntities) {
+    configuration.NumEntities = NumEntities;
+  }
+
+  public boolean getNumEntities() {
+    return configuration.NumEntities;
+  }
+
+  /**
+   * QuoteMarks - output " marks as &amp;quot;
+   * @see org.w3c.tidy.Configuration#QuoteMarks
+   */
+
+  public void setQuoteMarks(boolean QuoteMarks) {
+    configuration.QuoteMarks = QuoteMarks;
+  }
+
+  public boolean getQuoteMarks() {
+    return configuration.QuoteMarks;
+  }
+
+  /**
+   * QuoteNbsp - output non-breaking space as entity
+   * @see org.w3c.tidy.Configuration#QuoteNbsp
+   */
+
+  public void setQuoteNbsp(boolean QuoteNbsp) {
+    configuration.QuoteNbsp = QuoteNbsp;
+  }
+
+  public boolean getQuoteNbsp() {
+    return configuration.QuoteNbsp;
+  }
+
+  /**
+   * QuoteAmpersand - output naked ampersand as &amp;
+   * @see org.w3c.tidy.Configuration#QuoteAmpersand
+   */
+
+  public void setQuoteAmpersand(boolean QuoteAmpersand) {
+    configuration.QuoteAmpersand = QuoteAmpersand;
+  }
+
+  public boolean getQuoteAmpersand() {
+    return configuration.QuoteAmpersand;
+  }
+
+  /**
+   * WrapAttVals - wrap within attribute values
+   * @see org.w3c.tidy.Configuration#WrapAttVals
+   */
+
+  public void setWrapAttVals(boolean WrapAttVals) {
+    configuration.WrapAttVals = WrapAttVals;
+  }
+
+  public boolean getWrapAttVals() {
+    return configuration.WrapAttVals;
+  }
+
+  /**
+   * WrapScriptlets - wrap within JavaScript string literals
+   * @see org.w3c.tidy.Configuration#WrapScriptlets
+   */
+
+  public void setWrapScriptlets(boolean WrapScriptlets) {
+    configuration.WrapScriptlets = WrapScriptlets;
+  }
+
+  public boolean getWrapScriptlets() {
+    return configuration.WrapScriptlets;
+  }
+
+  /**
+   * WrapSection - wrap within &lt;![ ... ]&gt; section tags
+   * @see org.w3c.tidy.Configuration#WrapSection
+   */
+
+  public void setWrapSection(boolean WrapSection) {
+    configuration.WrapSection = WrapSection;
+  }
+
+  public boolean getWrapSection() {
+    return configuration.WrapSection;
+  }
+
+  /**
+   * AltText - default text for alt attribute
+   * @see org.w3c.tidy.Configuration#altText
+   */
+
+  public void setAltText(String altText) {
+    configuration.altText = altText;
+  }
+
+  public String getAltText() {
+    return configuration.altText;
+  }
+
+  /**
+   * Slidestyle - style sheet for slides
+   * @see org.w3c.tidy.Configuration#slidestyle
+   */
+
+  public void setSlidestyle(String slidestyle) {
+    configuration.slidestyle = slidestyle;
+  }
+
+  public String getSlidestyle() {
+    return configuration.slidestyle;
+  }
+
+  /**
+   * XmlPi - add &lt;?xml?&gt; for XML docs
+   * @see org.w3c.tidy.Configuration#XmlPi
+   */
+
+  public void setXmlPi(boolean XmlPi) {
+    configuration.XmlPi = XmlPi;
+  }
+
+  public boolean getXmlPi() {
+    return configuration.XmlPi;
+  }
+
+  /**
+   * DropFontTags - discard presentation tags
+   * @see org.w3c.tidy.Configuration#DropFontTags
+   */
+
+  public void setDropFontTags(boolean DropFontTags) {
+    configuration.DropFontTags = DropFontTags;
+  }
+
+  public boolean getDropFontTags() {
+    return configuration.DropFontTags;
+  }
+
+  /**
+   * DropEmptyParas - discard empty p elements
+   * @see org.w3c.tidy.Configuration#DropEmptyParas
+   */
+
+  public void setDropEmptyParas(boolean DropEmptyParas) {
+    configuration.DropEmptyParas = DropEmptyParas;
+  }
+
+  public boolean getDropEmptyParas() {
+    return configuration.DropEmptyParas;
+  }
+
+  /**
+   * FixComments - fix comments with adjacent hyphens
+   * @see org.w3c.tidy.Configuration#FixComments
+   */
+
+  public void setFixComments(boolean FixComments) {
+    configuration.FixComments = FixComments;
+  }
+
+  public boolean getFixComments() {
+    return configuration.FixComments;
+  }
+
+  /**
+   * WrapAsp - wrap within ASP pseudo elements
+   * @see org.w3c.tidy.Configuration#WrapAsp
+   */
+
+  public void setWrapAsp(boolean WrapAsp) {
+    configuration.WrapAsp = WrapAsp;
+  }
+
+  public boolean getWrapAsp() {
+    return configuration.WrapAsp;
+  }
+
+  /**
+   * WrapJste - wrap within JSTE pseudo elements
+   * @see org.w3c.tidy.Configuration#WrapJste
+   */
+
+  public void setWrapJste(boolean WrapJste) {
+    configuration.WrapJste = WrapJste;
+  }
+
+  public boolean getWrapJste() {
+    return configuration.WrapJste;
+  }
+
+  /**
+   * WrapPhp - wrap within PHP pseudo elements
+   * @see org.w3c.tidy.Configuration#WrapPhp
+   */
+
+  public void setWrapPhp(boolean WrapPhp) {
+    configuration.WrapPhp = WrapPhp;
+  }
+
+  public boolean getWrapPhp() {
+    return configuration.WrapPhp;
+  }
+
+  /**
+   * FixBackslash - fix URLs by replacing \ with /
+   * @see org.w3c.tidy.Configuration#FixBackslash
+   */
+
+  public void setFixBackslash(boolean FixBackslash) {
+    configuration.FixBackslash = FixBackslash;
+  }
+
+  public boolean getFixBackslash() {
+    return configuration.FixBackslash;
+  }
+
+  /**
+   * IndentAttributes - newline+indent before each attribute
+   * @see org.w3c.tidy.Configuration#IndentAttributes
+   */
+
+  public void setIndentAttributes(boolean IndentAttributes) {
+    configuration.IndentAttributes = IndentAttributes;
+  }
+
+  public boolean getIndentAttributes() {
+    return configuration.IndentAttributes;
+  }
+
+  /**
+   * DocType - user specified doctype
+   * omit | auto | strict | loose | <i>fpi</i>
+   * where the <i>fpi</i> is a string similar to
+   *    &quot;-//ACME//DTD HTML 3.14159//EN&quot;
+   * Note: for <i>fpi</i> include the double-quotes in the string.
+   * @see org.w3c.tidy.Configuration#docTypeStr
+   * @see org.w3c.tidy.Configuration#docTypeMode
+   */
+
+  public void setDocType(String doctype) {
+    if (doctype != null)
+      configuration.docTypeStr = configuration.parseDocType(doctype, "doctype");
+  }
+
+  public String getDocType() {
+    String result = null;
+    switch (configuration.docTypeMode) {
+      case Configuration.DOCTYPE_OMIT :
+        result = "omit";
+        break;
+      case Configuration.DOCTYPE_AUTO :
+        result = "auto";
+        break;
+      case Configuration.DOCTYPE_STRICT :
+        result = "strict";
+        break;
+      case Configuration.DOCTYPE_LOOSE :
+        result = "loose";
+        break;
+      case Configuration.DOCTYPE_USER :
+        result = configuration.docTypeStr;
+        break;
+    }
+    return result;
+  }
+
+  /**
+   * LogicalEmphasis - replace i by em and b by strong
+   * @see org.w3c.tidy.Configuration#LogicalEmphasis
+   */
+
+  public void setLogicalEmphasis(boolean LogicalEmphasis) {
+    configuration.LogicalEmphasis = LogicalEmphasis;
+  }
+
+  public boolean getLogicalEmphasis() {
+    return configuration.LogicalEmphasis;
+  }
+
+  /**
+   * XmlPIs - if set to true PIs must end with ?>
+   * @see org.w3c.tidy.Configuration#XmlPIs
+   */
+
+  public void setXmlPIs(boolean XmlPIs) {
+    configuration.XmlPIs = XmlPIs;
+  }
+
+  public boolean getXmlPIs() {
+    return configuration.XmlPIs;
+  }
+
+  /**
+   * EncloseText - if true text at body is wrapped in &lt;p&gt;'s
+   * @see org.w3c.tidy.Configuration#EncloseBodyText
+   */
+
+  public void setEncloseText(boolean EncloseText) {
+    configuration.EncloseBodyText = EncloseText;
+  }
+
+  public boolean getEncloseText() {
+    return configuration.EncloseBodyText;
+  }
+
+  /**
+   * EncloseBlockText - if true text in blocks is wrapped in &lt;p&gt;'s
+   * @see org.w3c.tidy.Configuration#EncloseBlockText
+   */
+
+  public void setEncloseBlockText(boolean EncloseBlockText) {
+    configuration.EncloseBlockText = EncloseBlockText;
+  }
+
+  public boolean getEncloseBlockText() {
+    return configuration.EncloseBlockText;
+  }
+
+  /**
+   * KeepFileTimes - if true last modified time is preserved<br>
+   * <b>this is NOT supported at this time.</b>
+   * @see org.w3c.tidy.Configuration#KeepFileTimes
+   */
+
+  public void setKeepFileTimes(boolean KeepFileTimes) {
+    configuration.KeepFileTimes = KeepFileTimes;
+  }
+
+  public boolean getKeepFileTimes() {
+    return configuration.KeepFileTimes;
+  }
+
+  /**
+   * Word2000 - draconian cleaning for Word2000
+   * @see org.w3c.tidy.Configuration#Word2000
+   */
+
+  public void setWord2000(boolean Word2000) {
+    configuration.Word2000 = Word2000;
+  }
+
+  public boolean getWord2000() {
+    return configuration.Word2000;
+  }
+
+  /**
+   * TidyMark - add meta element indicating tidied doc
+   * @see org.w3c.tidy.Configuration#TidyMark
+   */
+
+  public void setTidyMark(boolean TidyMark) {
+    configuration.TidyMark = TidyMark;
+  }
+
+  public boolean getTidyMark() {
+    return configuration.TidyMark;
+  }
+
+  /**
+   * XmlSpace - if set to yes adds xml:space attr as needed
+   * @see org.w3c.tidy.Configuration#XmlSpace
+   */
+
+  public void setXmlSpace(boolean XmlSpace) {
+    configuration.XmlSpace = XmlSpace;
+  }
+
+  public boolean getXmlSpace() {
+    return configuration.XmlSpace;
+  }
+
+  /**
+   * Emacs - if true format error output for GNU Emacs
+   * @see org.w3c.tidy.Configuration#Emacs
+   */
+
+  public void setEmacs(boolean Emacs) {
+    configuration.Emacs = Emacs;
+  }
+
+  public boolean getEmacs() {
+    return configuration.Emacs;
+  }
+
+  /**
+   * LiteralAttribs - if true attributes may use newlines
+   * @see org.w3c.tidy.Configuration#LiteralAttribs
+   */
+
+  public void setLiteralAttribs(boolean LiteralAttribs) {
+    configuration.LiteralAttribs = LiteralAttribs;
+  }
+
+  public boolean getLiteralAttribs() {
+    return configuration.LiteralAttribs;
+  }
+
+  /**
+   * InputStreamName - the name of the input stream (printed in the
+   * header information).
+   */
+  public void setInputStreamName(String name) {
+    if (name != null)
+      inputStreamName = name;
+  }
+
+  public String getInputStreamName() {
+    return inputStreamName;
+  }
+
+  /**
+   * Sets the configuration from a configuration file.
+   */
+
+  public void setConfigurationFromFile(String filename) {
+    configuration.parseFile(filename);
+  }
+
+  /**
+   * Sets the configuration from a properties object.
+   */
+
+  public void setConfigurationFromProps(Properties props) {
+    configuration.addProps(props);
+  }
+
+  /**
+   * first time initialization which should
+   * precede reading the command line
+   */
+
+  private void init() {
+    configuration = new Configuration();
+    if (configuration == null)
+      return;
+
+    AttributeTable at = AttributeTable.getDefaultAttributeTable();
+    if (at == null)
+      return;
+    TagTable tt = new TagTable();
+    if (tt == null)
+      return;
+    tt.setConfiguration(configuration);
+    configuration.tt = tt;
+    EntityTable et = EntityTable.getDefaultEntityTable();
+    if (et == null)
+      return;
+
+    /* Unnecessary - same initial values in Configuration
+    Configuration.XmlTags       = false;
+    Configuration.XmlOut        = false;
+    Configuration.HideEndTags   = false;
+    Configuration.UpperCaseTags = false;
+    Configuration.MakeClean     = false;
+    Configuration.writeback     = false;
+    Configuration.OnlyErrors    = false;
+    */
+
+    configuration.errfile = null;
+    stderr = new PrintWriter(System.err, true);
+    errout = stderr;
+    initialized = true;
+  }
+
+  /**
+   * Parses InputStream in and returns the root Node.
+   * If out is non-null, pretty prints to OutputStream out.
+   */
+
+  public Node parse(IFile iFile, InputStream in, OutputStream out) {
+    Node document = null;
+
+    try {
+      iFile.deleteMarkers(IMarker.PROBLEM, false, 0);
+      document = parse(iFile, in, null, out);
+    } catch (CoreException e) {
+    } catch (FileNotFoundException fnfe) {
+    } catch (IOException e) {
+    }
+
+    return document;
+  }
+
+  /**
+   * Internal routine that actually does the parsing.  The caller
+   * can pass either an InputStream or file name.  If both are passed,
+   * the file name is preferred.
+   */
+
+  private Node parse(IFile iFile, InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException {
+    Lexer lexer;
+    Node document = null;
+    Node doctype;
+    Out o = new OutImpl(); /* normal output stream */
+    PPrint pprint;
+
+    if (!initialized)
+      return null;
+
+    if (errout == null)
+      return null;
+
+    parseErrors = 0;
+    parseWarnings = 0;
+
+    /* ensure config is self-consistent */
+    configuration.adjust();
+
+    if (file != null) {
+      in = new FileInputStream(file);
+      inputStreamName = file;
+    } else if (in == null) {
+      in = System.in;
+      inputStreamName = "stdin";
+    }
+
+    if (in != null) {
+      lexer = new Lexer(iFile,new StreamInImpl(in, configuration.CharEncoding, configuration.tabsize), configuration);
+      lexer.errout = errout;
+
+      /*
+        store pointer to lexer in input stream
+        to allow character encoding errors to be
+        reported
+      */
+      lexer.in.lexer = lexer;
+
+      /* Tidy doesn't alter the doctype for generic XML docs */
+      if (configuration.XmlTags)
+        document = ParserImpl.parseXMLDocument(lexer);
+      else {
+        lexer.warnings = 0;
+        if (!configuration.Quiet)
+          Report.helloMessage(errout, Report.RELEASE_DATE, inputStreamName);
+
+        document = ParserImpl.parseDocument(lexer);
+
+        if (!document.checkNodeIntegrity()) {
+          Report.badTree(errout);
+          return null;
+        }
+
+        Clean cleaner = new Clean(configuration.tt);
+
+        /* simplifies <b><b> ... </b> ...</b> etc. */
+        cleaner.nestedEmphasis(document);
+
+        /* cleans up <dir>indented text</dir> etc. */
+        cleaner.list2BQ(document);
+        cleaner.bQ2Div(document);
+
+        /* replaces i by em and b by strong */
+        if (configuration.LogicalEmphasis)
+          cleaner.emFromI(document);
+
+        if (configuration.Word2000 && cleaner.isWord2000(document, configuration.tt)) {
+          /* prune Word2000's <![if ...]> ... <![endif]> */
+          cleaner.dropSections(lexer, document);
+
+          /* drop style & class attributes and empty p, span elements */
+          cleaner.cleanWord2000(lexer, document);
+        }
+
+        /* replaces presentational markup by style rules */
+        if (configuration.MakeClean || configuration.DropFontTags)
+          cleaner.cleanTree(lexer, document);
+
+        if (!document.checkNodeIntegrity()) {
+          Report.badTree(errout);
+          return null;
+        }
+        doctype = document.findDocType();
+        if (document.content != null) {
+          if (configuration.xHTML)
+            lexer.setXHTMLDocType(document);
+          else
+            lexer.fixDocType(document);
+
+          if (configuration.TidyMark)
+            lexer.addGenerator(document);
+        }
+
+        /* ensure presence of initial <?XML version="1.0"?> */
+        if (configuration.XmlOut && configuration.XmlPi)
+          lexer.fixXMLPI(document);
+
+        if (!configuration.Quiet && document.content != null) {
+          Report.reportVersion(errout, lexer, inputStreamName, doctype);
+          Report.reportNumWarnings(errout, lexer);
+        }
+      }
+
+      parseWarnings = lexer.warnings;
+      parseErrors = lexer.errors;
+
+      // Try to close the InputStream but only if if we created it.
+
+      if ((file != null) && (in != System.in)) {
+        try {
+          in.close();
+        } catch (IOException e) {
+        }
+      }
+
+      if (lexer.errors > 0)
+        Report.needsAuthorIntervention(errout);
+
+      o.state = StreamIn.FSM_ASCII;
+      o.encoding = configuration.CharEncoding;
+
+      if (!configuration.OnlyErrors && lexer.errors == 0) {
+        if (configuration.BurstSlides) {
+          Node body;
+
+          body = null;
+          /*
+             remove doctype to avoid potential clash with
+             markup introduced when bursting into slides
+          */
+          /* discard the document type */
+          doctype = document.findDocType();
+
+          if (doctype != null)
+            Node.discardElement(doctype);
+
+          /* slides use transitional features */
+          lexer.versions |= Dict.VERS_HTML40_LOOSE;
+
+          /* and patch up doctype to match */
+          if (configuration.xHTML)
+            lexer.setXHTMLDocType(document);
+          else
+            lexer.fixDocType(document);
+
+          /* find the body element which may be implicit */
+          body = document.findBody(configuration.tt);
+
+          if (body != null) {
+            pprint = new PPrint(configuration);
+            Report.reportNumberOfSlides(errout, pprint.countSlides(body));
+            pprint.createSlides(lexer, document);
+          } else
+            Report.missingBody(errout);
+        } else if (configuration.writeback && (file != null)) {
+          try {
+            pprint = new PPrint(configuration);
+            o.out = new FileOutputStream(file);
+
+            if (configuration.XmlTags)
+              pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+            else
+              pprint.printTree(o, (short) 0, 0, lexer, document);
+
+            pprint.flushLine(o, 0);
+            o.out.close();
+          } catch (IOException e) {
+            errout.println(file + e.toString());
+          }
+        } else if (out != null) {
+          pprint = new PPrint(configuration);
+          o.out = out;
+
+          if (configuration.XmlTags)
+            pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+          else
+            pprint.printTree(o, (short) 0, 0, lexer, document);
+
+          pprint.flushLine(o, 0);
+        }
+
+      }
+
+      Report.errorSummary(lexer);
+    }
+    return document;
+  }
+
+  /**
+   * Parses InputStream in and returns a DOM Document node.
+   * If out is non-null, pretty prints to OutputStream out.
+   */
+
+  public org.w3c.dom.Document parseDOM(IFile file, InputStream in, OutputStream out) {
+    Node document = parse(file, in, out);
+    if (document != null)
+      return (org.w3c.dom.Document) document.getAdapter();
+    else
+      return null;
+  }
+
+  /**
+   * Creates an empty DOM Document.
+   */
+
+  public static org.w3c.dom.Document createEmptyDocument() {
+    Node document = new Node(Node.RootNode, new byte[0], 0, 0);
+    Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html", new TagTable());
+    if (document != null && node != null) {
+      Node.insertNodeAtStart(document, node);
+      return (org.w3c.dom.Document) document.getAdapter();
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Pretty-prints a DOM Document.
+   */
+
+  public void pprint(org.w3c.dom.Document doc, OutputStream out) {
+    Out o = new OutImpl();
+    PPrint pprint;
+    Node document;
+
+    if (!(doc instanceof DOMDocumentImpl)) {
+      return;
+    }
+    document = ((DOMDocumentImpl) doc).adaptee;
+
+    o.state = StreamIn.FSM_ASCII;
+    o.encoding = configuration.CharEncoding;
+
+    if (out != null) {
+      pprint = new PPrint(configuration);
+      o.out = out;
+
+      if (configuration.XmlTags)
+        pprint.printXMLTree(o, (short) 0, 0, null, document);
+      else
+        pprint.printTree(o, (short) 0, 0, null, document);
+
+      pprint.flushLine(o, 0);
+    }
+  }
+
+  /**
+   * Command line interface to parser and pretty printer.
+   */
+
+  public static void main(String[] argv) {
+    int totalerrors = 0;
+    int totalwarnings = 0;
+    String file;
+    InputStream in;
+    String prog = "Tidy";
+    Node document;
+    Node doctype;
+    Lexer lexer;
+    String s;
+    Out out = new OutImpl(); /* normal output stream */
+    PPrint pprint;
+    int argc = argv.length + 1;
+    int argIndex = 0;
+    Tidy tidy;
+    Configuration configuration;
+    String arg;
+    String current_errorfile = "stderr";
+
+    tidy = new Tidy();
+    configuration = tidy.getConfiguration();
+
+    /* read command line */
+
+    while (argc > 0) {
+      if (argc > 1 && argv[argIndex].startsWith("-")) {
+        /* support -foo and --foo */
+        arg = argv[argIndex].substring(1);
+
+        if (arg.length() > 0 && arg.charAt(0) == '-')
+          arg = arg.substring(1);
+
+        if (arg.equals("xml"))
+          configuration.XmlTags = true;
+        else if (arg.equals("asxml") || arg.equals("asxhtml"))
+          configuration.xHTML = true;
+        else if (arg.equals("indent")) {
+          configuration.IndentContent = true;
+          configuration.SmartIndent = true;
+        } else if (arg.equals("omit"))
+          configuration.HideEndTags = true;
+        else if (arg.equals("upper"))
+          configuration.UpperCaseTags = true;
+        else if (arg.equals("clean"))
+          configuration.MakeClean = true;
+        else if (arg.equals("raw"))
+          configuration.CharEncoding = Configuration.RAW;
+        else if (arg.equals("ascii"))
+          configuration.CharEncoding = Configuration.ASCII;
+        else if (arg.equals("latin1"))
+          configuration.CharEncoding = Configuration.LATIN1;
+        else if (arg.equals("utf8"))
+          configuration.CharEncoding = Configuration.UTF8;
+        else if (arg.equals("iso2022"))
+          configuration.CharEncoding = Configuration.ISO2022;
+        else if (arg.equals("mac"))
+          configuration.CharEncoding = Configuration.MACROMAN;
+        else if (arg.equals("numeric"))
+          configuration.NumEntities = true;
+        else if (arg.equals("modify"))
+          configuration.writeback = true;
+        else if (arg.equals("change")) /* obsolete */
+          configuration.writeback = true;
+        else if (arg.equals("update")) /* obsolete */
+          configuration.writeback = true;
+        else if (arg.equals("errors"))
+          configuration.OnlyErrors = true;
+        else if (arg.equals("quiet"))
+          configuration.Quiet = true;
+        else if (arg.equals("slides"))
+          configuration.BurstSlides = true;
+        else if (arg.equals("help") || argv[argIndex].charAt(1) == '?' || argv[argIndex].charAt(1) == 'h') {
+          Report.helpText(new PrintWriter(System.out, true), prog);
+          System.exit(1);
+        } else if (arg.equals("config")) {
+          if (argc >= 3) {
+            configuration.parseFile(argv[argIndex + 1]);
+            --argc;
+            ++argIndex;
+          }
+        } else if (argv[argIndex].equals("-file") || argv[argIndex].equals("--file") || argv[argIndex].equals("-f")) {
+          if (argc >= 3) {
+            configuration.errfile = argv[argIndex + 1];
+            --argc;
+            ++argIndex;
+          }
+        } else if (argv[argIndex].equals("-wrap") || argv[argIndex].equals("--wrap") || argv[argIndex].equals("-w")) {
+          if (argc >= 3) {
+            configuration.wraplen = Integer.parseInt(argv[argIndex + 1]);
+            --argc;
+            ++argIndex;
+          }
+        } else if (argv[argIndex].equals("-version") || argv[argIndex].equals("--version") || argv[argIndex].equals("-v")) {
+          Report.showVersion(tidy.getErrout());
+          System.exit(0);
+        } else {
+          s = argv[argIndex];
+
+          for (int i = 1; i < s.length(); i++) {
+            if (s.charAt(i) == 'i') {
+              configuration.IndentContent = true;
+              configuration.SmartIndent = true;
+            } else if (s.charAt(i) == 'o')
+              configuration.HideEndTags = true;
+            else if (s.charAt(i) == 'u')
+              configuration.UpperCaseTags = true;
+            else if (s.charAt(i) == 'c')
+              configuration.MakeClean = true;
+            else if (s.charAt(i) == 'n')
+              configuration.NumEntities = true;
+            else if (s.charAt(i) == 'm')
+              configuration.writeback = true;
+            else if (s.charAt(i) == 'e')
+              configuration.OnlyErrors = true;
+            else if (s.charAt(i) == 'q')
+              configuration.Quiet = true;
+            else
+              Report.unknownOption(tidy.getErrout(), s.charAt(i));
+          }
+        }
+
+        --argc;
+        ++argIndex;
+        continue;
+      }
+
+      /* ensure config is self-consistent */
+      configuration.adjust();
+
+      /* user specified error file */
+      if (configuration.errfile != null) {
+        /* is it same as the currently opened file? */
+        if (!configuration.errfile.equals(current_errorfile)) {
+          /* no so close previous error file */
+
+          if (tidy.getErrout() != tidy.getStderr())
+            tidy.getErrout().close();
+
+          /* and try to open the new error file */
+          try {
+            tidy.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true));
+            current_errorfile = configuration.errfile;
+          } catch (IOException e) {
+            /* can't be opened so fall back to stderr */
+            current_errorfile = "stderr";
+            tidy.setErrout(tidy.getStderr());
+          }
+        }
+      }
+
+      if (argc > 1) {
+        file = argv[argIndex];
+      } else {
+        file = "stdin";
+      }
+
+      try {
+        document = tidy.parse(null, null, file, System.out);
+        totalwarnings += tidy.parseWarnings;
+        totalerrors += tidy.parseErrors;
+      } catch (FileNotFoundException fnfe) {
+        Report.unknownFile(tidy.getErrout(), prog, file);
+      } catch (IOException ioe) {
+        Report.unknownFile(tidy.getErrout(), prog, file);
+      }
+
+      --argc;
+      ++argIndex;
+
+      if (argc <= 1)
+        break;
+    }
+
+    if (totalerrors + totalwarnings > 0)
+      Report.generalInfo(tidy.getErrout());
+
+    if (tidy.getErrout() != tidy.getStderr())
+      tidy.getErrout().close();
+
+    /* return status can be used by scripts */
+
+    if (totalerrors > 0)
+      System.exit(2);
+
+    if (totalwarnings > 0)
+      System.exit(1);
+
+    /* 0 signifies all is ok */
+    System.exit(0);
+  }
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TidyBeanInfo.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TidyBeanInfo.java
new file mode 100644 (file)
index 0000000..c3ccde7
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * @(#)TidyBeanInfo.java   1.11 2000/08/16
+ *
+ */
+
+package net.sourceforge.phpdt.tidy;
+
+/**
+ *
+ * BeanInfo for Tidy
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
+ * HTML Tidy Release 4 Aug 2000</a>
+ *
+ * @author  Dave Raggett <dsr@w3.org>
+ * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ */
+
+import java.beans.SimpleBeanInfo;
+import java.awt.Image;
+
+public class TidyBeanInfo extends SimpleBeanInfo {
+
+    public Image getIcon(int kind)
+    {
+        return loadImage("tidy.gif");
+    }
+
+}
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TidyMessages.properties b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/TidyMessages.properties
new file mode 100644 (file)
index 0000000..323a7ee
--- /dev/null
@@ -0,0 +1,194 @@
+error=Error: 
+warning=Warning: 
+line_column=line {0,number} column {1,number} - 
+emacs_format={0}:{1,number}:{2,number}:
+illegal_char=Warning: replacing illegal character code {0,number}
+missing_semicolon=Warning: entity "{0}" doesn''t end in '';''
+unknown_entity=Warning: unescaped & or unknown entity "{0}"
+unescaped_ampersand=Warning: unescaped & which should be written as &amp;
+unknown_attribute=Warning: unknown attribute "{0}"
+missing_attribute=\ lacks "{0}" attribute
+missing_attr_value=\ attribute "{0}" lacks value
+missing_imagemap=\ should use client-side image map
+bad_attribute_value=\ unknown attribute value "{0}"
+xml_attribute_value=\ has XML attribute "{0}"
+unexpected_gt=\ missing '>' for end of tag
+unexpected_quotemark=\ unexpected or duplicate quote mark
+repeated_attribute=\ repeated attribute
+nested_emphasis=\ nested emphasis
+coerce_to_endtag=\ <{0}> is probably intended as </{0}>
+proprietary_attr_value=\ proprietary attribute value "{0}"
+missing_endtag_for=Warning: missing </{0}>
+missing_endtag_before=Warning: missing </{0}> before 
+discarding_unexpected=Warning: discarding unexpected 
+forced_end_anchor=Warning: <a> is probably intended as </a>
+non_matching_endtag_1=Warning: replacing unexpected 
+non_matching_endtag_2=\ by </{0}>
+tag_not_allowed_in=\ isn''t allowed in <{0}> elements
+doctype_after_tags=Warning: <!DOCTYPE> isn't allowed after elements
+missing_starttag=Warning: missing <{0}>
+unexpected_endtag=Warning: unexpected </{0}>
+unexpected_endtag_suffix=\ in <{0}>
+too_many_elements=Warning: too many {0} elements
+too_many_elements_suffix=\ in <{0}>
+using_br_inplace_of=Warning: using <br> in place of 
+inserting_tag=Warning: inserting implicit <{0}>
+cant_be_nested=\ can't be nested
+proprietary_element=\ is not approved by W3C
+obsolete_element=Warning: replacing obsolete element 
+replacing_element=Warning: replacing element
+by=\ by 
+trim_empty_element=Warning: trimming empty 
+missing_title_element=Warning: inserting missing 'title' element
+illegal_nesting=\ shouldn't be nested
+noframes_content=\ not inside 'noframes' element
+inconsistent_version=Warning: html doctype doesn't match content
+content_after_body=Warning: content occurs after end of body
+malformed_comment=Warning: adjacent hyphens within comment
+bad_comment_chars=Warning: expecting -- or >
+bad_xml_comment=Warning: XML comments can't contain --
+bad_cdata_content=Warning: '<' + '/' + letter not allowed here
+inconsistent_namespace=Warning: html namespace doesn't match content
+suspected_missing_quote=Error: missing quotemark for attribute value
+duplicate_frameset=Error: repeated FRAMESET element
+unknown_element=\ is not recognized!
+dtype_not_upper_case=Warning: SYSTEM, PUBLIC, W3C, DTD, EN must be upper case
+unexpected_end_of_file=Warning: end of file while parsing attributes
+malformed_doctype=Warning: expected "html PUBLIC" or "html SYSTEM"
+id_name_mismatch=\ id and name attribute value mismatch
+
+badchars_summary=Characters codes for the Microsoft Windows fonts in the range\n\
+128 - 159 may not be recognized on other platforms. You are\n\
+instead recommended to use named entities, e.g. &trade; rather\n\
+than Windows character code 153 (0x2122 in Unicode). Note that\n\
+as of February 1998 few browsers support the new entities."\n\n
+
+badform_summary=You may need to move one or both of the <form> and </form>\n\
+tags. HTML elements should be properly nested and form elements\n\
+are no exception. For instance you should not place the <form>\n\
+in one table cell and the </form> in another. If the <form> is\n\
+placed before a table, the </form> cannot be placed inside the\n\
+table! Note that one form can't be nested inside another!\n\n
+
+badaccess_missing_summary=The table summary attribute should be used to describe\n\
+the table structure. It is very helpful for people using\n\
+non-visual browsers. The scope and headers attributes for\n\
+table cells are useful for specifying which headers apply\n\
+to each table cell, enabling non-visual browsers to provide\n\
+a meaningful context for each cell.\n\n
+
+badaccess_missing_image_alt=The alt attribute should be used to give a short description\n\
+of an image; longer descriptions should be given with the\n\
+longdesc attribute which takes a URL linked to the description.\n\
+These measures are needed for people using non-graphical browsers.\n\n
+
+badaccess_missing_image_map=Use client-side image maps in preference to server-side image\n\
+maps as the latter are inaccessible to people using non-\n\
+graphical browsers. In addition, client-side maps are easier\n\
+to set up and provide immediate feedback to users.\n\n
+
+badaccess_missing_link_alt=For hypertext links defined using a client-side image map, you\n\
+need to use the alt attribute to provide a textual description\n\
+of the link for people using non-graphical browsers.\n\n
+
+badaccess_frames=Pages designed using frames presents problems for\n\
+people who are either blind or using a browser that\n\
+doesn't support frames. A frames-based page should always\n\
+include an alternative layout inside a NOFRAMES element.\n\n
+
+badaccess_summary=For further advice on how to make your pages accessible\n\
+see "{0}". You may also want to try\n\
+"http://www.cast.org/bobby/" which is a free Web-based\n\
+service for checking URLs for accessibility.\n\n
+
+badlayout_using_layer=The Cascading Style Sheets (CSS) Positioning mechanism\n\
+is recommended in preference to the proprietary <LAYER>\n\
+element due to limited vendor support for LAYER.\n\n
+
+badlayout_using_spacer=You are recommended to use CSS for controlling white\n\
+space (e.g. for indentation, margins and line spacing).\n\
+The proprietary <SPACER> element has limited vendor support.\n\n
+
+badlayout_using_font=You are recommended to use CSS to specify the font and\n\
+properties such as its size and color. This will reduce\n\
+the size of HTML files and make them easier maintain\n\
+compared with using <FONT> elements.\n\n
+
+badlayout_using_nobr=You are recommended to use CSS to control line wrapping.\n\
+Use \"white-space: nowrap\" to inhibit wrapping in place\n\
+of inserting <NOBR>...</NOBR> into the markup.\n\n
+
+badlayout_using_body=You are recommended to use CSS to specify page and link colors\n\n
+
+unrecognized_option=unrecognized option -{0} use -help to list options
+unknown_file={0}: can''t open file "{1}"
+unknown_option=Warning - unknown option: {0}
+bad_argument=Warning - missing or malformed argument for option: {0}
+
+needs_author_intervention=This document has errors that must be fixed before\n\
+using HTML Tidy to generate a tidied up version.\n\n
+
+missing_body=Can't create slides - document is missing a body element.
+slides_found={0,number} Slides found
+
+general_info=HTML & CSS specifications are available from http://www.w3.org/\n\
+To learn more about Tidy see http://www.w3.org/People/Raggett/tidy/\n\
+Please send bug reports to Dave Raggett care of <html-tidy@w3.org>\n\
+Lobby your company to join W3C, see http://www.w3.org/Consortium\n
+
+hello_message=\nTidy (vers {0}) Parsing "{1}"
+
+report_version=\n{0}: Document content looks like {1}
+
+doctype_given=\n{0}: Doctype given is "
+
+num_warnings={0,number} warnings/errors were found!\n
+no_warnings=no warnings or errors were found\n
+
+help_text={0}: file1 file2 ...\n\
+Utility to clean up & pretty print html files\n\
+see http://www.w3.org/People/Raggett/tidy/\n\
+options for tidy released on {1}\n\
+\n\
+Processing directives\n\
+--------------------\n\
+  -indent or -i   indent element content\n\
+  -omit   or -o   omit optional endtags\n\
+  -wrap 72        wrap text at column 72 (default is 68)\n\
+  -upper  or -u   force tags to upper case (default is lower)\n\
+  -clean  or -c   replace font, nobr & center tags by CSS\n\
+  -numeric or -n  output numeric rather than named entities\n\
+  -errors or -e   only show errors\n\
+  -quiet or -q    suppress nonessential output\n\
+  -xml            use this when input is wellformed xml\n\
+  -asxml          to convert html to wellformed xml\n\
+  -slides         to burst into slides on h2 elements\n\
+\n\
+Character encodings\n\
+------------------\n\
+  -raw            leave chars > 128 unchanged upon output\n\
+  -ascii          use ASCII for output, Latin-1 for input\n\
+  -latin1         use Latin-1 for both input and output\n\
+  -iso2022        use ISO2022 for both input and output\n\
+  -utf8           use UTF-8 for both input and output\n\
+  -mac            use the Apple MacRoman character set\n\
+\n\
+File manipulation\n\
+---------------\n\
+  -config <file>  set options from config file\n\
+  -f <file>       write errors to named <file>\n\
+  -modify or -m   to modify original files\n\
+\n\
+Miscellaneous\n\
+------------\n\
+  -version or -v  show version\n\
+  -help   or -h   list command line options\n\
+You can also use --blah for any config file option blah\n\
+\n\
+Input/Output default to stdin/stdout respectively\n\
+Single letter options apart from -f may be combined\n\
+as in:  tidy -f errs.txt -imu foo.html\n\
+For further info on HTML see http://www.w3.org/MarkUp\n\
+\n
+
+bad_tree=\nPanic - tree has lost its integrity\n
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/config.txt b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/config.txt
new file mode 100644 (file)
index 0000000..b1de207
--- /dev/null
@@ -0,0 +1,20 @@
+# sample config file for Java HTML tidy
+
+indent=auto
+indent-spaces=2
+wrap=72
+markup=yes
+clean=yes
+output-xml=no
+input-xml=no
+show-warnings=yes
+numeric-entities=yes
+quote-marks=yes
+quote-nbsp=yes
+quote-ampersand=no
+break-before-br=no
+uppercase-tags=yes
+uppercase-attributes=yes
+smart-indent=no
+output-xhtml=yes
+char-encoding=latin1