net.sourceforge.phpeclipse - todo:
-- synthax highlighting for php-files
-- php project wizard
-- update httpd context path (for project)
-- html highlighting
- debug for php
\ No newline at end of file
* Test the PHP Parser with different PHP snippets
*/
public void testPHPParser() {
+
checkHTML("\n\n\n\n <?php print \"Hello world\" ?>");
checkHTML("<?php phpinfo(); ?>");
checkHTML("<?php phpinfo()?>");
checkHTML("<?php phpinfo(); ?> foo <?php phpinfo(); ?>");
checkHTML(" <?php //this is a line comment ?>");
+ checkPHP("$add = 'a'.$i;$val = $$add;");
checkPHP("($a==\"b\") || (c($this->x)==\"d\");");
checkPHP("(substr($this->file, 0, 2) == \"MM\");");
checkPHP("(substr($this->file, 0, 2) == \"MM\") || substr($this->file, 0, 2) == \"II\";");
<?xml-stylesheet type="text/xsl" href="${url}">
${cursor}
</template>
-
+ <template name="<script" description="JavaScript inline" context="html" enabled="true"><script language="JavaScript" type="text/javascript">
+ ${cursor}
+</script>
+ </template>
+ <template name="<script" description="JavaScript file" context="html" enabled="true"><script language="JavaScript" type="text/javascript" src="${jsfile}.js"></script>${cursor}
+ </template>
+ <template name="<style" description="style text/css" context="html" enabled="true"></style type="text/css">
+<!--
+ ${cursor}
+-->
+</style>
+ </template>
<template name="&quot" description="quot" context="html" enabled="true">&quot;</template>
<template name="&amp" description="ampersand" context="html" enabled="true">&amp;</template>
<template name="&lt" description="less than" context="html" enabled="true">&lt;</template>
import org.eclipse.jface.viewers.StructuredSelection;
import org.eclipse.ui.IObjectActionDelegate;
import org.eclipse.ui.IWorkbenchPart;
-import org.w3c.tidy.Configuration;
-import org.w3c.tidy.Tidy;
+import net.sourceforge.phpdt.tidy.Configuration;
+import net.sourceforge.phpdt.tidy.Tidy;
public class HTMLParserAction implements IObjectActionDelegate {
ident.append(ch);
if (ch == '$') {
+ getChar();
+ // attention recursive call:
+ getIdentifier();
token = TT_VARIABLE;
+ return;
} else {
token = TT_IDENTIFIER;
}
+
getChar();
while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || (ch == '_')) {
ident.append(ch);
identifier = ident.toString();
chIndx--;
+ // determine if this identitfer is a keyword
+ // @todo improve this in future version
Integer i = (Integer) keywordMap.get(identifier.toLowerCase());
if (i != null) {
token = i.intValue();
getNextToken();
} else if (token == TT_function) {
getNextToken();
+ if (token == TT_AMPERSAND) {
+ getNextToken();
+ }
if (token == TT_IDENTIFIER) {
outlineInfo.addVariable(identifier);
current.add(new PHPFunctionDeclaration(current, identifier, chIndx - identifier.length()));
+++ /dev/null
-/*
- * @(#)AttVal.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Attribute/Value linked list node
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class AttVal extends Object implements Cloneable {
-
- public AttVal next;
- public Attribute dict;
- public Node asp;
- public Node php;
- public int delim;
- public String attribute;
- public String value;
-
- public AttVal()
- {
- this.next = null;
- this.dict = null;
- this.asp = null;
- this.php = null;
- this.delim = 0;
- this.attribute = null;
- this.value = null;
- }
-
- public AttVal(AttVal next, Attribute dict, int delim,
- String attribute, String value)
- {
- this.next = next;
- this.dict = dict;
- this.asp = null;
- this.php = null;
- this.delim = delim;
- this.attribute = attribute;
- this.value = value;
- }
-
- public AttVal(AttVal next, Attribute dict, Node asp, Node php,
- int delim, String attribute, String value)
- {
- this.next = next;
- this.dict = dict;
- this.asp = asp;
- this.php = php;
- this.delim = delim;
- this.attribute = attribute;
- this.value = value;
- }
-
- protected Object clone()
- {
- AttVal av = new AttVal();
- if (next != null) {
- av.next = (AttVal)next.clone();
- }
- if (attribute != null)
- av.attribute = attribute;
- if (value != null)
- av.value = value;
- av.delim = delim;
- if (asp != null) {
- av.asp = (Node)asp.clone();
- }
- if (php != null) {
- av.php = (Node)php.clone();
- }
- av.dict =
- AttributeTable.getDefaultAttributeTable().findAttribute(this);
- return av;
- }
-
- public boolean isBoolAttribute()
- {
- Attribute attribute = this.dict;
- if ( attribute != null ) {
- if (attribute.attrchk == AttrCheckImpl.getCheckBool() ) {
- return true;
- }
- }
-
- return false;
- }
-
- /* ignore unknown attributes for proprietary elements */
- public Attribute checkAttribute( Lexer lexer, Node node )
- {
- TagTable tt = lexer.configuration.tt;
-
- if (this.asp == null && this.php == null)
- this.checkUniqueAttribute(lexer, node);
-
- Attribute attribute = this.dict;
- if ( attribute != null ) {
- /* title is vers 2.0 for A and LINK otherwise vers 4.0 */
- if (attribute == AttributeTable.attrTitle &&
- (node.tag == tt.tagA || node.tag == tt.tagLink))
- lexer.versions &= Dict.VERS_ALL;
- else if ((attribute.versions & Dict.VERS_XML) != 0)
- {
- if (!(lexer.configuration.XmlTags || lexer.configuration.XmlOut))
- Report.attrError(lexer, node, this.attribute, Report.XML_ATTRIBUTE_VALUE);
- }
- else
- lexer.versions &= attribute.versions;
-
- if (attribute.attrchk != null)
- attribute.attrchk.check(lexer, node, this);
- }
- else if (!lexer.configuration.XmlTags && !(node.tag == null) && this.asp == null &&
- !(node.tag != null && ((node.tag.versions & Dict.VERS_PROPRIETARY) != 0)))
- Report.attrError(lexer, node, this.attribute, Report.UNKNOWN_ATTRIBUTE);
-
- return attribute;
- }
-
- /*
- the same attribute name can't be used
- more than once in each element
- */
- public void checkUniqueAttribute(Lexer lexer, Node node)
- {
- AttVal attr;
- int count = 0;
-
- for (attr = this.next; attr != null; attr = attr.next)
- {
- if (this.attribute != null &&
- attr.attribute != null &&
- attr.asp == null &&
- attr.php == null &&
- Lexer.wstrcasecmp(this.attribute, attr.attribute) == 0)
- ++count;
- }
-
- if (count > 0)
- Report.attrError(lexer, node, this.attribute, Report.REPEATED_ATTRIBUTE);
- }
-
- /* --------------------- DOM ---------------------------- */
-
- protected org.w3c.dom.Attr adapter = null;
-
- protected org.w3c.dom.Attr getAdapter()
- {
- if (adapter == null)
- {
- adapter = new DOMAttrImpl(this);
- }
- return adapter;
- }
- /* --------------------- END DOM ------------------------ */
-
-}
+++ /dev/null
-/*
- * @(#)AttrCheck.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Check attribute values
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public interface AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval);
-
-}
+++ /dev/null
-/*
- * @(#)AttrCheckImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Check attribute values implementations
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class AttrCheckImpl {
-
- public static class CheckUrl implements AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval)
- {
- if (attval.value == null)
- Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
- else if (lexer.configuration.FixBackslash)
- {
- attval.value = attval.value.replace('\\','/');
- }
- }
-
- };
-
- public static class CheckScript implements AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval)
- {
- }
-
- };
-
- public static class CheckAlign implements AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval)
- {
- String value;
-
- /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
- if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))
- {
- getCheckValign().check(lexer, node, attval);
- return;
- }
-
- value = attval.value;
-
- if (value == null)
- Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
- else if (! (Lexer.wstrcasecmp(value, "left") == 0 ||
- Lexer.wstrcasecmp(value, "center") == 0 ||
- Lexer.wstrcasecmp(value, "right") == 0 ||
- Lexer.wstrcasecmp(value, "justify") == 0))
- Report.attrError(lexer, node, attval.value, Report.BAD_ATTRIBUTE_VALUE);
- }
-
- };
-
- public static class CheckValign implements AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval)
- {
- String value;
-
- value = attval.value;
-
- if (value == null)
- Report.attrError(lexer, node, attval.attribute, Report.MISSING_ATTR_VALUE);
- else if (Lexer.wstrcasecmp(value, "top") == 0 ||
- Lexer.wstrcasecmp(value, "middle") == 0 ||
- Lexer.wstrcasecmp(value, "bottom") == 0 ||
- Lexer.wstrcasecmp(value, "baseline") == 0)
- {
- /* all is fine */
- }
- else if (Lexer.wstrcasecmp(value, "left") == 0 ||
- Lexer.wstrcasecmp(value, "right") == 0)
- {
- if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0)))
- Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
- }
- else if (Lexer.wstrcasecmp(value, "texttop") == 0 ||
- Lexer.wstrcasecmp(value, "absmiddle") == 0 ||
- Lexer.wstrcasecmp(value, "absbottom") == 0 ||
- Lexer.wstrcasecmp(value, "textbottom") == 0)
- {
- lexer.versions &= Dict.VERS_PROPRIETARY;
- Report.attrError(lexer, node, value, Report.PROPRIETARY_ATTR_VALUE);
- }
- else
- Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
- }
-
- };
-
- public static class CheckBool implements AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval)
- {
- }
-
- };
-
- public static class CheckId implements AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval)
- {
- }
-
- };
-
- public static class CheckName implements AttrCheck {
-
- public void check( Lexer lexer, Node node, AttVal attval)
- {
- }
-
- };
-
- public static AttrCheck getCheckUrl()
- {
- return _checkUrl;
- }
-
- public static AttrCheck getCheckScript()
- {
- return _checkScript;
- }
-
- public static AttrCheck getCheckAlign()
- {
- return _checkAlign;
- }
-
- public static AttrCheck getCheckValign()
- {
- return _checkValign;
- }
-
- public static AttrCheck getCheckBool()
- {
- return _checkBool;
- }
-
- public static AttrCheck getCheckId()
- {
- return _checkId;
- }
-
- public static AttrCheck getCheckName()
- {
- return _checkName;
- }
-
-
- private static AttrCheck _checkUrl = new CheckUrl();
- private static AttrCheck _checkScript = new CheckScript();
- private static AttrCheck _checkAlign = new CheckAlign();
- private static AttrCheck _checkValign = new CheckValign();
- private static AttrCheck _checkBool = new CheckBool();
- private static AttrCheck _checkId = new CheckId();
- private static AttrCheck _checkName = new CheckName();
-
-}
+++ /dev/null
-/*
- * @(#)Attribute.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * HTML attribute
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class Attribute {
-
- public Attribute( String name,
- boolean nowrap,
- short versions,
- AttrCheck attrchk )
- {
- this.name = name;
- this.nowrap = nowrap;
- this.literal = false;
- this.versions = versions;
- this.attrchk = attrchk;
- }
-
- public Attribute( String name,
- short versions,
- AttrCheck attrchk )
- {
- this.name = name;
- this.nowrap = false;
- this.literal = false;
- this.versions = versions;
- this.attrchk = attrchk;
- }
-
- public String name;
- public boolean nowrap;
- public boolean literal;
- public short versions;
- public AttrCheck attrchk;
-
-}
+++ /dev/null
-/*
- * @(#)AttributeTable.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import java.util.Hashtable;
-import java.util.Enumeration;
-
-/**
- *
- * HTML attribute hash table
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class AttributeTable {
-
- public AttributeTable()
- {
- }
-
- public Attribute lookup( String name )
- {
- return (Attribute)attributeHashtable.get( name );
- }
-
- public Attribute install( Attribute attr )
- {
- return (Attribute)attributeHashtable.put( attr.name, attr );
- }
-
- /* public method for finding attribute definition by name */
- public Attribute findAttribute( AttVal attval )
- {
- Attribute np;
-
- if ( attval.attribute != null ) {
- np = lookup( attval.attribute );
- return np;
- }
-
- return null;
- }
-
- public boolean isUrl( String attrname )
- {
- Attribute np;
-
- np = lookup( attrname );
- return ( np != null && np.attrchk == AttrCheckImpl.getCheckUrl() );
- }
-
- public boolean isScript( String attrname )
- {
- Attribute np;
-
- np = lookup( attrname );
- return ( np != null && np.attrchk == AttrCheckImpl.getCheckScript() );
- }
-
- public boolean isLiteralAttribute( String attrname )
- {
- Attribute np;
-
- np = lookup( attrname );
- return ( np != null && np.literal );
- }
-
- /*
- Henry Zrepa reports that some folk are
- using embed with script attributes where
- newlines are signficant. These need to be
- declared and handled specially!
- */
- public void declareLiteralAttrib(String name)
- {
- Attribute attrib = lookup(name);
-
- if (attrib == null)
- attrib = install(new Attribute(name, Dict.VERS_PROPRIETARY, null));
-
- attrib.literal = true;
- }
-
- private Hashtable attributeHashtable = new Hashtable();
-
- private static AttributeTable defaultAttributeTable = null;
-
- private static Attribute[] attrs = {
-
- new Attribute( "abbr", Dict.VERS_HTML40, null ),
- new Attribute( "accept-charset", Dict.VERS_HTML40, null ),
- new Attribute( "accept", Dict.VERS_ALL, null ),
- new Attribute( "accesskey", Dict.VERS_HTML40, null ),
- new Attribute( "action", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ),
- new Attribute( "add_date", Dict.VERS_NETSCAPE, null ), /* A */
- new Attribute( "align", Dict.VERS_ALL, AttrCheckImpl.getCheckAlign() ), /* set varies with element */
- new Attribute( "alink", Dict.VERS_LOOSE, null ),
- new Attribute( "alt", Dict.VERS_ALL, null ),
- new Attribute( "archive", Dict.VERS_HTML40, null ), /* space or comma separated list */
- new Attribute( "axis", Dict.VERS_HTML40, null ),
- new Attribute( "background", Dict.VERS_LOOSE, AttrCheckImpl.getCheckUrl() ),
- new Attribute( "bgcolor", Dict.VERS_LOOSE, null ),
- new Attribute( "bgproperties", Dict.VERS_PROPRIETARY, null ), /* BODY "fixed" fixes background */
- new Attribute( "border", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* like LENGTH + "border" */
- new Attribute( "bordercolor", Dict.VERS_MICROSOFT, null ), /* used on TABLE */
- new Attribute( "bottommargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
- new Attribute( "cellpadding", Dict.VERS_FROM32, null ), /* % or pixel values */
- new Attribute( "cellspacing", Dict.VERS_FROM32, null ),
- new Attribute( "char", Dict.VERS_HTML40, null ),
- new Attribute( "charoff", Dict.VERS_HTML40, null ),
- new Attribute( "charset", Dict.VERS_HTML40, null ),
- new Attribute( "checked", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* i.e. "checked" or absent */
- new Attribute( "cite", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ),
- new Attribute( "class", Dict.VERS_HTML40, null ),
- new Attribute( "classid", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ),
- new Attribute( "clear", Dict.VERS_LOOSE, null ), /* BR: left, right, all */
- new Attribute( "code", Dict.VERS_LOOSE, null ), /* APPLET */
- new Attribute( "codebase", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */
- new Attribute( "codetype", Dict.VERS_HTML40, null ), /* OBJECT */
- new Attribute( "color", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */
- new Attribute( "cols", Dict.VERS_IFRAMES, null ), /* TABLE & FRAMESET */
- new Attribute( "colspan", Dict.VERS_FROM32, null ),
- new Attribute( "compact", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* lists */
- new Attribute( "content", Dict.VERS_ALL, null ), /* META */
- new Attribute( "coords", Dict.VERS_FROM32, null ), /* AREA, A */
- new Attribute( "data", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* OBJECT */
- new Attribute( "datafld", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */
- new Attribute( "dataformatas", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */
- new Attribute( "datapagesize", Dict.VERS_MICROSOFT, null ), /* used on DIV, IMG */
- new Attribute( "datasrc", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckUrl() ), /* used on TABLE */
- new Attribute( "datetime", Dict.VERS_HTML40, null ), /* INS, DEL */
- new Attribute( "declare", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* OBJECT */
- new Attribute( "defer", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* SCRIPT */
- new Attribute( "dir", Dict.VERS_HTML40, null ), /* ltr or rtl */
- new Attribute( "disabled", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */
- new Attribute( "enctype", Dict.VERS_ALL, null ), /* FORM */
- new Attribute( "face", Dict.VERS_LOOSE, null ), /* BASEFONT, FONT */
- new Attribute( "for", Dict.VERS_HTML40, null ), /* LABEL */
- new Attribute( "frame", Dict.VERS_HTML40, null ), /* TABLE */
- new Attribute( "frameborder", Dict.VERS_FRAMES, null ), /* 0 or 1 */
- new Attribute( "framespacing", Dict.VERS_PROPRIETARY, null ), /* pixel value */
- new Attribute( "gridx", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive*/
- new Attribute( "gridy", Dict.VERS_PROPRIETARY, null ), /* TABLE Adobe golive */
- new Attribute( "headers", Dict.VERS_HTML40, null ), /* table cells */
- new Attribute( "height", Dict.VERS_ALL, null ), /* pixels only for TH/TD */
- new Attribute( "href", Dict.VERS_ALL, AttrCheckImpl.getCheckUrl() ), /* A, AREA, LINK and BASE */
- new Attribute( "hreflang", Dict.VERS_HTML40, null ), /* A, LINK */
- new Attribute( "hspace", Dict.VERS_ALL, null ), /* APPLET, IMG, OBJECT */
- new Attribute( "http-equiv", Dict.VERS_ALL, null ), /* META */
- new Attribute( "id", Dict.VERS_HTML40, AttrCheckImpl.getCheckId() ),
- new Attribute( "ismap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* IMG */
- new Attribute( "label", Dict.VERS_HTML40, null ), /* OPT, OPTGROUP */
- new Attribute( "lang", Dict.VERS_HTML40, null ),
- new Attribute( "language", Dict.VERS_LOOSE, null ), /* SCRIPT */
- new Attribute( "last_modified", Dict.VERS_NETSCAPE, null ), /* A */
- new Attribute( "last_visit", Dict.VERS_NETSCAPE, null ), /* A */
- new Attribute( "leftmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
- new Attribute( "link", Dict.VERS_LOOSE, null ), /* BODY */
- new Attribute( "longdesc", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* IMG */
- new Attribute( "lowsrc", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckUrl() ), /* IMG */
- new Attribute( "marginheight", Dict.VERS_IFRAMES, null ), /* FRAME, IFRAME, BODY */
- new Attribute( "marginwidth", Dict.VERS_IFRAMES, null ), /* ditto */
- new Attribute( "maxlength", Dict.VERS_ALL, null ), /* INPUT */
- new Attribute( "media", Dict.VERS_HTML40, null ), /* STYLE, LINK */
- new Attribute( "method", Dict.VERS_ALL, null ), /* FORM: get or post */
- new Attribute( "multiple", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* SELECT */
- new Attribute( "name", Dict.VERS_ALL, AttrCheckImpl.getCheckName() ),
- new Attribute( "nohref", Dict.VERS_FROM32, AttrCheckImpl.getCheckBool() ), /* AREA */
- new Attribute( "noresize", Dict.VERS_FRAMES, AttrCheckImpl.getCheckBool() ), /* FRAME */
- new Attribute( "noshade", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* HR */
- new Attribute( "nowrap", Dict.VERS_LOOSE, AttrCheckImpl.getCheckBool() ), /* table cells */
- new Attribute( "object", Dict.VERS_HTML40_LOOSE, null ), /* APPLET */
- new Attribute( "onblur", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onchange", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "ondblclick", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onkeydown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onkeypress", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onkeyup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onmousedown", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onmousemove", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onmouseout", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onmouseover", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onmouseup", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onsubmit", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onreset", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onselect", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onunload", Dict.VERS_HTML40, AttrCheckImpl.getCheckScript() ), /* event */
- new Attribute( "onafterupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
- new Attribute( "onbeforeupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
- new Attribute( "onerrorupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
- new Attribute( "onrowenter", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
- new Attribute( "onrowexit", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
- new Attribute( "onbeforeunload", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* form fields */
- new Attribute( "ondatasetchanged", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */
- new Attribute( "ondataavailable", Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */
- new Attribute( "ondatasetcomplete",Dict.VERS_MICROSOFT, AttrCheckImpl.getCheckScript() ), /* object, applet */
- new Attribute( "profile", Dict.VERS_HTML40, AttrCheckImpl.getCheckUrl() ), /* HEAD */
- new Attribute( "prompt", Dict.VERS_LOOSE, null ), /* ISINDEX */
- new Attribute( "readonly", Dict.VERS_HTML40, AttrCheckImpl.getCheckBool() ), /* form fields */
- new Attribute( "rel", Dict.VERS_ALL, null ), /* A, LINK */
- new Attribute( "rev", Dict.VERS_ALL, null ), /* A, LINK */
- new Attribute( "rightmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
- new Attribute( "rows", Dict.VERS_ALL, null ), /* TEXTAREA */
- new Attribute( "rowspan", Dict.VERS_ALL, null ), /* table cells */
- new Attribute( "rules", Dict.VERS_HTML40, null ), /* TABLE */
- new Attribute( "scheme", Dict.VERS_HTML40, null ), /* META */
- new Attribute( "scope", Dict.VERS_HTML40, null ), /* table cells */
- new Attribute( "scrolling", Dict.VERS_IFRAMES, null ), /* yes, no or auto */
- new Attribute( "selected", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* OPTION */
- new Attribute( "shape", Dict.VERS_FROM32, null ), /* AREA, A */
- new Attribute( "showgrid", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive */
- new Attribute( "showgridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/
- new Attribute( "showgridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.getCheckBool() ), /* TABLE Adobe golive*/
- new Attribute( "size", Dict.VERS_LOOSE, null ), /* HR, FONT, BASEFONT, SELECT */
- new Attribute( "span", Dict.VERS_HTML40, null ), /* COL, COLGROUP */
- new Attribute( "src", (short)(Dict.VERS_ALL | Dict.VERS_FRAMES), AttrCheckImpl.getCheckUrl() ), /* IMG, FRAME, IFRAME */
- new Attribute( "standby", Dict.VERS_HTML40, null ), /* OBJECT */
- new Attribute( "start", Dict.VERS_ALL, null ), /* OL */
- new Attribute( "style", Dict.VERS_HTML40, null ),
- new Attribute( "summary", Dict.VERS_HTML40, null ), /* TABLE */
- new Attribute( "tabindex", Dict.VERS_HTML40, null ), /* fields, OBJECT and A */
- new Attribute( "target", Dict.VERS_HTML40, null ), /* names a frame/window */
- new Attribute( "text", Dict.VERS_LOOSE, null ), /* BODY */
- new Attribute( "title", Dict.VERS_HTML40, null ), /* text tool tip */
- new Attribute( "topmargin", Dict.VERS_MICROSOFT, null ), /* used on BODY */
- new Attribute( "type", Dict.VERS_FROM32, null ), /* also used by SPACER */
- new Attribute( "usemap", Dict.VERS_ALL, AttrCheckImpl.getCheckBool() ), /* things with images */
- new Attribute( "valign", Dict.VERS_FROM32, AttrCheckImpl.getCheckValign() ),
- new Attribute( "value", Dict.VERS_ALL, null ), /* OPTION, PARAM */
- new Attribute( "valuetype", Dict.VERS_HTML40, null ), /* PARAM: data, ref, object */
- new Attribute( "version", Dict.VERS_ALL, null ), /* HTML */
- new Attribute( "vlink", Dict.VERS_LOOSE, null ), /* BODY */
- new Attribute( "vspace", Dict.VERS_LOOSE, null ), /* IMG, OBJECT, APPLET */
- new Attribute( "width", Dict.VERS_ALL, null ), /* pixels only for TD/TH */
- new Attribute( "wrap", Dict.VERS_NETSCAPE, null ), /* textarea */
- new Attribute( "xml:lang", Dict.VERS_XML, null ), /* XML language */
- new Attribute( "xmlns", Dict.VERS_ALL, null ), /* name space */
-
- };
-
- public static Attribute attrHref = null;
- public static Attribute attrSrc = null;
- public static Attribute attrId = null;
- public static Attribute attrName = null;
- public static Attribute attrSummary = null;
- public static Attribute attrAlt = null;
- public static Attribute attrLongdesc = null;
- public static Attribute attrUsemap = null;
- public static Attribute attrIsmap = null;
- public static Attribute attrLanguage = null;
- public static Attribute attrType = null;
- public static Attribute attrTitle = null;
- public static Attribute attrXmlns = null;
- public static Attribute attrValue = null;
- public static Attribute attrContent = null;
- public static Attribute attrDatafld = null;
- public static Attribute attrWidth = null;
- public static Attribute attrHeight = null;
-
- public static AttributeTable getDefaultAttributeTable()
- {
- if ( defaultAttributeTable == null ) {
- defaultAttributeTable = new AttributeTable();
- for ( int i = 0; i < attrs.length; i++ ) {
- defaultAttributeTable.install( attrs[i] );
- }
- attrHref = defaultAttributeTable.lookup("href");
- attrSrc = defaultAttributeTable.lookup("src");
- attrId = defaultAttributeTable.lookup("id");
- attrName = defaultAttributeTable.lookup("name");
- attrSummary = defaultAttributeTable.lookup("summary");
- attrAlt = defaultAttributeTable.lookup("alt");
- attrLongdesc = defaultAttributeTable.lookup("longdesc");
- attrUsemap = defaultAttributeTable.lookup("usemap");
- attrIsmap = defaultAttributeTable.lookup("ismap");
- attrLanguage = defaultAttributeTable.lookup("language");
- attrType = defaultAttributeTable.lookup("type");
- attrTitle = defaultAttributeTable.lookup("title");
- attrXmlns = defaultAttributeTable.lookup("xmlns");
- attrValue = defaultAttributeTable.lookup("value");
- attrContent = defaultAttributeTable.lookup("content");
- attrDatafld = defaultAttributeTable.lookup("datafld");;
- attrWidth = defaultAttributeTable.lookup("width");;
- attrHeight = defaultAttributeTable.lookup("height");;
-
- attrAlt.nowrap = true;
- attrValue.nowrap = true;
- attrContent.nowrap = true;
- }
- return defaultAttributeTable;
- }
-
-}
+++ /dev/null
-/*
- * @(#)CheckAttribs.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Check HTML attributes
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public interface CheckAttribs {
-
- public void check( Lexer lexer, Node node );
-
-}
-
+++ /dev/null
-/*
- * @(#)CheckAttribsImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Check HTML attributes implementation
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class CheckAttribsImpl {
-
- public static class CheckHTML implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- AttVal attval;
- Attribute attribute;
-
- node.checkUniqueAttributes(lexer);
-
- for (attval = node.attributes; attval != null; attval = attval.next)
- {
- attribute = attval.checkAttribute(lexer, node );
-
- if (attribute == AttributeTable.attrXmlns)
- lexer.isvoyager = true;
- }
- }
-
- };
-
- public static class CheckSCRIPT implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- Attribute attribute;
- AttVal lang, type;
-
- node.checkUniqueAttributes(lexer);
-
- lang = node.getAttrByName("language");
- type = node.getAttrByName("type");
-
- if (type == null)
- {
- Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
-
- /* check for javascript */
-
- if (lang != null)
- {
- String str = lang.value;
- if (str.length() > 10)
- str = str.substring(0, 10);
- if ( (Lexer.wstrcasecmp(str, "javascript") == 0) ||
- (Lexer.wstrcasecmp(str, "jscript") == 0) )
- {
- node.addAttribute("type", "text/javascript");
- }
- }
- else
- node.addAttribute("type", "text/javascript");
- }
- }
-
- };
-
- public static class CheckTABLE implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- AttVal attval;
- Attribute attribute;
- boolean hasSummary = false;
-
- node.checkUniqueAttributes(lexer);
-
- for (attval = node.attributes; attval != null; attval = attval.next)
- {
- attribute = attval.checkAttribute(lexer, node);
-
- if (attribute == AttributeTable.attrSummary)
- hasSummary = true;
- }
-
- /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */
- if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32)
- {
- lexer.badAccess |= Report.MISSING_SUMMARY;
- Report.attrError(lexer, node, "summary", Report.MISSING_ATTRIBUTE);
- }
-
- /* convert <table border> to <table border="1"> */
- if (lexer.configuration.XmlOut)
- {
- attval = node.getAttrByName("border");
- if (attval != null)
- {
- if (attval.value == null)
- attval.value = "1";
- }
- }
- }
-
- };
-
- public static class CheckCaption implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- AttVal attval;
- String value = null;
-
- node.checkUniqueAttributes(lexer);
-
- for (attval = node.attributes; attval != null; attval = attval.next)
- {
- if ( Lexer.wstrcasecmp(attval.attribute, "align") == 0 )
- {
- value = attval.value;
- break;
- }
- }
-
- if (value != null)
- {
- if (Lexer.wstrcasecmp(value, "left") == 0 || Lexer.wstrcasecmp(value, "right") == 0)
- lexer.versions &= (short)(Dict.VERS_HTML40_LOOSE|Dict.VERS_FRAMES);
- else if (Lexer.wstrcasecmp(value, "top") == 0 || Lexer.wstrcasecmp(value, "bottom") == 0)
- lexer.versions &= Dict.VERS_FROM32;
- else
- Report.attrError(lexer, node, value, Report.BAD_ATTRIBUTE_VALUE);
- }
- }
-
- };
-
- public static class CheckHR implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- if (node.getAttrByName("src") != null)
- Report.attrError(lexer, node, "src", Report.PROPRIETARY_ATTR_VALUE);
- }
- };
-
- public static class CheckIMG implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- AttVal attval;
- Attribute attribute;
- boolean hasAlt = false;
- boolean hasSrc = false;
- boolean hasUseMap = false;
- boolean hasIsMap = false;
- boolean hasDataFld = false;
-
- node.checkUniqueAttributes(lexer);
-
- for (attval = node.attributes; attval != null; attval = attval.next)
- {
- attribute = attval.checkAttribute( lexer, node );
-
- if (attribute == AttributeTable.attrAlt)
- hasAlt = true;
- else if (attribute == AttributeTable.attrSrc)
- hasSrc = true;
- else if (attribute == AttributeTable.attrUsemap)
- hasUseMap = true;
- else if (attribute == AttributeTable.attrIsmap)
- hasIsMap = true;
- else if (attribute == AttributeTable.attrDatafld)
- hasDataFld = true;
- else if (attribute == AttributeTable.attrWidth ||
- attribute == AttributeTable.attrHeight)
- lexer.versions &= ~Dict.VERS_HTML20;
- }
-
- if (!hasAlt)
- {
- lexer.badAccess |= Report.MISSING_IMAGE_ALT;
- Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE);
- if (lexer.configuration.altText != null)
- node.addAttribute("alt", lexer.configuration.altText);
- }
-
- if (!hasSrc && !hasDataFld)
- Report.attrError(lexer, node, "src", Report.MISSING_ATTRIBUTE);
-
- if (hasIsMap && !hasUseMap)
- Report.attrError(lexer, node, "ismap", Report.MISSING_IMAGEMAP);
- }
-
- };
-
- public static class CheckAREA implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- AttVal attval;
- Attribute attribute;
- boolean hasAlt = false;
- boolean hasHref = false;
-
- node.checkUniqueAttributes(lexer);
-
- for (attval = node.attributes; attval != null; attval = attval.next)
- {
- attribute = attval.checkAttribute( lexer, node );
-
- if (attribute == AttributeTable.attrAlt)
- hasAlt = true;
- else if (attribute == AttributeTable.attrHref)
- hasHref = true;
- }
-
- if (!hasAlt)
- {
- lexer.badAccess |= Report.MISSING_LINK_ALT;
- Report.attrError(lexer, node, "alt", Report.MISSING_ATTRIBUTE);
- }
- if (!hasHref)
- Report.attrError(lexer, node, "href", Report.MISSING_ATTRIBUTE);
- }
-
- };
-
- public static class CheckAnchor implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- node.checkUniqueAttributes(lexer);
-
- lexer.fixId(node);
- }
- };
-
- public static class CheckMap implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- node.checkUniqueAttributes(lexer);
-
- lexer.fixId(node);
- }
- }
-
- public static class CheckSTYLE implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- AttVal type = node.getAttrByName("type");
-
- node.checkUniqueAttributes(lexer);
-
- if (type == null)
- {
- Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
-
- node.addAttribute("type", "text/css");
- }
- }
- }
-
- public static class CheckTableCell implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- node.checkUniqueAttributes(lexer);
-
- /*
- HTML4 strict doesn't allow mixed content for
- elements with %block; as their content model
- */
- if (node.getAttrByName("width") != null || node.getAttrByName("height") != null)
- lexer.versions &= ~Dict.VERS_HTML40_STRICT;
- }
- }
-
- /* add missing type attribute when appropriate */
- public static class CheckLINK implements CheckAttribs {
-
- public void check( Lexer lexer, Node node )
- {
- AttVal rel = node.getAttrByName("rel");
-
- node.checkUniqueAttributes(lexer);
-
- if (rel != null && rel.value != null &&
- rel.value.equals("stylesheet"))
- {
- AttVal type = node.getAttrByName("type");
-
- if (type == null)
- {
- Report.attrError(lexer, node, "type", Report.MISSING_ATTRIBUTE);
-
- node.addAttribute("type", "text/css");
- }
- }
- }
- }
-
- public static CheckAttribs getCheckHTML()
- {
- return _checkHTML;
- }
-
- public static CheckAttribs getCheckSCRIPT()
- {
- return _checkSCRIPT;
- }
-
- public static CheckAttribs getCheckTABLE()
- {
- return _checkTABLE;
- }
-
- public static CheckAttribs getCheckCaption()
- {
- return _checkCaption;
- }
-
- public static CheckAttribs getCheckIMG()
- {
- return _checkIMG;
- }
-
- public static CheckAttribs getCheckAREA()
- {
- return _checkAREA;
- }
-
- public static CheckAttribs getCheckAnchor()
- {
- return _checkAnchor;
- }
-
- public static CheckAttribs getCheckMap()
- {
- return _checkMap;
- }
-
- public static CheckAttribs getCheckSTYLE()
- {
- return _checkStyle;
- }
-
- public static CheckAttribs getCheckTableCell()
- {
- return _checkTableCell;
- }
-
- public static CheckAttribs getCheckLINK()
- {
- return _checkLINK;
- }
-
- public static CheckAttribs getCheckHR()
- {
- return _checkHR;
- }
-
-
- private static CheckAttribs _checkHTML = new CheckHTML();
- private static CheckAttribs _checkSCRIPT = new CheckSCRIPT();
- private static CheckAttribs _checkTABLE = new CheckTABLE();
- private static CheckAttribs _checkCaption = new CheckCaption();
- private static CheckAttribs _checkIMG = new CheckIMG();
- private static CheckAttribs _checkAREA = new CheckAREA();
- private static CheckAttribs _checkAnchor = new CheckAnchor();
- private static CheckAttribs _checkMap = new CheckMap();
- private static CheckAttribs _checkStyle = new CheckSTYLE();
- private static CheckAttribs _checkTableCell = new CheckTableCell();
- private static CheckAttribs _checkLINK = new CheckLINK();
- private static CheckAttribs _checkHR = new CheckHR();
-
-}
+++ /dev/null
-/*
- * @(#)Clean.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Clean up misuse of presentation markup
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-/*
- Filters from other formats such as Microsoft Word
- often make excessive use of presentation markup such
- as font tags, B, I, and the align attribute. By applying
- a set of production rules, it is straight forward to
- transform this to use CSS.
-
- Some rules replace some of the children of an element by
- style properties on the element, e.g.
-
- <p><b>...</b></p> -> <p style="font-weight: bold">...</p>
-
- Such rules are applied to the element's content and then
- to the element itself until none of the rules more apply.
- Having applied all the rules to an element, it will have
- a style attribute with one or more properties.
-
- Other rules strip the element they apply to, replacing
- it by style properties on the contents, e.g.
-
- <dir><li><p>...</li></dir> -> <p style="margin-left 1em">...
-
- These rules are applied to an element before processing
- its content and replace the current element by the first
- element in the exposed content.
-
- After applying both sets of rules, you can replace the
- style attribute by a class value and style rule in the
- document head. To support this, an association of styles
- and class names is built.
-
- A naive approach is to rely on string matching to test
- when two property lists are the same. A better approach
- would be to first sort the properties before matching.
-*/
-
-public class Clean {
-
- private int classNum = 1;
-
- private TagTable tt;
-
- public Clean(TagTable tt)
- {
- this.tt = tt;
- }
-
- private StyleProp insertProperty(StyleProp props, String name,
- String value)
- {
- StyleProp first, prev, prop;
- int cmp;
-
- prev = null;
- first = props;
-
- while (props != null)
- {
- cmp = props.name.compareTo(name);
-
- if (cmp == 0)
- {
- /* this property is already defined, ignore new value */
- return first;
- }
-
- if (cmp > 0) // props.name > name
- {
- /* insert before this */
-
- prop = new StyleProp(name, value, props);
-
- if (prev != null)
- prev.next = prop;
- else
- first = prop;
-
- return first;
- }
-
- prev = props;
- props = props.next;
- }
-
- prop = new StyleProp(name, value);
-
- if (prev != null)
- prev.next = prop;
- else
- first = prop;
-
- return first;
- }
-
- /*
- Create sorted linked list of properties from style string
- It temporarily places nulls in place of ':' and ';' to
- delimit the strings for the property name and value.
- Some systems don't allow you to null literal strings,
- so to avoid this, a copy is made first.
- */
- private StyleProp createProps(StyleProp prop, String style)
- {
- int name_end;
- int value_end;
- int value_start = 0;
- int name_start = 0;
- boolean more;
-
- name_start = 0;
- while (name_start < style.length())
- {
- while (name_start < style.length() &&
- style.charAt(name_start) == ' ')
- ++name_start;
-
- name_end = name_start;
-
- while (name_end < style.length())
- {
- if (style.charAt(name_end) == ':')
- {
- value_start = name_end + 1;
- break;
- }
-
- ++name_end;
- }
-
- if (name_end >= style.length() || style.charAt(name_end) != ':')
- break;
-
- while (value_start < style.length() &&
- style.charAt(value_start) == ' ')
- ++value_start;
-
- value_end = value_start;
- more = false;
-
- while (value_end < style.length())
- {
- if (style.charAt(value_end) == ';')
- {
- more = true;
- break;
- }
-
- ++value_end;
- }
-
- prop = insertProperty(prop,
- style.substring(name_start, name_end),
- style.substring(value_start, value_end));
-
- if (more)
- {
- name_start = value_end + 1;
- continue;
- }
-
- break;
- }
-
- return prop;
- }
-
- private String createPropString(StyleProp props)
- {
- String style = "";
- int len;
- StyleProp prop;
-
- /* compute length */
-
- for (len = 0, prop = props; prop != null; prop = prop.next)
- {
- len += prop.name.length() + 2;
- len += prop.value.length() + 2;
- }
-
- for (prop = props; prop != null; prop = prop.next)
- {
- style = style.concat(prop.name);
- style = style.concat(": ");
-
- style = style.concat(prop.value);
-
- if (prop.next == null)
- break;
-
- style = style.concat("; ");
- }
-
- return style;
- }
-
- /*
- create string with merged properties
- */
- private String addProperty(String style, String property)
- {
- StyleProp prop;
-
- prop = createProps(null, style);
- prop = createProps(prop, property);
- style = createPropString(prop);
- return style;
- }
-
- private String gensymClass(String tag)
- {
- String str;
-
- str = "c" + classNum;
- classNum++;
- return str;
- }
-
- private String findStyle(Lexer lexer, String tag, String properties)
- {
- Style style;
-
- for (style = lexer.styles; style != null; style=style.next)
- {
- if (style.tag.equals(tag) &&
- style.properties.equals(properties))
- return style.tagClass;
- }
-
- style = new Style(tag, gensymClass(tag), properties, lexer.styles);
- lexer.styles = style;
- return style.tagClass;
- }
-
- /*
- Find style attribute in node, and replace it
- by corresponding class attribute. Search for
- class in style dictionary otherwise gensym
- new class and add to dictionary.
-
- Assumes that node doesn't have a class attribute
- */
- private void style2Rule(Lexer lexer, Node node)
- {
- AttVal styleattr, classattr;
- String classname;
-
- styleattr = node.getAttrByName("style");
-
- if (styleattr != null)
- {
- classname = findStyle(lexer, node.element, styleattr.value);
- classattr = node.getAttrByName("class");
-
- /*
- if there already is a class attribute
- then append class name after a space
- */
- if (classattr != null)
- {
- classattr.value = classattr.value + " " + classname;
- node.removeAttribute(styleattr);
- }
- else /* reuse style attribute for class attribute */
- {
- styleattr.attribute = "class";
- styleattr.value = classname;
- }
- }
- }
-
- private void addColorRule(Lexer lexer, String selector, String color)
- {
- if (color != null)
- {
- lexer.addStringLiteral(selector);
- lexer.addStringLiteral(" { color: ");
- lexer.addStringLiteral(color);
- lexer.addStringLiteral(" }\n");
- }
- }
-
- /*
- move presentation attribs from body to style element
-
- background="foo" -> body { background-image: url(foo) }
- bgcolor="foo" -> body { background-color: foo }
- text="foo" -> body { color: foo }
- link="foo" -> :link { color: foo }
- vlink="foo" -> :visited { color: foo }
- alink="foo" -> :active { color: foo }
- */
- private void cleanBodyAttrs(Lexer lexer, Node body)
- {
- AttVal attr;
- String bgurl = null;
- String bgcolor = null;
- String color = null;
-
- attr = body.getAttrByName("background");
-
- if (attr != null)
- {
- bgurl = attr.value;
- attr.value = null;
- body.removeAttribute(attr);
- }
-
- attr = body.getAttrByName("bgcolor");
-
- if (attr != null)
- {
- bgcolor = attr.value;
- attr.value = null;
- body.removeAttribute(attr);
- }
-
- attr = body.getAttrByName("text");
-
- if (attr != null)
- {
- color = attr.value;
- attr.value = null;
- body.removeAttribute(attr);
- }
-
- if (bgurl != null || bgcolor != null || color != null)
- {
- lexer.addStringLiteral(" body {\n");
-
- if (bgurl != null)
- {
- lexer.addStringLiteral(" background-image: url(");
- lexer.addStringLiteral(bgurl);
- lexer.addStringLiteral(");\n");
- }
-
- if (bgcolor != null)
- {
- lexer.addStringLiteral(" background-color: ");
- lexer.addStringLiteral(bgcolor);
- lexer.addStringLiteral(";\n");
- }
-
- if (color != null)
- {
- lexer.addStringLiteral(" color: ");
- lexer.addStringLiteral(color);
- lexer.addStringLiteral(";\n");
- }
-
- lexer.addStringLiteral(" }\n");
- }
-
- attr = body.getAttrByName("link");
-
- if (attr != null)
- {
- addColorRule(lexer, " :link", attr.value);
- body.removeAttribute(attr);
- }
-
- attr = body.getAttrByName("vlink");
-
- if (attr != null)
- {
- addColorRule(lexer, " :visited", attr.value);
- body.removeAttribute(attr);
- }
-
- attr = body.getAttrByName("alink");
-
- if (attr != null)
- {
- addColorRule(lexer, " :active", attr.value);
- body.removeAttribute(attr);
- }
- }
-
- private boolean niceBody(Lexer lexer, Node doc)
- {
- Node body = doc.findBody(lexer.configuration.tt);
-
- if (body != null)
- {
- if (
- body.getAttrByName("background") != null ||
- body.getAttrByName("bgcolor") != null ||
- body.getAttrByName("text") != null ||
- body.getAttrByName("link") != null ||
- body.getAttrByName("vlink") != null ||
- body.getAttrByName("alink") != null
- )
- {
- lexer.badLayout |= Report.USING_BODY;
- return false;
- }
- }
-
- return true;
- }
-
- /* create style element using rules from dictionary */
- private void createStyleElement(Lexer lexer, Node doc)
- {
- Node node, head, body;
- Style style;
- AttVal av;
-
- if (lexer.styles == null && niceBody(lexer, doc))
- return;
-
- node = lexer.newNode(Node.StartTag, null, 0, 0, "style");
- node.implicit = true;
-
- /* insert type attribute */
- av = new AttVal(null, null, '"', "type", "text/css");
- av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
- node.attributes = av;
-
- body = doc.findBody(lexer.configuration.tt);
-
- lexer.txtstart = lexer.lexsize;
-
- if (body != null)
- cleanBodyAttrs(lexer, body);
-
- for (style = lexer.styles; style != null; style = style.next)
- {
- lexer.addCharToLexer(' ');
- lexer.addStringLiteral(style.tag);
- lexer.addCharToLexer('.');
- lexer.addStringLiteral(style.tagClass);
- lexer.addCharToLexer(' ');
- lexer.addCharToLexer('{');
- lexer.addStringLiteral(style.properties);
- lexer.addCharToLexer('}');
- lexer.addCharToLexer('\n');
- }
-
- lexer.txtend = lexer.lexsize;
-
- Node.insertNodeAtEnd(node,
- lexer.newNode(Node.TextNode,
- lexer.lexbuf,
- lexer.txtstart,
- lexer.txtend));
-
- /*
- now insert style element into document head
-
- doc is root node. search its children for html node
- the head node should be first child of html node
- */
-
- head = doc.findHEAD(lexer.configuration.tt);
-
- if (head != null)
- Node.insertNodeAtEnd(head, node);
- }
-
- /* ensure bidirectional links are consistent */
- private void fixNodeLinks(Node node)
- {
- Node child;
-
- if (node.prev != null)
- node.prev.next = node;
- else
- node.parent.content = node;
-
- if (node.next != null)
- node.next.prev = node;
- else
- node.parent.last = node;
-
- for (child = node.content; child != null; child = child.next)
- child.parent = node;
- }
-
- /*
- used to strip child of node when
- the node has one and only one child
- */
- private void stripOnlyChild(Node node)
- {
- Node child;
-
- child = node.content;
- node.content = child.content;
- node.last = child.last;
- child.content = null;
-
- for (child = node.content; child != null; child = child.next)
- child.parent = node;
- }
-
- /* used to strip font start and end tags */
- private void discardContainer(Node element, MutableObject pnode)
- {
- Node node;
- Node parent = element.parent;
-
- if (element.content != null)
- {
- element.last.next = element.next;
-
- if (element.next != null)
- {
- element.next.prev = element.last;
- element.last.next = element.next;
- }
- else
- parent.last = element.last;
-
- if (element.prev != null)
- {
- element.content.prev = element.prev;
- element.prev.next = element.content;
- }
- else
- parent.content = element.content;
-
- for (node = element.content; node != null; node = node.next)
- node.parent = parent;
-
- pnode.setObject(element.content);
- }
- else
- {
- if (element.next != null)
- element.next.prev = element.prev;
- else
- parent.last = element.prev;
-
- if (element.prev != null)
- element.prev.next = element.next;
- else
- parent.content = element.next;
-
- pnode.setObject(element.next);
- }
-
- element.next = null;
- element.content = null;
- }
-
- /*
- Add style property to element, creating style
- attribute as needed and adding ; delimiter
- */
- private void addStyleProperty(Node node, String property)
- {
- AttVal av;
-
- for (av = node.attributes; av != null; av = av.next)
- {
- if (av.attribute.equals("style"))
- break;
- }
-
- /* if style attribute already exists then insert property */
-
- if (av != null)
- {
- String s;
-
- s = addProperty(av.value, property);
- av.value = s;
- }
- else /* else create new style attribute */
- {
- av = new AttVal(node.attributes, null, '"', "style", property);
- av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
- node.attributes = av;
- }
- }
-
- /*
- Create new string that consists of the
- combined style properties in s1 and s2
-
- To merge property lists, we build a linked
- list of property/values and insert properties
- into the list in order, merging values for
- the same property name.
- */
- private String mergeProperties(String s1, String s2)
- {
- String s;
- StyleProp prop;
-
- prop = createProps(null, s1);
- prop = createProps(prop, s2);
- s = createPropString(prop);
- return s;
- }
-
- private void mergeStyles(Node node, Node child)
- {
- AttVal av;
- String s1, s2, style;
-
- for (s2 = null, av = child.attributes; av != null; av = av.next)
- {
- if (av.attribute.equals("style"))
- {
- s2 = av.value;
- break;
- }
- }
-
- for (s1 = null, av = node.attributes; av != null; av = av.next)
- {
- if (av.attribute.equals("style"))
- {
- s1 = av.value;
- break;
- }
- }
-
- if (s1 != null)
- {
- if (s2 != null) /* merge styles from both */
- {
- style = mergeProperties(s1, s2);
- av.value = style;
- }
- }
- else if (s2 != null) /* copy style of child */
- {
- av = new AttVal(node.attributes, null, '"', "style", s2);
- av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
- node.attributes = av;
- }
- }
-
- private String fontSize2Name(String size)
- {
- /*
- String[] sizes =
- {
- "50%",
- "60%",
- "80%",
- null,
- "120%",
- "150%",
- "200%"
- };
- */
-
- String[] sizes =
- {
- "60%",
- "70%",
- "80%",
- null,
- "120%",
- "150%",
- "200%"
- };
- String buf;
-
- if (size.length() > 0 &&
- '0' <= size.charAt(0) && size.charAt(0) <= '6')
- {
- int n = size.charAt(0) - '0';
- return sizes[n];
- }
-
- if (size.length() > 0 && size.charAt(0) == '-')
- {
- if (size.length() > 1 &&
- '0' <= size.charAt(1) && size.charAt(1) <= '6')
- {
- int n = size.charAt(1) - '0';
- double x;
-
- for (x = 1.0; n > 0; --n)
- x *= 0.8;
-
- x *= 100.0;
- buf = "" + (int)x + "%";
-
- return buf;
- }
-
- return "smaller"; /*"70%"; */
- }
-
- if (size.length() > 1 &&
- '0' <= size.charAt(1) && size.charAt(1) <= '6')
- {
- int n = size.charAt(1) - '0';
- double x;
-
- for (x = 1.0; n > 0; --n)
- x *= 1.2;
-
- x *= 100.0;
- buf = "" + (int)x + "%";
-
- return buf;
- }
-
- return "larger"; /* "140%" */
- }
-
- private void addFontFace(Node node, String face)
- {
- addStyleProperty(node, "font-family: " + face);
- }
-
- private void addFontSize(Node node, String size)
- {
- String value;
-
- if (size.equals("6") && node.tag == tt.tagP)
- {
- node.element = "h1";
- tt.findTag(node);
- return;
- }
-
- if (size.equals("5") && node.tag == tt.tagP)
- {
- node.element = "h2";
- tt.findTag(node);
- return;
- }
-
- if (size.equals("4") && node.tag == tt.tagP)
- {
- node.element = "h3";
- tt.findTag(node);
- return;
- }
-
- value = fontSize2Name(size);
-
- if (value != null)
- {
- addStyleProperty(node, "font-size: " + value);
- }
- }
-
- private void addFontColor(Node node, String color)
- {
- addStyleProperty(node, "color: " + color);
- }
-
- private void addAlign(Node node, String align)
- {
- /* force alignment value to lower case */
- addStyleProperty(node, "text-align: " + align.toLowerCase());
- }
-
- /*
- add style properties to node corresponding to
- the font face, size and color attributes
- */
- private void addFontStyles(Node node, AttVal av)
- {
- while (av != null)
- {
- if (av.attribute.equals("face"))
- addFontFace(node, av.value);
- else if (av.attribute.equals("size"))
- addFontSize(node, av.value);
- else if (av.attribute.equals("color"))
- addFontColor(node, av.value);
-
- av = av.next;
- }
- }
-
- /*
- Symptom: <p align=center>
- Action: <p style="text-align: center">
- */
- private void textAlign(Lexer lexer, Node node)
- {
- AttVal av, prev;
-
- prev = null;
-
- for (av = node.attributes; av != null; av = av.next)
- {
- if (av.attribute.equals("align"))
- {
- if (prev != null)
- prev.next = av.next;
- else
- node.attributes = av.next;
-
- if (av.value != null)
- {
- addAlign(node, av.value);
- }
-
- break;
- }
-
- prev = av;
- }
- }
-
- /*
- The clean up rules use the pnode argument to return the
- next node when the orignal node has been deleted
- */
-
- /*
- Symptom: <dir> <li> where <li> is only child
- Action: coerce <dir> <li> to <div> with indent.
- */
-
- private boolean dir2Div(Lexer lexer, Node node, MutableObject pnode)
- {
- Node child;
-
- if (node.tag == tt.tagDir ||
- node.tag == tt.tagUl ||
- node.tag == tt.tagOl)
- {
- child = node.content;
-
- if (child == null)
- return false;
-
- /* check child has no peers */
-
- if (child.next != null)
- return false;
-
- if (child.tag != tt.tagLi)
- return false;
-
- if (!child.implicit)
- return false;
-
- /* coerce dir to div */
-
- node.tag = tt.tagDiv;
- node.element = "div";
- addStyleProperty(node, "margin-left: 2em");
- stripOnlyChild(node);
- return true;
-
-//#if 0
- //Node content;
- //Node last;
- //content = child.content;
- //last = child.last;
- //child.content = null;
-
- /* adjust parent and set margin on contents of <li> */
-
- //for (child = content; child != null; child = child.next)
- //{
- // child.parent = node.parent;
- // addStyleProperty(child, "margin-left: 1em");
- //}
-
- /* hook first/last into sequence */
-
- //if (content != null)
- //{
- // content.prev = node.prev;
- // last.next = node.next;
- // fixNodeLinks(content);
- // fixNodeLinks(last);
- //}
-
- //node.next = null;
-
- /* ensure that new node is cleaned */
- //pnode.setObject(cleanNode(lexer, content));
- //return true;
-//#endif
- }
-
- return false;
- }
-
- /*
- Symptom: <center>
- Action: replace <center> by <div style="text-align: center">
- */
-
- private boolean center2Div(Lexer lexer, Node node, MutableObject pnode)
- {
- if (node.tag == tt.tagCenter)
- {
- if (lexer.configuration.DropFontTags)
- {
- if (node.content != null)
- {
- Node last = node.last;
- Node parent = node.parent;
-
- discardContainer(node, pnode);
-
- node = lexer.inferredTag("br");
-
- if (last.next != null)
- last.next.prev = node;
-
- node.next = last.next;
- last.next = node;
- node.prev = last;
-
- if (parent.last == last)
- parent.last = node;
-
- node.parent = parent;
- }
- else
- {
- Node prev = node.prev;
- Node next = node.next;
- Node parent = node.parent;
- discardContainer(node, pnode);
-
- node = lexer.inferredTag("br");
- node.next = next;
- node.prev = prev;
- node.parent = parent;
-
- if (next != null)
- next.prev = node;
- else
- parent.last = node;
-
- if (prev != null)
- prev.next = node;
- else
- parent.content = node;
- }
-
- return true;
- }
- node.tag = tt.tagDiv;
- node.element = "div";
- addStyleProperty(node, "text-align: center");
- return true;
- }
-
- return false;
- }
-
- /*
- Symptom <div><div>...</div></div>
- Action: merge the two divs
-
- This is useful after nested <dir>s used by Word
- for indenting have been converted to <div>s
- */
- private boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode)
- {
- Node child;
-
- if (node.tag != tt.tagDiv)
- return false;
-
- child = node.content;
-
- if (child == null)
- return false;
-
- if (child.tag != tt.tagDiv)
- return false;
-
- if (child.next != null)
- return false;
-
- mergeStyles(node, child);
- stripOnlyChild(node);
- return true;
- }
-
- /*
- Symptom: <ul><li><ul>...</ul></li></ul>
- Action: discard outer list
- */
-
- private boolean nestedList(Lexer lexer, Node node, MutableObject pnode)
- {
- Node child, list;
-
- if (node.tag == tt.tagUl || node.tag == tt.tagOl)
- {
- child = node.content;
-
- if (child == null)
- return false;
-
- /* check child has no peers */
-
- if (child.next != null)
- return false;
-
- list = child.content;
-
- if (list == null)
- return false;
-
- if (list.tag != node.tag)
- return false;
-
- pnode.setObject(node.next);
-
- /* move inner list node into position of outer node */
- list.prev = node.prev;
- list.next = node.next;
- list.parent = node.parent;
- fixNodeLinks(list);
-
- /* get rid of outer ul and its li */
- child.content = null;
- node.content = null;
- node.next = null;
-
- /*
- If prev node was a list the chances are this node
- should be appended to that list. Word has no way of
- recognizing nested lists and just uses indents
- */
-
- if (list.prev != null)
- {
- node = list;
- list = node.prev;
-
- if (list.tag == tt.tagUl || list.tag == tt.tagOl)
- {
- list.next = node.next;
-
- if (list.next != null)
- list.next.prev = list;
-
- child = list.last; /* <li> */
-
- node.parent = child;
- node.next = null;
- node.prev = child.last;
- fixNodeLinks(node);
- }
- }
-
- cleanNode(lexer, node);
- return true;
- }
-
- return false;
- }
-
- /*
- Symptom: the only child of a block-level element is a
- presentation element such as B, I or FONT
-
- Action: add style "font-weight: bold" to the block and
- strip the <b> element, leaving its children.
-
- example:
-
- <p>
- <b><font face="Arial" size="6">Draft Recommended Practice</font></b>
- </p>
-
- becomes:
-
- <p style="font-weight: bold; font-family: Arial; font-size: 6">
- Draft Recommended Practice
- </p>
-
- This code also replaces the align attribute by a style attribute.
- However, to avoid CSS problems with Navigator 4, this isn't done
- for the elements: caption, tr and table
- */
- private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode)
- {
- Node child;
-
- if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0)
- {
- if (node.tag != tt.tagTable
- && node.tag != tt.tagTr
- && node.tag != tt.tagLi)
- {
- /* check for align attribute */
- if (node.tag != tt.tagCaption)
- textAlign(lexer, node);
-
- child = node.content;
-
- if (child == null)
- return false;
-
- /* check child has no peers */
-
- if (child.next != null)
- return false;
-
- if (child.tag == tt.tagB)
- {
- mergeStyles(node, child);
- addStyleProperty(node, "font-weight: bold");
- stripOnlyChild(node);
- return true;
- }
-
- if (child.tag == tt.tagI)
- {
- mergeStyles(node, child);
- addStyleProperty(node, "font-style: italic");
- stripOnlyChild(node);
- return true;
- }
-
- if (child.tag == tt.tagFont)
- {
- mergeStyles(node, child);
- addFontStyles(node, child.attributes);
- stripOnlyChild(node);
- return true;
- }
- }
- }
-
- return false;
- }
-
- /* the only child of table cell or an inline element such as em */
- private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode)
- {
- Node child;
-
- if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE|Dict.CM_ROW)) != 0)
- {
- child = node.content;
-
- if (child == null)
- return false;
-
- /* check child has no peers */
-
- if (child.next != null)
- return false;
-
- if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis)
- {
- mergeStyles(node, child);
- addStyleProperty(node, "font-weight: bold");
- stripOnlyChild(node);
- return true;
- }
-
- if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis)
- {
- mergeStyles(node, child);
- addStyleProperty(node, "font-style: italic");
- stripOnlyChild(node);
- return true;
- }
-
- if (child.tag == tt.tagFont)
- {
- mergeStyles(node, child);
- addFontStyles(node, child.attributes);
- stripOnlyChild(node);
- return true;
- }
- }
-
- return false;
- }
-
- /*
- Replace font elements by span elements, deleting
- the font element's attributes and replacing them
- by a single style attribute.
- */
- private boolean font2Span(Lexer lexer, Node node, MutableObject pnode)
- {
- AttVal av, style, next;
-
- if (node.tag == tt.tagFont)
- {
- if (lexer.configuration.DropFontTags)
- {
- discardContainer(node, pnode);
- return false;
- }
-
- /* if FONT is only child of parent element then leave alone */
- if (node.parent.content == node
- && node.next == null)
- return false;
-
- addFontStyles(node, node.attributes);
-
- /* extract style attribute and free the rest */
- av = node.attributes;
- style = null;
-
- while (av != null)
- {
- next = av.next;
-
- if (av.attribute.equals("style"))
- {
- av.next = null;
- style = av;
- }
-
- av = next;
- }
-
- node.attributes = style;
-
- node.tag = tt.tagSpan;
- node.element = "span";
-
- return true;
- }
-
- return false;
- }
-
- /*
- Applies all matching rules to a node.
- */
- private Node cleanNode(Lexer lexer, Node node)
- {
- Node next = null;
- MutableObject o = new MutableObject();
- boolean b = false;
-
- for (next = node; node.isElement(); node = next)
- {
- o.setObject(next);
-
- b = dir2Div(lexer, node, o);
- next = (Node)o.getObject();
- if (b)
- continue;
-
- b = nestedList(lexer, node, o);
- next = (Node)o.getObject();
- if (b)
- continue;
-
- b = center2Div(lexer, node, o);
- next = (Node)o.getObject();
- if (b)
- continue;
-
- b = mergeDivs(lexer, node, o);
- next = (Node)o.getObject();
- if (b)
- continue;
-
- b = blockStyle(lexer, node, o);
- next = (Node)o.getObject();
- if (b)
- continue;
-
- b = inlineStyle(lexer, node, o);
- next = (Node)o.getObject();
- if (b)
- continue;
-
- b = font2Span(lexer, node, o);
- next = (Node)o.getObject();
- if (b)
- continue;
-
- break;
- }
-
- return next;
- }
-
- private Node createStyleProperties(Lexer lexer, Node node)
- {
- Node child;
-
- if (node.content != null)
- {
- for (child = node.content; child != null; child = child.next)
- {
- child = createStyleProperties(lexer, child);
- }
- }
-
- return cleanNode(lexer, node);
- }
-
- private void defineStyleRules(Lexer lexer, Node node)
- {
- Node child;
-
- if (node.content != null)
- {
- for (child = node.content;
- child != null; child = child.next)
- {
- defineStyleRules(lexer, child);
- }
- }
-
- style2Rule(lexer, node);
- }
-
- public void cleanTree(Lexer lexer, Node doc)
- {
- doc = createStyleProperties(lexer, doc);
-
- if (!lexer.configuration.MakeClean)
- {
- defineStyleRules(lexer, doc);
- createStyleElement(lexer, doc);
- }
- }
-
- /* simplifies <b><b> ... </b> ...</b> etc. */
- public void nestedEmphasis(Node node)
- {
- MutableObject o = new MutableObject();
- Node next;
-
- while (node != null)
- {
- next = node.next;
-
- if ((node.tag == tt.tagB || node.tag == tt.tagI)
- && node.parent != null && node.parent.tag == node.tag)
- {
- /* strip redundant inner element */
- o.setObject(next);
- discardContainer(node, o);
- next = (Node)o.getObject();
- node = next;
- continue;
- }
-
- if (node.content != null)
- nestedEmphasis(node.content);
-
- node = next;
- }
- }
-
- /* replace i by em and b by strong */
- public void emFromI(Node node)
- {
- while (node != null)
- {
- if (node.tag == tt.tagI)
- {
- node.element = tt.tagEm.name;
- node.tag = tt.tagEm;
- }
- else if (node.tag == tt.tagB)
- {
- node.element = tt.tagStrong.name;
- node.tag = tt.tagStrong;
- }
-
- if (node.content != null)
- emFromI(node.content);
-
- node = node.next;
- }
- }
-
- /*
- Some people use dir or ul without an li
- to indent the content. The pattern to
- look for is a list with a single implicit
- li. This is recursively replaced by an
- implicit blockquote.
- */
- public void list2BQ(Node node)
- {
- while (node != null)
- {
- if (node.content != null)
- list2BQ(node.content);
-
- if (node.tag != null && node.tag.parser == ParserImpl.getParseList() &&
- node.hasOneChild() && node.content.implicit)
- {
- stripOnlyChild(node);
- node.element = tt.tagBlockquote.name;
- node.tag = tt.tagBlockquote;
- node.implicit = true;
- }
-
- node = node.next;
- }
- }
-
- /*
- Replace implicit blockquote by div with an indent
- taking care to reduce nested blockquotes to a single
- div with the indent set to match the nesting depth
- */
- public void bQ2Div(Node node)
- {
- int indent;
- String indent_buf;
-
- while (node != null)
- {
- if (node.tag == tt.tagBlockquote && node.implicit)
- {
- indent = 1;
-
- while(node.hasOneChild() &&
- node.content.tag == tt.tagBlockquote &&
- node.implicit)
- {
- ++indent;
- stripOnlyChild(node);
- }
-
- if (node.content != null)
- bQ2Div(node.content);
-
- indent_buf = "margin-left: " +
- (new Integer(2*indent)).toString() + "em";
-
- node.element = tt.tagDiv.name;
- node.tag = tt.tagDiv;
- node.addAttribute("style", indent_buf);
- }
- else if (node.content != null)
- bQ2Div(node.content);
-
-
- node = node.next;
- }
- }
-
- /* node is <![if ...]> prune up to <![endif]> */
- public Node pruneSection(Lexer lexer, Node node)
- {
- for (;;)
- {
- /* discard node and returns next */
- node = Node.discardElement(node);
-
- if (node == null)
- return null;
-
- if (node.type == Node.SectionTag)
- {
- if ((Lexer.getString(node.textarray, node.start, 2)).equals("if"))
- {
- node = pruneSection(lexer, node);
- continue;
- }
-
- if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif"))
- {
- node = Node.discardElement(node);
- break;
- }
- }
- }
-
- return node;
- }
-
- public void dropSections(Lexer lexer, Node node)
- {
- while (node != null)
- {
- if (node.type == Node.SectionTag)
- {
- /* prune up to matching endif */
- if ((Lexer.getString(node.textarray, node.start, 2)).equals("if"))
- {
- node = pruneSection(lexer, node);
- continue;
- }
-
- /* discard others as well */
- node = Node.discardElement(node);
- continue;
- }
-
- if (node.content != null)
- dropSections(lexer, node.content);
-
- node = node.next;
- }
- }
-
- public void purgeAttributes(Node node)
- {
- AttVal attr = node.attributes;
- AttVal next = null;
- AttVal prev = null;
-
- while (attr != null)
- {
- next = attr.next;
-
- /* special check for class="Code" denoting pre text */
- if (attr.attribute != null &&
- attr.value != null &&
- attr.attribute.equals("class") &&
- attr.value.equals("Code"))
- {
- prev = attr;
- }
- else if (attr.attribute != null &&
- (attr.attribute.equals("class") ||
- attr.attribute.equals("style") ||
- attr.attribute.equals("lang") ||
- attr.attribute.startsWith("x:") ||
- ((attr.attribute.equals("height") || attr.attribute.equals("width")) &&
- (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh))))
- {
- if (prev != null)
- prev.next = next;
- else
- node.attributes = next;
-
- }
- else
- prev = attr;
-
- attr = next;
- }
- }
-
- /* Word2000 uses span excessively, so we strip span out */
- public Node stripSpan(Lexer lexer, Node span)
- {
- Node node;
- Node prev = null;
- Node content;
-
- /*
- deal with span elements that have content
- by splicing the content in place of the span
- after having processed it
- */
-
- cleanWord2000(lexer, span.content);
- content = span.content;
-
- if (span.prev != null)
- prev = span.prev;
- else if (content != null)
- {
- node = content;
- content = content.next;
- Node.removeNode(node);
- Node.insertNodeBeforeElement(span, node);
- prev = node;
- }
-
- while (content != null)
- {
- node = content;
- content = content.next;
- Node.removeNode(node);
- Node.insertNodeAfterElement(prev, node);
- prev = node;
- }
-
- if (span.next == null)
- span.parent.last = prev;
-
- node = span.next;
- span.content = null;
- Node.discardElement(span);
- return node;
- }
-
- /* map non-breaking spaces to regular spaces */
- private void normalizeSpaces(Lexer lexer, Node node)
- {
- while (node != null)
- {
- if (node.content != null)
- normalizeSpaces(lexer, node.content);
-
- if (node.type == Node.TextNode)
- {
- int i;
- MutableInteger c = new MutableInteger();
- int p = node.start;
-
- for (i = node.start; i < node.end; ++i)
- {
- c.value = (int)node.textarray[i];
-
- /* look for UTF-8 multibyte character */
- if (c.value > 0x7F)
- i += PPrint.getUTF8(node.textarray, i, c);
-
- if (c.value == 160)
- c.value = ' ';
-
- p = PPrint.putUTF8(node.textarray, p, c.value);
- }
- }
-
- node = node.next;
- }
- }
-
- /*
- This is a major clean up to strip out all the extra stuff you get
- when you save as web page from Word 2000. It doesn't yet know what
- to do with VML tags, but these will appear as errors unless you
- declare them as new tags, such as o:p which needs to be declared
- as inline.
- */
- public void cleanWord2000(Lexer lexer, Node node)
- {
- /* used to a list from a sequence of bulletted p's */
- Node list = null;
-
- while (node != null)
- {
- /* discard Word's style verbiage */
- if (node.tag == tt.tagStyle ||
- node.tag == tt.tagMeta ||
- node.type == Node.CommentTag)
- {
- node = Node.discardElement(node);
- continue;
- }
-
- /* strip out all span tags Word scatters so liberally! */
- if (node.tag == tt.tagSpan)
- {
- node = stripSpan(lexer, node);
- continue;
- }
-
- /* get rid of Word's xmlns attributes */
- if (node.tag == tt.tagHtml)
- {
- /* check that it's a Word 2000 document */
- if (node.getAttrByName("xmlns:o") == null)
- return;
- }
-
- if (node.tag == tt.tagLink)
- {
- AttVal attr = node.getAttrByName("rel");
-
- if (attr != null && attr.value != null &&
- attr.value.equals("File-List"))
- {
- node = Node.discardElement(node);
- continue;
- }
- }
-
- /* discard empty paragraphs */
- if (node.content == null && node.tag == tt.tagP)
- {
- node = Node.discardElement(node);
- continue;
- }
-
- if (node.tag == tt.tagP)
- {
- AttVal attr = node.getAttrByName("class");
-
- /* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */
- if (attr != null && attr.value != null &&
- attr.value.equals("MsoListBullet"))
- {
- Node.coerceNode(lexer, node, tt.tagLi);
-
- if (list == null || list.tag != tt.tagUl)
- {
- list = lexer.inferredTag("ul");
- Node.insertNodeBeforeElement(node, list);
- }
-
- purgeAttributes(node);
-
- if (node.content != null)
- cleanWord2000(lexer, node.content);
-
- /* remove node and append to contents of list */
- Node.removeNode(node);
- Node.insertNodeAtEnd(list, node);
- node = list.next;
- }
- /* map sequence of <p class="Code"> to <pre>...</pre> */
- else if (attr != null && attr.value != null &&
- attr.value.equals("Code"))
- {
- Node br = lexer.newLineNode();
- normalizeSpaces(lexer, node);
-
- if (list == null || list.tag != tt.tagPre)
- {
- list = lexer.inferredTag("pre");
- Node.insertNodeBeforeElement(node, list);
- }
-
- /* remove node and append to contents of list */
- Node.removeNode(node);
- Node.insertNodeAtEnd(list, node);
- stripSpan(lexer, node);
- Node.insertNodeAtEnd(list, br);
- node = list.next;
- }
- else
- list = null;
- }
- else
- list = null;
-
- /* strip out style and class attributes */
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- purgeAttributes(node);
-
- if (node.content != null)
- cleanWord2000(lexer, node.content);
-
- node = node.next;
- }
- }
-
- public boolean isWord2000(Node root, TagTable tt)
- {
- Node html = root.findHTML(tt);
-
- return (html != null && html.getAttrByName("xmlns:o") != null);
- }
-}
+++ /dev/null
-/*
- * @(#)Configuration.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Read configuration file and manage configuration properties.
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-/*
- Configuration files associate a property name with a value.
- The format is that of a Java .properties file.
-*/
-
-import java.util.Enumeration;
-import java.util.Properties;
-import java.util.StringTokenizer;
-import java.io.FileInputStream;
-import java.io.IOException;
-
-public class Configuration implements java.io.Serializable {
-
- /* character encodings */
- public static final int RAW = 0;
- public static final int ASCII = 1;
- public static final int LATIN1 = 2;
- public static final int UTF8 = 3;
- public static final int ISO2022 = 4;
- public static final int MACROMAN = 5;
-
- /* mode controlling treatment of doctype */
- public static final int DOCTYPE_OMIT = 0;
- public static final int DOCTYPE_AUTO = 1;
- public static final int DOCTYPE_STRICT= 2;
- public static final int DOCTYPE_LOOSE = 3;
- public static final int DOCTYPE_USER = 4;
-
- protected int spaces = 2; /* default indentation */
- protected int wraplen = 68; /* default wrap margin */
- protected int CharEncoding = ASCII;
- protected int tabsize = 4;
-
- protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
- protected String altText = null; /* default text for alt attribute */
- protected String slidestyle = null; /* style sheet for slides */
- protected String docTypeStr = null; /* user specified doctype */
- protected String errfile = null; /* file name to write errors to */
- protected boolean writeback = false; /* if true then output tidied markup */
-
- protected boolean OnlyErrors = false; /* if true normal output is suppressed */
- protected boolean ShowWarnings = true; /* however errors are always shown */
- protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
- protected boolean IndentContent = false; /* indent content of appropriate tags */
- protected boolean SmartIndent = false; /* does text/block level content effect indentation */
- protected boolean HideEndTags = false; /* suppress optional end tags */
- protected boolean XmlTags = false; /* treat input as XML */
- protected boolean XmlOut = false; /* create output as XML */
- protected boolean xHTML = false; /* output extensible HTML */
- protected boolean XmlPi = false; /* add <?xml?> for XML docs */
- protected boolean RawOut = false; /* avoid mapping values > 127 to entities */
- protected boolean UpperCaseTags = false; /* output tags in upper not lower case */
- protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
- protected boolean MakeClean = false; /* remove presentational clutter */
- protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */
- protected boolean DropFontTags = false; /* discard presentation tags */
- protected boolean DropEmptyParas = true; /* discard empty p elements */
- protected boolean FixComments = true; /* fix comments with adjacent hyphens */
- protected boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
- protected boolean BurstSlides = false; /* create slides on each h2 element */
- protected boolean NumEntities = false; /* use numeric entities */
- protected boolean QuoteMarks = false; /* output " marks as " */
- protected boolean QuoteNbsp = true; /* output non-breaking space as entity */
- protected boolean QuoteAmpersand = true; /* output naked ampersand as & */
- protected boolean WrapAttVals = false; /* wrap within attribute values */
- protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
- protected boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
- protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */
- protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */
- protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */
- protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */
- protected boolean IndentAttributes = false; /* newline+indent before each attribute */
- protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
- protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
- protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
- protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
- protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */
- protected boolean Word2000 = false; /* draconian cleaning for Word2000 */
- protected boolean TidyMark = true; /* add meta element indicating tidied doc */
- protected boolean Emacs = false; /* if true format error output for GNU Emacs */
- protected boolean LiteralAttribs = false; /* if true attributes may use newlines */
-
- protected TagTable tt; /* TagTable associated with this Configuration */
-
- private transient Properties _properties = new Properties();
-
- public Configuration()
- {
- }
-
- public void addProps( Properties p )
- {
- Enumeration enum = p.propertyNames();
- while (enum.hasMoreElements())
- {
- String key = (String) enum.nextElement();
- String value = p.getProperty(key);
- _properties.put(key, value);
- }
- parseProps();
- }
-
- public void parseFile( String filename )
- {
- try
- {
- _properties.load( new FileInputStream( filename ) );
- }
- catch (IOException e)
- {
- System.err.println(filename + e.toString());
- return;
- }
- parseProps();
- }
-
- private void parseProps()
- {
- String value;
-
- value = _properties.getProperty("indent-spaces");
- if (value != null)
- spaces = parseInt(value, "indent-spaces");
-
- value = _properties.getProperty("wrap");
- if (value != null)
- wraplen = parseInt(value, "wrap");
-
- value = _properties.getProperty("wrap-attributes");
- if (value != null)
- WrapAttVals = parseBool(value, "wrap-attributes");
-
- value = _properties.getProperty("wrap-script-literals");
- if (value != null)
- WrapScriptlets = parseBool(value, "wrap-script-literals");
-
- value = _properties.getProperty("wrap-sections");
- if (value != null)
- WrapSection = parseBool(value, "wrap-sections");
-
- value = _properties.getProperty("wrap-asp");
- if (value != null)
- WrapAsp = parseBool(value, "wrap-asp");
-
- value = _properties.getProperty("wrap-jste");
- if (value != null)
- WrapJste = parseBool(value, "wrap-jste");
-
- value = _properties.getProperty("wrap-php");
- if (value != null)
- WrapPhp = parseBool(value, "wrap-php");
-
- value = _properties.getProperty("literal-attributes");
- if (value != null)
- LiteralAttribs = parseBool(value, "literal-attributes");
-
- value = _properties.getProperty("tab-size");
- if (value != null)
- tabsize = parseInt(value, "tab-size");
-
- value = _properties.getProperty("markup");
- if (value != null)
- OnlyErrors = parseInvBool(value, "markup");
-
- value = _properties.getProperty("quiet");
- if (value != null)
- Quiet = parseBool(value, "quiet");
-
- value = _properties.getProperty("tidy-mark");
- if (value != null)
- TidyMark = parseBool(value, "tidy-mark");
-
- value = _properties.getProperty("indent");
- if (value != null)
- IndentContent = parseIndent(value, "indent");
-
- value = _properties.getProperty("indent-attributes");
- if (value != null)
- IndentAttributes = parseBool(value, "ident-attributes");
-
- value = _properties.getProperty("hide-endtags");
- if (value != null)
- HideEndTags = parseBool(value, "hide-endtags");
-
- value = _properties.getProperty("input-xml");
- if (value != null)
- XmlTags = parseBool(value, "input-xml");
-
- value = _properties.getProperty("output-xml");
- if (value != null)
- XmlOut = parseBool(value, "output-xml");
-
- value = _properties.getProperty("output-xhtml");
- if (value != null)
- xHTML = parseBool(value, "output-xhtml");
-
- value = _properties.getProperty("add-xml-pi");
- if (value != null)
- XmlPi = parseBool(value, "add-xml-pi");
-
- value = _properties.getProperty("add-xml-decl");
- if (value != null)
- XmlPi = parseBool(value, "add-xml-decl");
-
- value = _properties.getProperty("assume-xml-procins");
- if (value != null)
- XmlPIs = parseBool(value, "assume-xml-procins");
-
- value = _properties.getProperty("raw");
- if (value != null)
- RawOut = parseBool(value, "raw");
-
- value = _properties.getProperty("uppercase-tags");
- if (value != null)
- UpperCaseTags = parseBool(value, "uppercase-tags");
-
- value = _properties.getProperty("uppercase-attributes");
- if (value != null)
- UpperCaseAttrs = parseBool(value, "uppercase-attributes");
-
- value = _properties.getProperty("clean");
- if (value != null)
- MakeClean = parseBool(value, "clean");
-
- value = _properties.getProperty("logical-emphasis");
- if (value != null)
- LogicalEmphasis = parseBool(value, "logical-emphasis");
-
- value = _properties.getProperty("word-2000");
- if (value != null)
- Word2000 = parseBool(value, "word-2000");
-
- value = _properties.getProperty("drop-empty-paras");
- if (value != null)
- DropEmptyParas = parseBool(value, "drop-empty-paras");
-
- value = _properties.getProperty("drop-font-tags");
- if (value != null)
- DropFontTags = parseBool(value, "drop-font-tags");
-
- value = _properties.getProperty("enclose-text");
- if (value != null)
- EncloseBodyText = parseBool(value, "enclose-text");
-
- value = _properties.getProperty("enclose-block-text");
- if (value != null)
- EncloseBlockText = parseBool(value, "enclose-block-text");
-
- value = _properties.getProperty("alt-text");
- if (value != null)
- altText = value;
-
- value = _properties.getProperty("add-xml-space");
- if (value != null)
- XmlSpace = parseBool(value, "add-xml-space");
-
- value = _properties.getProperty("fix-bad-comments");
- if (value != null)
- FixComments = parseBool(value, "fix-bad-comments");
-
- value = _properties.getProperty("split");
- if (value != null)
- BurstSlides = parseBool(value, "split");
-
- value = _properties.getProperty("break-before-br");
- if (value != null)
- BreakBeforeBR = parseBool(value, "break-before-br");
-
- value = _properties.getProperty("numeric-entities");
- if (value != null)
- NumEntities = parseBool(value, "numeric-entities");
-
- value = _properties.getProperty("quote-marks");
- if (value != null)
- QuoteMarks = parseBool(value, "quote-marks");
-
- value = _properties.getProperty("quote-nbsp");
- if (value != null)
- QuoteNbsp = parseBool(value, "quote-nbsp");
-
- value = _properties.getProperty("quote-ampersand");
- if (value != null)
- QuoteAmpersand = parseBool(value, "quote-ampersand");
-
- value = _properties.getProperty("write-back");
- if (value != null)
- writeback = parseBool(value, "write-back");
-
- value = _properties.getProperty("keep-time");
- if (value != null)
- KeepFileTimes = parseBool(value, "keep-time");
-
- value = _properties.getProperty("show-warnings");
- if (value != null)
- ShowWarnings = parseBool(value, "show-warnings");
-
- value = _properties.getProperty("error-file");
- if (value != null)
- errfile = parseName(value, "error-file");
-
- value = _properties.getProperty("slide-style");
- if (value != null)
- slidestyle = parseName(value, "slide-style");
-
- value = _properties.getProperty("new-inline-tags");
- if (value != null)
- parseInlineTagNames(value, "new-inline-tags");
-
- value = _properties.getProperty("new-blocklevel-tags");
- if (value != null)
- parseBlockTagNames(value, "new-blocklevel-tags");
-
- value = _properties.getProperty("new-empty-tags");
- if (value != null)
- parseEmptyTagNames(value, "new-empty-tags");
-
- value = _properties.getProperty("new-pre-tags");
- if (value != null)
- parsePreTagNames(value, "new-pre-tags");
-
- value = _properties.getProperty("char-encoding");
- if (value != null)
- CharEncoding = parseCharEncoding(value, "char-encoding");
-
- value = _properties.getProperty("doctype");
- if (value != null)
- docTypeStr = parseDocType(value, "doctype");
-
- value = _properties.getProperty("fix-backslash");
- if (value != null)
- FixBackslash = parseBool(value, "fix-backslash");
-
- value = _properties.getProperty("gnu-emacs");
- if (value != null)
- Emacs = parseBool(value, "gnu-emacs");
- }
-
- /* ensure that config is self consistent */
- public void adjust()
- {
- if (EncloseBlockText)
- EncloseBodyText = true;
-
- /* avoid the need to set IndentContent when SmartIndent is set */
-
- if (SmartIndent)
- IndentContent = true;
-
- /* disable wrapping */
- if (wraplen == 0)
- wraplen = 0x7FFFFFFF;
-
- /* Word 2000 needs o:p to be declared as inline */
- if (Word2000)
- {
- tt.defineInlineTag("o:p");
- }
-
- /* XHTML is written in lower case */
- if (xHTML)
- {
- XmlOut = true;
- UpperCaseTags = false;
- UpperCaseAttrs = false;
- }
-
- /* if XML in, then XML out */
- if (XmlTags)
- {
- XmlOut = true;
- XmlPIs = true;
- }
-
- /* XML requires end tags */
- if (XmlOut)
- {
- QuoteAmpersand = true;
- HideEndTags = false;
- }
- }
-
- private static int parseInt( String s, String option )
- {
- int i = 0;
- try {
- i = Integer.parseInt( s );
- }
- catch ( NumberFormatException e ) {
- Report.badArgument(option);
- i = -1;
- }
- return i;
- }
-
- private static boolean parseBool( String s, String option )
- {
- boolean b = false;
- if ( s != null && s.length() > 0 ) {
- char c = s.charAt(0);
- if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
- b = true;
- else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
- b = false;
- else
- Report.badArgument(option);
- }
- return b;
- }
-
- private static boolean parseInvBool( String s, String option )
- {
- boolean b = false;
- if ( s != null && s.length() > 0 ) {
- char c = s.charAt(0);
- if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
- b = true;
- else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
- b = false;
- else
- Report.badArgument(option);
- }
- return !b;
- }
-
- private static String parseName( String s, String option )
- {
- StringTokenizer t = new StringTokenizer( s );
- String rs = null;
- if ( t.countTokens() >= 1 )
- rs = t.nextToken();
- else
- Report.badArgument(option);
- return rs;
- }
-
- private static int parseCharEncoding( String s, String option )
- {
- int result = ASCII;
-
- if (Lexer.wstrcasecmp(s, "ascii") == 0)
- result = ASCII;
- else if (Lexer.wstrcasecmp(s, "latin1") == 0)
- result = LATIN1;
- else if (Lexer.wstrcasecmp(s, "raw") == 0)
- result = RAW;
- else if (Lexer.wstrcasecmp(s, "utf8") == 0)
- result = UTF8;
- else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
- result = ISO2022;
- else if (Lexer.wstrcasecmp(s, "mac") == 0)
- result = MACROMAN;
- else
- Report.badArgument(option);
-
- return result;
- }
-
- /* slight hack to avoid changes to pprint.c */
- private boolean parseIndent( String s, String option )
- {
- boolean b = IndentContent;
-
- if (Lexer.wstrcasecmp(s, "yes") == 0)
- {
- b = true;
- SmartIndent = false;
- }
- else if (Lexer.wstrcasecmp(s, "true") == 0)
- {
- b = true;
- SmartIndent = false;
- }
- else if (Lexer.wstrcasecmp(s, "no") == 0)
- {
- b = false;
- SmartIndent = false;
- }
- else if (Lexer.wstrcasecmp(s, "false") == 0)
- {
- b = false;
- SmartIndent = false;
- }
- else if (Lexer.wstrcasecmp(s, "auto") == 0)
- {
- b = true;
- SmartIndent = true;
- }
- else
- Report.badArgument(option);
- return b;
- }
-
- private void parseInlineTagNames( String s, String option )
- {
- StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
- while ( t.hasMoreTokens() ) {
- tt.defineInlineTag( t.nextToken() );
- }
- }
-
- private void parseBlockTagNames( String s, String option )
- {
- StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
- while ( t.hasMoreTokens() ) {
- tt.defineBlockTag( t.nextToken() );
- }
- }
-
- private void parseEmptyTagNames( String s, String option )
- {
- StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
- while ( t.hasMoreTokens() ) {
- tt.defineEmptyTag( t.nextToken() );
- }
- }
-
- private void parsePreTagNames( String s, String option )
- {
- StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
- while ( t.hasMoreTokens() ) {
- tt.definePreTag( t.nextToken() );
- }
- }
-
- /*
- doctype: omit | auto | strict | loose | <fpi>
-
- where the fpi is a string similar to
-
- "-//ACME//DTD HTML 3.14159//EN"
- */
- protected String parseDocType( String s, String option )
- {
- s = s.trim();
-
- /* "-//ACME//DTD HTML 3.14159//EN" or similar */
-
- if (s.startsWith("\""))
- {
- docTypeMode = DOCTYPE_USER;
- return s;
- }
-
- /* read first word */
- String word = "";
- StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
- if (t.hasMoreTokens())
- word = t.nextToken();
-
- if (Lexer.wstrcasecmp(word, "omit") == 0)
- docTypeMode = DOCTYPE_OMIT;
- else if (Lexer.wstrcasecmp(word, "strict") == 0)
- docTypeMode = DOCTYPE_STRICT;
- else if (Lexer.wstrcasecmp(word, "loose") == 0 ||
- Lexer.wstrcasecmp(word, "transitional") == 0)
- docTypeMode = DOCTYPE_LOOSE;
- else if (Lexer.wstrcasecmp(word, "auto") == 0)
- docTypeMode = DOCTYPE_AUTO;
- else
- {
- docTypeMode = DOCTYPE_AUTO;
- Report.badArgument(option);
- }
- return null;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMAttrImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMAttrImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM Support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr {
-
- protected AttVal avAdaptee;
-
- protected DOMAttrImpl(AttVal adaptee)
- {
- super(null); // must override all methods of DOMNodeImpl
- this.avAdaptee = adaptee;
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- public String getNodeValue() throws DOMException
- {
- return getValue();
- }
-
- public void setNodeValue(String nodeValue) throws DOMException
- {
- setValue(nodeValue);
- }
-
- public String getNodeName()
- {
- return getName();
- }
-
- public short getNodeType()
- {
- return org.w3c.dom.Node.ATTRIBUTE_NODE;
- }
-
- public org.w3c.dom.Node getParentNode()
- {
- return null;
- }
-
- public org.w3c.dom.NodeList getChildNodes()
- {
- // NOT SUPPORTED
- return null;
- }
-
- public org.w3c.dom.Node getFirstChild()
- {
- // NOT SUPPORTED
- return null;
- }
-
- public org.w3c.dom.Node getLastChild()
- {
- // NOT SUPPORTED
- return null;
- }
-
- public org.w3c.dom.Node getPreviousSibling()
- {
- return null;
- }
-
- public org.w3c.dom.Node getNextSibling()
- {
- return null;
- }
-
- public org.w3c.dom.NamedNodeMap getAttributes()
- {
- return null;
- }
-
- public org.w3c.dom.Document getOwnerDocument()
- {
- return null;
- }
-
- public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild,
- org.w3c.dom.Node refChild)
- throws DOMException
- {
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild,
- org.w3c.dom.Node oldChild)
- throws DOMException
- {
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild)
- throws DOMException
- {
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild)
- throws DOMException
- {
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- public boolean hasChildNodes()
- {
- return false;
- }
-
- public org.w3c.dom.Node cloneNode(boolean deep)
- {
- return null;
- }
-
- /**
- * @see org.w3c.dom.Attr#getName
- */
- public String getName()
- {
- return avAdaptee.attribute;
- }
-
- /**
- * @see org.w3c.dom.Attr#getSpecified
- */
- public boolean getSpecified()
- {
- return true;
- }
-
- /**
- * Returns value of this attribute. If this attribute has a null value,
- * then the attribute name is returned instead.
- * Thanks to Brett Knights <brett@knightsofthenet.com> for this fix.
- * @see org.w3c.dom.Attr#getValue
- *
- */
- public String getValue()
- {
- return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value ;
- }
-
- /**
- * @see org.w3c.dom.Attr#setValue
- */
- public void setValue(String value)
- {
- avAdaptee.value = value;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public org.w3c.dom.Element getOwnerElement() {
- return null;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMAttrMapImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMAttrMapImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap {
-
- private AttVal first = null;
-
- protected DOMAttrMapImpl(AttVal first)
- {
- this.first = first;
- }
-
- /**
- * @see org.w3c.dom.NamedNodeMap#getNamedItem
- */
- public org.w3c.dom.Node getNamedItem(String name)
- {
- AttVal att = this.first;
- while (att != null) {
- if (att.attribute.equals(name)) break;
- att = att.next;
- }
- if (att != null)
- return att.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.NamedNodeMap#setNamedItem
- */
- public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg)
- throws DOMException
- {
- // NOT SUPPORTED
- return null;
- }
-
- /**
- * @see org.w3c.dom.NamedNodeMap#removeNamedItem
- */
- public org.w3c.dom.Node removeNamedItem(String name)
- throws DOMException
- {
- // NOT SUPPORTED
- return null;
- }
-
- /**
- * @see org.w3c.dom.NamedNodeMap#item
- */
- public org.w3c.dom.Node item(int index)
- {
- int i = 0;
- AttVal att = this.first;
- while (att != null) {
- if (i >= index) break;
- i++;
- att = att.next;
- }
- if (att != null)
- return att.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.NamedNodeMap#getLength
- */
- public int getLength()
- {
- int len = 0;
- AttVal att = this.first;
- while (att != null) {
- len++;
- att = att.next;
- }
- return len;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public org.w3c.dom.Node getNamedItemNS(String namespaceURI,
- String localName)
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg)
- throws org.w3c.dom.DOMException
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public org.w3c.dom.Node removeNamedItemNS(String namespaceURI,
- String localName)
- throws org.w3c.dom.DOMException
- {
- return null;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMCDATASectionImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMCDATASectionImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @author Gary L Peskin <garyp@firstech.com>
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMCDATASectionImpl extends DOMTextImpl
- implements org.w3c.dom.CDATASection {
-
- protected DOMCDATASectionImpl(Node adaptee)
- {
- super(adaptee);
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeName
- */
- public String getNodeName()
- {
- return "#cdata-section";
- }
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- return org.w3c.dom.Node.CDATA_SECTION_NODE;
- }
-}
+++ /dev/null
-/*
- * @(#)DOMCharacterDataImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMCharacterDataImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMCharacterDataImpl extends DOMNodeImpl
- implements org.w3c.dom.CharacterData {
-
- protected DOMCharacterDataImpl(Node adaptee)
- {
- super(adaptee);
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.CharacterData#getData
- */
- public String getData() throws DOMException
- {
- return getNodeValue();
- }
-
- /**
- * @see org.w3c.dom.CharacterData#setData
- */
- public void setData(String data) throws DOMException
- {
- // NOT SUPPORTED
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- /**
- * @see org.w3c.dom.CharacterData#getLength
- */
- public int getLength()
- {
- int len = 0;
- if (adaptee.textarray != null && adaptee.start < adaptee.end)
- len = adaptee.end - adaptee.start;
- return len;
- }
-
- /**
- * @see org.w3c.dom.CharacterData#substringData
- */
- public String substringData(int offset,
- int count) throws DOMException
- {
- int len;
- String value = null;
- if (count < 0)
- {
- throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR,
- "Invalid length");
- }
- if (adaptee.textarray != null && adaptee.start < adaptee.end)
- {
- if (adaptee.start + offset >= adaptee.end)
- {
- throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR,
- "Invalid offset");
- }
- len = count;
- if (adaptee.start + offset + len - 1 >= adaptee.end)
- len = adaptee.end - adaptee.start - offset;
-
- value = Lexer.getString(adaptee.textarray,
- adaptee.start + offset,
- len);
- }
- return value;
- }
-
- /**
- * @see org.w3c.dom.CharacterData#appendData
- */
- public void appendData(String arg) throws DOMException
- {
- // NOT SUPPORTED
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- /**
- * @see org.w3c.dom.CharacterData#insertData
- */
- public void insertData(int offset,
- String arg) throws DOMException
- {
- // NOT SUPPORTED
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- /**
- * @see org.w3c.dom.CharacterData#deleteData
- */
- public void deleteData(int offset,
- int count) throws DOMException
- {
- // NOT SUPPORTED
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
- /**
- * @see org.w3c.dom.CharacterData#replaceData
- */
- public void replaceData(int offset,
- int count,
- String arg) throws DOMException
- {
- // NOT SUPPORTED
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMCommentImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMCommentImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMCommentImpl extends DOMCharacterDataImpl
- implements org.w3c.dom.Comment {
-
- protected DOMCommentImpl(Node adaptee)
- {
- super(adaptee);
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeName
- */
- public String getNodeName()
- {
- return "#comment";
- }
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- return org.w3c.dom.Node.COMMENT_NODE;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMDocumentImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMDocumentImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM Support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document {
-
- private TagTable tt; // a DOM Document has its own TagTable.
-
- protected DOMDocumentImpl(Node adaptee)
- {
- super(adaptee);
- tt = new TagTable();
- }
-
- public void setTagTable(TagTable tt)
- {
- this.tt = tt;
- }
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeName
- */
- public String getNodeName()
- {
- return "#document";
- }
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- return org.w3c.dom.Node.DOCUMENT_NODE;
- }
-
- /**
- * @see org.w3c.dom.Document#getDoctype
- */
- public org.w3c.dom.DocumentType getDoctype()
- {
- Node node = adaptee.content;
- while (node != null) {
- if (node.type == Node.DocTypeTag) break;
- node = node.next;
- }
- if (node != null)
- return (org.w3c.dom.DocumentType)node.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#getImplementation
- */
- public org.w3c.dom.DOMImplementation getImplementation()
- {
- // NOT SUPPORTED
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#getDocumentElement
- */
- public org.w3c.dom.Element getDocumentElement()
- {
- Node node = adaptee.content;
- while (node != null) {
- if (node.type == Node.StartTag ||
- node.type == Node.StartEndTag) break;
- node = node.next;
- }
- if (node != null)
- return (org.w3c.dom.Element)node.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#createElement
- */
- public org.w3c.dom.Element createElement(String tagName)
- throws DOMException
- {
- Node node = new Node(Node.StartEndTag, null, 0, 0, tagName, tt);
- if (node != null) {
- if (node.tag == null) // Fix Bug 121206
- node.tag = tt.xmlTags;
- return (org.w3c.dom.Element)node.getAdapter();
- }
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#createDocumentFragment
- */
- public org.w3c.dom.DocumentFragment createDocumentFragment()
- {
- // NOT SUPPORTED
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#createTextNode
- */
- public org.w3c.dom.Text createTextNode(String data)
- {
- byte[] textarray = Lexer.getBytes(data);
- Node node = new Node(Node.TextNode, textarray, 0, textarray.length);
- if (node != null)
- return (org.w3c.dom.Text)node.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#createComment
- */
- public org.w3c.dom.Comment createComment(String data)
- {
- byte[] textarray = Lexer.getBytes(data);
- Node node = new Node(Node.CommentTag, textarray, 0, textarray.length);
- if (node != null)
- return (org.w3c.dom.Comment)node.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#createCDATASection
- */
- public org.w3c.dom.CDATASection createCDATASection(String data)
- throws DOMException
- {
- // NOT SUPPORTED
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#createProcessingInstruction
- */
- public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target,
- String data)
- throws DOMException
- {
- throw new DOMExceptionImpl(DOMException.NOT_SUPPORTED_ERR,
- "HTML document");
- }
-
- /**
- * @see org.w3c.dom.Document#createAttribute
- */
- public org.w3c.dom.Attr createAttribute(String name)
- throws DOMException
- {
- AttVal av = new AttVal(null, null, (int)'"', name, null);
- if (av != null) {
- av.dict =
- AttributeTable.getDefaultAttributeTable().findAttribute(av);
- return (org.w3c.dom.Attr)av.getAdapter();
- } else {
- return null;
- }
- }
-
- /**
- * @see org.w3c.dom.Document#createEntityReference
- */
- public org.w3c.dom.EntityReference createEntityReference(String name)
- throws DOMException
- {
- // NOT SUPPORTED
- return null;
- }
-
- /**
- * @see org.w3c.dom.Document#getElementsByTagName
- */
- public org.w3c.dom.NodeList getElementsByTagName(String tagname)
- {
- return new DOMNodeListByTagNameImpl(this.adaptee, tagname);
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep)
- throws org.w3c.dom.DOMException
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public org.w3c.dom.Attr createAttributeNS(String namespaceURI,
- String qualifiedName)
- throws org.w3c.dom.DOMException
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public org.w3c.dom.Element createElementNS(String namespaceURI,
- String qualifiedName)
- throws org.w3c.dom.DOMException
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI,
- String localName)
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public org.w3c.dom.Element getElementById(String elementId)
- {
- return null;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMDocumentTypeImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMDocumentTypeImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMDocumentTypeImpl extends DOMNodeImpl
- implements org.w3c.dom.DocumentType {
-
- protected DOMDocumentTypeImpl(Node adaptee)
- {
- super(adaptee);
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- return org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
- }
-
- /**
- * @see org.w3c.dom.Node#getNodeName
- */
- public String getNodeName()
- {
- return getName();
- }
-
- /**
- * @see org.w3c.dom.DocumentType#getName
- */
- public String getName()
- {
- String value = null;
- if (adaptee.type == Node.DocTypeTag)
- {
-
- if (adaptee.textarray != null && adaptee.start < adaptee.end)
- {
- value = Lexer.getString(adaptee.textarray,
- adaptee.start,
- adaptee.end - adaptee.start);
- }
- }
- return value;
- }
-
- public org.w3c.dom.NamedNodeMap getEntities()
- {
- // NOT SUPPORTED
- return null;
- }
-
- public org.w3c.dom.NamedNodeMap getNotations()
- {
- // NOT SUPPORTED
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public String getPublicId() {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public String getSystemId() {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public String getInternalSubset() {
- return null;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMElementImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMElementImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM Support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMElementImpl extends DOMNodeImpl
- implements org.w3c.dom.Element {
-
- protected DOMElementImpl(Node adaptee)
- {
- super(adaptee);
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- return org.w3c.dom.Node.ELEMENT_NODE;
- }
-
- /**
- * @see org.w3c.dom.Element#getTagName
- */
- public String getTagName()
- {
- return super.getNodeName();
- }
-
- /**
- * @see org.w3c.dom.Element#getAttribute
- */
- public String getAttribute(String name)
- {
- if (this.adaptee == null)
- return null;
-
- AttVal att = this.adaptee.attributes;
- while (att != null) {
- if (att.attribute.equals(name)) break;
- att = att.next;
- }
- if (att != null)
- return att.value;
- else
- return "";
- }
-
- /**
- * @see org.w3c.dom.Element#setAttribute
- */
- public void setAttribute(String name,
- String value)
- throws DOMException
- {
- if (this.adaptee == null)
- return;
-
- AttVal att = this.adaptee.attributes;
- while (att != null) {
- if (att.attribute.equals(name)) break;
- att = att.next;
- }
- if (att != null) {
- att.value = value;
- } else {
- att = new AttVal(null, null, (int)'"', name, value);
- att.dict =
- AttributeTable.getDefaultAttributeTable().findAttribute(att);
- if (this.adaptee.attributes == null) {
- this.adaptee.attributes = att;
- } else {
- att.next = this.adaptee.attributes;
- this.adaptee.attributes = att;
- }
- }
- }
-
- /**
- * @see org.w3c.dom.Element#removeAttribute
- */
- public void removeAttribute(String name)
- throws DOMException
- {
- if (this.adaptee == null)
- return;
-
- AttVal att = this.adaptee.attributes;
- AttVal pre = null;
- while (att != null) {
- if (att.attribute.equals(name)) break;
- pre = att;
- att = att.next;
- }
- if (att != null) {
- if (pre == null) {
- this.adaptee.attributes = att.next;
- } else {
- pre.next = att.next;
- }
- }
- }
-
- /**
- * @see org.w3c.dom.Element#getAttributeNode
- */
- public org.w3c.dom.Attr getAttributeNode(String name)
- {
- if (this.adaptee == null)
- return null;
-
- AttVal att = this.adaptee.attributes;
- while (att != null) {
- if (att.attribute.equals(name)) break;
- att = att.next;
- }
- if (att != null)
- return att.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Element#setAttributeNode
- */
- public org.w3c.dom.Attr setAttributeNode(org.w3c.dom.Attr newAttr)
- throws DOMException
- {
- if (newAttr == null)
- return null;
- if (!(newAttr instanceof DOMAttrImpl)) {
- throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
- "newAttr not instanceof DOMAttrImpl");
- }
-
- DOMAttrImpl newatt = (DOMAttrImpl)newAttr;
- String name = newatt.avAdaptee.attribute;
- org.w3c.dom.Attr result = null;
-
- AttVal att = this.adaptee.attributes;
- while (att != null) {
- if (att.attribute.equals(name)) break;
- att = att.next;
- }
- if (att != null) {
- result = att.getAdapter();
- att.adapter = newAttr;
- } else {
- if (this.adaptee.attributes == null) {
- this.adaptee.attributes = newatt.avAdaptee;
- } else {
- newatt.avAdaptee.next = this.adaptee.attributes;
- this.adaptee.attributes = newatt.avAdaptee;
- }
- }
- return result;
- }
-
- /**
- * @see org.w3c.dom.Element#removeAttributeNode
- */
- public org.w3c.dom.Attr removeAttributeNode(org.w3c.dom.Attr oldAttr)
- throws DOMException
- {
- if (oldAttr == null)
- return null;
-
- org.w3c.dom.Attr result = null;
- AttVal att = this.adaptee.attributes;
- AttVal pre = null;
- while (att != null) {
- if (att.getAdapter() == oldAttr) break;
- pre = att;
- att = att.next;
- }
- if (att != null) {
- if (pre == null) {
- this.adaptee.attributes = att.next;
- } else {
- pre.next = att.next;
- }
- result = oldAttr;
- } else {
- throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
- "oldAttr not found");
- }
- return result;
- }
-
- /**
- * @see org.w3c.dom.Element#getElementsByTagName
- */
- public org.w3c.dom.NodeList getElementsByTagName(String name)
- {
- return new DOMNodeListByTagNameImpl(this.adaptee, name);
- }
-
- /**
- * @see org.w3c.dom.Element#normalize
- */
- public void normalize()
- {
- // NOT SUPPORTED
- }
-
- /**
- * DOM2 - not implemented.
- */
- public String getAttributeNS(String namespaceURI, String localName)
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public void setAttributeNS(String namespaceURI,
- String qualifiedName,
- String value)
- throws org.w3c.dom.DOMException
- {
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public void removeAttributeNS(String namespaceURI, String localName)
- throws org.w3c.dom.DOMException
- {
- }
-
- /**
- * DOM2 - not implemented.
- */
- public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI,
- String localName)
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- * @exception org.w3c.dom.DOMException
- */
- public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr)
- throws org.w3c.dom.DOMException
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI,
- String localName)
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public boolean hasAttribute(String name)
- {
- return false;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public boolean hasAttributeNS(String namespaceURI,
- String localName)
- {
- return false;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMExceptionImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMExceptionImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM Support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMExceptionImpl extends org.w3c.dom.DOMException {
-
- public DOMExceptionImpl(short code, String message) {
- super(code, message);
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMNodeImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMNodeImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM Support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMNodeImpl implements org.w3c.dom.Node {
-
- protected Node adaptee;
-
- protected DOMNodeImpl(Node adaptee)
- {
- this.adaptee = adaptee;
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeValue
- */
- public String getNodeValue() throws DOMException
- {
- String value = ""; //BAK 10/10/2000 replaced null
- if (adaptee.type == Node.TextNode ||
- adaptee.type == Node.CDATATag ||
- adaptee.type == Node.CommentTag ||
- adaptee.type == Node.ProcInsTag)
- {
-
- if (adaptee.textarray != null && adaptee.start < adaptee.end)
- {
- value = Lexer.getString(adaptee.textarray,
- adaptee.start,
- adaptee.end - adaptee.start);
- }
- }
- return value;
- }
-
- /**
- * @see org.w3c.dom.Node#setNodeValue
- */
- public void setNodeValue(String nodeValue) throws DOMException
- {
- if (adaptee.type == Node.TextNode ||
- adaptee.type == Node.CDATATag ||
- adaptee.type == Node.CommentTag ||
- adaptee.type == Node.ProcInsTag)
- {
- byte[] textarray = Lexer.getBytes(nodeValue);
- adaptee.textarray = textarray;
- adaptee.start = 0;
- adaptee.end = textarray.length;
- }
- }
-
- /**
- * @see org.w3c.dom.Node#getNodeName
- */
- public String getNodeName()
- {
- return adaptee.element;
- }
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- short result = -1;
- switch (adaptee.type) {
- case Node.RootNode:
- result = org.w3c.dom.Node.DOCUMENT_NODE;
- break;
- case Node.DocTypeTag:
- result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE;
- break;
- case Node.CommentTag:
- result = org.w3c.dom.Node.COMMENT_NODE;
- break;
- case Node.ProcInsTag:
- result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
- break;
- case Node.TextNode:
- result = org.w3c.dom.Node.TEXT_NODE;
- break;
- case Node.CDATATag:
- result = org.w3c.dom.Node.CDATA_SECTION_NODE;
- break;
- case Node.StartTag:
- case Node.StartEndTag:
- result = org.w3c.dom.Node.ELEMENT_NODE;
- break;
- }
- return result;
- }
-
- /**
- * @see org.w3c.dom.Node#getParentNode
- */
- public org.w3c.dom.Node getParentNode()
- {
- if (adaptee.parent != null)
- return adaptee.parent.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Node#getChildNodes
- */
- public org.w3c.dom.NodeList getChildNodes()
- {
- return new DOMNodeListImpl(adaptee);
- }
-
- /**
- * @see org.w3c.dom.Node#getFirstChild
- */
- public org.w3c.dom.Node getFirstChild()
- {
- if (adaptee.content != null)
- return adaptee.content.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Node#getLastChild
- */
- public org.w3c.dom.Node getLastChild()
- {
- if (adaptee.last != null)
- return adaptee.last.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Node#getPreviousSibling
- */
- public org.w3c.dom.Node getPreviousSibling()
- {
- if (adaptee.prev != null)
- return adaptee.prev.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Node#getNextSibling
- */
- public org.w3c.dom.Node getNextSibling()
- {
- if (adaptee.next != null)
- return adaptee.next.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Node#getAttributes
- */
- public org.w3c.dom.NamedNodeMap getAttributes()
- {
- return new DOMAttrMapImpl(adaptee.attributes);
- }
-
- /**
- * @see org.w3c.dom.Node#getOwnerDocument
- */
- public org.w3c.dom.Document getOwnerDocument()
- {
- Node node;
-
- node = this.adaptee;
- if (node != null && node.type == Node.RootNode)
- return null;
-
- for (node = this.adaptee;
- node != null && node.type != Node.RootNode; node = node.parent);
-
- if (node != null)
- return (org.w3c.dom.Document)node.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.Node#insertBefore
- */
- public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild,
- org.w3c.dom.Node refChild)
- throws DOMException
- {
- // TODO - handle newChild already in tree
-
- if (newChild == null)
- return null;
- if (!(newChild instanceof DOMNodeImpl)) {
- throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
- "newChild not instanceof DOMNodeImpl");
- }
- DOMNodeImpl newCh = (DOMNodeImpl)newChild;
-
- if (this.adaptee.type == Node.RootNode) {
- if (newCh.adaptee.type != Node.DocTypeTag &&
- newCh.adaptee.type != Node.ProcInsTag) {
- throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
- "newChild cannot be a child of this node");
- }
- } else if (this.adaptee.type == Node.StartTag) {
- if (newCh.adaptee.type != Node.StartTag &&
- newCh.adaptee.type != Node.StartEndTag &&
- newCh.adaptee.type != Node.CommentTag &&
- newCh.adaptee.type != Node.TextNode &&
- newCh.adaptee.type != Node.CDATATag) {
- throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
- "newChild cannot be a child of this node");
- }
- }
- if (refChild == null) {
- Node.insertNodeAtEnd(this.adaptee, newCh.adaptee);
- if (this.adaptee.type == Node.StartEndTag) {
- this.adaptee.setType(Node.StartTag);
- }
- } else {
- Node ref = this.adaptee.content;
- while (ref != null) {
- if (ref.getAdapter() == refChild) break;
- ref = ref.next;
- }
- if (ref == null) {
- throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
- "refChild not found");
- }
- Node.insertNodeBeforeElement(ref, newCh.adaptee);
- }
- return newChild;
- }
-
- /**
- * @see org.w3c.dom.Node#replaceChild
- */
- public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild,
- org.w3c.dom.Node oldChild)
- throws DOMException
- {
- // TODO - handle newChild already in tree
-
- if (newChild == null)
- return null;
- if (!(newChild instanceof DOMNodeImpl)) {
- throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
- "newChild not instanceof DOMNodeImpl");
- }
- DOMNodeImpl newCh = (DOMNodeImpl)newChild;
-
- if (this.adaptee.type == Node.RootNode) {
- if (newCh.adaptee.type != Node.DocTypeTag &&
- newCh.adaptee.type != Node.ProcInsTag) {
- throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
- "newChild cannot be a child of this node");
- }
- } else if (this.adaptee.type == Node.StartTag) {
- if (newCh.adaptee.type != Node.StartTag &&
- newCh.adaptee.type != Node.StartEndTag &&
- newCh.adaptee.type != Node.CommentTag &&
- newCh.adaptee.type != Node.TextNode &&
- newCh.adaptee.type != Node.CDATATag) {
- throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
- "newChild cannot be a child of this node");
- }
- }
- if (oldChild == null) {
- throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
- "oldChild not found");
- } else {
- Node n;
- Node ref = this.adaptee.content;
- while (ref != null) {
- if (ref.getAdapter() == oldChild) break;
- ref = ref.next;
- }
- if (ref == null) {
- throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
- "oldChild not found");
- }
- newCh.adaptee.next = ref.next;
- newCh.adaptee.prev = ref.prev;
- newCh.adaptee.last = ref.last;
- newCh.adaptee.parent = ref.parent;
- newCh.adaptee.content = ref.content;
- if (ref.parent != null) {
- if (ref.parent.content == ref)
- ref.parent.content = newCh.adaptee;
- if (ref.parent.last == ref)
- ref.parent.last = newCh.adaptee;
- }
- if (ref.prev != null) {
- ref.prev.next = newCh.adaptee;
- }
- if (ref.next != null) {
- ref.next.prev = newCh.adaptee;
- }
- for (n = ref.content; n != null; n = n.next) {
- if (n.parent == ref)
- n.parent = newCh.adaptee;
- }
- }
- return oldChild;
- }
-
- /**
- * @see org.w3c.dom.Node#removeChild
- */
- public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild)
- throws DOMException
- {
- if (oldChild == null)
- return null;
-
- Node ref = this.adaptee.content;
- while (ref != null) {
- if (ref.getAdapter() == oldChild) break;
- ref = ref.next;
- }
- if (ref == null) {
- throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR,
- "refChild not found");
- }
- Node.discardElement(ref);
-
- if (this.adaptee.content == null
- && this.adaptee.type == Node.StartTag) {
- this.adaptee.setType(Node.StartEndTag);
- }
-
- return oldChild;
- }
-
- /**
- * @see org.w3c.dom.Node#appendChild
- */
- public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild)
- throws DOMException
- {
- // TODO - handle newChild already in tree
-
- if (newChild == null)
- return null;
- if (!(newChild instanceof DOMNodeImpl)) {
- throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR,
- "newChild not instanceof DOMNodeImpl");
- }
- DOMNodeImpl newCh = (DOMNodeImpl)newChild;
-
- if (this.adaptee.type == Node.RootNode) {
- if (newCh.adaptee.type != Node.DocTypeTag &&
- newCh.adaptee.type != Node.ProcInsTag) {
- throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
- "newChild cannot be a child of this node");
- }
- } else if (this.adaptee.type == Node.StartTag) {
- if (newCh.adaptee.type != Node.StartTag &&
- newCh.adaptee.type != Node.StartEndTag &&
- newCh.adaptee.type != Node.CommentTag &&
- newCh.adaptee.type != Node.TextNode &&
- newCh.adaptee.type != Node.CDATATag) {
- throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR,
- "newChild cannot be a child of this node");
- }
- }
- Node.insertNodeAtEnd(this.adaptee, newCh.adaptee);
-
- if (this.adaptee.type == Node.StartEndTag) {
- this.adaptee.setType(Node.StartTag);
- }
-
- return newChild;
- }
-
- /**
- * @see org.w3c.dom.Node#hasChildNodes
- */
- public boolean hasChildNodes()
- {
- return (adaptee.content != null);
- }
-
- /**
- * @see org.w3c.dom.Node#cloneNode
- */
- public org.w3c.dom.Node cloneNode(boolean deep)
- {
- Node node = adaptee.cloneNode(deep);
- node.parent = null;
- return node.getAdapter();
- }
-
- /**
- * DOM2 - not implemented.
- */
- public void normalize()
- {
- }
-
- /**
- * DOM2 - not implemented.
- */
- public boolean supports(String feature, String version)
- {
- return isSupported(feature, version);
- }
-
- /**
- * DOM2 - not implemented.
- */
- public String getNamespaceURI()
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public String getPrefix()
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public void setPrefix(String prefix)
- throws DOMException
- {
- }
-
- /**
- * DOM2 - not implemented.
- */
- public String getLocalName()
- {
- return null;
- }
-
- /**
- * DOM2 - not implemented.
- */
- public boolean isSupported(String feature,String version) {
- return false;
- }
-
- /**
- * DOM2 - @see org.w3c.dom.Node#hasAttributes
- * contributed by dlp@users.sourceforge.net
- */
- public boolean hasAttributes()
- {
- return adaptee.attributes != null;
- }
-}
+++ /dev/null
-/*
- * @(#)DOMNodeListByTagNameImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * DOMNodeListByTagNameImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-/**
- * <p>The items in the <code>NodeList</code> are accessible via an integral
- * index, starting from 0.
- *
- */
-public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList {
-
- private Node first = null;
- private String tagName = "*";
- private int currIndex = 0;
- private int maxIndex = 0;
- private Node currNode = null;
-
- protected DOMNodeListByTagNameImpl(Node first, String tagName)
- {
- this.first = first;
- this.tagName = tagName;
- }
-
- /**
- * @see org.w3c.dom.NodeList#item
- */
- public org.w3c.dom.Node item(int index)
- {
- currIndex = 0;
- maxIndex = index;
- preTraverse(first);
-
- if (currIndex > maxIndex && currNode != null)
- return currNode.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.NodeList#getLength
- */
- public int getLength()
- {
- currIndex = 0;
- maxIndex = Integer.MAX_VALUE;
- preTraverse(first);
- return currIndex;
- }
-
- protected void preTraverse(Node node)
- {
- if (node == null)
- return;
-
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- if (currIndex <= maxIndex &&
- (tagName.equals("*") || tagName.equals(node.element)))
- {
- currIndex += 1;
- currNode = node;
- }
- }
- if (currIndex > maxIndex)
- return;
-
- node = node.content;
- while (node != null)
- {
- preTraverse(node);
- node = node.next;
- }
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMNodeListImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * DOMNodeListImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-/**
- * <p>The items in the <code>NodeList</code> are accessible via an integral
- * index, starting from 0.
- *
- */
-public class DOMNodeListImpl implements org.w3c.dom.NodeList {
-
- private Node parent = null;
-
- protected DOMNodeListImpl(Node parent)
- {
- this.parent = parent;
- }
-
- /**
- * @see org.w3c.dom.NodeList#item
- */
- public org.w3c.dom.Node item(int index)
- {
- int i = 0;
- Node node = parent.content;
- while (node != null) {
- if (i >= index) break;
- i++;
- node = node.next;
- }
- if (node != null)
- return node.getAdapter();
- else
- return null;
- }
-
- /**
- * @see org.w3c.dom.NodeList#getLength
- */
- public int getLength()
- {
- int len = 0;
- Node node = parent.content;
- while (node != null) {
- len++;
- node = node.next;
- }
- return len;
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMProcessingInstructionImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMProcessingInstructionImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMProcessingInstructionImpl extends DOMNodeImpl
- implements org.w3c.dom.ProcessingInstruction {
-
- protected DOMProcessingInstructionImpl(Node adaptee)
- {
- super(adaptee);
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE;
- }
-
- /**
- * @see org.w3c.dom.ProcessingInstruction#getTarget
- */
- public String getTarget()
- {
- // TODO
- return null;
- }
-
- /**
- * @see org.w3c.dom.ProcessingInstruction#getData
- */
- public String getData()
- {
- return getNodeValue();
- }
-
- /**
- * @see org.w3c.dom.ProcessingInstruction#setData
- */
- public void setData(String data) throws DOMException
- {
- // NOT SUPPORTED
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
-}
+++ /dev/null
-/*
- * @(#)DOMTextImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-import org.w3c.dom.DOMException;
-
-/**
- *
- * DOMTextImpl
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class DOMTextImpl extends DOMCharacterDataImpl
- implements org.w3c.dom.Text {
-
- protected DOMTextImpl(Node adaptee)
- {
- super(adaptee);
- }
-
-
- /* --------------------- DOM ---------------------------- */
-
- /**
- * @see org.w3c.dom.Node#getNodeName
- */
- public String getNodeName()
- {
- return "#text";
- }
-
- /**
- * @see org.w3c.dom.Node#getNodeType
- */
- public short getNodeType()
- {
- return org.w3c.dom.Node.TEXT_NODE;
- }
-
- /**
- * @see org.w3c.dom.Text#splitText
- */
- public org.w3c.dom.Text splitText(int offset) throws DOMException
- {
- // NOT SUPPORTED
- throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR,
- "Not supported");
- }
-
-}
+++ /dev/null
-/*
- * @(#)Dict.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Tag dictionary node
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class Dict {
-
- /* content model shortcut encoding */
-
- public static final int CM_UNKNOWN = 0;
- public static final int CM_EMPTY = (1 << 0);
- public static final int CM_HTML = (1 << 1);
- public static final int CM_HEAD = (1 << 2);
- public static final int CM_BLOCK = (1 << 3);
- public static final int CM_INLINE = (1 << 4);
- public static final int CM_LIST = (1 << 5);
- public static final int CM_DEFLIST = (1 << 6);
- public static final int CM_TABLE = (1 << 7);
- public static final int CM_ROWGRP = (1 << 8);
- public static final int CM_ROW = (1 << 9);
- public static final int CM_FIELD = (1 << 10);
- public static final int CM_OBJECT = (1 << 11);
- public static final int CM_PARAM = (1 << 12);
- public static final int CM_FRAMES = (1 << 13);
- public static final int CM_HEADING = (1 << 14);
- public static final int CM_OPT = (1 << 15);
- public static final int CM_IMG = (1 << 16);
- public static final int CM_MIXED = (1 << 17);
- public static final int CM_NO_INDENT = (1 << 18);
- public static final int CM_OBSOLETE = (1 << 19);
- public static final int CM_NEW = (1 << 20);
- public static final int CM_OMITST = (1 << 21);
-
- /*
-
- If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0
- Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary
- tags and attributes then describe it as HTML Proprietary. If it includes the
- xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe
- it as one of the flavors of Voyager (strict, loose or frameset).
- */
-
- public static final short VERS_UNKNOWN = 0;
-
- public static final short VERS_HTML20 = 1;
- public static final short VERS_HTML32 = 2;
- public static final short VERS_HTML40_STRICT = 4;
- public static final short VERS_HTML40_LOOSE = 8;
- public static final short VERS_FRAMES = 16;
- public static final short VERS_XML = 32;
-
- public static final short VERS_NETSCAPE = 64;
- public static final short VERS_MICROSOFT = 128;
- public static final short VERS_SUN = 256;
-
- public static final short VERS_MALFORMED = 512;
-
- public static final short VERS_ALL = (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES);
- public static final short VERS_HTML40 = (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES);
- public static final short VERS_LOOSE = (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES);
- public static final short VERS_IFRAMES = (VERS_HTML40_LOOSE|VERS_FRAMES);
- public static final short VERS_FROM32 = (VERS_HTML40_STRICT|VERS_LOOSE);
- public static final short VERS_PROPRIETARY = (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN);
-
- public static final short VERS_EVERYTHING = (VERS_ALL|VERS_PROPRIETARY);
-
- public Dict( String name, short versions, int model,
- Parser parser, CheckAttribs chkattrs )
- {
- this.name = name;
- this.versions = versions;
- this.model = model;
- this.parser = parser;
- this.chkattrs = chkattrs;
- }
-
- public String name;
- public short versions;
- public int model;
- public Parser parser;
- public CheckAttribs chkattrs;
-}
+++ /dev/null
-/*
- * @(#)Entity.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * HTML ISO entity
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class Entity {
-
- public Entity( String name, short code )
- {
- this.name = name;
- this.code = code;
- }
-
- public Entity( String name, int code )
- {
- this.name = name;
- this.code = (short)code;
- }
-
- public String name;
- public short code;
-
-}
+++ /dev/null
-/*
- * @(#)EntityTable.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Entity hash table
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-import java.util.Hashtable;
-import java.util.Enumeration;
-
-public class EntityTable {
-
- public EntityTable()
- {
- }
-
- public Entity lookup( String name )
- {
- return (Entity)entityHashtable.get( name );
- }
-
- public Entity install( String name, short code )
- {
- Entity ent = lookup( name );
- if ( ent == null ) {
- ent = new Entity( name, code );
- entityHashtable.put( name, ent );
- } else {
- ent.code = code;
- }
- return ent;
- }
-
- public Entity install( Entity ent )
- {
- return (Entity)entityHashtable.put( ent.name, ent );
- }
-
- /* entity starting with "&" returns zero on error */
- public short entityCode( String name )
- {
- int c;
-
- if (name.length() <= 1)
- return 0;
-
- /* numeric entitity: name = "&#" followed by number */
- if ( name.charAt(1) == '#' ) {
- c = 0; /* zero on missing/bad number */
-
- /* 'x' prefix denotes hexadecimal number format */
- try {
- if (name.length() >= 4 && name.charAt(2) == 'x') {
- c = Integer.parseInt( name.substring(3), 16 );
- } else if (name.length() >= 3) {
- c = Integer.parseInt( name.substring(2) );
- }
- }
- catch ( NumberFormatException e ) {}
-
- return (short)c;
- }
-
- /* Named entity: name ="&" followed by a name */
- Entity ent = lookup( name.substring(1) );
- if ( ent != null ) {
- return ent.code;
- }
-
- return 0; /* zero signifies unknown entity name */
- }
-
- public String entityName( short code )
- {
- String name = null;
- Entity ent;
- Enumeration en = entityHashtable.elements();
- while ( en.hasMoreElements() ) {
- ent = (Entity)en.nextElement();
- if ( ent.code == code ) {
- name = ent.name;
- break;
- }
- }
- return name;
- }
-
- private Hashtable entityHashtable = new Hashtable();
-
- private static EntityTable defaultEntityTable = null;
-
- private static Entity[] entities = {
-
- new Entity( "nbsp", 160 ),
- new Entity( "iexcl", 161 ),
- new Entity( "cent", 162 ),
- new Entity( "pound", 163 ),
- new Entity( "curren", 164 ),
- new Entity( "yen", 165 ),
- new Entity( "brvbar", 166 ),
- new Entity( "sect", 167 ),
- new Entity( "uml", 168 ),
- new Entity( "copy", 169 ),
- new Entity( "ordf", 170 ),
- new Entity( "laquo", 171 ),
- new Entity( "not", 172 ),
- new Entity( "shy", 173 ),
- new Entity( "reg", 174 ),
- new Entity( "macr", 175 ),
- new Entity( "deg", 176 ),
- new Entity( "plusmn", 177 ),
- new Entity( "sup2", 178 ),
- new Entity( "sup3", 179 ),
- new Entity( "acute", 180 ),
- new Entity( "micro", 181 ),
- new Entity( "para", 182 ),
- new Entity( "middot", 183 ),
- new Entity( "cedil", 184 ),
- new Entity( "sup1", 185 ),
- new Entity( "ordm", 186 ),
- new Entity( "raquo", 187 ),
- new Entity( "frac14", 188 ),
- new Entity( "frac12", 189 ),
- new Entity( "frac34", 190 ),
- new Entity( "iquest", 191 ),
- new Entity( "Agrave", 192 ),
- new Entity( "Aacute", 193 ),
- new Entity( "Acirc", 194 ),
- new Entity( "Atilde", 195 ),
- new Entity( "Auml", 196 ),
- new Entity( "Aring", 197 ),
- new Entity( "AElig", 198 ),
- new Entity( "Ccedil", 199 ),
- new Entity( "Egrave", 200 ),
- new Entity( "Eacute", 201 ),
- new Entity( "Ecirc", 202 ),
- new Entity( "Euml", 203 ),
- new Entity( "Igrave", 204 ),
- new Entity( "Iacute", 205 ),
- new Entity( "Icirc", 206 ),
- new Entity( "Iuml", 207 ),
- new Entity( "ETH", 208 ),
- new Entity( "Ntilde", 209 ),
- new Entity( "Ograve", 210 ),
- new Entity( "Oacute", 211 ),
- new Entity( "Ocirc", 212 ),
- new Entity( "Otilde", 213 ),
- new Entity( "Ouml", 214 ),
- new Entity( "times", 215 ),
- new Entity( "Oslash", 216 ),
- new Entity( "Ugrave", 217 ),
- new Entity( "Uacute", 218 ),
- new Entity( "Ucirc", 219 ),
- new Entity( "Uuml", 220 ),
- new Entity( "Yacute", 221 ),
- new Entity( "THORN", 222 ),
- new Entity( "szlig", 223 ),
- new Entity( "agrave", 224 ),
- new Entity( "aacute", 225 ),
- new Entity( "acirc", 226 ),
- new Entity( "atilde", 227 ),
- new Entity( "auml", 228 ),
- new Entity( "aring", 229 ),
- new Entity( "aelig", 230 ),
- new Entity( "ccedil", 231 ),
- new Entity( "egrave", 232 ),
- new Entity( "eacute", 233 ),
- new Entity( "ecirc", 234 ),
- new Entity( "euml", 235 ),
- new Entity( "igrave", 236 ),
- new Entity( "iacute", 237 ),
- new Entity( "icirc", 238 ),
- new Entity( "iuml", 239 ),
- new Entity( "eth", 240 ),
- new Entity( "ntilde", 241 ),
- new Entity( "ograve", 242 ),
- new Entity( "oacute", 243 ),
- new Entity( "ocirc", 244 ),
- new Entity( "otilde", 245 ),
- new Entity( "ouml", 246 ),
- new Entity( "divide", 247 ),
- new Entity( "oslash", 248 ),
- new Entity( "ugrave", 249 ),
- new Entity( "uacute", 250 ),
- new Entity( "ucirc", 251 ),
- new Entity( "uuml", 252 ),
- new Entity( "yacute", 253 ),
- new Entity( "thorn", 254 ),
- new Entity( "yuml", 255 ),
- new Entity( "fnof", 402 ),
- new Entity( "Alpha", 913 ),
- new Entity( "Beta", 914 ),
- new Entity( "Gamma", 915 ),
- new Entity( "Delta", 916 ),
- new Entity( "Epsilon", 917 ),
- new Entity( "Zeta", 918 ),
- new Entity( "Eta", 919 ),
- new Entity( "Theta", 920 ),
- new Entity( "Iota", 921 ),
- new Entity( "Kappa", 922 ),
- new Entity( "Lambda", 923 ),
- new Entity( "Mu", 924 ),
- new Entity( "Nu", 925 ),
- new Entity( "Xi", 926 ),
- new Entity( "Omicron", 927 ),
- new Entity( "Pi", 928 ),
- new Entity( "Rho", 929 ),
- new Entity( "Sigma", 931 ),
- new Entity( "Tau", 932 ),
- new Entity( "Upsilon", 933 ),
- new Entity( "Phi", 934 ),
- new Entity( "Chi", 935 ),
- new Entity( "Psi", 936 ),
- new Entity( "Omega", 937 ),
- new Entity( "alpha", 945 ),
- new Entity( "beta", 946 ),
- new Entity( "gamma", 947 ),
- new Entity( "delta", 948 ),
- new Entity( "epsilon", 949 ),
- new Entity( "zeta", 950 ),
- new Entity( "eta", 951 ),
- new Entity( "theta", 952 ),
- new Entity( "iota", 953 ),
- new Entity( "kappa", 954 ),
- new Entity( "lambda", 955 ),
- new Entity( "mu", 956 ),
- new Entity( "nu", 957 ),
- new Entity( "xi", 958 ),
- new Entity( "omicron", 959 ),
- new Entity( "pi", 960 ),
- new Entity( "rho", 961 ),
- new Entity( "sigmaf", 962 ),
- new Entity( "sigma", 963 ),
- new Entity( "tau", 964 ),
- new Entity( "upsilon", 965 ),
- new Entity( "phi", 966 ),
- new Entity( "chi", 967 ),
- new Entity( "psi", 968 ),
- new Entity( "omega", 969 ),
- new Entity( "thetasym", 977 ),
- new Entity( "upsih", 978 ),
- new Entity( "piv", 982 ),
- new Entity( "bull", 8226 ),
- new Entity( "hellip", 8230 ),
- new Entity( "prime", 8242 ),
- new Entity( "Prime", 8243 ),
- new Entity( "oline", 8254 ),
- new Entity( "frasl", 8260 ),
- new Entity( "weierp", 8472 ),
- new Entity( "image", 8465 ),
- new Entity( "real", 8476 ),
- new Entity( "trade", 8482 ),
- new Entity( "alefsym", 8501 ),
- new Entity( "larr", 8592 ),
- new Entity( "uarr", 8593 ),
- new Entity( "rarr", 8594 ),
- new Entity( "darr", 8595 ),
- new Entity( "harr", 8596 ),
- new Entity( "crarr", 8629 ),
- new Entity( "lArr", 8656 ),
- new Entity( "uArr", 8657 ),
- new Entity( "rArr", 8658 ),
- new Entity( "dArr", 8659 ),
- new Entity( "hArr", 8660 ),
- new Entity( "forall", 8704 ),
- new Entity( "part", 8706 ),
- new Entity( "exist", 8707 ),
- new Entity( "empty", 8709 ),
- new Entity( "nabla", 8711 ),
- new Entity( "isin", 8712 ),
- new Entity( "notin", 8713 ),
- new Entity( "ni", 8715 ),
- new Entity( "prod", 8719 ),
- new Entity( "sum", 8721 ),
- new Entity( "minus", 8722 ),
- new Entity( "lowast", 8727 ),
- new Entity( "radic", 8730 ),
- new Entity( "prop", 8733 ),
- new Entity( "infin", 8734 ),
- new Entity( "ang", 8736 ),
- new Entity( "and", 8743 ),
- new Entity( "or", 8744 ),
- new Entity( "cap", 8745 ),
- new Entity( "cup", 8746 ),
- new Entity( "int", 8747 ),
- new Entity( "there4", 8756 ),
- new Entity( "sim", 8764 ),
- new Entity( "cong", 8773 ),
- new Entity( "asymp", 8776 ),
- new Entity( "ne", 8800 ),
- new Entity( "equiv", 8801 ),
- new Entity( "le", 8804 ),
- new Entity( "ge", 8805 ),
- new Entity( "sub", 8834 ),
- new Entity( "sup", 8835 ),
- new Entity( "nsub", 8836 ),
- new Entity( "sube", 8838 ),
- new Entity( "supe", 8839 ),
- new Entity( "oplus", 8853 ),
- new Entity( "otimes", 8855 ),
- new Entity( "perp", 8869 ),
- new Entity( "sdot", 8901 ),
- new Entity( "lceil", 8968 ),
- new Entity( "rceil", 8969 ),
- new Entity( "lfloor", 8970 ),
- new Entity( "rfloor", 8971 ),
- new Entity( "lang", 9001 ),
- new Entity( "rang", 9002 ),
- new Entity( "loz", 9674 ),
- new Entity( "spades", 9824 ),
- new Entity( "clubs", 9827 ),
- new Entity( "hearts", 9829 ),
- new Entity( "diams", 9830 ),
- new Entity( "quot", 34 ),
- new Entity( "amp", 38 ),
- new Entity( "lt", 60 ),
- new Entity( "gt", 62 ),
- new Entity( "OElig", 338 ),
- new Entity( "oelig", 339 ),
- new Entity( "Scaron", 352 ),
- new Entity( "scaron", 353 ),
- new Entity( "Yuml", 376 ),
- new Entity( "circ", 710 ),
- new Entity( "tilde", 732 ),
- new Entity( "ensp", 8194 ),
- new Entity( "emsp", 8195 ),
- new Entity( "thinsp", 8201 ),
- new Entity( "zwnj", 8204 ),
- new Entity( "zwj", 8205 ),
- new Entity( "lrm", 8206 ),
- new Entity( "rlm", 8207 ),
- new Entity( "ndash", 8211 ),
- new Entity( "mdash", 8212 ),
- new Entity( "lsquo", 8216 ),
- new Entity( "rsquo", 8217 ),
- new Entity( "sbquo", 8218 ),
- new Entity( "ldquo", 8220 ),
- new Entity( "rdquo", 8221 ),
- new Entity( "bdquo", 8222 ),
- new Entity( "dagger", 8224 ),
- new Entity( "Dagger", 8225 ),
- new Entity( "permil", 8240 ),
- new Entity( "lsaquo", 8249 ),
- new Entity( "rsaquo", 8250 ),
- new Entity( "euro", 8364 )
-
- };
-
- public static EntityTable getDefaultEntityTable()
- {
- if ( defaultEntityTable == null ) {
- defaultEntityTable = new EntityTable();
- for ( int i = 0; i < entities.length; i++ ) {
- defaultEntityTable.install( entities[i] );
- }
- }
- return defaultEntityTable;
- }
-
-}
+++ /dev/null
-/*
- * @(#)IStack.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Inline stack node
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class IStack {
-
- /*
- Mosaic handles inlines via a separate stack from other elements
- We duplicate this to recover from inline markup errors such as:
-
- <i>italic text
- <p>more italic text</b> normal text
-
- which for compatibility with Mosaic is mapped to:
-
- <i>italic text</i>
- <p><i>more italic text</i> normal text
-
- Note that any inline end tag pop's the effect of the current
- inline start tag, so that </b> pop's <i> in the above example.
- */
-
- public IStack next;
- public Dict tag; /* tag's dictionary definition */
- public String element; /* name (null for text nodes) */
- public AttVal attributes;
-
- public IStack()
- {
- next = null;
- tag = null;
- element = null;
- attributes = null;
- }
-
-}
+++ /dev/null
-/*
- * @(#)Lexer.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Lexer for html parser
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-/*
- Given a file stream fp it returns a sequence of tokens.
-
- GetToken(fp) gets the next token
- UngetToken(fp) provides one level undo
-
- The tags include an attribute list:
-
- - linked list of attribute/value nodes
- - each node has 2 null-terminated strings.
- - entities are replaced in attribute values
-
- white space is compacted if not in preformatted mode
- If not in preformatted mode then leading white space
- is discarded and subsequent white space sequences
- compacted to single space chars.
-
- If XmlTags is no then Tag names are folded to upper
- case and attribute names to lower case.
-
- Not yet done:
- - Doctype subset and marked sections
-*/
-
-import java.io.PrintWriter;
-import java.util.Stack;
-import java.util.Vector;
-
-import org.eclipse.core.resources.IFile;
-import sun.security.krb5.internal.av;
-
-public class Lexer {
-
- private IFile iFile;
- public StreamIn in; /* file stream */
- public PrintWriter errout; /* error output stream */
- public short badAccess; /* for accessibility errors */
- public short badLayout; /* for bad style errors */
- public short badChars; /* for bad char encodings */
- public short badForm; /* for mismatched/mispositioned form tags */
- public short warnings; /* count of warnings in this document */
- public short errors; /* count of errors */
- public int lines; /* lines seen */
- public int columns; /* at start of current token */
- public boolean waswhite; /* used to collapse contiguous white space */
- public boolean pushed; /* true after token has been pushed back */
- public boolean insertspace; /* when space is moved after end tag */
- public boolean excludeBlocks; /* Netscape compatibility */
- public boolean exiled; /* true if moved out of table */
- public boolean isvoyager; /* true if xmlns attribute on html element */
- public short versions; /* bit vector of HTML versions */
- public int doctype; /* version as given by doctype (if any) */
- public boolean badDoctype; /* e.g. if html or PUBLIC is missing */
- public int txtstart; /* start of current node */
- public int txtend; /* end of current node */
- public short state; /* state of lexer's finite state machine */
- public Node token;
-
- /*
- lexer character buffer
-
- parse tree nodes span onto this buffer
- which contains the concatenated text
- contents of all of the elements.
-
- lexsize must be reset for each file.
- */
- public byte[] lexbuf; /* byte buffer of UTF-8 chars */
- public int lexlength; /* allocated */
- public int lexsize; /* used */
-
- /* Inline stack for compatibility with Mosaic */
- public Node inode; /* for deferring text node */
- public int insert; /* for inferring inline tags */
- public Stack istack;
- public int istackbase; /* start of frame */
-
- public Style styles; /* used for cleaning up presentation markup */
-
- public Configuration configuration;
- protected int seenBodyEndTag; /* used by parser */
- private Vector nodeList;
-
- public Lexer(IFile iFile, StreamIn in, Configuration configuration)
- {
- this.iFile = iFile;
- this.in = in;
- this.lines = 1;
- this.columns = 1;
- this.state = LEX_CONTENT;
- this.badAccess = 0;
- this.badLayout = 0;
- this.badChars = 0;
- this.badForm = 0;
- this.warnings = 0;
- this.errors = 0;
- this.waswhite = false;
- this.pushed = false;
- this.insertspace = false;
- this.exiled = false;
- this.isvoyager = false;
- this.versions = Dict.VERS_EVERYTHING;
- this.doctype = Dict.VERS_UNKNOWN;
- this.badDoctype = false;
- this.txtstart = 0;
- this.txtend = 0;
- this.token = null;
- this.lexbuf = null;
- this.lexlength = 0;
- this.lexsize = 0;
- this.inode = null;
- this.insert = -1;
- this.istack = new Stack();
- this.istackbase = 0;
- this.styles = null;
- this.configuration = configuration;
- this.seenBodyEndTag = 0;
- this.nodeList = new Vector();
- }
-
- public IFile getIFile() {
- return iFile;
- }
-
- public Node newNode()
- {
- Node node = new Node();
- nodeList.addElement(node);
- return node;
- }
-
- public Node newNode(short type, byte[] textarray, int start, int end)
- {
- Node node = new Node(type, textarray, start, end);
- nodeList.addElement(node);
- return node;
- }
-
- public Node newNode(short type, byte[] textarray, int start, int end, String element)
- {
- Node node = new Node(type, textarray, start, end, element, configuration.tt);
- nodeList.addElement(node);
- return node;
- }
-
- public Node cloneNode(Node node)
- {
- Node cnode = (Node)node.clone();
- nodeList.addElement(cnode);
- for (AttVal att = cnode.attributes; att != null; att = att.next) {
- if (att.asp != null)
- nodeList.addElement(att.asp);
- if (att.php != null)
- nodeList.addElement(att.php);
- }
- return cnode;
- }
-
- public AttVal cloneAttributes(AttVal attrs)
- {
- AttVal cattrs = (AttVal)attrs.clone();
- for (AttVal att = cattrs; att != null; att = att.next) {
- if (att.asp != null)
- nodeList.addElement(att.asp);
- if (att.php != null)
- nodeList.addElement(att.php);
- }
- return cattrs;
- }
-
- protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray)
- {
- Node node;
- for (int i = 0; i < nodeList.size(); i++) {
- node = (Node)(nodeList.elementAt(i));
- if (node.textarray == oldtextarray)
- node.textarray = newtextarray;
- }
- }
-
- /* used for creating preformatted text from Word2000 */
- public Node newLineNode()
- {
- Node node = newNode();
-
- node.textarray = this.lexbuf;
- node.start = this.lexsize;
- addCharToLexer((int)'\n');
- node.end = this.lexsize;
- return node;
- }
-
- // Should always be able convert to/from UTF-8, so encoding exceptions are
- // converted to an Error to avoid adding throws declarations in
- // lots of methods.
-
- public static byte[] getBytes(String str) {
- try {
- return str.getBytes("UTF8");
- } catch (java.io.UnsupportedEncodingException e) {
- throw new Error("string to UTF-8 conversion failed: " + e.getMessage());
- }
- }
-
- public static String getString(byte[] bytes, int offset, int length) {
- try {
- return new String(bytes, offset, length, "UTF8");
- } catch (java.io.UnsupportedEncodingException e) {
- throw new Error("UTF-8 to string conversion failed: " + e.getMessage());
- }
- }
-
- public boolean endOfInput()
- {
- return this.in.isEndOfStream();
- }
-
- public void addByte(int c)
- {
- if (this.lexsize + 1 >= this.lexlength)
- {
- while (this.lexsize + 1 >= this.lexlength)
- {
- if (this.lexlength == 0)
- this.lexlength = 8192;
- else
- this.lexlength = this.lexlength * 2;
- }
-
- byte[] temp = this.lexbuf;
- this.lexbuf = new byte[ this.lexlength ];
- if (temp != null)
- {
- System.arraycopy( temp, 0, this.lexbuf, 0, temp.length );
- updateNodeTextArrays(temp, this.lexbuf);
- }
- }
-
- this.lexbuf[this.lexsize++] = (byte)c;
- this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */
- }
-
- public void changeChar(byte c)
- {
- if (this.lexsize > 0)
- {
- this.lexbuf[this.lexsize-1] = c;
- }
- }
-
- /* store char c as UTF-8 encoded byte stream */
- public void addCharToLexer(int c)
- {
- if (c < 128)
- addByte(c);
- else if (c <= 0x7FF)
- {
- addByte(0xC0 | (c >> 6));
- addByte(0x80 | (c & 0x3F));
- }
- else if (c <= 0xFFFF)
- {
- addByte(0xE0 | (c >> 12));
- addByte(0x80 | ((c >> 6) & 0x3F));
- addByte(0x80 | (c & 0x3F));
- }
- else if (c <= 0x1FFFFF)
- {
- addByte(0xF0 | (c >> 18));
- addByte(0x80 | ((c >> 12) & 0x3F));
- addByte(0x80 | ((c >> 6) & 0x3F));
- addByte(0x80 | (c & 0x3F));
- }
- else
- {
- addByte(0xF8 | (c >> 24));
- addByte(0x80 | ((c >> 18) & 0x3F));
- addByte(0x80 | ((c >> 12) & 0x3F));
- addByte(0x80 | ((c >> 6) & 0x3F));
- addByte(0x80 | (c & 0x3F));
- }
- }
-
- public void addStringToLexer(String str)
- {
- for ( int i = 0; i < str.length(); i++ ) {
- addCharToLexer( (int)str.charAt(i) );
- }
- }
-
- /*
- No longer attempts to insert missing ';' for unknown
- enitities unless one was present already, since this
- gives unexpected results.
-
- For example: <a href="something.htm?foo&bar&fred">
- was tidied to: <a href="something.htm?foo&bar;&fred;">
- rather than: <a href="something.htm?foo&bar&fred">
-
- My thanks for Maurice Buxton for spotting this.
- */
- public void parseEntity(short mode)
- {
- short map;
- int start;
- boolean first = true;
- boolean semicolon = false;
- boolean numeric = false;
- int c, ch, startcol;
- String str;
-
- start = this.lexsize - 1; /* to start at "&" */
- startcol = this.in.curcol - 1;
-
- while (true)
- {
- c = this.in.readChar();
- if (c == StreamIn.EndOfStream) break;
- if (c == ';')
- {
- semicolon = true;
- break;
- }
-
- if (first && c == '#')
- {
- addCharToLexer(c);
- first = false;
- numeric = true;
- continue;
- }
-
- first = false;
- map = MAP((char)c);
-
- /* AQ: Added flag for numeric entities so that numeric entities
- with missing semi-colons are recognized.
- Eg. "rep..." is recognized as "rep"
- */
- if (numeric && ((c == 'x') || ((map & DIGIT) != 0)))
- {
- addCharToLexer(c);
- continue;
- }
- if (!numeric && ((map & NAMECHAR) != 0))
- {
- addCharToLexer(c);
- continue;
- }
-
- /* otherwise put it back */
-
- this.in.ungetChar(c);
- break;
- }
-
- str = getString( this.lexbuf, start, this.lexsize - start );
- ch = EntityTable.getDefaultEntityTable().entityCode( str );
-
- /* deal with unrecognized entities */
- if (ch <= 0)
- {
- /* set error position just before offending chararcter */
- this.lines = this.in.curline;
- this.columns = startcol;
-
- if (this.lexsize > start +1 )
- {
- Report.entityError(this, Report.UNKNOWN_ENTITY, str, ch);
-
- if (semicolon)
- addCharToLexer(';');
- }
- else /* naked & */
- {
- Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch);
- }
- }
- else
- {
- if (c != ';') /* issue warning if not terminated by ';' */
- {
- /* set error position just before offending chararcter */
- this.lines = this.in.curline;
- this.columns = startcol;
- Report.entityError(this, Report.MISSING_SEMICOLON, str, c);
- }
-
- this.lexsize = start;
-
- if (ch == 160 && (mode & Preformatted) != 0)
- ch = ' ';
-
- addCharToLexer(ch);
-
- if (ch == '&' && !this.configuration.QuoteAmpersand)
- {
- addCharToLexer('a');
- addCharToLexer('m');
- addCharToLexer('p');
- addCharToLexer(';');
- }
- }
- }
-
- public char parseTagName()
- {
- short map;
- int c;
-
- /* fold case of first char in buffer */
-
- c = this.lexbuf[this.txtstart];
- map = MAP((char)c);
-
- if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
- {
- c += (int)((int)'a' - (int)'A');
- this.lexbuf[this.txtstart] = (byte)c;
- }
-
- while (true)
- {
- c = this.in.readChar();
- if (c == StreamIn.EndOfStream) break;
- map = MAP((char)c);
-
- if ((map & NAMECHAR) == 0)
- break;
-
- /* fold case of subsequent chars */
-
- if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
- c += (int)((int)'a' - (int)'A');
-
- addCharToLexer(c);
- }
-
- this.txtend = this.lexsize;
- return (char)c;
- }
-
- public void addStringLiteral(String str)
- {
- for ( int i = 0; i < str.length(); i++ ) {
- addCharToLexer( (int)str.charAt(i) );
- }
- }
-
- /* choose what version to use for new doctype */
- public short HTMLVersion()
- {
- short versions;
-
- versions = this.versions;
-
- if ((versions & Dict.VERS_HTML20) != 0)
- return Dict.VERS_HTML20;
-
- if ((versions & Dict.VERS_HTML32) != 0)
- return Dict.VERS_HTML32;
-
- if ((versions & Dict.VERS_HTML40_STRICT) != 0)
- return Dict.VERS_HTML40_STRICT;
-
- if ((versions & Dict.VERS_HTML40_LOOSE) != 0)
- return Dict.VERS_HTML40_LOOSE;
-
- if ((versions & Dict.VERS_FRAMES) != 0)
- return Dict.VERS_FRAMES;
-
- return Dict.VERS_UNKNOWN;
- }
-
- public String HTMLVersionName()
- {
- short guessed;
- int j;
-
- guessed = apparentVersion();
-
- for (j = 0; j < W3CVersion.length; ++j)
- {
- if (guessed == W3CVersion[j].code)
- {
- if (this.isvoyager)
- return W3CVersion[j].voyagerName;
-
- return W3CVersion[j].name;
- }
- }
-
- return null;
- }
-
- /* add meta element for Tidy */
- public boolean addGenerator(Node root)
- {
- AttVal attval;
- Node node;
- Node head = root.findHEAD(configuration.tt);
-
- if (head != null)
- {
- for (node = head.content; node != null; node = node.next)
- {
- if (node.tag == configuration.tt.tagMeta)
- {
- attval = node.getAttrByName("name");
-
- if (attval != null && attval.value != null &&
- Lexer.wstrcasecmp(attval.value, "generator") == 0)
- {
- attval = node.getAttrByName("content");
-
- if (attval != null && attval.value != null &&
- attval.value.length() >= 9 &&
- Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0)
- {
- return false;
- }
- }
- }
- }
-
- node = this.inferredTag("meta");
- node.addAttribute("content", "HTML Tidy, see www.w3.org");
- node.addAttribute("name", "generator");
- Node.insertNodeAtStart(head, node);
- return true;
- }
-
- return false;
- }
-
- /* return true if substring s is in p and isn't all in upper case */
- /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */
- /* len is how many chars to check in p */
- private static boolean findBadSubString(String s, String p, int len)
- {
- int n = s.length();
- int i = 0;
- String ps;
-
- while (n < len)
- {
- ps = p.substring(i, i + n);
- if (wstrcasecmp(s, ps) == 0)
- return (!ps.equals(s.substring(0, n)));
-
- ++i;
- --len;
- }
-
- return false;
- }
-
- public boolean checkDocTypeKeyWords(Node doctype)
- {
- int len = doctype.end - doctype.start;
- String s = getString(this.lexbuf, doctype.start, len);
-
- return !(
- findBadSubString("SYSTEM", s, len) ||
- findBadSubString("PUBLIC", s, len) ||
- findBadSubString("//DTD", s, len) ||
- findBadSubString("//W3C", s, len) ||
- findBadSubString("//EN", s, len)
- );
- }
-
- /* examine <!DOCTYPE> to identify version */
- public short findGivenVersion(Node doctype)
- {
- String p, s;
- int i, j;
- int len;
- String str1;
- String str2;
-
- /* if root tag for doctype isn't html give up now */
- str1 = getString(this.lexbuf, doctype.start, 5);
- if (wstrcasecmp(str1, "html ") != 0)
- return 0;
-
- if (!checkDocTypeKeyWords(doctype))
- Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
-
- /* give up if all we are given is the system id for the doctype */
- str1 = getString(this.lexbuf, doctype.start + 5, 7);
- if (wstrcasecmp(str1, "SYSTEM ") == 0)
- {
- /* but at least ensure the case is correct */
- if (!str1.substring(0, 6).equals("SYSTEM"))
- System.arraycopy( getBytes("SYSTEM"), 0,
- this.lexbuf, doctype.start + 5, 6 );
- return 0; /* unrecognized */
- }
-
- if (wstrcasecmp(str1, "PUBLIC ") == 0)
- {
- if (!str1.substring(0, 6).equals("PUBLIC"))
- System.arraycopy( getBytes("PUBLIC "), 0,
- this.lexbuf, doctype.start + 5, 6 );
- }
- else
- this.badDoctype = true;
-
- for (i = doctype.start; i < doctype.end; ++i)
- {
- if (this.lexbuf[i] == (byte)'"')
- {
- str1 = getString( this.lexbuf, i + 1, 12 );
- str2 = getString( this.lexbuf, i + 1, 13 );
- if (str1.equals("-//W3C//DTD "))
- {
- /* compute length of identifier e.g. "HTML 4.0 Transitional" */
- for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
- len = j - i - 13;
- p = getString( this.lexbuf, i + 13, len );
-
- for (j = 1; j < W3CVersion.length; ++j)
- {
- s = W3CVersion[j].name;
- if (len == s.length() && s.equals(p))
- return W3CVersion[j].code;
- }
-
- /* else unrecognized version */
- }
- else if (str2.equals("-//IETF//DTD "))
- {
- /* compute length of identifier e.g. "HTML 2.0" */
- for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
- len = j - i - 14;
-
- p = getString( this.lexbuf, i + 14, len );
- s = W3CVersion[0].name;
- if (len == s.length() && s.equals(p))
- return W3CVersion[0].code;
-
- /* else unrecognized version */
- }
- break;
- }
- }
-
- return 0;
- }
-
- public void fixHTMLNameSpace(Node root, String profile)
- {
- Node node;
- AttVal prev, attr;
-
- for (node = root.content;
- node != null && node.tag != configuration.tt.tagHtml; node = node.next);
-
- if (node != null)
- {
- prev = null;
-
- for (attr = node.attributes; attr != null; attr = attr.next)
- {
- if (attr.attribute.equals("xmlns"))
- break;
-
- prev = attr;
- }
-
- if (attr != null)
- {
- if (!attr.value.equals(profile))
- {
- Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE);
- attr.value = profile;
- }
- }
- else
- {
- attr = new AttVal( node.attributes, null, (int)'"',
- "xmlns", profile );
- attr.dict =
- AttributeTable.getDefaultAttributeTable().findAttribute( attr );
- node.attributes = attr;
- }
- }
- }
-
- public boolean setXHTMLDocType(Node root)
- {
- String fpi = " ";
- String sysid = "";
- String namespace = XHTML_NAMESPACE;
- Node doctype;
-
- doctype = root.findDocType();
-
- if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
- {
- if (doctype != null)
- Node.discardElement(doctype);
- return true;
- }
-
- if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
- {
- /* see what flavor of XHTML this document matches */
- if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
- { /* use XHTML strict */
- fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
- sysid = voyager_strict;
- }
- else if ((this.versions & Dict.VERS_LOOSE) != 0)
- {
- fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
- sysid = voyager_loose;
- }
- else if ((this.versions & Dict.VERS_FRAMES) != 0)
- { /* use XHTML frames */
- fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
- sysid = voyager_frameset;
- }
- else /* lets assume XHTML transitional */
- {
- fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
- sysid = voyager_loose;
- }
- }
- else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
- {
- fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
- sysid = voyager_strict;
- }
- else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
- {
- fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
- sysid = voyager_loose;
- }
-
- fixHTMLNameSpace(root, namespace);
-
- if (doctype == null)
- {
- doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
- doctype.next = root.content;
- doctype.parent = root;
- doctype.prev = null;
- root.content = doctype;
- }
-
- if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
- configuration.docTypeStr != null)
- {
- fpi = configuration.docTypeStr;
- sysid = "";
- }
-
- this.txtstart = this.lexsize;
- this.txtend = this.lexsize;
-
- /* add public identifier */
- addStringLiteral("html PUBLIC ");
-
- /* check if the fpi is quoted or not */
- if (fpi.charAt(0) == '"')
- addStringLiteral(fpi);
- else
- {
- addStringLiteral("\"");
- addStringLiteral(fpi);
- addStringLiteral("\"");
- }
-
- if (sysid.length() + 6 >= this.configuration.wraplen)
- addStringLiteral("\n\"");
- else
- addStringLiteral("\n \"");
-
- /* add system identifier */
- addStringLiteral(sysid);
- addStringLiteral("\"");
-
- this.txtend = this.lexsize;
-
- doctype.start = this.txtstart;
- doctype.end = this.txtend;
-
- return false;
- }
-
- public short apparentVersion()
- {
- switch (this.doctype)
- {
- case Dict.VERS_UNKNOWN:
- return HTMLVersion();
-
- case Dict.VERS_HTML20:
- if ((this.versions & Dict.VERS_HTML20) != 0)
- return Dict.VERS_HTML20;
-
- break;
-
- case Dict.VERS_HTML32:
- if ((this.versions & Dict.VERS_HTML32) != 0)
- return Dict.VERS_HTML32;
-
- break; /* to replace old version by new */
-
- case Dict.VERS_HTML40_STRICT:
- if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
- return Dict.VERS_HTML40_STRICT;
-
- break;
-
- case Dict.VERS_HTML40_LOOSE:
- if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
- return Dict.VERS_HTML40_LOOSE;
-
- break; /* to replace old version by new */
-
- case Dict.VERS_FRAMES:
- if ((this.versions & Dict.VERS_FRAMES) != 0)
- return Dict.VERS_FRAMES;
-
- break;
- }
-
- Report.warning(this, null, null, Report.INCONSISTENT_VERSION);
- return this.HTMLVersion();
- }
-
- /* fixup doctype if missing */
- public boolean fixDocType(Node root)
- {
- Node doctype;
- int guessed = Dict.VERS_HTML40_STRICT, i;
-
- if (this.badDoctype)
- Report.warning(this, null, null, Report.MALFORMED_DOCTYPE);
-
- if (configuration.XmlOut)
- return true;
-
- doctype = root.findDocType();
-
- if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
- {
- if (doctype != null)
- Node.discardElement(doctype);
- return true;
- }
-
- if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
- {
- Node.discardElement(doctype);
- doctype = null;
- guessed = Dict.VERS_HTML40_STRICT;
- }
- else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
- {
- Node.discardElement(doctype);
- doctype = null;
- guessed = Dict.VERS_HTML40_LOOSE;
- }
- else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
- {
- if (doctype != null)
- {
- if (this.doctype == Dict.VERS_UNKNOWN)
- return false;
-
- switch (this.doctype)
- {
- case Dict.VERS_UNKNOWN:
- return false;
-
- case Dict.VERS_HTML20:
- if ((this.versions & Dict.VERS_HTML20) != 0)
- return true;
-
- break; /* to replace old version by new */
-
- case Dict.VERS_HTML32:
- if ((this.versions & Dict.VERS_HTML32) != 0)
- return true;
-
- break; /* to replace old version by new */
-
- case Dict.VERS_HTML40_STRICT:
- if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
- return true;
-
- break; /* to replace old version by new */
-
- case Dict.VERS_HTML40_LOOSE:
- if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
- return true;
-
- break; /* to replace old version by new */
-
- case Dict.VERS_FRAMES:
- if ((this.versions & Dict.VERS_FRAMES) != 0)
- return true;
-
- break; /* to replace old version by new */
- }
-
- /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
- }
-
- /* choose new doctype */
- guessed = HTMLVersion();
- }
-
- if (guessed == Dict.VERS_UNKNOWN)
- return false;
-
- /* for XML use the Voyager system identifier */
- if (this.configuration.XmlOut || this.configuration.XmlTags || this.isvoyager)
- {
- if (doctype != null)
- Node.discardElement(doctype);
-
- for (i = 0; i < W3CVersion.length; ++i)
- {
- if (guessed == W3CVersion[i].code)
- {
- fixHTMLNameSpace(root, W3CVersion[i].profile);
- break;
- }
- }
-
- return true;
- }
-
- if (doctype == null)
- {
- doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
- doctype.next = root.content;
- doctype.parent = root;
- doctype.prev = null;
- root.content = doctype;
- }
-
- this.txtstart = this.lexsize;
- this.txtend = this.lexsize;
-
- /* use the appropriate public identifier */
- addStringLiteral("html PUBLIC ");
-
- if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
- configuration.docTypeStr != null)
- addStringLiteral(configuration.docTypeStr);
- else if (guessed == Dict.VERS_HTML20)
- addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
- else
- {
- addStringLiteral("\"-//W3C//DTD ");
-
- for (i = 0; i < W3CVersion.length; ++i)
- {
- if (guessed == W3CVersion[i].code)
- {
- addStringLiteral(W3CVersion[i].name);
- break;
- }
- }
-
- addStringLiteral("//EN\"");
- }
-
- this.txtend = this.lexsize;
-
- doctype.start = this.txtstart;
- doctype.end = this.txtend;
-
- return true;
- }
-
- /* ensure XML document starts with <?XML version="1.0"?> */
- public boolean fixXMLPI(Node root)
- {
- Node xml;
- int s;
-
- if( root.content != null && root.content.type == Node.ProcInsTag)
- {
- s = root.content.start;
-
- if (this.lexbuf[s] == (byte)'x' &&
- this.lexbuf[s+1] == (byte)'m' &&
- this.lexbuf[s+2] == (byte)'l')
- return true;
- }
-
- xml = newNode(Node.ProcInsTag, this.lexbuf, 0, 0);
- xml.next = root.content;
-
- if (root.content != null)
- {
- root.content.prev = xml;
- xml.next = root.content;
- }
-
- root.content = xml;
-
- this.txtstart = this.lexsize;
- this.txtend = this.lexsize;
- addStringLiteral("xml version=\"1.0\"");
- if (this.configuration.CharEncoding == Configuration.LATIN1)
- addStringLiteral(" encoding=\"ISO-8859-1\"");
- this.txtend = this.lexsize;
-
- xml.start = this.txtstart;
- xml.end = this.txtend;
- return false;
- }
-
- public Node inferredTag(String name)
- {
- Node node;
-
- node = newNode(Node.StartTag,
- this.lexbuf,
- this.txtstart,
- this.txtend,
- name);
- node.implicit = true;
- return node;
- }
-
- public static boolean expectsContent(Node node)
- {
- if (node.type != Node.StartTag)
- return false;
-
- /* unknown element? */
- if (node.tag == null)
- return true;
-
- if ((node.tag.model & Dict.CM_EMPTY) != 0)
- return false;
-
- return true;
- }
-
- /*
- create a text node for the contents of
- a CDATA element like style or script
- which ends with </foo> for some foo.
- */
- public Node getCDATA(Node container)
- {
- int c, lastc, start, len, i;
- String str;
- boolean endtag = false;
-
- this.lines = this.in.curline;
- this.columns = this.in.curcol;
- this.waswhite = false;
- this.txtstart = this.lexsize;
- this.txtend = this.lexsize;
-
- lastc = (int)'\0';
- start = -1;
-
- while (true)
- {
- c = this.in.readChar();
- if (c == StreamIn.EndOfStream) break;
- /* treat \r\n as \n and \r as \n */
-
- if (c == (int)'/' && lastc == (int)'<')
- {
- if (endtag)
- {
- this.lines = this.in.curline;
- this.columns = this.in.curcol - 3;
-
- Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
- }
-
- start = this.lexsize + 1; /* to first letter */
- endtag = true;
- }
- else if (c == (int)'>' && start >= 0)
- {
- len = this.lexsize - start;
- if (len == container.element.length())
- {
- str = getString( this.lexbuf, start, len );
- if (Lexer.wstrcasecmp(str, container.element) == 0)
- {
- this.txtend = start - 2;
- break;
- }
- }
-
- this.lines = this.in.curline;
- this.columns = this.in.curcol - 3;
-
- Report.warning(this, null, null, Report.BAD_CDATA_CONTENT);
-
- /* if javascript insert backslash before / */
-
- if (ParserImpl.isJavaScript(container))
- {
- for (i = this.lexsize; i > start-1; --i)
- this.lexbuf[i] = this.lexbuf[i-1];
-
- this.lexbuf[start-1] = (byte)'\\';
- this.lexsize++;
- }
-
- start = -1;
- }
- else if (c == (int)'\r')
- {
- c = this.in.readChar();
-
- if (c != (int)'\n')
- this.in.ungetChar(c);
-
- c = (int)'\n';
- }
-
- addCharToLexer((int)c);
- this.txtend = this.lexsize;
- lastc = c;
- }
-
- if (c == StreamIn.EndOfStream)
- Report.warning(this, container, null, Report.MISSING_ENDTAG_FOR);
-
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- return null;
- }
-
- public void ungetToken()
- {
- this.pushed = true;
- }
-
- public static final short IgnoreWhitespace = 0;
- public static final short MixedContent = 1;
- public static final short Preformatted = 2;
- public static final short IgnoreMarkup = 3;
-
- /*
- modes for GetToken()
-
- MixedContent -- for elements which don't accept PCDATA
- Preformatted -- white space preserved as is
- IgnoreMarkup -- for CDATA elements such as script, style
- */
-
- public Node getToken(short mode)
- {
- short map;
- int c = 0;
- int lastc;
- int badcomment = 0;
- MutableBoolean isempty = new MutableBoolean();
- AttVal attributes;
-
- if (this.pushed)
- {
- /* duplicate inlines in preference to pushed text nodes when appropriate */
- if (this.token.type != Node.TextNode ||
- (this.insert == -1 && this.inode == null))
- {
- this.pushed = false;
- return this.token;
- }
- }
-
- /* at start of block elements, unclosed inline
- elements are inserted into the token stream */
-
- if (this.insert != -1 || this.inode != null)
- return insertedToken();
-
- this.lines = this.in.curline;
- this.columns = this.in.curcol;
- this.waswhite = false;
-
- this.txtstart = this.lexsize;
- this.txtend = this.lexsize;
-
- while (true)
- {
- c = this.in.readChar();
- if (c == StreamIn.EndOfStream) break;
- if (this.insertspace && mode != IgnoreWhitespace)
- {
- addCharToLexer(' ');
- this.waswhite = true;
- this.insertspace = false;
- }
-
- /* treat \r\n as \n and \r as \n */
-
- if (c == '\r')
- {
- c = this.in.readChar();
-
- if (c != '\n')
- this.in.ungetChar(c);
-
- c = '\n';
- }
-
- addCharToLexer(c);
-
- switch (this.state)
- {
- case LEX_CONTENT: /* element content */
- map = MAP((char)c);
-
- /*
- Discard white space if appropriate. Its cheaper
- to do this here rather than in parser methods
- for elements that don't have mixed content.
- */
- if (((map & WHITE) != 0) && (mode == IgnoreWhitespace)
- && this.lexsize == this.txtstart + 1)
- {
- --this.lexsize;
- this.waswhite = false;
- this.lines = this.in.curline;
- this.columns = this.in.curcol;
- continue;
- }
-
- if (c == '<')
- {
- this.state = LEX_GT;
- continue;
- }
-
- if ((map & WHITE) != 0)
- {
- /* was previous char white? */
- if (this.waswhite)
- {
- if (mode != Preformatted && mode != IgnoreMarkup)
- {
- --this.lexsize;
- this.lines = this.in.curline;
- this.columns = this.in.curcol;
- }
- }
- else /* prev char wasn't white */
- {
- this.waswhite = true;
- lastc = c;
-
- if (mode != Preformatted && mode != IgnoreMarkup && c != ' ')
- changeChar((byte)' ');
- }
-
- continue;
- }
- else if (c == '&' && mode != IgnoreMarkup)
- parseEntity(mode);
-
- /* this is needed to avoid trimming trailing whitespace */
- if (mode == IgnoreWhitespace)
- mode = MixedContent;
-
- this.waswhite = false;
- continue;
-
- case LEX_GT: /* < */
-
- /* check for endtag */
- if (c == '/')
- {
- c = this.in.readChar();
- if (c == StreamIn.EndOfStream)
- {
- this.in.ungetChar(c);
- continue;
- }
-
- addCharToLexer(c);
- map = MAP((char)c);
-
- if ((map & LETTER) != 0)
- {
- this.lexsize -= 3;
- this.txtend = this.lexsize;
- this.in.ungetChar(c);
- this.state = LEX_ENDTAG;
- this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */
- this.in.curcol -= 2;
-
- /* if some text before the </ return it now */
- if (this.txtend > this.txtstart)
- {
- /* trim space char before end tag */
- if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ')
- {
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- }
-
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- continue; /* no text so keep going */
- }
-
- /* otherwise treat as CDATA */
- this.waswhite = false;
- this.state = LEX_CONTENT;
- continue;
- }
-
- if (mode == IgnoreMarkup)
- {
- /* otherwise treat as CDATA */
- this.waswhite = false;
- this.state = LEX_CONTENT;
- continue;
- }
-
- /*
- look out for comments, doctype or marked sections
- this isn't quite right, but its getting there ...
- */
- if (c == '!')
- {
- c = this.in.readChar();
-
- if (c == '-')
- {
- c = this.in.readChar();
-
- if (c == '-')
- {
- this.state = LEX_COMMENT; /* comment */
- this.lexsize -= 2;
- this.txtend = this.lexsize;
-
- /* if some text before < return it now */
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- this.txtstart = this.lexsize;
- continue;
- }
-
- Report.warning(this, null, null, Report.MALFORMED_COMMENT);
- }
- else if (c == 'd' || c == 'D')
- {
- this.state = LEX_DOCTYPE; /* doctype */
- this.lexsize -= 2;
- this.txtend = this.lexsize;
- mode = IgnoreWhitespace;
-
- /* skip until white space or '>' */
-
- for (;;)
- {
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream || c == '>')
- {
- this.in.ungetChar(c);
- break;
- }
-
- map = MAP((char)c);
-
- if ((map & WHITE) == 0)
- continue;
-
- /* and skip to end of whitespace */
-
- for (;;)
- {
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream || c == '>')
- {
- this.in.ungetChar(c);
- break;
- }
-
- map = MAP((char)c);
-
- if ((map & WHITE) != 0)
- continue;
-
- this.in.ungetChar(c);
- break;
- }
-
- break;
- }
-
- /* if some text before < return it now */
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- this.txtstart = this.lexsize;
- continue;
- }
- else if (c == '[')
- {
- /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
- this.lexsize -= 2;
- this.state = LEX_SECTION;
- this.txtend = this.lexsize;
-
- /* if some text before < return it now */
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- this.txtstart = this.lexsize;
- continue;
- }
-
- /* otherwise swallow chars up to and including next '>' */
- while (true)
- {
- c = this.in.readChar();
- if (c == '>') break;
- if (c == -1)
- {
- this.in.ungetChar(c);
- break;
- }
- }
-
- this.lexsize -= 2;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- continue;
- }
-
- /*
- processing instructions
- */
-
- if (c == '?')
- {
- this.lexsize -= 2;
- this.state = LEX_PROCINSTR;
- this.txtend = this.lexsize;
-
- /* if some text before < return it now */
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- this.txtstart = this.lexsize;
- continue;
- }
-
- /* Microsoft ASP's e.g. <% ... server-code ... %> */
- if (c == '%')
- {
- this.lexsize -= 2;
- this.state = LEX_ASP;
- this.txtend = this.lexsize;
-
- /* if some text before < return it now */
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- this.txtstart = this.lexsize;
- continue;
- }
-
- /* Netscapes JSTE e.g. <# ... server-code ... #> */
- if (c == '#')
- {
- this.lexsize -= 2;
- this.state = LEX_JSTE;
- this.txtend = this.lexsize;
-
- /* if some text before < return it now */
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- this.txtstart = this.lexsize;
- continue;
- }
-
- map = MAP((char)c);
-
- /* check for start tag */
- if ((map & LETTER) != 0)
- {
- this.in.ungetChar(c); /* push back letter */
- this.lexsize -= 2; /* discard "<" + letter */
- this.txtend = this.lexsize;
- this.state = LEX_STARTTAG; /* ready to read tag name */
-
- /* if some text before < return it now */
- if (this.txtend > this.txtstart)
- {
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- continue; /* no text so keep going */
- }
-
- /* otherwise treat as CDATA */
- this.state = LEX_CONTENT;
- this.waswhite = false;
- continue;
-
- case LEX_ENDTAG: /* </letter */
- this.txtstart = this.lexsize - 1;
- this.in.curcol += 2;
- c = parseTagName();
- this.token = newNode(Node.EndTag, /* create endtag token */
- this.lexbuf,
- this.txtstart,
- this.txtend,
- getString(this.lexbuf,
- this.txtstart,
- this.txtend - this.txtstart));
- this.lexsize = this.txtstart;
- this.txtend = this.txtstart;
-
- /* skip to '>' */
- while (c != '>')
- {
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream)
- break;
- }
-
- if (c == StreamIn.EndOfStream)
- {
- this.in.ungetChar(c);
- continue;
- }
-
- this.state = LEX_CONTENT;
- this.waswhite = false;
- return this.token; /* the endtag token */
-
- case LEX_STARTTAG: /* first letter of tagname */
- this.txtstart = this.lexsize - 1; /* set txtstart to first letter */
- c = parseTagName();
- isempty.value = false;
- attributes = null;
- this.token = newNode((isempty.value ? Node.StartEndTag : Node.StartTag),
- this.lexbuf,
- this.txtstart,
- this.txtend,
- getString(this.lexbuf,
- this.txtstart,
- this.txtend - this.txtstart));
-
- /* parse attributes, consuming closing ">" */
- if (c != '>')
- {
- if (c == '/')
- this.in.ungetChar(c);
-
- attributes = parseAttrs(isempty);
- }
-
- if (isempty.value)
- this.token.type = Node.StartEndTag;
-
- this.token.attributes = attributes;
- this.lexsize = this.txtstart;
- this.txtend = this.txtstart;
-
- /* swallow newline following start tag */
- /* special check needed for CRLF sequence */
- /* this doesn't apply to empty elements */
-
- if (expectsContent(this.token) ||
- this.token.tag == configuration.tt.tagBr)
- {
-
- c = this.in.readChar();
-
- if (c == '\r')
- {
- c = this.in.readChar();
-
- if (c != '\n')
- this.in.ungetChar(c);
- }
- else if (c != '\n' && c != '\f')
- this.in.ungetChar(c);
-
- this.waswhite = true; /* to swallow leading whitespace */
- }
- else
- this.waswhite = false;
-
- this.state = LEX_CONTENT;
-
- if (this.token.tag == null)
- Report.error(this, null, this.token, Report.UNKNOWN_ELEMENT);
- else if (!this.configuration.XmlTags)
- {
- this.versions &= this.token.tag.versions;
-
- if ((this.token.tag.versions & Dict.VERS_PROPRIETARY) != 0)
- {
- if (!this.configuration.MakeClean && (this.token.tag == configuration.tt.tagNobr ||
- this.token.tag == configuration.tt.tagWbr))
- Report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT);
- }
-
- if (this.token.tag.chkattrs != null)
- {
- this.token.checkUniqueAttributes(this);
- this.token.tag.chkattrs.check(this, this.token);
- }
- else
- this.token.checkAttributes(this);
- }
-
- return this.token; /* return start tag */
-
- case LEX_COMMENT: /* seen <!-- so look for --> */
-
- if (c != '-')
- continue;
-
- c = this.in.readChar();
- addCharToLexer(c);
-
- if (c != '-')
- continue;
-
- end_comment: while (true) {
- c = this.in.readChar();
-
- if (c == '>')
- {
- if (badcomment != 0)
- Report.warning(this, null, null, Report.MALFORMED_COMMENT);
-
- this.txtend = this.lexsize - 2; // AQ 8Jul2000
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.CommentTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
-
- /* now look for a line break */
-
- c = this.in.readChar();
-
- if (c == '\r')
- {
- c = this.in.readChar();
-
- if (c != '\n')
- this.token.linebreak = true;
- }
-
- if (c == '\n')
- this.token.linebreak = true;
- else
- this.in.ungetChar(c);
-
- return this.token;
- }
-
- /* note position of first such error in the comment */
- if (badcomment == 0)
- {
- this.lines = this.in.curline;
- this.columns = this.in.curcol - 3;
- }
-
- badcomment++;
- if (this.configuration.FixComments)
- this.lexbuf[this.lexsize - 2] = (byte)'=';
-
- addCharToLexer(c);
-
- /* if '-' then look for '>' to end the comment */
- if (c != '-')
- break end_comment;
-
- }
- /* otherwise continue to look for --> */
- this.lexbuf[this.lexsize - 2] = (byte)'=';
- continue;
-
- case LEX_DOCTYPE: /* seen <!d so look for '>' munging whitespace */
- map = MAP((char)c);
-
- if ((map & WHITE) != 0)
- {
- if (this.waswhite)
- this.lexsize -= 1;
-
- this.waswhite = true;
- }
- else
- this.waswhite = false;
-
- if (c != '>')
- continue;
-
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.DocTypeTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- /* make a note of the version named by the doctype */
- this.doctype = findGivenVersion(this.token);
- return this.token;
-
- case LEX_PROCINSTR: /* seen <? so look for '>' */
- /* check for PHP preprocessor instructions <?php ... ?> */
-
- if (this.lexsize - this.txtstart == 3)
- {
- if ((getString(this.lexbuf, this.txtstart, 3)).equals("php"))
- {
- this.state = LEX_PHP;
- continue;
- }
- }
-
- if (this.configuration.XmlPIs) /* insist on ?> as terminator */
- {
- if (c != '?')
- continue;
-
- /* now look for '>' */
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream)
- {
- Report.warning(this, null, null, Report.UNEXPECTED_END_OF_FILE);
- this.in.ungetChar(c);
- continue;
- }
-
- addCharToLexer(c);
- }
-
- if (c != '>')
- continue;
-
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.ProcInsTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
-
- case LEX_ASP: /* seen <% so look for "%>" */
- if (c != '%')
- continue;
-
- /* now look for '>' */
- c = this.in.readChar();
-
-
- if (c != '>')
- {
- this.in.ungetChar(c);
- continue;
- }
-
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.AspTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
-
- case LEX_JSTE: /* seen <# so look for "#>" */
- if (c != '#')
- continue;
-
- /* now look for '>' */
- c = this.in.readChar();
-
-
- if (c != '>')
- {
- this.in.ungetChar(c);
- continue;
- }
-
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.JsteTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
-
- case LEX_PHP: /* seen "<?php" so look for "?>" */
- if (c != '?')
- continue;
-
- /* now look for '>' */
- c = this.in.readChar();
-
- if (c != '>')
- {
- this.in.ungetChar(c);
- continue;
- }
-
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.PhpTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
-
- case LEX_SECTION: /* seen "<![" so look for "]>" */
- if (c == '[')
- {
- if (this.lexsize == (this.txtstart + 6) &&
- (getString(this.lexbuf, this.txtstart, 6)).equals("CDATA["))
- {
- this.state = LEX_CDATA;
- this.lexsize -= 6;
- continue;
- }
- }
-
- if (c != ']')
- continue;
-
- /* now look for '>' */
- c = this.in.readChar();
-
- if (c != '>')
- {
- this.in.ungetChar(c);
- continue;
- }
-
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.SectionTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
-
- case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */
- if (c != ']')
- continue;
-
- /* now look for ']' */
- c = this.in.readChar();
-
- if (c != ']')
- {
- this.in.ungetChar(c);
- continue;
- }
-
- /* now look for '>' */
- c = this.in.readChar();
-
- if (c != '>')
- {
- this.in.ungetChar(c);
- continue;
- }
-
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.CDATATag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
- }
-
- if (this.state == LEX_CONTENT) /* text string */
- {
- this.txtend = this.lexsize;
-
- if (this.txtend > this.txtstart)
- {
- this.in.ungetChar(c);
-
- if (this.lexbuf[this.lexsize - 1] == (byte)' ')
- {
- this.lexsize -= 1;
- this.txtend = this.lexsize;
- }
-
- this.token = newNode(Node.TextNode,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
- }
- else if (this.state == LEX_COMMENT) /* comment */
- {
- if (c == StreamIn.EndOfStream)
- Report.warning(this, null, null, Report.MALFORMED_COMMENT);
-
- this.txtend = this.lexsize;
- this.lexbuf[this.lexsize] = (byte)'\0';
- this.state = LEX_CONTENT;
- this.waswhite = false;
- this.token = newNode(Node.CommentTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
- return this.token;
- }
-
- return null;
- }
-
- /*
- parser for ASP within start tags
-
- Some people use ASP for to customize attributes
- Tidy isn't really well suited to dealing with ASP
- This is a workaround for attributes, but won't
- deal with the case where the ASP is used to tailor
- the attribute value. Here is an example of a work
- around for using ASP in attribute values:
-
- href="<%=rsSchool.Fields("ID").Value%>"
-
- where the ASP that generates the attribute value
- is masked from Tidy by the quotemarks.
-
- */
-
- public Node parseAsp()
- {
- int c;
- Node asp = null;
-
- this.txtstart = this.lexsize;
-
- for (;;)
- {
- c = this.in.readChar();
- addCharToLexer(c);
-
-
- if (c != '%')
- continue;
-
- c = this.in.readChar();
- addCharToLexer(c);
-
- if (c == '>')
- break;
- }
-
- this.lexsize -= 2;
- this.txtend = this.lexsize;
-
- if (this.txtend > this.txtstart)
- asp = newNode(Node.AspTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
-
- this.txtstart = this.txtend;
- return asp;
- }
-
- /*
- PHP is like ASP but is based upon XML
- processing instructions, e.g. <?php ... ?>
- */
- public Node parsePhp()
- {
- int c;
- Node php = null;
-
- this.txtstart = this.lexsize;
-
- for (;;)
- {
- c = this.in.readChar();
- addCharToLexer(c);
-
-
- if (c != '?')
- continue;
-
- c = this.in.readChar();
- addCharToLexer(c);
-
- if (c == '>')
- break;
- }
-
- this.lexsize -= 2;
- this.txtend = this.lexsize;
-
- if (this.txtend > this.txtstart)
- php = newNode(Node.PhpTag,
- this.lexbuf,
- this.txtstart,
- this.txtend);
-
- this.txtstart = this.txtend;
- return php;
- }
-
- /* consumes the '>' terminating start tags */
- public String parseAttribute(MutableBoolean isempty, MutableObject asp,
- MutableObject php)
- {
- int start = 0;
- // int len = 0; Removed by BUGFIX for 126265
- short map;
- String attr;
- int c = 0;
-
- asp.setObject(null); /* clear asp pointer */
- php.setObject(null); /* clear php pointer */
- /* skip white space before the attribute */
-
- for (;;)
- {
- c = this.in.readChar();
-
- if (c == '/')
- {
- c = this.in.readChar();
-
- if (c == '>')
- {
- isempty.value = true;
- return null;
- }
-
- this.in.ungetChar(c);
- c = '/';
- break;
- }
-
- if (c == '>')
- return null;
-
- if (c =='<')
- {
- c = this.in.readChar();
-
- if (c == '%')
- {
- asp.setObject(parseAsp());
- return null;
- }
- else if (c == '?')
- {
- php.setObject(parsePhp());
- return null;
- }
-
- this.in.ungetChar(c);
- Report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
- return null;
- }
-
- if (c == '"' || c == '\'')
- {
- Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK);
- continue;
- }
-
- if (c == StreamIn.EndOfStream)
- {
- Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
- this.in.ungetChar(c);
- return null;
- }
-
- map = MAP((char)c);
-
- if ((map & WHITE) == 0)
- break;
- }
-
- start = this.lexsize;
-
- for (;;)
- {
- /* but push back '=' for parseValue() */
- if (c == '=' || c == '>')
- {
- this.in.ungetChar(c);
- break;
- }
-
- if (c == '<' || c == StreamIn.EndOfStream)
- {
- this.in.ungetChar(c);
- break;
- }
-
- map = MAP((char)c);
-
- if ((map & WHITE) != 0)
- break;
-
- /* what should be done about non-namechar characters? */
- /* currently these are incorporated into the attr name */
-
- if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
- c += (int)('a' - 'A');
-
- // ++len; Removed by BUGFIX for 126265
- addCharToLexer(c);
-
- c = this.in.readChar();
- }
-
- // Following line added by GLP to fix BUG 126265. This is a temporary comment
- // and should be removed when Tidy is fixed.
- int len = this.lexsize - start;
- attr = (len > 0 ? getString(this.lexbuf, start, len) : null);
- this.lexsize = start;
-
- return attr;
- }
-
- /*
- invoked when < is seen in place of attribute value
- but terminates on whitespace if not ASP, PHP or Tango
- this routine recognizes ' and " quoted strings
- */
- public int parseServerInstruction()
- {
- int c, map, delim = '"';
- boolean isrule = false;
-
- c = this.in.readChar();
- addCharToLexer(c);
-
- /* check for ASP, PHP or Tango */
- if (c == '%' || c == '?' || c == '@')
- isrule = true;
-
- for (;;)
- {
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream)
- break;
-
- if (c == '>')
- {
- if (isrule)
- addCharToLexer(c);
- else
- this.in.ungetChar(c);
-
- break;
- }
-
- /* if not recognized as ASP, PHP or Tango */
- /* then also finish value on whitespace */
- if (!isrule)
- {
- map = MAP((char)c);
-
- if ((map & WHITE) != 0)
- break;
- }
-
- addCharToLexer(c);
-
- if (c == '"')
- {
- do
- {
- c = this.in.readChar();
- addCharToLexer(c);
- }
- while (c != '"');
- delim = '\'';
- continue;
- }
-
- if (c == '\'')
- {
- do
- {
- c = this.in.readChar();
- addCharToLexer(c);
- }
- while (c != '\'');
- }
- }
-
- return delim;
- }
-
- /* values start with "=" or " = " etc. */
- /* doesn't consume the ">" at end of start tag */
-
- public String parseValue(String name, boolean foldCase,
- MutableBoolean isempty, MutableInteger pdelim)
- {
- int len = 0;
- int start;
- short map;
- boolean seen_gt = false;
- boolean munge = true;
- int c = 0;
- int lastc, delim, quotewarning;
- String value;
-
- delim = 0;
- pdelim.value = (int)'"';
-
- /*
- Henry Zrepa reports that some folk are using the
- embed element with script attributes where newlines
- are significant and must be preserved
- */
- if (configuration.LiteralAttribs)
- munge = false;
-
- /* skip white space before the '=' */
-
- for (;;)
- {
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream)
- {
- this.in.ungetChar(c);
- break;
- }
-
- map = MAP((char)c);
-
- if ((map & WHITE) == 0)
- break;
- }
-
- /*
- c should be '=' if there is a value
- other legal possibilities are white
- space, '/' and '>'
- */
-
- if (c != '=')
- {
- this.in.ungetChar(c);
- return null;
- }
-
- /* skip white space after '=' */
-
- for (;;)
- {
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream)
- {
- this.in.ungetChar(c);
- break;
- }
-
- map = MAP((char)c);
-
- if ((map & WHITE) == 0)
- break;
- }
-
- /* check for quote marks */
-
- if (c == '"' || c == '\'')
- delim = c;
- else if (c == '<')
- {
- start = this.lexsize;
- addCharToLexer(c);
- pdelim.value = parseServerInstruction();
- len = this.lexsize - start;
- this.lexsize = start;
- return (len > 0 ? getString(this.lexbuf, start, len) : null);
- }
- else
- this.in.ungetChar(c);
-
- /*
- and read the value string
- check for quote mark if needed
- */
-
- quotewarning = 0;
- start = this.lexsize;
- c = '\0';
-
- for (;;)
- {
- lastc = c; /* track last character */
- c = this.in.readChar();
-
- if (c == StreamIn.EndOfStream)
- {
- Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE);
- this.in.ungetChar(c);
- break;
- }
-
- if (delim == (char)0)
- {
- if (c == '>')
- {
- this.in.ungetChar(c);
- break;
- }
-
- if (c == '"' || c == '\'')
- {
- Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK);
- break;
- }
-
- if (c == '<')
- {
- /* this.in.ungetChar(c); */
- Report.attrError(this, this.token, null, Report.UNEXPECTED_GT);
- /* break; */
- }
-
- /*
- For cases like <br clear=all/> need to avoid treating /> as
- part of the attribute value, however care is needed to avoid
- so treating <a href=http://www.acme.com/> in this way, which
- would map the <a> tag to <a href="http://www.acme.com"/>
- */
- if (c == '/')
- {
- /* peek ahead in case of /> */
- c = this.in.readChar();
-
- if (c == '>' &&
- !AttributeTable.getDefaultAttributeTable().isUrl(name))
- {
- isempty.value = true;
- this.in.ungetChar(c);
- break;
- }
-
- /* unget peeked char */
- this.in.ungetChar(c);
- c = '/';
- }
- }
- else /* delim is '\'' or '"' */
- {
- if (c == delim)
- break;
-
- /* treat CRLF, CR and LF as single line break */
-
- if (c == '\r')
- {
- c = this.in.readChar();
- if (c != '\n')
- this.in.ungetChar(c);
-
- c = '\n';
- }
-
- if (c == '\n' || c == '<' || c == '>')
- ++quotewarning;
-
- if (c == '>')
- seen_gt = true;
- }
-
- if (c == '&')
- {
- addCharToLexer(c);
- parseEntity((short)0);
- continue;
- }
-
- /*
- kludge for JavaScript attribute values
- with line continuations in string literals
- */
- if (c == '\\')
- {
- c = this.in.readChar();
-
- if (c != '\n')
- {
- this.in.ungetChar(c);
- c = '\\';
- }
- }
-
- map = MAP((char)c);
-
- if ((map & WHITE) != 0)
- {
- if (delim == (char)0)
- break;
-
- if (munge)
- {
- c = ' ';
-
- if (lastc == ' ')
- continue;
- }
- }
- else if (foldCase && (map & UPPERCASE) != 0)
- c += (int)('a' - 'A');
-
- addCharToLexer(c);
- }
-
- if (quotewarning > 10 && seen_gt && munge)
- {
- /*
- there is almost certainly a missing trailling quote mark
- as we have see too many newlines, < or > characters.
-
- an exception is made for Javascript attributes and the
- javascript URL scheme which may legitimately include < and >
- */
- if (!AttributeTable.getDefaultAttributeTable().isScript(name) &&
- !(AttributeTable.getDefaultAttributeTable().isUrl(name) &&
- (getString(this.lexbuf, start, 11)).equals("javascript:")))
- Report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE);
- }
-
- len = this.lexsize - start;
- this.lexsize = start;
-
- if (len > 0 || delim != 0)
- value = getString(this.lexbuf, start, len);
- else
- value = null;
-
- /* note delimiter if given */
- if (delim != 0)
- pdelim.value = delim;
- else
- pdelim.value = (int)'"';
-
- return value;
- }
-
- /* attr must be non-null */
- public static boolean isValidAttrName(String attr)
- {
- short map;
- char c;
- int i;
-
- /* first character should be a letter */
- c = attr.charAt(0);
- map = MAP(c);
-
- if (!((map & LETTER) != 0))
- return false;
-
- /* remaining characters should be namechars */
- for( i = 1; i < attr.length(); i++)
- {
- c = attr.charAt(i);
- map = MAP(c);
-
- if((map & NAMECHAR) != 0)
- continue;
-
- return false;
- }
-
- return true;
- }
-
- /* swallows closing '>' */
-
- public AttVal parseAttrs(MutableBoolean isempty)
- {
- AttVal av, list;
- String attribute, value;
- MutableInteger delim = new MutableInteger();
- MutableObject asp = new MutableObject();
- MutableObject php = new MutableObject();
-
- list = null;
-
- for (; !endOfInput();)
- {
- attribute = parseAttribute(isempty, asp, php);
-
- if (attribute == null)
- {
- /* check if attributes are created by ASP markup */
- if (asp.getObject() != null)
- {
- av = new AttVal(list, null, (Node)asp.getObject(), null,
- '\0', null, null );
- list = av;
- continue;
- }
-
- /* check if attributes are created by PHP markup */
- if (php.getObject() != null)
- {
- av = new AttVal(list, null, null, (Node)php.getObject(),
- '\0', null, null );
- list = av;
- continue;
- }
-
- break;
- }
-
- value = parseValue(attribute, false, isempty, delim);
-
- if (attribute != null && isValidAttrName(attribute))
- {
- av = new AttVal( list, null, null, null,
- delim.value, attribute, value );
- av.dict =
- AttributeTable.getDefaultAttributeTable().findAttribute(av);
- list = av;
- }
- else
- {
- av = new AttVal( null, null, null, null,
- 0, attribute, value );
- Report.attrError(this, this.token, value, Report.BAD_ATTRIBUTE_VALUE);
- }
- }
-
- return list;
- }
-
- /*
- push a copy of an inline node onto stack
- but don't push if implicit or OBJECT or APPLET
- (implicit tags are ones generated from the istack)
-
- One issue arises with pushing inlines when
- the tag is already pushed. For instance:
-
- <p><em>text
- <p><em>more text
-
- Shouldn't be mapped to
-
- <p><em>text</em></p>
- <p><em><em>more text</em></em>
- */
- public void pushInline( Node node )
- {
- IStack is;
-
- if (node.implicit)
- return;
-
- if (node.tag == null)
- return;
-
- if ((node.tag.model & Dict.CM_INLINE) == 0 )
- return;
-
- if ((node.tag.model & Dict.CM_OBJECT) != 0)
- return;
-
- if (node.tag != configuration.tt.tagFont && isPushed(node))
- return;
-
- // make sure there is enough space for the stack
- is = new IStack();
- is.tag = node.tag;
- is.element = node.element;
- if (node.attributes != null)
- is.attributes = cloneAttributes(node.attributes);
- this.istack.push( is );
- }
-
- /* pop inline stack */
- public void popInline( Node node )
- {
- AttVal av;
- IStack is;
-
- if (node != null) {
-
- if (node.tag == null)
- return;
-
- if ((node.tag.model & Dict.CM_INLINE) == 0)
- return;
-
- if ((node.tag.model & Dict.CM_OBJECT) != 0)
- return;
-
- // if node is </a> then pop until we find an <a>
- if (node.tag == configuration.tt.tagA) {
-
- while (this.istack.size() > 0) {
- is = (IStack)this.istack.pop();
- if (is.tag == configuration.tt.tagA) {
- break;
- }
- }
-
- if (this.insert >= this.istack.size())
- this.insert = -1;
- return;
- }
- }
-
- if (this.istack.size() > 0) {
- is = (IStack)this.istack.pop();
- if (this.insert >= this.istack.size())
- this.insert = -1;
- }
- }
-
- public boolean isPushed( Node node )
- {
- int i;
- IStack is;
-
- for (i = this.istack.size() - 1; i >= 0; --i) {
- is = (IStack)this.istack.elementAt(i);
- if (is.tag == node.tag)
- return true;
- }
-
- return false;
- }
-
- /*
- This has the effect of inserting "missing" inline
- elements around the contents of blocklevel elements
- such as P, TD, TH, DIV, PRE etc. This procedure is
- called at the start of ParseBlock. when the inline
- stack is not empty, as will be the case in:
-
- <i><h1>italic heading</h1></i>
-
- which is then treated as equivalent to
-
- <h1><i>italic heading</i></h1>
-
- This is implemented by setting the lexer into a mode
- where it gets tokens from the inline stack rather than
- from the input stream.
- */
- public int inlineDup( Node node )
- {
- int n;
-
- n = this.istack.size() - this.istackbase;
- if ( n > 0 ) {
- this.insert = this.istackbase;
- this.inode = node;
- }
-
- return n;
- }
-
- public Node insertedToken()
- {
- Node node;
- IStack is;
- int n;
-
- // this will only be null if inode != null
- if (this.insert == -1) {
- node = this.inode;
- this.inode = null;
- return node;
- }
-
- // is this is the "latest" node then update
- // the position, otherwise use current values
-
- if (this.inode == null) {
- this.lines = this.in.curline;
- this.columns = this.in.curcol;
- }
-
- node = newNode(Node.StartTag,
- this.lexbuf,
- this.txtstart,
- this.txtend); // GLP: Bugfix 126261. Remove when this change
- // is fixed in istack.c in the original Tidy
- node.implicit = true;
- is = (IStack)this.istack.elementAt( this.insert );
- node.element = is.element;
- node.tag = is.tag;
- if (is.attributes != null)
- node.attributes = cloneAttributes(is.attributes);
-
- // advance lexer to next item on the stack
- n = this.insert;
-
- // and recover state if we have reached the end
- if (++n < this.istack.size() ) {
- this.insert = n;
- } else {
- this.insert = -1;
- }
-
- return node;
- }
-
- /* AQ: Try this for speed optimization */
- public static int wstrcasecmp(String s1, String s2)
- {
- return (s1.equalsIgnoreCase(s2) ? 0 : 1);
- }
-
- public static int wstrcaselexcmp(String s1, String s2)
- {
- char c;
- int i = 0;
-
- while ( i < s1.length() && i < s2.length() ) {
- c = s1.charAt(i);
- if ( toLower(c) != toLower( s2.charAt(i) ) ) {
- break;
- }
- i += 1;
- }
- if ( i == s1.length() && i == s2.length() ) {
- return 0;
- } else if ( i == s1.length() ) {
- return -1;
- } else if ( i == s2.length() ) {
- return 1;
- } else {
- return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 );
- }
- }
-
- public static boolean wsubstr(String s1, String s2)
- {
- int i;
- int len1 = s1.length();
- int len2 = s2.length();
-
- for (i = 0; i <= len1 - len2; ++i)
- {
- if (s2.equalsIgnoreCase(s1.substring(i)))
- return true;
- }
-
- return false;
- }
-
- public boolean canPrune(Node element)
- {
- if (element.type == Node.TextNode)
- return true;
-
- if (element.content != null)
- return false;
-
- if (element.tag == configuration.tt.tagA && element.attributes != null)
- return false;
-
- if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas)
- return false;
-
- if (element.tag == null)
- return false;
-
- if ((element.tag.model & Dict.CM_ROW) != 0)
- return false;
-
- if (element.tag == configuration.tt.tagApplet)
- return false;
-
- if (element.tag == configuration.tt.tagObject)
- return false;
-
- if (element.attributes != null &&
- (element.getAttrByName("id") != null ||
- element.getAttrByName("name") != null) )
- return false;
-
- return true;
- }
-
- /* duplicate name attribute as an id */
- public void fixId(Node node)
- {
- AttVal name = node.getAttrByName("name");
- AttVal id = node.getAttrByName("id");
-
- if (name != null)
- {
- if (id != null)
- {
- if (!id.value.equals(name.value))
- Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH);
- }
- else if (this.configuration.XmlOut)
- node.addAttribute("id", name.value);
- }
- }
-
- /*
- defer duplicates when entering a table or other
- element where the inlines shouldn't be duplicated
- */
- public void deferDup()
- {
- this.insert = -1;
- this.inode = null;
- }
-
- /* Private methods and fields */
-
- /* lexer char types */
- private static final short DIGIT = 1;
- private static final short LETTER = 2;
- private static final short NAMECHAR = 4;
- private static final short WHITE = 8;
- private static final short NEWLINE = 16;
- private static final short LOWERCASE = 32;
- private static final short UPPERCASE = 64;
-
- /* lexer GetToken states */
-
- private static final short LEX_CONTENT = 0;
- private static final short LEX_GT = 1;
- private static final short LEX_ENDTAG = 2;
- private static final short LEX_STARTTAG = 3;
- private static final short LEX_COMMENT = 4;
- private static final short LEX_DOCTYPE = 5;
- private static final short LEX_PROCINSTR = 6;
- private static final short LEX_ENDCOMMENT = 7;
- private static final short LEX_CDATA = 8;
- private static final short LEX_SECTION = 9;
- private static final short LEX_ASP = 10;
- private static final short LEX_JSTE = 11;
- private static final short LEX_PHP = 12;
-
- /* used to classify chars for lexical purposes */
- private static short[] lexmap = new short[128];
-
- private static void mapStr(String str, short code)
- {
- int j;
-
- for ( int i = 0; i < str.length(); i++ ) {
- j = (int)str.charAt(i);
- lexmap[j] |= code;
- }
- }
-
- static {
- mapStr("\r\n\f", (short)(NEWLINE|WHITE));
- mapStr(" \t", WHITE);
- mapStr("-.:_", NAMECHAR);
- mapStr("0123456789", (short)(DIGIT|NAMECHAR));
- mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR));
- mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR));
- }
-
- private static short MAP( char c )
- {
- return ((int)c < 128 ? lexmap[(int)c] : 0);
- }
-
- private static boolean isWhite(char c)
- {
- short m = MAP(c);
-
- return (m & WHITE) != 0;
- }
-
- private static boolean isDigit(char c)
- {
- short m;
-
- m = MAP(c);
-
- return (m & DIGIT) != 0;
- }
-
- private static boolean isLetter(char c)
- {
- short m;
-
- m = MAP(c);
-
- return (m & LETTER) != 0;
- }
-
- private static char toLower(char c)
- {
- short m = MAP(c);
-
- if ((m & UPPERCASE) != 0)
- c = (char)( (int)c + (int)'a' - (int)'A' );
-
- return c;
- }
-
- private static char toUpper(char c)
- {
- short m = MAP(c);
-
- if ((m & LOWERCASE) != 0)
- c = (char)( (int)c + (int)'A' - (int)'a' );
-
- return c;
- }
-
- public static char foldCase(char c, boolean tocaps, boolean xmlTags)
- {
- short m;
-
- if (!xmlTags)
- {
- m = MAP(c);
-
- if (tocaps)
- {
- if ((m & LOWERCASE) != 0)
- c = (char)( (int)c + (int)'A' - (int)'a' );
- }
- else /* force to lower case */
- {
- if ((m & UPPERCASE) != 0)
- c = (char)( (int)c + (int)'a' - (int)'A' );
- }
- }
-
- return c;
- }
-
-
- private static class W3CVersionInfo
- {
- String name;
- String voyagerName;
- String profile;
- short code;
-
- public W3CVersionInfo( String name,
- String voyagerName,
- String profile,
- short code )
- {
- this.name = name;
- this.voyagerName = voyagerName;
- this.profile = profile;
- this.code = code;
- }
- }
-
- /* the 3 URIs for the XHTML 1.0 DTDs */
- private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
- private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
- private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd";
-
- private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
-
- private static Lexer.W3CVersionInfo[] W3CVersion =
- {
- new W3CVersionInfo("HTML 4.01",
- "XHTML 1.0 Strict",
- voyager_strict,
- Dict.VERS_HTML40_STRICT),
- new W3CVersionInfo("HTML 4.01 Transitional",
- "XHTML 1.0 Transitional",
- voyager_loose,
- Dict.VERS_HTML40_LOOSE),
- new W3CVersionInfo("HTML 4.01 Frameset",
- "XHTML 1.0 Frameset",
- voyager_frameset,
- Dict.VERS_FRAMES),
- new W3CVersionInfo("HTML 4.0",
- "XHTML 1.0 Strict",
- voyager_strict,
- Dict.VERS_HTML40_STRICT),
- new W3CVersionInfo("HTML 4.0 Transitional",
- "XHTML 1.0 Transitional",
- voyager_loose,
- Dict.VERS_HTML40_LOOSE),
- new W3CVersionInfo("HTML 4.0 Frameset",
- "XHTML 1.0 Frameset",
- voyager_frameset,
- Dict.VERS_FRAMES),
- new W3CVersionInfo("HTML 3.2",
- "XHTML 1.0 Transitional",
- voyager_loose,
- Dict.VERS_HTML32),
- new W3CVersionInfo("HTML 2.0",
- "XHTML 1.0 Strict",
- voyager_strict,
- Dict.VERS_HTML20)
- };
-
-}
+++ /dev/null
-/*
- * @(#)MutableBoolean.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Mutable Boolean
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class MutableBoolean {
-
- public boolean value;
-
-}
+++ /dev/null
-/*
- * @(#)MutableInteger.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Mutable Integer
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class MutableInteger {
-
- public int value;
-
-}
+++ /dev/null
-/*
- * @(#)MutableObject.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Mutable Object
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class MutableObject {
-
- public MutableObject()
- {
- this(null);
- }
-
- public MutableObject(Object o)
- {
- this.value = o;
- }
-
- public void setObject(Object o)
- {
- value = o;
- }
-
- public Object getObject()
- {
- return value;
- }
-
- private Object value;
-
-}
+++ /dev/null
-/*
- * @(#)Node.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Node
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-/*
- Used for elements and text nodes
- element name is null for text nodes
- start and end are offsets into lexbuf
- which contains the textual content of
- all elements in the parse tree.
-
- parent and content allow traversal
- of the parse tree in any direction.
- attributes are represented as a linked
- list of AttVal nodes which hold the
- strings for attribute/value pairs.
-*/
-
-public class Node {
-
- public static final short RootNode = 0;
- public static final short DocTypeTag = 1;
- public static final short CommentTag = 2;
- public static final short ProcInsTag = 3;
- public static final short TextNode = 4;
- public static final short StartTag = 5;
- public static final short EndTag = 6;
- public static final short StartEndTag = 7;
- public static final short CDATATag = 8;
- public static final short SectionTag = 9;
- public static final short AspTag = 10;
- public static final short JsteTag = 11;
- public static final short PhpTag = 12;
-
- protected Node parent;
- protected Node prev;
- protected Node next;
- protected Node last;
- protected int start; /* start of span onto text array */
- protected int end; /* end of span onto text array */
- protected byte[] textarray; /* the text array */
- protected short type; /* TextNode, StartTag, EndTag etc. */
- protected boolean closed; /* true if closed by explicit end tag */
- protected boolean implicit; /* true if inferred */
- protected boolean linebreak; /* true if followed by a line break */
- protected Dict was; /* old tag when it was changed */
- protected Dict tag; /* tag's dictionary definition */
- protected String element; /* name (null for text nodes) */
- protected AttVal attributes;
- protected Node content;
-
- public Node()
- {
- this(TextNode, null, 0, 0);
- }
-
- public Node(short type, byte[] textarray, int start, int end)
- {
- this.parent = null;
- this.prev = null;
- this.next = null;
- this.last = null;
- this.start = start;
- this.end = end;
- this.textarray = textarray;
- this.type = type;
- this.closed = false;
- this.implicit = false;
- this.linebreak = false;
- this.was = null;
- this.tag = null;
- this.element = null;
- this.attributes = null;
- this.content = null;
- }
-
- public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
- {
- this.parent = null;
- this.prev = null;
- this.next = null;
- this.last = null;
- this.start = start;
- this.end = end;
- this.textarray = textarray;
- this.type = type;
- this.closed = false;
- this.implicit = false;
- this.linebreak = false;
- this.was = null;
- this.tag = null;
- this.element = element;
- this.attributes = null;
- this.content = null;
- if (type == StartTag || type == StartEndTag || type == EndTag)
- tt.findTag(this);
- }
-
- /* used to clone heading nodes when split by an <HR> */
- protected Object clone()
- {
- Node node = new Node();
-
- node.parent = this.parent;
- if (this.textarray != null)
- {
- node.textarray = new byte[this.end - this.start];
- node.start = 0;
- node.end = this.end - this.start;
- if (node.end > 0)
- System.arraycopy(this.textarray, this.start,
- node.textarray, node.start, node.end);
- }
- node.type = this.type;
- node.closed = this.closed;
- node.implicit = this.implicit;
- node.linebreak = this.linebreak;
- node.was = this.was;
- node.tag = this.tag;
- if (this.element != null)
- node.element = this.element;
- if (this.attributes != null)
- node.attributes = (AttVal)this.attributes.clone();
- return node;
- }
-
- public AttVal getAttrByName(String name)
- {
- AttVal attr;
-
- for (attr = this.attributes; attr != null; attr = attr.next)
- {
- if (name != null &&
- attr.attribute != null &&
- attr.attribute.equals(name))
- break;
- }
-
- return attr;
- }
-
- /* default method for checking an element's attributes */
- public void checkAttributes( Lexer lexer )
- {
- AttVal attval;
-
- for (attval = this.attributes; attval != null; attval = attval.next)
- attval.checkAttribute( lexer, this );
- }
-
- public void checkUniqueAttributes(Lexer lexer)
- {
- AttVal attval;
-
- for (attval = this.attributes; attval != null; attval = attval.next) {
- if (attval.asp == null && attval.php == null)
- attval.checkUniqueAttribute(lexer, this);
- }
- }
-
- public void addAttribute(String name, String value)
- {
- AttVal av = new AttVal(null, null, null, null,
- '"', name, value);
- av.dict =
- AttributeTable.getDefaultAttributeTable().findAttribute(av);
-
- if (this.attributes == null)
- this.attributes = av;
- else /* append to end of attributes */
- {
- AttVal here = this.attributes;
-
- while (here.next != null)
- here = here.next;
-
- here.next = av;
- }
- }
-
- /* remove attribute from node then free it */
- public void removeAttribute(AttVal attr)
- {
- AttVal av;
- AttVal prev = null;
- AttVal next;
-
- for (av = this.attributes; av != null; av = next)
- {
- next = av.next;
-
- if (av == attr)
- {
- if (prev != null)
- prev.next = next;
- else
- this.attributes = next;
- }
- else
- prev = av;
- }
- }
-
- /* find doctype element */
- public Node findDocType()
- {
- Node node;
-
- for (node = this.content;
- node != null && node.type != DocTypeTag; node = node.next);
-
- return node;
- }
-
- public void discardDocType()
- {
- Node node;
-
- node = findDocType();
- if (node != null)
- {
- if (node.prev != null)
- node.prev.next = node.next;
- else
- node.parent.content = node.next;
-
- if (node.next != null)
- node.next.prev = node.prev;
-
- node.next = null;
- }
- }
-
- /* remove node from markup tree and discard it */
- public static Node discardElement(Node element)
- {
- Node next = null;
-
- if (element != null)
- {
- next = element.next;
- removeNode(element);
- }
-
- return next;
- }
-
- /* insert node into markup tree */
- public static void insertNodeAtStart(Node element, Node node)
- {
- node.parent = element;
-
- if (element.content == null)
- element.last = node;
- else
- element.content.prev = node; // AQ added 13 Apr 2000
-
- node.next = element.content;
- node.prev = null;
- element.content = node;
- }
-
- /* insert node into markup tree */
- public static void insertNodeAtEnd(Node element, Node node)
- {
- node.parent = element;
- node.prev = element.last;
-
- if (element.last != null)
- element.last.next = node;
- else
- element.content = node;
-
- element.last = node;
- }
-
- /*
- insert node into markup tree in pace of element
- which is moved to become the child of the node
- */
- public static void insertNodeAsParent(Node element, Node node)
- {
- node.content = element;
- node.last = element;
- node.parent = element.parent;
- element.parent = node;
-
- if (node.parent.content == element)
- node.parent.content = node;
-
- if (node.parent.last == element)
- node.parent.last = node;
-
- node.prev = element.prev;
- element.prev = null;
-
- if (node.prev != null)
- node.prev.next = node;
-
- node.next = element.next;
- element.next = null;
-
- if (node.next != null)
- node.next.prev = node;
- }
-
- /* insert node into markup tree before element */
- public static void insertNodeBeforeElement(Node element, Node node)
- {
- Node parent;
-
- parent = element.parent;
- node.parent = parent;
- node.next = element;
- node.prev = element.prev;
- element.prev = node;
-
- if (node.prev != null)
- node.prev.next = node;
-
- if (parent.content == element)
- parent.content = node;
- }
-
- /* insert node into markup tree after element */
- public static void insertNodeAfterElement(Node element, Node node)
- {
- Node parent;
-
- parent = element.parent;
- node.parent = parent;
-
- // AQ - 13Jan2000 fix for parent == null
- if (parent != null && parent.last == element)
- parent.last = node;
- else
- {
- node.next = element.next;
- // AQ - 13Jan2000 fix for node.next == null
- if (node.next != null)
- node.next.prev = node;
- }
-
- element.next = node;
- node.prev = element;
- }
-
- public static void trimEmptyElement(Lexer lexer, Node element)
- {
- TagTable tt = lexer.configuration.tt;
-
- if (lexer.canPrune(element))
- {
- if (element.type != TextNode)
- Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
-
- discardElement(element);
- }
- else if (element.tag == tt.tagP && element.content == null)
- {
- /* replace <p></p> by <br><br> to preserve formatting */
- Node node = lexer.inferredTag("br");
- Node.coerceNode(lexer, element, tt.tagBr);
- Node.insertNodeAfterElement(element, node);
- }
- }
-
- /*
- This maps
- <em>hello </em><strong>world</strong>
- to
- <em>hello</em> <strong>world</strong>
-
- If last child of element is a text node
- then trim trailing white space character
- moving it to after element's end tag.
- */
- public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
- {
- byte c;
- TagTable tt = lexer.configuration.tt;
-
- if (last != null && last.type == Node.TextNode &&
- last.end > last.start)
- {
- c = lexer.lexbuf[last.end - 1];
-
- if (c == 160 || c == (byte)' ')
- {
- /* take care with <td> </td> */
- if (element.tag == tt.tagTd ||
- element.tag == tt.tagTh)
- {
- if (last.end > last.start + 1)
- last.end -= 1;
- }
- else
- {
- last.end -= 1;
-
- if (((element.tag.model & Dict.CM_INLINE) != 0) &&
- !((element.tag.model & Dict.CM_FIELD) != 0))
- lexer.insertspace = true;
-
- /* if empty string then delete from parse tree */
- if (last.start == last.end)
- trimEmptyElement(lexer, last);
- }
- }
- }
- }
-
- /*
- This maps
- <p>hello<em> world</em>
- to
- <p>hello <em>world</em>
-
- Trims initial space, by moving it before the
- start tag, or if this element is the first in
- parent's content, then by discarding the space
- */
- public static void trimInitialSpace(Lexer lexer, Node element, Node text)
- {
- Node prev, node;
-
- // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
- // 31-Oct-00.
- if (text.type == TextNode && text.textarray[text.start] == (byte)' '
- && (text.start < text.end))
- {
- if (((element.tag.model & Dict.CM_INLINE) != 0) &&
- !((element.tag.model & Dict.CM_FIELD) != 0) &&
- element.parent.content != element)
- {
- prev = element.prev;
-
- if (prev != null && prev.type == TextNode)
- {
- if (prev.textarray[prev.end - 1] != (byte)' ')
- prev.textarray[prev.end++] = (byte)' ';
-
- ++element.start;
- }
- else /* create new node */
- {
- node = lexer.newNode();
- // Local fix for bug 228486 (GLP). This handles the case
- // where we need to create a preceeding text node but there are
- // no "slots" in textarray that we can steal from the current
- // element. Therefore, we create a new textarray containing
- // just the blank. When Tidy is fixed, this should be removed.
- if (element.start >= element.end)
- {
- node.start = 0;
- node.end = 1;
- node.textarray = new byte[1];
- }
- else
- {
- node.start = element.start++;
- node.end = element.start;
- node.textarray = element.textarray;
- }
- node.textarray[node.start] = (byte)' ';
- node.prev = prev;
- if (prev != null)
- prev.next = node;
- node.next = element;
- element.prev = node;
- node.parent = element.parent;
- }
- }
-
- /* discard the space in current node */
- ++text.start;
- }
- }
-
- /*
- Move initial and trailing space out.
- This routine maps:
-
- hello<em> world</em>
- to
- hello <em>world</em>
- and
- <em>hello </em><strong>world</strong>
- to
- <em>hello</em> <strong>world</strong>
- */
- public static void trimSpaces(Lexer lexer, Node element)
- {
- Node text = element.content;
- TagTable tt = lexer.configuration.tt;
-
- if (text != null && text.type == Node.TextNode &&
- element.tag != tt.tagPre)
- trimInitialSpace(lexer, element, text);
-
- text = element.last;
-
- if (text != null && text.type == Node.TextNode)
- trimTrailingSpace(lexer, element, text);
- }
-
- public boolean isDescendantOf(Dict tag)
- {
- Node parent;
-
- for (parent = this.parent;
- parent != null; parent = parent.parent)
- {
- if (parent.tag == tag)
- return true;
- }
-
- return false;
- }
-
- /*
- the doctype has been found after other tags,
- and needs moving to before the html element
- */
- public static void insertDocType(Lexer lexer, Node element, Node doctype)
- {
- TagTable tt = lexer.configuration.tt;
-
- Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
-
- while (element.tag != tt.tagHtml)
- element = element.parent;
-
- insertNodeBeforeElement(element, doctype);
- }
-
- public Node findBody(TagTable tt)
- {
- Node node;
-
- node = this.content;
-
- while (node != null && node.tag != tt.tagHtml)
- node = node.next;
-
- if (node == null)
- return null;
-
- node = node.content;
-
- while (node != null && node.tag != tt.tagBody)
- node = node.next;
-
- return node;
- }
-
- public boolean isElement()
- {
- return (this.type == StartTag || this.type == StartEndTag ? true : false);
- }
-
- /*
- unexpected content in table row is moved to just before
- the table in accordance with Netscape and IE. This code
- assumes that node hasn't been inserted into the row.
- */
- public static void moveBeforeTable(Node row, Node node, TagTable tt)
- {
- Node table;
-
- /* first find the table element */
- for (table = row.parent; table != null; table = table.parent)
- {
- if (table.tag == tt.tagTable)
- {
- if (table.parent.content == table)
- table.parent.content = node;
-
- node.prev = table.prev;
- node.next = table;
- table.prev = node;
- node.parent = table.parent;
-
- if (node.prev != null)
- node.prev.next = node;
-
- break;
- }
- }
- }
-
- /*
- if a table row is empty then insert an empty cell
- this practice is consistent with browser behavior
- and avoids potential problems with row spanning cells
- */
- public static void fixEmptyRow(Lexer lexer, Node row)
- {
- Node cell;
-
- if (row.content == null)
- {
- cell = lexer.inferredTag("td");
- insertNodeAtEnd(row, cell);
- Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
- }
- }
-
- public static void coerceNode(Lexer lexer, Node node, Dict tag)
- {
- Node tmp = lexer.inferredTag(tag.name);
- Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
- node.was = node.tag;
- node.tag = tag;
- node.type = StartTag;
- node.implicit = true;
- node.element = tag.name;
- }
-
- /* extract a node and its children from a markup tree */
- public static void removeNode(Node node)
- {
- if (node.prev != null)
- node.prev.next = node.next;
-
- if (node.next != null)
- node.next.prev = node.prev;
-
- if (node.parent != null)
- {
- if (node.parent.content == node)
- node.parent.content = node.next;
-
- if (node.parent.last == node)
- node.parent.last = node.prev;
- }
-
- node.parent = node.prev = node.next = null;
- }
-
- public static boolean insertMisc(Node element, Node node)
- {
- if (node.type == CommentTag ||
- node.type == ProcInsTag ||
- node.type == CDATATag ||
- node.type == SectionTag ||
- node.type == AspTag ||
- node.type == JsteTag ||
- node.type == PhpTag)
- {
- insertNodeAtEnd(element, node);
- return true;
- }
-
- return false;
- }
-
- /*
- used to determine how attributes
- without values should be printed
- this was introduced to deal with
- user defined tags e.g. Cold Fusion
- */
- public static boolean isNewNode(Node node)
- {
- if (node != null && node.tag != null)
- {
- return ((node.tag.model & Dict.CM_NEW) != 0);
- }
-
- return true;
- }
-
- public boolean hasOneChild()
- {
- return (this.content != null && this.content.next == null);
- }
-
- /* find html element */
- public Node findHTML(TagTable tt)
- {
- Node node;
-
- for (node = this.content;
- node != null && node.tag != tt.tagHtml; node = node.next);
-
- return node;
- }
-
- public Node findHEAD(TagTable tt)
- {
- Node node;
-
- node = this.findHTML(tt);
-
- if (node != null)
- {
- for (node = node.content;
- node != null && node.tag != tt.tagHead;
- node = node.next);
- }
-
- return node;
- }
-
- public boolean checkNodeIntegrity()
- {
- Node child;
- boolean found = false;
-
- if (this.prev != null)
- {
- if (this.prev.next != this)
- return false;
- }
-
- if (this.next != null)
- {
- if (this.next.prev != this)
- return false;
- }
-
- if (this.parent != null)
- {
- if (this.prev == null && this.parent.content != this)
- return false;
-
- if (this.next == null && this.parent.last != this)
- return false;
-
- for (child = this.parent.content; child != null; child = child.next)
- if (child == this)
- {
- found = true;
- break;
- }
-
- if (!found)
- return false;
- }
-
- for (child = this.content; child != null; child = child.next)
- if (!child.checkNodeIntegrity())
- return false;
-
- return true;
- }
-
- /*
- Add class="foo" to node
- */
- public static void addClass(Node node, String classname)
- {
- AttVal classattr = node.getAttrByName("class");
-
- /*
- if there already is a class attribute
- then append class name after a space
- */
- if (classattr != null)
- {
- classattr.value = classattr.value + " " + classname;
- }
- else /* create new class attribute */
- node.addAttribute("class", classname);
- }
-
- /* --------------------- DEBUG -------------------------- */
-
- private static final String[] nodeTypeString =
- {
- "RootNode",
- "DocTypeTag",
- "CommentTag",
- "ProcInsTag",
- "TextNode",
- "StartTag",
- "EndTag",
- "StartEndTag",
- "SectionTag",
- "AspTag",
- "PhpTag"
- };
-
- public String toString()
- {
- String s = "";
- Node n = this;
-
- while (n != null) {
- s += "[Node type=";
- s += nodeTypeString[n.type];
- s += ",element=";
- if (n.element != null)
- s += n.element;
- else
- s += "null";
- if (n.type == TextNode ||
- n.type == CommentTag ||
- n.type == ProcInsTag) {
- s += ",text=";
- if (n.textarray != null && n.start <= n.end) {
- s += "\"";
- s += Lexer.getString(n.textarray, n.start, n.end - n.start);
- s += "\"";
- } else {
- s += "null";
- }
- }
- s += ",content=";
- if (n.content != null)
- s += n.content.toString();
- else
- s += "null";
- s += "]";
- if (n.next != null)
- s += ",";
- n = n.next;
- }
- return s;
- }
- /* --------------------- END DEBUG ---------------------- */
-
-
- /* --------------------- DOM ---------------------------- */
-
- protected org.w3c.dom.Node adapter = null;
-
- protected org.w3c.dom.Node getAdapter()
- {
- if (adapter == null)
- {
- switch (this.type)
- {
- case RootNode:
- adapter = new DOMDocumentImpl(this);
- break;
- case StartTag:
- case StartEndTag:
- adapter = new DOMElementImpl(this);
- break;
- case DocTypeTag:
- adapter = new DOMDocumentTypeImpl(this);
- break;
- case CommentTag:
- adapter = new DOMCommentImpl(this);
- break;
- case TextNode:
- adapter = new DOMTextImpl(this);
- break;
- case CDATATag:
- adapter = new DOMCDATASectionImpl(this);
- break;
- case ProcInsTag:
- adapter = new DOMProcessingInstructionImpl(this);
- break;
- default:
- adapter = new DOMNodeImpl(this);
- }
- }
- return adapter;
- }
-
- protected Node cloneNode(boolean deep)
- {
- Node node = (Node)this.clone();
- if (deep)
- {
- Node child;
- Node newChild;
- for (child = this.content; child != null; child = child.next)
- {
- newChild = child.cloneNode(deep);
- insertNodeAtEnd(node, newChild);
- }
- }
- return node;
- }
-
-
- protected void setType(short newType)
- {
- this.type = newType;
- }
-
- /* --------------------- END DOM ------------------------ */
-
-}
+++ /dev/null
-/*
- * @(#)Out.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Output Stream
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-import java.io.OutputStream;
-
-public abstract class Out
-{
- public int encoding;
- public int state; /* for ISO 2022 */
- public OutputStream out;
-
- public abstract void outc(int c);
-
- public abstract void outc(byte c);
-
- public abstract void newline();
-
-};
-
+++ /dev/null
-/*
- * @(#)OutImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Output Stream Implementation
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-import java.io.IOException;
-
-public class OutImpl extends Out
-{
-
- public OutImpl()
- {
- this.out = null;
- }
-
- public void outc(byte c) {
- outc(((int)c) & 0xFF); // Convert to unsigned.
- }
-
- /* For mac users, should we map Unicode back to MacRoman? */
- public void outc(int c)
- {
- int ch;
-
- try {
- if (this.encoding == Configuration.UTF8)
- {
- if (c < 128)
- this.out.write(c);
- else if (c <= 0x7FF)
- {
- ch = (0xC0 | (c >> 6)); this.out.write(ch);
- ch = (0x80 | (c & 0x3F)); this.out.write(ch);
- }
- else if (c <= 0xFFFF)
- {
- ch = (0xE0 | (c >> 12)); this.out.write(ch);
- ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
- ch = (0x80 | (c & 0x3F)); this.out.write(ch);
- }
- else if (c <= 0x1FFFFF)
- {
- ch = (0xF0 | (c >> 18)); this.out.write(ch);
- ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch);
- ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
- ch = (0x80 | (c & 0x3F)); this.out.write(ch);
- }
- else
- {
- ch = (0xF8 | (c >> 24)); this.out.write(ch);
- ch = (0x80 | ((c >> 18) & 0x3F)); this.out.write(ch);
- ch = (0x80 | ((c >> 12) & 0x3F)); this.out.write(ch);
- ch = (0x80 | ((c >> 6) & 0x3F)); this.out.write(ch);
- ch = (0x80 | (c & 0x3F)); this.out.write(ch);
- }
- }
- else if (this.encoding == Configuration.ISO2022)
- {
- if (c == 0x1b) /* ESC */
- this.state = StreamIn.FSM_ESC;
- else
- {
- switch (this.state)
- {
- case StreamIn.FSM_ESC:
- if (c == '$')
- this.state = StreamIn.FSM_ESCD;
- else if (c == '(')
- this.state = StreamIn.FSM_ESCP;
- else
- this.state = StreamIn.FSM_ASCII;
- break;
-
- case StreamIn.FSM_ESCD:
- if (c == '(')
- this.state = StreamIn.FSM_ESCDP;
- else
- this.state = StreamIn.FSM_NONASCII;
- break;
-
- case StreamIn.FSM_ESCDP:
- this.state = StreamIn.FSM_NONASCII;
- break;
-
- case StreamIn.FSM_ESCP:
- this.state = StreamIn.FSM_ASCII;
- break;
-
- case StreamIn.FSM_NONASCII:
- c &= 0x7F;
- break;
- }
- }
-
- this.out.write(c);
- }
- else
- this.out.write(c);
- }
- catch (IOException e) {
- System.err.println("OutImpl.outc: " + e.toString());
- }
- }
-
- public void newline()
- {
- try {
- this.out.write(nlBytes);
- this.out.flush();
- }
- catch (IOException e) {
- System.err.println("OutImpl.newline: " + e.toString());
- }
- }
-
- private static final byte[] nlBytes =
- (System.getProperty("line.separator")).getBytes();
-
-};
-
+++ /dev/null
-/*
- * @(#)PPrint.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Pretty print parse tree
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-/*
- Block-level and unknown elements are printed on
- new lines and their contents indented 2 spaces
-
- Inline elements are printed inline.
-
- Inline content is wrapped on spaces (except in
- attribute values or preformatted text, after
- start tags and before end tags
-*/
-
-import java.io.FileOutputStream;
-import java.io.File;
-
-import java.io.IOException;
-import java.io.FileNotFoundException;
-
-public class PPrint {
-
- /* page transition effects */
-
- public static final short EFFECT_BLEND = -1;
- public static final short EFFECT_BOX_IN = 0;
- public static final short EFFECT_BOX_OUT = 1;
- public static final short EFFECT_CIRCLE_IN = 2;
- public static final short EFFECT_CIRCLE_OUT = 3;
- public static final short EFFECT_WIPE_UP = 4;
- public static final short EFFECT_WIPE_DOWN = 5;
- public static final short EFFECT_WIPE_RIGHT = 6;
- public static final short EFFECT_WIPE_LEFT = 7;
- public static final short EFFECT_VERT_BLINDS = 8;
- public static final short EFFECT_HORZ_BLINDS = 9;
- public static final short EFFECT_CHK_ACROSS = 10;
- public static final short EFFECT_CHK_DOWN = 11;
- public static final short EFFECT_RND_DISSOLVE = 12;
- public static final short EFFECT_SPLIT_VIRT_IN = 13;
- public static final short EFFECT_SPLIT_VIRT_OUT = 14;
- public static final short EFFECT_SPLIT_HORZ_IN = 15;
- public static final short EFFECT_SPLIT_HORZ_OUT = 16;
- public static final short EFFECT_STRIPS_LEFT_DOWN = 17;
- public static final short EFFECT_STRIPS_LEFT_UP = 18;
- public static final short EFFECT_STRIPS_RIGHT_DOWN = 19;
- public static final short EFFECT_STRIPS_RIGHT_UP = 20;
- public static final short EFFECT_RND_BARS_HORZ = 21;
- public static final short EFFECT_RND_BARS_VERT = 22;
- public static final short EFFECT_RANDOM = 23;
-
- private static final short NORMAL = 0;
- private static final short PREFORMATTED = 1;
- private static final short COMMENT = 2;
- private static final short ATTRIBVALUE = 4;
- private static final short NOWRAP = 8;
- private static final short CDATA = 16;
-
- private int[] linebuf = null;
- private int lbufsize = 0;
- private int linelen = 0;
- private int wraphere = 0;
- private boolean inAttVal = false;
- private boolean InString = false;
-
- private int slide = 0;
- private int count = 0;
- private Node slidecontent = null;
-
- private Configuration configuration;
-
- public PPrint(Configuration configuration)
- {
- this.configuration = configuration;
- }
-
- /*
- 1010 A
- 1011 B
- 1100 C
- 1101 D
- 1110 E
- 1111 F
- */
-
- /* return one less that the number of bytes used by UTF-8 char */
- /* str points to 1st byte, *ch initialized to 1st byte */
- public static int getUTF8(byte[] str, int start, MutableInteger ch)
- {
- int c, n, i, bytes;
-
- c = ((int)str[start]) & 0xFF; // Convert to unsigned.
-
- if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */
- {
- n = c & 31;
- bytes = 2;
- }
- else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
- {
- n = c & 15;
- bytes = 3;
- }
- else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
- {
- n = c & 7;
- bytes = 4;
- }
- else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */
- {
- n = c & 3;
- bytes = 5;
- }
- else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */
-
- {
- n = c & 1;
- bytes = 6;
- }
- else /* 0XXX XXXX one byte */
- {
- ch.value = c;
- return 0;
- }
-
- /* successor bytes should have the form 10XX XXXX */
- for (i = 1; i < bytes; ++i)
- {
- c = ((int)str[start + i]) & 0xFF; // Convert to unsigned.
- n = (n << 6) | (c & 0x3F);
- }
-
- ch.value = n;
- return bytes - 1;
- }
-
- /* store char c as UTF-8 encoded byte stream */
- public static int putUTF8(byte[] buf, int start, int c)
- {
- if (c < 128)
- buf[start++] = (byte)c;
- else if (c <= 0x7FF)
- {
- buf[start++] = (byte)(0xC0 | (c >> 6));
- buf[start++] = (byte)(0x80 | (c & 0x3F));
- }
- else if (c <= 0xFFFF)
- {
- buf[start++] = (byte)(0xE0 | (c >> 12));
- buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F));
- buf[start++] = (byte)(0x80 | (c & 0x3F));
- }
- else if (c <= 0x1FFFFF)
- {
- buf[start++] = (byte)(0xF0 | (c >> 18));
- buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F));
- buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F));
- buf[start++] = (byte)(0x80 | (c & 0x3F));
- }
- else
- {
- buf[start++] = (byte)(0xF8 | (c >> 24));
- buf[start++] = (byte)(0x80 | ((c >> 18) & 0x3F));
- buf[start++] = (byte)(0x80 | ((c >> 12) & 0x3F));
- buf[start++] = (byte)(0x80 | ((c >> 6) & 0x3F));
- buf[start++] = (byte)(0x80 | (c & 0x3F));
- }
-
- return start;
- }
-
- private void addC(int c, int index)
- {
- if (index + 1 >= lbufsize)
- {
- while (index + 1 >= lbufsize)
- {
- if (lbufsize == 0)
- lbufsize = 256;
- else
- lbufsize = lbufsize * 2;
- }
-
- int[] temp = new int[ lbufsize ];
- if (linebuf != null)
- System.arraycopy(linebuf, 0, temp, 0, index);
- linebuf = temp;
- }
-
- linebuf[index] = c;
- }
-
- private void wrapLine(Out fout, int indent)
- {
- int i, p, q;
-
- if (wraphere == 0)
- return;
-
- for (i = 0; i < indent; ++i)
- fout.outc((int)' ');
-
- for (i = 0; i < wraphere; ++i)
- fout.outc(linebuf[i]);
-
- if (InString)
- {
- fout.outc((int)' ');
- fout.outc((int)'\\');
- }
-
- fout.newline();
-
- if (linelen > wraphere)
- {
- p = 0;
-
- if (linebuf[wraphere] == ' ')
- ++wraphere;
-
- q = wraphere;
- addC('\0', linelen);
-
- while (true)
- {
- linebuf[p] = linebuf[q];
- if (linebuf[q] == 0) break;
- p++;
- q++;
- }
- linelen -= wraphere;
- }
- else
- linelen = 0;
-
- wraphere = 0;
- }
-
- private void wrapAttrVal(Out fout, int indent, boolean inString)
- {
- int i, p, q;
-
- for (i = 0; i < indent; ++i)
- fout.outc((int)' ');
-
- for (i = 0; i < wraphere; ++i)
- fout.outc(linebuf[i]);
-
- fout.outc((int)' ');
-
- if (inString)
- fout.outc((int)'\\');
-
- fout.newline();
-
- if (linelen > wraphere)
- {
- p = 0;
-
- if (linebuf[wraphere] == ' ')
- ++wraphere;
-
- q = wraphere;
- addC('\0', linelen);
-
- while (true)
- {
- linebuf[p] = linebuf[q];
- if (linebuf[q] == 0) break;
- p++;
- q++;
- }
- linelen -= wraphere;
- }
- else
- linelen = 0;
-
- wraphere = 0;
- }
-
- public void flushLine(Out fout, int indent)
- {
- int i;
-
- if (linelen > 0)
- {
- if (indent + linelen >= this.configuration.wraplen)
- wrapLine(fout, indent);
-
- if (!inAttVal || this.configuration.IndentAttributes)
- {
- for (i = 0; i < indent; ++i)
- fout.outc((int)' ');
- }
-
- for (i = 0; i < linelen; ++i)
- fout.outc(linebuf[i]);
- }
-
- fout.newline();
- linelen = 0;
- wraphere = 0;
- inAttVal = false;
- }
-
- public void condFlushLine(Out fout, int indent)
- {
- int i;
-
- if (linelen > 0)
- {
- if (indent + linelen >= this.configuration.wraplen)
- wrapLine(fout, indent);
-
- if (!inAttVal || this.configuration.IndentAttributes)
- {
- for (i = 0; i < indent; ++i)
- fout.outc((int)' ');
- }
-
- for (i = 0; i < linelen; ++i)
- fout.outc(linebuf[i]);
-
- fout.newline();
- linelen = 0;
- wraphere = 0;
- inAttVal = false;
- }
- }
-
- private void printChar(int c, short mode)
- {
- String entity;
-
- if (c == ' ' && !((mode & (PREFORMATTED | COMMENT | ATTRIBVALUE)) != 0))
- {
- /* coerce a space character to a non-breaking space */
- if ((mode & NOWRAP) != 0)
- {
- /* by default XML doesn't define */
- if (this.configuration.NumEntities || this.configuration.XmlTags)
- {
- addC('&', linelen++);
- addC('#', linelen++);
- addC('1', linelen++);
- addC('6', linelen++);
- addC('0', linelen++);
- addC(';', linelen++);
- }
- else /* otherwise use named entity */
- {
- addC('&', linelen++);
- addC('n', linelen++);
- addC('b', linelen++);
- addC('s', linelen++);
- addC('p', linelen++);
- addC(';', linelen++);
- }
- return;
- }
- else
- wraphere = linelen;
- }
-
- /* comment characters are passed raw */
- if ((mode & COMMENT) != 0)
- {
- addC(c, linelen++);
- return;
- }
-
- /* except in CDATA map < to < etc. */
- if (! ((mode & CDATA) != 0) )
- {
- if (c == '<')
- {
- addC('&', linelen++);
- addC('l', linelen++);
- addC('t', linelen++);
- addC(';', linelen++);
- return;
- }
-
- if (c == '>')
- {
- addC('&', linelen++);
- addC('g', linelen++);
- addC('t', linelen++);
- addC(';', linelen++);
- return;
- }
-
- /*
- naked '&' chars can be left alone or
- quoted as & The latter is required
- for XML where naked '&' are illegal.
- */
- if (c == '&' && this.configuration.QuoteAmpersand)
- {
- addC('&', linelen++);
- addC('a', linelen++);
- addC('m', linelen++);
- addC('p', linelen++);
- addC(';', linelen++);
- return;
- }
-
- if (c == '"' && this.configuration.QuoteMarks)
- {
- addC('&', linelen++);
- addC('q', linelen++);
- addC('u', linelen++);
- addC('o', linelen++);
- addC('t', linelen++);
- addC(';', linelen++);
- return;
- }
-
- if (c == '\'' && this.configuration.QuoteMarks)
- {
- addC('&', linelen++);
- addC('#', linelen++);
- addC('3', linelen++);
- addC('9', linelen++);
- addC(';', linelen++);
- return;
- }
-
- if (c == 160 && this.configuration.CharEncoding != Configuration.RAW)
- {
- if (this.configuration.QuoteNbsp)
- {
- addC('&', linelen++);
-
- if (this.configuration.NumEntities)
- {
- addC('#', linelen++);
- addC('1', linelen++);
- addC('6', linelen++);
- addC('0', linelen++);
- }
- else
- {
- addC('n', linelen++);
- addC('b', linelen++);
- addC('s', linelen++);
- addC('p', linelen++);
- }
-
- addC(';', linelen++);
- }
- else
- addC(c, linelen++);
-
- return;
- }
- }
-
- /* otherwise ISO 2022 characters are passed raw */
- if (this.configuration.CharEncoding == Configuration.ISO2022 ||
- this.configuration.CharEncoding == Configuration.RAW)
- {
- addC(c, linelen++);
- return;
- }
-
- /* if preformatted text, map to space */
- if (c == 160 && ((mode & PREFORMATTED) != 0))
- {
- addC(' ', linelen++);
- return;
- }
-
- /*
- Filters from Word and PowerPoint often use smart
- quotes resulting in character codes between 128
- and 159. Unfortunately, the corresponding HTML 4.0
- entities for these are not widely supported. The
- following converts dashes and quotation marks to
- the nearest ASCII equivalent. My thanks to
- Andrzej Novosiolov for his help with this code.
- */
-
- if (this.configuration.MakeClean)
- {
- if (c >= 0x2013 && c <= 0x201E)
- {
- switch (c) {
- case 0x2013:
- case 0x2014:
- c = '-';
- break;
- case 0x2018:
- case 0x2019:
- case 0x201A:
- c = '\'';
- break;
- case 0x201C:
- case 0x201D:
- case 0x201E:
- c = '"';
- break;
- }
- }
- }
-
- /* don't map latin-1 chars to entities */
- if (this.configuration.CharEncoding == Configuration.LATIN1)
- {
- if (c > 255) /* multi byte chars */
- {
- if (!this.configuration.NumEntities)
- {
- entity = EntityTable.getDefaultEntityTable().entityName((short)c);
- if (entity != null)
- entity = "&" + entity + ";";
- else
- entity = "&#" + c + ";";
- }
- else
- entity = "&#" + c + ";";
-
- for (int i = 0; i < entity.length(); i++)
- addC((int)entity.charAt(i), linelen++);
-
- return;
- }
-
- if (c > 126 && c < 160)
- {
- entity = "&#" + c + ";";
-
- for (int i = 0; i < entity.length(); i++)
- addC((int)entity.charAt(i), linelen++);
-
- return;
- }
-
- addC(c, linelen++);
- return;
- }
-
- /* don't map utf8 chars to entities */
- if (this.configuration.CharEncoding == Configuration.UTF8)
- {
- addC(c, linelen++);
- return;
- }
-
- /* use numeric entities only for XML */
- if (this.configuration.XmlTags)
- {
- /* if ASCII use numeric entities for chars > 127 */
- if (c > 127 && this.configuration.CharEncoding == Configuration.ASCII)
- {
- entity = "&#" + c + ";";
-
- for (int i = 0; i < entity.length(); i++)
- addC((int)entity.charAt(i), linelen++);
-
- return;
- }
-
- /* otherwise output char raw */
- addC(c, linelen++);
- return;
- }
-
- /* default treatment for ASCII */
- if (c > 126 || (c < ' ' && c != '\t'))
- {
- if (!this.configuration.NumEntities)
- {
- entity = EntityTable.getDefaultEntityTable().entityName((short)c);
- if (entity != null)
- entity = "&" + entity + ";";
- else
- entity = "&#" + c + ";";
- }
- else
- entity = "&#" + c + ";";
-
- for (int i = 0; i < entity.length(); i++)
- addC((int)entity.charAt(i), linelen++);
-
- return;
- }
-
- addC(c, linelen++);
- }
-
- /*
- The line buffer is uint not char so we can
- hold Unicode values unencoded. The translation
- to UTF-8 is deferred to the outc routine called
- to flush the line buffer.
- */
- private void printText(Out fout, short mode, int indent,
- byte[] textarray, int start, int end)
- {
- int i, c;
- MutableInteger ci = new MutableInteger();
-
- for (i = start; i < end; ++i)
- {
- if (indent + linelen >= this.configuration.wraplen)
- wrapLine(fout, indent);
-
- c = ((int)textarray[i]) & 0xFF; // Convert to unsigned.
-
- /* look for UTF-8 multibyte character */
- if (c > 0x7F)
- {
- i += getUTF8(textarray, i, ci);
- c = ci.value;
- }
-
- if (c == '\n')
- {
- flushLine(fout, indent);
- continue;
- }
-
- printChar(c, mode);
- }
- }
-
- private void printString(Out fout, int indent, String str)
- {
- for (int i = 0; i < str.length(); i++ )
- addC((int)str.charAt(i), linelen++);
- }
-
- private void printAttrValue(Out fout, int indent, String value, int delim, boolean wrappable)
- {
- int c;
- MutableInteger ci = new MutableInteger();
- boolean wasinstring = false;
- byte[] valueChars = null;
- int i;
- short mode = (wrappable ? (short)(NORMAL | ATTRIBVALUE) :
- (short)(PREFORMATTED | ATTRIBVALUE));
-
- if (value != null)
- {
- valueChars = Lexer.getBytes(value);
- }
-
- /* look for ASP, Tango or PHP instructions for computed attribute value */
- if (valueChars != null && valueChars.length >= 5 && valueChars[0] == '<')
- {
- if (valueChars[1] == '%' || valueChars[1] == '@'||
- (new String(valueChars, 0, 5)).equals("<?php"))
- mode |= CDATA;
- }
-
- if (delim == 0)
- delim = '"';
-
- addC('=', linelen++);
-
- /* don't wrap after "=" for xml documents */
- if (!this.configuration.XmlOut) {
-
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-
- if (indent + linelen >= this.configuration.wraplen)
- wrapLine(fout, indent);
-
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
- else
- condFlushLine(fout, indent);
- }
-
- addC(delim, linelen++);
-
- if (value != null)
- {
- InString = false;
-
- i = 0;
- while (i < valueChars.length)
- {
- c = ((int)valueChars[i]) & 0xFF; // Convert to unsigned.
-
- if (wrappable && c == ' ' && indent + linelen < this.configuration.wraplen)
- {
- wraphere = linelen;
- wasinstring = InString;
- }
-
- if (wrappable && wraphere > 0 && indent + linelen >= this.configuration.wraplen)
- wrapAttrVal(fout, indent, wasinstring);
-
- if (c == delim)
- {
- String entity;
-
- entity = (c == '"' ? """ : "'");
-
- for (int j = 0; j < entity.length(); j++ )
- addC(entity.charAt(j), linelen++);
-
- ++i;
- continue;
- }
- else if (c == '"')
- {
- if (this.configuration.QuoteMarks)
- {
- addC('&', linelen++);
- addC('q', linelen++);
- addC('u', linelen++);
- addC('o', linelen++);
- addC('t', linelen++);
- addC(';', linelen++);
- }
- else
- addC('"', linelen++);
-
- if (delim == '\'')
- InString = !InString;
-
- ++i;
- continue;
- }
- else if (c == '\'')
- {
- if (this.configuration.QuoteMarks)
- {
- addC('&', linelen++);
- addC('#', linelen++);
- addC('3', linelen++);
- addC('9', linelen++);
- addC(';', linelen++);
- }
- else
- addC('\'', linelen++);
-
- if (delim == '"')
- InString = !InString;
-
- ++i;
- continue;
- }
-
- /* look for UTF-8 multibyte character */
- if (c > 0x7F)
- {
- i += getUTF8(valueChars, i, ci);
- c = ci.value;
- }
-
- ++i;
-
- if (c == '\n')
- {
- flushLine(fout, indent);
- continue;
- }
-
- printChar(c, mode);
- }
- }
-
- InString = false;
- addC(delim, linelen++);
- }
-
- private void printAttribute(Out fout, int indent, Node node, AttVal attr)
- {
- String name;
- boolean wrappable = false;
-
- if (this.configuration.IndentAttributes)
- {
- flushLine(fout, indent);
- indent += this.configuration.spaces;
- }
-
- name = attr.attribute;
-
- if (indent + linelen >= this.configuration.wraplen)
- wrapLine(fout, indent);
-
- if (!this.configuration.XmlTags && !this.configuration.XmlOut && attr.dict != null)
- {
- if (AttributeTable.getDefaultAttributeTable().isScript(name))
- wrappable = this.configuration.WrapScriptlets;
- else if (!attr.dict.nowrap && this.configuration.WrapAttVals)
- wrappable = true;
- }
-
- if (indent + linelen < this.configuration.wraplen)
- {
- wraphere = linelen;
- addC(' ', linelen++);
- }
- else
- {
- condFlushLine(fout, indent);
- addC(' ', linelen++);
- }
-
- for (int i = 0; i < name.length(); i++ )
- addC((int)Lexer.foldCase(name.charAt(i),
- this.configuration.UpperCaseAttrs,
- this.configuration.XmlTags),
- linelen++);
-
- if (indent + linelen >= this.configuration.wraplen)
- wrapLine(fout, indent);
-
- if (attr.value == null)
- {
- if (this.configuration.XmlTags || this.configuration.XmlOut)
- printAttrValue(fout, indent, attr.attribute, attr.delim, true);
- else if (!attr.isBoolAttribute() && !Node.isNewNode(node))
- printAttrValue(fout, indent, "", attr.delim, true);
- else if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-
- }
- else
- printAttrValue(fout, indent, attr.value, attr.delim, wrappable);
- }
-
- private void printAttrs(Out fout, int indent,
- Node node, AttVal attr)
- {
- if (attr != null)
- {
- if (attr.next != null)
- printAttrs(fout, indent, node, attr.next);
-
- if (attr.attribute != null)
- printAttribute(fout, indent, node, attr);
- else if (attr.asp != null)
- {
- addC(' ', linelen++);
- printAsp(fout, indent, attr.asp);
- }
- else if (attr.php != null)
- {
- addC(' ', linelen++);
- printPhp(fout, indent, attr.php);
- }
- }
-
- /* add xml:space attribute to pre and other elements */
- if (configuration.XmlOut &&
- configuration.XmlSpace &&
- ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt) &&
- node.getAttrByName("xml:space") == null)
- printString(fout, indent, " xml:space=\"preserve\"");
- }
-
- /*
- Line can be wrapped immediately after inline start tag provided
- if follows a text node ending in a space, or it parent is an
- inline element that that rule applies to. This behaviour was
- reverse engineered from Netscape 3.0
- */
- private static boolean afterSpace(Node node)
- {
- Node prev;
- int c;
-
- if (node == null || node.tag == null || !((node.tag.model & Dict.CM_INLINE) != 0))
- return true;
-
- prev = node.prev;
-
- if (prev != null)
- {
- if (prev.type == Node.TextNode && prev.end > prev.start)
- {
- c = ((int)prev.textarray[prev.end - 1]) & 0xFF; // Convert to unsigned.
-
- if (c == 160 || c == ' ' || c == '\n')
- return true;
- }
-
- return false;
- }
-
- return afterSpace(node.parent);
- }
-
- private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node)
- {
- char c;
- String p;
- TagTable tt = this.configuration.tt;
-
- addC('<', linelen++);
-
- if (node.type == Node.EndTag)
- addC('/', linelen++);
-
- p = node.element;
- for (int i = 0; i < p.length(); i++ )
- addC((int)Lexer.foldCase(p.charAt(i),
- this.configuration.UpperCaseTags,
- this.configuration.XmlTags),
- linelen++);
-
- printAttrs(fout, indent, node, node.attributes);
-
- if ((this.configuration.XmlOut || lexer != null && lexer.isvoyager) &&
- (node.type == Node.StartEndTag || (node.tag.model & Dict.CM_EMPTY) != 0))
- {
- addC(' ', linelen++); /* compatibility hack */
- addC('/', linelen++);
- }
-
- addC('>', linelen++);;
-
- if (node.type != Node.StartEndTag && !((mode & PREFORMATTED) != 0))
- {
- if (indent + linelen >= this.configuration.wraplen)
- wrapLine(fout, indent);
-
- if (indent + linelen < this.configuration.wraplen)
- {
- /*
- wrap after start tag if is <br/> or if it's not
- inline or it is an empty tag followed by </a>
- */
- if (afterSpace(node))
- {
- if (!((mode & NOWRAP) != 0) &&
- (!((node.tag.model & Dict.CM_INLINE) != 0) ||
- (node.tag == tt.tagBr) ||
- (((node.tag.model & Dict.CM_EMPTY) != 0) &&
- node.next == null &&
- node.parent.tag == tt.tagA)))
- {
- wraphere = linelen;
- }
- }
- }
- else
- condFlushLine(fout, indent);
- }
- }
-
- private void printEndTag(Out fout, short mode, int indent, Node node)
- {
- char c;
- String p;
-
- /*
- Netscape ignores SGML standard by not ignoring a
- line break before </A> or </U> etc. To avoid rendering
- this as an underlined space, I disable line wrapping
- before inline end tags by the #if 0 ... #endif
- */
-if (false) {
- if (indent + linelen < this.configuration.wraplen && !((mode & NOWRAP) != 0))
- wraphere = linelen;
-}
-
- addC('<', linelen++);
- addC('/', linelen++);
-
- p = node.element;
- for (int i = 0; i < p.length(); i++ )
- addC((int)Lexer.foldCase(p.charAt(i),
- this.configuration.UpperCaseTags,
- this.configuration.XmlTags),
- linelen++);
-
- addC('>', linelen++);
- }
-
- private void printComment(Out fout, int indent, Node node)
- {
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-
- addC('<', linelen++);
- addC('!', linelen++);
- addC('-', linelen++);
- addC('-', linelen++);
-if (false) {
- if (linelen < this.configuration.wraplen)
- wraphere = linelen;
-}
- printText(fout, COMMENT, indent,
- node.textarray, node.start, node.end);
-if (false) {
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-}
- // See Lexer.java: AQ 8Jul2000
- addC('-', linelen++);
- addC('-', linelen++);
- addC('>', linelen++);
-
- if (node.linebreak)
- flushLine(fout, indent);
- }
-
- private void printDocType(Out fout, int indent, Node node)
- {
- boolean q = this.configuration.QuoteMarks;
-
- this.configuration.QuoteMarks = false;
-
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-
- condFlushLine(fout, indent);
-
- addC('<', linelen++);
- addC('!', linelen++);
- addC('D', linelen++);
- addC('O', linelen++);
- addC('C', linelen++);
- addC('T', linelen++);
- addC('Y', linelen++);
- addC('P', linelen++);
- addC('E', linelen++);
- addC(' ', linelen++);
-
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-
- printText(fout, (short)0, indent,
- node.textarray, node.start, node.end);
-
- if (linelen < this.configuration.wraplen)
- wraphere = linelen;
-
- addC('>', linelen++);
- this.configuration.QuoteMarks = q;
- condFlushLine(fout, indent);
- }
-
- private void printPI(Out fout, int indent, Node node)
- {
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-
- addC('<', linelen++);
- addC('?', linelen++);
-
- /* set CDATA to pass < and > unescaped */
- printText(fout, CDATA, indent,
- node.textarray, node.start, node.end);
-
- if (node.textarray[node.end - 1] != (byte)'?')
- addC('?', linelen++);
-
- addC('>', linelen++);
- condFlushLine(fout, indent);
- }
-
- /* note ASP and JSTE share <% ... %> syntax */
- private void printAsp(Out fout, int indent, Node node)
- {
- int savewraplen = this.configuration.wraplen;
-
- /* disable wrapping if so requested */
-
- if (!this.configuration.WrapAsp || !this.configuration.WrapJste)
- this.configuration.wraplen = 0xFFFFFF; /* a very large number */
-if (false) { //#if 0
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-} //#endif
-
- addC('<', linelen++);
- addC('%', linelen++);
-
- printText(fout, (this.configuration.WrapAsp ? CDATA : COMMENT), indent,
- node.textarray, node.start, node.end);
-
- addC('%', linelen++);
- addC('>', linelen++);
- /* condFlushLine(fout, indent); */
- this.configuration.wraplen = savewraplen;
- }
-
- /* JSTE also supports <# ... #> syntax */
- private void printJste(Out fout, int indent, Node node)
- {
- int savewraplen = this.configuration.wraplen;
-
- /* disable wrapping if so requested */
-
- if (!this.configuration.WrapJste)
- this.configuration.wraplen = 0xFFFFFF; /* a very large number */
-
- addC('<', linelen++);
- addC('#', linelen++);
-
- printText(fout, (this.configuration.WrapJste ? CDATA : COMMENT), indent,
- node.textarray, node.start, node.end);
-
- addC('#', linelen++);
- addC('>', linelen++);
- /* condFlushLine(fout, indent); */
- this.configuration.wraplen = savewraplen;
- }
-
- /* PHP is based on XML processing instructions */
- private void printPhp(Out fout, int indent, Node node)
- {
- int savewraplen = this.configuration.wraplen;
-
- /* disable wrapping if so requested */
-
- if (!this.configuration.WrapPhp)
- this.configuration.wraplen = 0xFFFFFF; /* a very large number */
-
-if (false) { //#if 0
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-} //#endif
- addC('<', linelen++);
- addC('?', linelen++);
-
- printText(fout, (this.configuration.WrapPhp ? CDATA : COMMENT), indent,
- node.textarray, node.start, node.end);
-
- addC('?', linelen++);
- addC('>', linelen++);
- /* PCondFlushLine(fout, indent); */
- this.configuration.wraplen = savewraplen;
- }
-
- private void printCDATA(Out fout, int indent, Node node)
- {
- int savewraplen = this.configuration.wraplen;
-
- condFlushLine(fout, indent);
-
- /* disable wrapping */
-
- this.configuration.wraplen = 0xFFFFFF; /* a very large number */
-
- addC('<', linelen++);
- addC('!', linelen++);
- addC('[', linelen++);
- addC('C', linelen++);
- addC('D', linelen++);
- addC('A', linelen++);
- addC('T', linelen++);
- addC('A', linelen++);
- addC('[', linelen++);
-
- printText(fout, COMMENT, indent,
- node.textarray, node.start, node.end);
-
- addC(']', linelen++);
- addC(']', linelen++);
- addC('>', linelen++);
- condFlushLine(fout, indent);
- this.configuration.wraplen = savewraplen;
- }
-
- private void printSection(Out fout, int indent, Node node)
- {
- int savewraplen = this.configuration.wraplen;
-
- /* disable wrapping if so requested */
-
- if (!this.configuration.WrapSection)
- this.configuration.wraplen = 0xFFFFFF; /* a very large number */
-
-if (false) { //#if 0
- if (indent + linelen < this.configuration.wraplen)
- wraphere = linelen;
-} //#endif
- addC('<', linelen++);
- addC('!', linelen++);
- addC('[', linelen++);
-
- printText(fout, (this.configuration.WrapSection ? CDATA : COMMENT), indent,
- node.textarray, node.start, node.end);
-
- addC(']', linelen++);
- addC('>', linelen++);
- /* PCondFlushLine(fout, indent); */
- this.configuration.wraplen = savewraplen;
- }
-
- private boolean shouldIndent(Node node)
- {
- TagTable tt = this.configuration.tt;
-
- if (!this.configuration.IndentContent)
- return false;
-
- if (this.configuration.SmartIndent)
- {
- if (node.content != null && ((node.tag.model & Dict.CM_NO_INDENT) != 0))
- {
- for (node = node.content; node != null; node = node.next)
- if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0)
- return true;
-
- return false;
- }
-
- if ((node.tag.model & Dict.CM_HEADING) != 0)
- return false;
-
- if (node.tag == tt.tagP)
- return false;
-
- if (node.tag == tt.tagTitle)
- return false;
- }
-
- if ((node.tag.model & (Dict.CM_FIELD | Dict.CM_OBJECT)) != 0)
- return true;
-
- if (node.tag == tt.tagMap)
- return true;
-
- return !((node.tag.model & Dict.CM_INLINE) != 0);
- }
-
- public void printTree(Out fout, short mode, int indent,
- Lexer lexer, Node node)
- {
- Node content, last;
- TagTable tt = this.configuration.tt;
-
- if (node == null)
- return;
-
- if (node.type == Node.TextNode)
- printText(fout, mode, indent,
- node.textarray, node.start, node.end);
- else if (node.type == Node.CommentTag)
- {
- printComment(fout, indent, node);
- }
- else if (node.type == Node.RootNode)
- {
- for (content = node.content;
- content != null;
- content = content.next)
- printTree(fout, mode, indent, lexer, content);
- }
- else if (node.type == Node.DocTypeTag)
- printDocType(fout, indent, node);
- else if (node.type == Node.ProcInsTag)
- printPI(fout, indent, node);
- else if (node.type == Node.CDATATag)
- printCDATA(fout, indent, node);
- else if (node.type == Node.SectionTag)
- printSection(fout, indent, node);
- else if (node.type == Node.AspTag)
- printAsp(fout, indent, node);
- else if (node.type == Node.JsteTag)
- printJste(fout, indent, node);
- else if (node.type == Node.PhpTag)
- printPhp(fout, indent, node);
- else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag)
- {
- if (!((node.tag.model & Dict.CM_INLINE) != 0))
- condFlushLine(fout, indent);
-
- if (node.tag == tt.tagBr && node.prev != null &&
- node.prev.tag != tt.tagBr && this.configuration.BreakBeforeBR)
- flushLine(fout, indent);
-
- if (this.configuration.MakeClean && node.tag == tt.tagWbr)
- printString(fout, indent, " ");
- else
- printTag(lexer, fout, mode, indent, node);
-
- if (node.tag == tt.tagParam || node.tag == tt.tagArea)
- condFlushLine(fout, indent);
- else if (node.tag == tt.tagBr || node.tag == tt.tagHr)
- flushLine(fout, indent);
- }
- else /* some kind of container element */
- {
- if (node.tag != null && node.tag.parser == ParserImpl.getParsePre())
- {
- condFlushLine(fout, indent);
-
- indent = 0;
- condFlushLine(fout, indent);
- printTag(lexer, fout, mode, indent, node);
- flushLine(fout, indent);
-
- for (content = node.content;
- content != null;
- content = content.next)
- printTree(fout, (short)(mode | PREFORMATTED | NOWRAP), indent, lexer, content);
-
- condFlushLine(fout, indent);
- printEndTag(fout, mode, indent, node);
- flushLine(fout, indent);
-
- if (this.configuration.IndentContent == false && node.next != null)
- flushLine(fout, indent);
- }
- else if (node.tag == tt.tagStyle || node.tag == tt.tagScript)
- {
- condFlushLine(fout, indent);
-
- indent = 0;
- condFlushLine(fout, indent);
- printTag(lexer, fout, mode, indent, node);
- flushLine(fout, indent);
-
- for (content = node.content;
- content != null;
- content = content.next)
- printTree(fout, (short)(mode | PREFORMATTED | NOWRAP |CDATA), indent, lexer, content);
-
- condFlushLine(fout, indent);
- printEndTag(fout, mode, indent, node);
- flushLine(fout, indent);
-
- if (this.configuration.IndentContent == false && node.next != null)
- flushLine(fout, indent);
- }
- else if ((node.tag.model & Dict.CM_INLINE) != 0)
- {
- if (this.configuration.MakeClean)
- {
- /* discards <font> and </font> tags */
- if (node.tag == tt.tagFont)
- {
- for (content = node.content;
- content != null;
- content = content.next)
- printTree(fout, mode, indent, lexer, content);
- return;
- }
-
- /* replace <nobr>...</nobr> by or   etc. */
- if (node.tag == tt.tagNobr)
- {
- for (content = node.content;
- content != null;
- content = content.next)
- printTree(fout, (short)(mode|NOWRAP), indent, lexer, content);
- return;
- }
- }
-
- /* otherwise a normal inline element */
-
- printTag(lexer, fout, mode, indent, node);
-
- /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */
-
- if (shouldIndent(node))
- {
- condFlushLine(fout, indent);
- indent += this.configuration.spaces;
-
- for (content = node.content;
- content != null;
- content = content.next)
- printTree(fout, mode, indent, lexer, content);
-
- condFlushLine(fout, indent);
- indent -= this.configuration.spaces;
- condFlushLine(fout, indent);
- }
- else
- {
-
- for (content = node.content;
- content != null;
- content = content.next)
- printTree(fout, mode, indent, lexer, content);
- }
-
- printEndTag(fout, mode, indent, node);
- }
- else /* other tags */
- {
- condFlushLine(fout, indent);
-
- if (this.configuration.SmartIndent && node.prev != null)
- flushLine(fout, indent);
-
- if (this.configuration.HideEndTags == false ||
- !(node.tag != null && ((node.tag.model & Dict.CM_OMITST) != 0)))
- {
- printTag(lexer, fout, mode, indent, node);
-
- if (shouldIndent(node))
- condFlushLine(fout, indent);
- else if ((node.tag.model & Dict.CM_HTML) != 0 ||
- node.tag == tt.tagNoframes ||
- ((node.tag.model & Dict.CM_HEAD) != 0 &&
- !(node.tag == tt.tagTitle)))
- flushLine(fout, indent);
- }
-
- if (node.tag == tt.tagBody && this.configuration.BurstSlides)
- printSlide(fout, mode, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer);
- else
- {
- last = null;
-
- for (content = node.content;
- content != null; content = content.next)
- {
- /* kludge for naked text before block level tag */
- if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode &&
- content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0)
- {
- flushLine(fout, indent);
- flushLine(fout, indent);
- }
-
- printTree(fout, mode,
- (shouldIndent(node) ? indent+this.configuration.spaces : indent), lexer, content);
-
- last = content;
- }
- }
-
- /* don't flush line for td and th */
- if (shouldIndent(node) ||
- (((node.tag.model & Dict.CM_HTML) != 0 || node.tag == tt.tagNoframes ||
- ((node.tag.model & Dict.CM_HEAD) != 0 && !(node.tag == tt.tagTitle)))
- && this.configuration.HideEndTags == false))
- {
- condFlushLine(fout, (this.configuration.IndentContent ? indent+this.configuration.spaces : indent));
-
- if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0))
- {
- printEndTag(fout, mode, indent, node);
- flushLine(fout, indent);
- }
- }
- else
- {
- if (this.configuration.HideEndTags == false || !((node.tag.model & Dict.CM_OPT) != 0))
- printEndTag(fout, mode, indent, node);
-
- flushLine(fout, indent);
- }
-
- if (this.configuration.IndentContent == false &&
- node.next != null &&
- this.configuration.HideEndTags == false &&
- (node.tag.model & (Dict.CM_BLOCK|Dict.CM_LIST|Dict.CM_DEFLIST|Dict.CM_TABLE)) != 0)
- {
- flushLine(fout, indent);
- }
- }
- }
- }
-
- public void printXMLTree(Out fout, short mode, int indent,
- Lexer lexer, Node node)
- {
- TagTable tt = this.configuration.tt;
-
- if (node == null)
- return;
-
- if (node.type == Node.TextNode)
- {
- printText(fout, mode, indent,
- node.textarray, node.start, node.end);
- }
- else if (node.type == Node.CommentTag)
- {
- condFlushLine(fout, indent);
- printComment(fout, 0, node);
- condFlushLine(fout, 0);
- }
- else if (node.type == Node.RootNode)
- {
- Node content;
-
- for (content = node.content;
- content != null;
- content = content.next)
- printXMLTree(fout, mode, indent, lexer, content);
- }
- else if (node.type == Node.DocTypeTag)
- printDocType(fout, indent, node);
- else if (node.type == Node.ProcInsTag)
- printPI(fout, indent, node);
- else if (node.type == Node.SectionTag)
- printSection(fout, indent, node);
- else if (node.type == Node.AspTag)
- printAsp(fout, indent, node);
- else if (node.type == Node.JsteTag)
- printJste(fout, indent, node);
- else if (node.type == Node.PhpTag)
- printPhp(fout, indent, node);
- else if ((node.tag.model & Dict.CM_EMPTY) != 0 || node.type == Node.StartEndTag)
- {
- condFlushLine(fout, indent);
- printTag(lexer, fout, mode, indent, node);
- flushLine(fout, indent);
-
- if (node.next != null)
- flushLine(fout, indent);
- }
- else /* some kind of container element */
- {
- Node content;
- boolean mixed = false;
- int cindent;
-
- for (content = node.content; content != null; content = content.next)
- {
- if (content.type == Node.TextNode)
- {
- mixed = true;
- break;
- }
- }
-
- condFlushLine(fout, indent);
-
- if (ParserImpl.XMLPreserveWhiteSpace(node, tt))
- {
- indent = 0;
- cindent = 0;
- mixed = false;
- }
- else if (mixed)
- cindent = indent;
- else
- cindent = indent + this.configuration.spaces;
-
- printTag(lexer, fout, mode, indent, node);
-
- if (!mixed)
- flushLine(fout, indent);
-
- for (content = node.content;
- content != null;
- content = content.next)
- printXMLTree(fout, mode, cindent, lexer, content);
-
- if (!mixed)
- condFlushLine(fout, cindent);
- printEndTag(fout, mode, indent, node);
- condFlushLine(fout, indent);
-
- if (node.next != null)
- flushLine(fout, indent);
- }
- }
-
-
- /* split parse tree by h2 elements and output to separate files */
-
- /* counts number of h2 children belonging to node */
- public int countSlides(Node node)
- {
- int n = 1;
- TagTable tt = this.configuration.tt;
-
- for (node = node.content; node != null; node = node.next)
- if (node.tag == tt.tagH2)
- ++n;
-
- return n;
- }
-
- /*
- inserts a space gif called "dot.gif" to ensure
- that the slide is at least n pixels high
- */
- private void printVertSpacer(Out fout, int indent)
- {
- condFlushLine(fout, indent);
- printString(fout, indent ,
- "<img width=\"0\" height=\"0\" hspace=\"1\" src=\"dot.gif\" vspace=\"%d\" align=\"left\">");
- condFlushLine(fout, indent);
- }
-
- private void printNavBar(Out fout, int indent)
- {
- String buf;
-
- condFlushLine(fout, indent);
- printString(fout, indent , "<center><small>");
-
- if (slide > 1)
- {
- buf = "<a href=\"slide" +
- (new Integer(slide - 1)).toString() +
- ".html\">previous</a> | ";
- printString(fout, indent , buf);
- condFlushLine(fout, indent);
-
- if (slide < count)
- printString(fout, indent , "<a href=\"slide1.html\">start</a> | ");
- else
- printString(fout, indent , "<a href=\"slide1.html\">start</a>");
-
- condFlushLine(fout, indent);
- }
-
- if (slide < count)
- {
- buf = "<a href=\"slide" +
- (new Integer(slide + 1)).toString() +
- ".html\">next</a>";
- printString(fout, indent , buf);
- }
-
- printString(fout, indent , "</small></center>");
- condFlushLine(fout, indent);
- }
-
- /*
- Called from printTree to print the content of a slide from
- the node slidecontent. On return slidecontent points to the
- node starting the next slide or null. The variables slide
- and count are used to customise the navigation bar.
- */
- public void printSlide(Out fout, short mode, int indent, Lexer lexer)
- {
- Node content, last;
- TagTable tt = this.configuration.tt;
-
- /* insert div for onclick handler */
- String s;
- s = "<div onclick=\"document.location='slide" +
- (new Integer(slide < count ? slide + 1 : 1)).toString() +
- ".html'\">";
- printString(fout, indent, s);
- condFlushLine(fout, indent);
-
- /* first print the h2 element and navbar */
- if (slidecontent.tag == tt.tagH2)
- {
- printNavBar(fout, indent);
-
- /* now print an hr after h2 */
-
- addC('<', linelen++);
-
-
- addC((int)Lexer.foldCase('h',
- this.configuration.UpperCaseTags,
- this.configuration.XmlTags),
- linelen++);
- addC((int)Lexer.foldCase('r',
- this.configuration.UpperCaseTags,
- this.configuration.XmlTags),
- linelen++);
-
- if (this.configuration.XmlOut == true)
- printString(fout, indent , " />");
- else
- addC('>', linelen++);
-
-
- if (this.configuration.IndentContent == true)
- condFlushLine(fout, indent);
-
- /* PrintVertSpacer(fout, indent); */
-
- /*condFlushLine(fout, indent); */
-
- /* print the h2 element */
- printTree(fout, mode,
- (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, slidecontent);
-
- slidecontent = slidecontent.next;
- }
-
- /* now continue until we reach the next h2 */
-
- last = null;
- content = slidecontent;
-
- for (; content != null; content = content.next)
- {
- if (content.tag == tt.tagH2)
- break;
-
- /* kludge for naked text before block level tag */
- if (last != null && !this.configuration.IndentContent && last.type == Node.TextNode &&
- content.tag != null && (content.tag.model & Dict.CM_BLOCK) != 0)
- {
- flushLine(fout, indent);
- flushLine(fout, indent);
- }
-
- printTree(fout, mode,
- (this.configuration.IndentContent ? indent+this.configuration.spaces : indent), lexer, content);
-
- last = content;
- }
-
- slidecontent = content;
-
- /* now print epilog */
-
- condFlushLine(fout, indent);
-
- printString(fout, indent , "<br clear=\"all\">");
- condFlushLine(fout, indent);
-
- addC('<', linelen++);
-
-
- addC((int)Lexer.foldCase('h',
- this.configuration.UpperCaseTags,
- this.configuration.XmlTags),
- linelen++);
- addC((int)Lexer.foldCase('r',
- this.configuration.UpperCaseTags,
- this.configuration.XmlTags),
- linelen++);
-
- if (this.configuration.XmlOut == true)
- printString(fout, indent , " />");
- else
- addC('>', linelen++);
-
-
- if (this.configuration.IndentContent == true)
- condFlushLine(fout, indent);
-
- printNavBar(fout, indent);
-
- /* end tag for div */
- printString(fout, indent, "</div>");
- condFlushLine(fout, indent);
- }
-
-
- /*
- Add meta element for page transition effect, this works on IE but not NS
- */
-
- public void addTransitionEffect(Lexer lexer, Node root, short effect, double duration)
- {
- Node head = root.findHEAD(lexer.configuration.tt);
- String transition;
-
- if (0 <= effect && effect <= 23)
- transition = "revealTrans(Duration=" +
- (new Double(duration)).toString() +
- ",Transition=" + effect + ")";
- else
- transition = "blendTrans(Duration=" +
- (new Double(duration)).toString() + ")";
-
- if (head != null)
- {
- Node meta = lexer.inferredTag("meta");
- meta.addAttribute("http-equiv", "Page-Enter");
- meta.addAttribute("content", transition);
- Node.insertNodeAtStart(head, meta);
- }
- }
-
- public void createSlides(Lexer lexer, Node root)
- {
- Node body;
- String buf;
- Out out = new OutImpl();
-
- body = root.findBody(lexer.configuration.tt);
- count = countSlides(body);
- slidecontent = body.content;
- addTransitionEffect(lexer, root, EFFECT_BLEND, 3.0);
-
- for (slide = 1; slide <= count; ++slide)
- {
- buf = "slide" + slide + ".html";
- out.state = StreamIn.FSM_ASCII;
- out.encoding = this.configuration.CharEncoding;
-
- try
- {
- out.out = new FileOutputStream(buf);
- printTree(out, (short)0, 0, lexer, root);
- flushLine(out, 0);
- out.out.close();
- }
- catch (IOException e)
- {
- System.err.println(buf + e.toString() );
- }
- }
-
- /*
- delete superfluous slides by deleting slideN.html
- for N = count+1, count+2, etc. until no such file
- is found.
- */
-
- for (;;)
- {
- buf = "slide" + slide + "html";
-
- if (!(new File(buf)).delete())
- break;
-
- ++slide;
- }
- }
-
-}
+++ /dev/null
-/*
- * @(#)Parser.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * HTML Parser
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public interface Parser {
-
- public void parse( Lexer lexer, Node node, short mode );
-
-}
-
+++ /dev/null
-/*
- * @(#)ParserImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * HTML Parser implementation
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class ParserImpl {
-
- //private static int SeenBodyEndTag; /* AQ: moved into lexer structure */
-
- private static void parseTag(Lexer lexer, Node node, short mode)
- {
- // Local fix by GLP 2000-12-21. Need to reset insertspace if this
- // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
- // Remove this code once the fix is made in Tidy.
-
-/****** (Original code follows)
- if ((node.tag.model & Dict.CM_EMPTY) != 0)
- {
- lexer.waswhite = false;
- return;
- }
- else if (!((node.tag.model & Dict.CM_INLINE) != 0))
- lexer.insertspace = false;
-*******/
-
- if (!((node.tag.model & Dict.CM_INLINE) != 0))
- lexer.insertspace = false;
-
- if ((node.tag.model & Dict.CM_EMPTY) != 0)
- {
- lexer.waswhite = false;
- return;
- }
-
- if (node.tag.parser == null || node.type == Node.StartEndTag)
- return;
-
- node.tag.parser.parse(lexer, node, mode);
- }
-
- private static void moveToHead(Lexer lexer, Node element, Node node)
- {
- Node head;
- TagTable tt = lexer.configuration.tt;
-
-
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
-
- while (element.tag != tt.tagHtml)
- element = element.parent;
-
- for (head = element.content; head != null; head = head.next)
- {
- if (head.tag == tt.tagHead)
- {
- Node.insertNodeAtEnd(head, node);
- break;
- }
- }
-
- if (node.tag.parser != null)
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- }
- else
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- }
- }
-
- public static class ParseHTML implements Parser {
-
- public void parse( Lexer lexer, Node html, short mode )
- {
- Node node, head;
- Node frameset = null;
- Node noframes = null;
-
- lexer.configuration.XmlTags = false;
- lexer.seenBodyEndTag = 0;
- TagTable tt = lexer.configuration.tt;
-
- for (;;)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
-
- if (node == null)
- {
- node = lexer.inferredTag("head");
- break;
- }
-
- if (node.tag == tt.tagHead)
- break;
-
- if (node.tag == html.tag && node.type == Node.EndTag)
- {
- Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(html, node))
- continue;
-
- lexer.ungetToken();
- node = lexer.inferredTag("head");
- break;
- }
-
- head = node;
- Node.insertNodeAtEnd(html, head);
- getParseHead().parse(lexer, head, mode);
-
- for (;;)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
-
- if (node == null)
- {
- if (frameset == null) /* create an empty body */
- node = lexer.inferredTag("body");
-
- return;
- }
-
- /* robustly handle html tags */
- if (node.tag == html.tag)
- {
- if (node.type != Node.StartTag && frameset == null)
- Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
-
- continue;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(html, node))
- continue;
-
- /* if frameset document coerce <body> to <noframes> */
- if (node.tag == tt.tagBody)
- {
- if (node.type != Node.StartTag)
- {
- Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (frameset != null)
- {
- lexer.ungetToken();
-
- if (noframes == null)
- {
- noframes = lexer.inferredTag("noframes");
- Node.insertNodeAtEnd(frameset, noframes);
- Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
- }
-
- parseTag(lexer, noframes, mode);
- continue;
- }
-
- break; /* to parse body */
- }
-
- /* flag an error if we see more than one frameset */
- if (node.tag == tt.tagFrameset)
- {
- if (node.type != Node.StartTag)
- {
- Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (frameset != null)
- Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
- else
- frameset = node;
-
- Node.insertNodeAtEnd(html, node);
- parseTag(lexer, node, mode);
-
- /*
- see if it includes a noframes element so
- that we can merge subsequent noframes elements
- */
-
- for (node = frameset.content; node != null; node = node.next)
- {
- if (node.tag == tt.tagNoframes)
- noframes = node;
- }
- continue;
- }
-
- /* if not a frameset document coerce <noframes> to <body> */
- if (node.tag == tt.tagNoframes)
- {
- if (node.type != Node.StartTag)
- {
- Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (frameset == null)
- {
- Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
- node = lexer.inferredTag("body");
- break;
- }
-
- if (noframes == null)
- {
- noframes = node;
- Node.insertNodeAtEnd(frameset, noframes);
- }
-
- parseTag(lexer, noframes, mode);
- continue;
- }
-
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
- {
- moveToHead(lexer, html, node);
- continue;
- }
- }
-
- lexer.ungetToken();
-
- /* insert other content into noframes element */
-
- if (frameset != null)
- {
- if (noframes == null)
- {
- noframes = lexer.inferredTag("noframes");
- Node.insertNodeAtEnd(frameset, noframes);
- }
- else
- Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
-
- parseTag(lexer, noframes, mode);
- continue;
- }
-
- node = lexer.inferredTag("body");
- break;
- }
-
- /* node must be body */
-
- Node.insertNodeAtEnd(html, node);
- parseTag(lexer, node, mode);
- }
-
- };
-
- public static class ParseHead implements Parser {
-
- public void parse( Lexer lexer, Node head, short mode )
- {
- Node node;
- int HasTitle = 0;
- int HasBase = 0;
- TagTable tt = lexer.configuration.tt;
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == head.tag && node.type == Node.EndTag)
- {
- head.closed = true;
- break;
- }
-
- if (node.type == Node.TextNode)
- {
- lexer.ungetToken();
- break;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(head, node))
- continue;
-
- if (node.type == Node.DocTypeTag)
- {
- Node.insertDocType(lexer, head, node);
- continue;
- }
-
- /* discard unknown tags */
- if (node.tag == null)
- {
- Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (!((node.tag.model & Dict.CM_HEAD) != 0))
- {
- lexer.ungetToken();
- break;
- }
-
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- if (node.tag == tt.tagTitle)
- {
- ++HasTitle;
-
- if (HasTitle > 1)
- Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
- }
- else if (node.tag == tt.tagBase)
- {
- ++HasBase;
-
- if (HasBase > 1)
- Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
- }
- else if (node.tag == tt.tagNoscript)
- Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
-
- Node.insertNodeAtEnd(head, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- continue;
- }
-
- /* discard unexpected text nodes and end tags */
- Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
- }
-
- if (HasTitle == 0)
- {
- Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
- Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
- }
- }
-
- };
-
- public static class ParseTitle implements Parser {
-
- public void parse( Lexer lexer, Node title, short mode )
- {
- Node node;
-
- while (true)
- {
- node = lexer.getToken(Lexer.MixedContent);
- if (node == null) break;
- if (node.tag == title.tag && node.type == Node.EndTag)
- {
- title.closed = true;
- Node.trimSpaces(lexer, title);
- return;
- }
-
- if (node.type == Node.TextNode)
- {
- /* only called for 1st child */
- if (title.content == null)
- Node.trimInitialSpace(lexer, title, node);
-
- if (node.start >= node.end)
- {
- continue;
- }
-
- Node.insertNodeAtEnd(title, node);
- continue;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(title, node))
- continue;
-
- /* discard unknown tags */
- if (node.tag == null)
- {
- Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* pushback unexpected tokens */
- Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.ungetToken();
- Node.trimSpaces(lexer, title);
- return;
- }
-
- Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
- }
-
- };
-
- public static class ParseScript implements Parser {
-
- public void parse( Lexer lexer, Node script, short mode )
- {
- /*
- This isn't quite right for CDATA content as it recognises
- tags within the content and parses them accordingly.
- This will unfortunately screw up scripts which include
- < + letter, < + !, < + ? or < + / + letter
- */
-
- Node node;
-
- node = lexer.getCDATA( script);
-
- if (node != null)
- Node.insertNodeAtEnd(script, node);
- }
-
- };
-
- public static class ParseBody implements Parser {
-
- public void parse( Lexer lexer, Node body, short mode )
- {
- Node node;
- boolean checkstack, iswhitenode;
-
- mode = Lexer.IgnoreWhitespace;
- checkstack = true;
- TagTable tt = lexer.configuration.tt;
-
- while (true)
- {
- node = lexer.getToken(mode);
- if (node == null) break;
- if (node.tag == body.tag && node.type == Node.EndTag)
- {
- body.closed = true;
- Node.trimSpaces(lexer, body);
- lexer.seenBodyEndTag = 1;
- mode = Lexer.IgnoreWhitespace;
-
- if (body.parent.tag == tt.tagNoframes)
- break;
-
- continue;
- }
-
- if (node.tag == tt.tagNoframes)
- {
- if (node.type == Node.StartTag)
- {
- Node.insertNodeAtEnd(body, node);
- getParseBlock().parse(lexer, node, mode);
- continue;
- }
-
- if (node.type == Node.EndTag &&
- body.parent.tag == tt.tagNoframes)
- {
- Node.trimSpaces(lexer, body);
- lexer.ungetToken();
- break;
- }
- }
-
- if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
- && body.parent.tag == tt.tagNoframes)
- {
- Node.trimSpaces(lexer, body);
- lexer.ungetToken();
- break;
- }
-
- if (node.tag == tt.tagHtml)
- {
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
-
- continue;
- }
-
- iswhitenode = false;
-
- if (node.type == Node.TextNode &&
- node.end <= node.start + 1 &&
- node.textarray[node.start] == (byte)' ')
- iswhitenode = true;
-
- /* deal with comments etc. */
- if (Node.insertMisc(body, node))
- continue;
-
- if (lexer.seenBodyEndTag == 1 && !iswhitenode)
- {
- ++lexer.seenBodyEndTag;
- Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
- }
-
- /* mixed content model permits text */
- if (node.type == Node.TextNode)
- {
- if (iswhitenode && mode == Lexer.IgnoreWhitespace)
- {
- continue;
- }
-
- if (lexer.configuration.EncloseBodyText && !iswhitenode)
- {
- Node para;
-
- lexer.ungetToken();
- para = lexer.inferredTag("p");
- Node.insertNodeAtEnd(body, para);
- parseTag(lexer, para, mode);
- mode = Lexer.MixedContent;
- continue;
- }
- else /* strict doesn't allow text here */
- lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
-
- if (checkstack)
- {
- checkstack = false;
-
- if (lexer.inlineDup( node) > 0)
- continue;
- }
-
- Node.insertNodeAtEnd(body, node);
- mode = Lexer.MixedContent;
- continue;
- }
-
- if (node.type == Node.DocTypeTag)
- {
- Node.insertDocType(lexer, body, node);
- continue;
- }
- /* discard unknown and PARAM tags */
- if (node.tag == null || node.tag == tt.tagParam)
- {
- Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /*
- Netscape allows LI and DD directly in BODY
- We infer UL or DL respectively and use this
- boolean to exclude block-level elements so as
- to match Netscape's observed behaviour.
- */
- lexer.excludeBlocks = false;
-
- if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
- !((node.tag.model & Dict.CM_INLINE) != 0))
- {
- /* avoid this error message being issued twice */
- if (!((node.tag.model & Dict.CM_HEAD) != 0))
- Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
-
- if ((node.tag.model & Dict.CM_HTML) != 0)
- {
- /* copy body attributes if current body was inferred */
- if (node.tag == tt.tagBody && body.implicit
- && body.attributes == null)
- {
- body.attributes = node.attributes;
- node.attributes = null;
- }
-
- continue;
- }
-
- if ((node.tag.model & Dict.CM_HEAD) != 0)
- {
- moveToHead(lexer, body, node);
- continue;
- }
-
- if ((node.tag.model & Dict.CM_LIST) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "ul");
- Node.addClass(node, "noindent");
- lexer.excludeBlocks = true;
- }
- else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "dl");
- lexer.excludeBlocks = true;
- }
- else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "table");
- lexer.excludeBlocks = true;
- }
- else
- {
- /* AQ: The following line is from the official C
- version of tidy. It doesn't make sense to me
- because the '!' operator has higher precedence
- than the '&' operator. It seems to me that the
- expression always evaluates to 0.
-
- if (!node->tag->model & (CM_ROW | CM_FIELD))
-
- AQ: 13Jan2000 fixed in C tidy
- */
- if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
- {
- lexer.ungetToken();
- return;
- }
-
- /* ignore </td> </th> <option> etc. */
- continue;
- }
- }
-
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagBr)
- node.type = Node.StartTag;
- else if (node.tag == tt.tagP)
- {
- Node.coerceNode(lexer, node, tt.tagBr);
- Node.insertNodeAtEnd(body, node);
- node = lexer.inferredTag("br");
- }
- else if ((node.tag.model & Dict.CM_INLINE) != 0)
- lexer.popInline(node);
- }
-
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
- {
- /* HTML4 strict doesn't allow inline content here */
- /* but HTML2 does allow img elements as children of body */
- if (node.tag == tt.tagImg)
- lexer.versions &= ~Dict.VERS_HTML40_STRICT;
- else
- lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
-
- if (checkstack && !node.implicit)
- {
- checkstack = false;
-
- if (lexer.inlineDup( node) > 0)
- continue;
- }
-
- mode = Lexer.MixedContent;
- }
- else
- {
- checkstack = true;
- mode = Lexer.IgnoreWhitespace;
- }
-
- if (node.implicit)
- Report.warning(lexer, body, node, Report.INSERTING_TAG);
-
- Node.insertNodeAtEnd(body, node);
- parseTag(lexer, node, mode);
- continue;
- }
-
- /* discard unexpected tags */
- Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
- }
- }
-
- };
-
- public static class ParseFrameSet implements Parser {
-
- public void parse( Lexer lexer, Node frameset, short mode )
- {
- Node node;
- TagTable tt = lexer.configuration.tt;
-
- lexer.badAccess |= Report.USING_FRAMES;
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == frameset.tag && node.type == Node.EndTag)
- {
- frameset.closed = true;
- Node.trimSpaces(lexer, frameset);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(frameset, node))
- continue;
-
- if (node.tag == null)
- {
- Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
- {
- moveToHead(lexer, frameset, node);
- continue;
- }
- }
-
- if (node.tag == tt.tagBody)
- {
- lexer.ungetToken();
- node = lexer.inferredTag("noframes");
- Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
- }
-
- if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
- {
- Node.insertNodeAtEnd(frameset, node);
- lexer.excludeBlocks = false;
- parseTag(lexer, node, Lexer.MixedContent);
- continue;
- }
- else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
- {
- Node.insertNodeAtEnd(frameset, node);
- continue;
- }
-
- /* discard unexpected tags */
- Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
- }
-
- Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
- }
-
- };
-
- public static class ParseInline implements Parser {
-
- public void parse( Lexer lexer, Node element, short mode )
- {
- Node node, parent;
- TagTable tt = lexer.configuration.tt;
-
- if ((element.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- if (element.tag == tt.tagA)
- {
- if (element.attributes == null)
- {
- Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
- Node.discardElement(element);
- return;
- }
- }
-
- /*
- ParseInline is used for some block level elements like H1 to H6
- For such elements we need to insert inline emphasis tags currently
- on the inline stack. For Inline elements, we normally push them
- onto the inline stack provided they aren't implicit or OBJECT/APPLET.
- This test is carried out in PushInline and PopInline, see istack.c
- We don't push A or SPAN to replicate current browser behavior
- */
- if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
- lexer.inlineDup( null);
- else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
- element.tag != tt.tagA && element.tag != tt.tagSpan)
- lexer.pushInline( element);
-
- if (element.tag == tt.tagNobr)
- lexer.badLayout |= Report.USING_NOBR;
- else if (element.tag == tt.tagFont)
- lexer.badLayout |= Report.USING_FONT;
-
- /* Inline elements may or may not be within a preformatted element */
- if (mode != Lexer.Preformatted)
- mode = Lexer.MixedContent;
-
- while (true)
- {
- node = lexer.getToken(mode);
- if (node == null) break;
- /* end tag for current element */
- if (node.tag == element.tag && node.type == Node.EndTag)
- {
- if ((element.tag.model & Dict.CM_INLINE) != 0 &&
- element.tag != tt.tagA)
- lexer.popInline( node);
-
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
- /*
- if a font element wraps an anchor and nothing else
- then move the font element inside the anchor since
- otherwise it won't alter the anchor text color
- */
- if (element.tag == tt.tagFont &&
- element.content != null &&
- element.content == element.last)
- {
- Node child = element.content;
-
- if (child.tag == tt.tagA)
- {
- child.parent = element.parent;
- child.next = element.next;
- child.prev = element.prev;
-
- if (child.prev != null)
- child.prev.next = child;
- else
- child.parent.content = child;
-
- if (child.next != null)
- child.next.prev = child;
- else
- child.parent.last = child;
-
- element.next = null;
- element.prev = null;
- element.parent = child;
- element.content = child.content;
- element.last = child.last;
- child.content = element;
- child.last = element;
- for (child = element.content; child != null; child = child.next)
- child.parent = element;
- }
- }
- element.closed = true;
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- /* <u>...<u> map 2nd <u> to </u> if 1st is explicit */
- /* otherwise emphasis nesting is probably unintentional */
- /* big and small have cumulative effect to leave them alone */
- if (node.type == Node.StartTag
- && node.tag == element.tag
- && lexer.isPushed(node)
- && !node.implicit
- && !element.implicit
- && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
- && node.tag != tt.tagA
- && node.tag != tt.tagFont
- && node.tag != tt.tagBig
- && node.tag != tt.tagSmall)
- {
- if (element.content != null && node.attributes == null)
- {
- Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
- node.type = Node.EndTag;
- lexer.ungetToken();
- continue;
- }
-
- Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
- }
-
- if (node.type == Node.TextNode)
- {
- /* only called for 1st child */
- if (element.content == null &&
- !((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
-
- if (node.start >= node.end)
- {
- continue;
- }
-
- Node.insertNodeAtEnd(element, node);
- continue;
- }
-
- /* mixed content model so allow text */
- if (Node.insertMisc(element, node))
- continue;
-
- /* deal with HTML tags */
- if (node.tag == tt.tagHtml)
- {
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* otherwise infer end of inline element */
- lexer.ungetToken();
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- /* within <dt> or <pre> map <p> to <br> */
- if (node.tag == tt.tagP &&
- node.type == Node.StartTag &&
- ((mode & Lexer.Preformatted) != 0 ||
- element.tag == tt.tagDt ||
- element.isDescendantOf(tt.tagDt)))
- {
- node.tag = tt.tagBr;
- node.element = "br";
- Node.trimSpaces(lexer, element);
- Node.insertNodeAtEnd(element, node);
- continue;
- }
-
- /* ignore unknown and PARAM tags */
- if (node.tag == null || node.tag == tt.tagParam)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag == tt.tagBr && node.type == Node.EndTag)
- node.type = Node.StartTag;
-
- if (node.type == Node.EndTag)
- {
- /* coerce </br> to <br> */
- if (node.tag == tt.tagBr)
- node.type = Node.StartTag;
- else if (node.tag == tt.tagP)
- {
- /* coerce unmatched </p> to <br><br> */
- if (!element.isDescendantOf(tt.tagP))
- {
- Node.coerceNode(lexer, node, tt.tagBr);
- Node.trimSpaces(lexer, element);
- Node.insertNodeAtEnd(element, node);
- node = lexer.inferredTag("br");
- continue;
- }
- }
- else if ((node.tag.model & Dict.CM_INLINE) != 0
- && node.tag != tt.tagA
- && !((node.tag.model & Dict.CM_OBJECT) != 0)
- && (element.tag.model & Dict.CM_INLINE) != 0)
- {
- /* allow any inline end tag to end current element */
- lexer.popInline( element);
-
- if (element.tag != tt.tagA)
- {
- if (node.tag == tt.tagA && node.tag != element.tag)
- {
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.ungetToken();
- }
- else
- {
- Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
- }
-
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- /* if parent is <a> then discard unexpected inline end tag */
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- } /* special case </tr> etc. for stuff moved in front of table */
- else if (lexer.exiled
- && node.tag.model != 0
- && (node.tag.model & Dict.CM_TABLE) != 0)
- {
- lexer.ungetToken();
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
- }
-
- /* allow any header tag to end current header */
- if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
- {
- if (node.tag == element.tag)
- {
- Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
- }
- else
- {
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.ungetToken();
- }
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- /*
- an <A> tag to ends any open <A> element
- but <A href=...> is mapped to </A><A href=...>
- */
- if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
- {
- /* coerce <a> to </a> unless it has some attributes */
- if (node.attributes == null)
- {
- node.type = Node.EndTag;
- Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
- lexer.popInline( node);
- lexer.ungetToken();
- continue;
- }
-
- lexer.ungetToken();
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.popInline( element);
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- if ((element.tag.model & Dict.CM_HEADING) != 0)
- {
- if (node.tag == tt.tagCenter ||
- node.tag == tt.tagDiv)
- {
- if (node.type != Node.StartTag &&
- node.type != Node.StartEndTag)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
-
- /* insert center as parent if heading is empty */
- if (element.content == null)
- {
- Node.insertNodeAsParent(element, node);
- continue;
- }
-
- /* split heading and make center parent of 2nd part */
- Node.insertNodeAfterElement(element, node);
-
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
-
- element = lexer.cloneNode(element);
- element.start = lexer.lexsize;
- element.end = lexer.lexsize;
- Node.insertNodeAtEnd(node, element);
- continue;
- }
-
- if (node.tag == tt.tagHr)
- {
- if (node.type != Node.StartTag &&
- node.type != Node.StartEndTag)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
-
- /* insert hr before heading if heading is empty */
- if (element.content == null)
- {
- Node.insertNodeBeforeElement(element, node);
- continue;
- }
-
- /* split heading and insert hr before 2nd part */
- Node.insertNodeAfterElement(element, node);
-
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
-
- element = lexer.cloneNode(element);
- element.start = lexer.lexsize;
- element.end = lexer.lexsize;
- Node.insertNodeAfterElement(node, element);
- continue;
- }
- }
-
- if (element.tag == tt.tagDt)
- {
- if (node.tag == tt.tagHr)
- {
- Node dd;
-
- if (node.type != Node.StartTag &&
- node.type != Node.StartEndTag)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
- dd = lexer.inferredTag("dd");
-
- /* insert hr within dd before dt if dt is empty */
- if (element.content == null)
- {
- Node.insertNodeBeforeElement(element, dd);
- Node.insertNodeAtEnd(dd, node);
- continue;
- }
-
- /* split dt and insert hr within dd before 2nd part */
- Node.insertNodeAfterElement(element, dd);
- Node.insertNodeAtEnd(dd, node);
-
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
-
- element = lexer.cloneNode(element);
- element.start = lexer.lexsize;
- element.end = lexer.lexsize;
- Node.insertNodeAfterElement(dd, element);
- continue;
- }
- }
-
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- for (parent = element.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- if (!((element.tag.model & Dict.CM_OPT) != 0) &&
- !element.implicit)
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
-
- if (element.tag == tt.tagA)
- lexer.popInline(element);
-
- lexer.ungetToken();
-
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
-
- Node.trimEmptyElement(lexer, element);
- return;
- }
- }
- }
-
- /* block level tags end this element */
- if (!((node.tag.model & Dict.CM_INLINE) != 0))
- {
- if (node.type != Node.StartTag)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (!((element.tag.model & Dict.CM_OPT) != 0))
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
-
- if ((node.tag.model & Dict.CM_HEAD) != 0 &&
- !((node.tag.model & Dict.CM_BLOCK) != 0))
- {
- moveToHead(lexer, element, node);
- continue;
- }
-
- /*
- prevent anchors from propagating into block tags
- except for headings h1 to h6
- */
- if (element.tag == tt.tagA)
- {
- if (node.tag != null &&
- !((node.tag.model & Dict.CM_HEADING) != 0))
- lexer.popInline(element);
- else if (!(element.content != null))
- {
- Node.discardElement(element);
- lexer.ungetToken();
- return;
- }
- }
-
- lexer.ungetToken();
-
- if (!((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, element);
-
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- /* parse inline element */
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- if (node.implicit)
- Report.warning(lexer, element, node, Report.INSERTING_TAG);
-
- /* trim white space before <br> */
- if (node.tag == tt.tagBr)
- Node.trimSpaces(lexer, element);
-
- Node.insertNodeAtEnd(element, node);
- parseTag(lexer, node, mode);
- continue;
- }
-
- /* discard unexpected tags */
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- }
-
- if (!((element.tag.model & Dict.CM_OPT) != 0))
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
-
- Node.trimEmptyElement(lexer, element);
- }
- };
-
- public static class ParseList implements Parser {
-
- public void parse( Lexer lexer, Node list, short mode )
- {
- Node node;
- Node parent;
- TagTable tt = lexer.configuration.tt;
-
- if ((list.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- lexer.insert = -1; /* defer implicit inline start tags */
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
-
- if (node.tag == list.tag && node.type == Node.EndTag)
- {
- if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
- Node.coerceNode(lexer, list, tt.tagUl);
-
- list.closed = true;
- Node.trimEmptyElement(lexer, list);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(list, node))
- continue;
-
- if (node.type != Node.TextNode && node.tag == null)
- {
- Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.badForm = 1;
- Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
- {
- Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
- lexer.popInline(node);
- continue;
- }
-
- for (parent = list.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.ungetToken();
-
- if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
- Node.coerceNode(lexer, list, tt.tagUl);
-
- Node.trimEmptyElement(lexer, list);
- return;
- }
- }
-
- Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag != tt.tagLi)
- {
- lexer.ungetToken();
-
- if (node.tag != null &&
- (node.tag.model & Dict.CM_BLOCK) != 0 &&
- lexer.excludeBlocks)
- {
- Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
- Node.trimEmptyElement(lexer, list);
- return;
- }
-
- node = lexer.inferredTag("li");
- node.addAttribute("style", "list-style: none");
- Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
- }
-
- /* node should be <LI> */
- Node.insertNodeAtEnd(list, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- }
-
- if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
- Node.coerceNode(lexer, list, tt.tagUl);
-
- Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
- Node.trimEmptyElement(lexer, list);
- }
-
- };
-
- public static class ParseDefList implements Parser {
-
- public void parse( Lexer lexer, Node list, short mode )
- {
- Node node, parent;
- TagTable tt = lexer.configuration.tt;
-
- if ((list.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- lexer.insert = -1; /* defer implicit inline start tags */
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == list.tag && node.type == Node.EndTag)
- {
- list.closed = true;
- Node.trimEmptyElement(lexer, list);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(list, node))
- continue;
-
- if (node.type == Node.TextNode)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "dt");
- Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
- }
-
- if (node.tag == null)
- {
- Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.badForm = 1;
- Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = list.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
-
- lexer.ungetToken();
- Node.trimEmptyElement(lexer, list);
- return;
- }
- }
- }
-
- /* center in a dt or a dl breaks the dl list in two */
- if (node.tag == tt.tagCenter)
- {
- if (list.content != null)
- Node.insertNodeAfterElement(list, node);
- else /* trim empty dl list */
- {
- Node.insertNodeBeforeElement(list, node);
- Node.discardElement(list);
- }
-
- /* and parse contents of center */
- parseTag(lexer, node, mode);
-
- /* now create a new dl element */
- list = lexer.inferredTag("dl");
- Node.insertNodeAfterElement(node, list);
- continue;
- }
-
- if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
- {
- lexer.ungetToken();
-
- if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
- {
- Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
- Node.trimEmptyElement(lexer, list);
- return;
- }
-
- /* if DD appeared directly in BODY then exclude blocks */
- if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
- {
- Node.trimEmptyElement(lexer, list);
- return;
- }
-
- node = lexer.inferredTag( "dd");
- Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
- }
-
- if (node.type == Node.EndTag)
- {
- Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* node should be <DT> or <DD>*/
- Node.insertNodeAtEnd(list, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- }
-
- Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
- Node.trimEmptyElement(lexer, list);
- }
-
- };
-
- public static class ParsePre implements Parser {
-
- public void parse( Lexer lexer, Node pre, short mode )
- {
- Node node, parent;
- TagTable tt = lexer.configuration.tt;
-
- if ((pre.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
- Node.coerceNode(lexer, pre, tt.tagPre);
-
- lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
-
- while (true)
- {
- node = lexer.getToken(Lexer.Preformatted);
- if (node == null) break;
- if (node.tag == pre.tag && node.type == Node.EndTag)
- {
- Node.trimSpaces(lexer, pre);
- pre.closed = true;
- Node.trimEmptyElement(lexer, pre);
- return;
- }
-
- if (node.tag == tt.tagHtml)
- {
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
-
- continue;
- }
-
- if (node.type == Node.TextNode)
- {
- /* if first check for inital newline */
- if (pre.content == null)
- {
- if (node.textarray[node.start] == (byte)'\n')
- ++node.start;
-
- if (node.start >= node.end)
- {
- continue;
- }
- }
-
- Node.insertNodeAtEnd(pre, node);
- continue;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(pre, node))
- continue;
-
- /* discard unknown and PARAM tags */
- if (node.tag == null || node.tag == tt.tagParam)
- {
- Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag == tt.tagP)
- {
- if (node.type == Node.StartTag)
- {
- Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
-
- /* trim white space before <p> in <pre>*/
- Node.trimSpaces(lexer, pre);
-
- /* coerce both <p> and </p> to <br> */
- Node.coerceNode(lexer, node, tt.tagBr);
- Node.insertNodeAtEnd(pre, node);
- }
- else
- {
- Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
- }
- continue;
- }
-
- if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
- {
- moveToHead(lexer, pre, node);
- continue;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.badForm = 1;
- Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = pre.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
-
- lexer.ungetToken();
- Node.trimSpaces(lexer, pre);
- Node.trimEmptyElement(lexer, pre);
- return;
- }
- }
- }
-
- /* what about head content, HEAD, BODY tags etc? */
- if (!((node.tag.model & Dict.CM_INLINE) != 0))
- {
- if (node.type != Node.StartTag)
- {
- Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.excludeBlocks = true;
-
- /* check if we need to infer a container */
- if ((node.tag.model & Dict.CM_LIST) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "ul");
- Node.addClass(node, "noindent");
- }
- else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "dl");
- }
- else if ((node.tag.model & Dict.CM_TABLE) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "table");
- }
-
- Node.insertNodeAfterElement(pre, node);
- pre = lexer.inferredTag( "pre");
- Node.insertNodeAfterElement(node, pre);
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- lexer.excludeBlocks = false;
- continue;
- }
- /*
- if (!((node.tag.model & Dict.CM_INLINE) != 0))
- {
- Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.ungetToken();
- return;
- }
- */
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- /* trim white space before <br> */
- if (node.tag == tt.tagBr)
- Node.trimSpaces(lexer, pre);
-
- Node.insertNodeAtEnd(pre, node);
- parseTag(lexer, node, Lexer.Preformatted);
- continue;
- }
-
- /* discard unexpected tags */
- Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
- }
-
- Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
- Node.trimEmptyElement(lexer, pre);
- }
-
- };
-
- public static class ParseBlock implements Parser {
-
- public void parse( Lexer lexer, Node element, short mode )
- /*
- element is node created by the lexer
- upon seeing the start tag, or by the
- parser when the start tag is inferred
- */
- {
- Node node, parent;
- boolean checkstack;
- int istackbase = 0;
- TagTable tt = lexer.configuration.tt;
-
- checkstack = true;
-
- if ((element.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
- Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
-
- /*
- InlineDup() asks the lexer to insert inline emphasis tags
- currently pushed on the istack, but take care to avoid
- propagating inline emphasis inside OBJECT or APPLET.
- For these elements a fresh inline stack context is created
- and disposed of upon reaching the end of the element.
- They thus behave like table cells in this respect.
- */
- if ((element.tag.model & Dict.CM_OBJECT) != 0)
- {
- istackbase = lexer.istackbase;
- lexer.istackbase = lexer.istack.size();
- }
-
- if (!((element.tag.model & Dict.CM_MIXED) != 0))
- lexer.inlineDup( null);
-
- mode = Lexer.IgnoreWhitespace;
-
- while (true)
- {
- node = lexer.getToken(mode /*Lexer.MixedContent*/);
- if (node == null) break;
- /* end tag for this element */
- if (node.type == Node.EndTag && node.tag != null &&
- (node.tag == element.tag || element.was == node.tag))
- {
-
- if ((element.tag.model & Dict.CM_OBJECT) != 0)
- {
- /* pop inline stack */
- while (lexer.istack.size() > lexer.istackbase)
- lexer.popInline( null);
- lexer.istackbase = istackbase;
- }
-
- element.closed = true;
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- if (node.tag == tt.tagHtml ||
- node.tag == tt.tagHead ||
- node.tag == tt.tagBody)
- {
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
-
- continue;
- }
-
- if (node.type == Node.EndTag)
- {
- if (node.tag == null)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
-
- continue;
- }
- else if (node.tag == tt.tagBr)
- node.type = Node.StartTag;
- else if (node.tag == tt.tagP)
- {
- Node.coerceNode(lexer, node, tt.tagBr);
- Node.insertNodeAtEnd(element, node);
- node = lexer.inferredTag("br");
- }
- else
- {
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- for (parent = element.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- if (!((element.tag.model & Dict.CM_OPT) != 0))
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
-
- lexer.ungetToken();
-
- if ((element.tag.model & Dict.CM_OBJECT) != 0)
- {
- /* pop inline stack */
- while (lexer.istack.size() > lexer.istackbase)
- lexer.popInline( null);
- lexer.istackbase = istackbase;
- }
-
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
- }
- /* special case </tr> etc. for stuff moved in front of table */
- if (lexer.exiled
- && node.tag.model != 0
- && (node.tag.model & Dict.CM_TABLE) != 0)
- {
- lexer.ungetToken();
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
- }
- }
-
- /* mixed content model permits text */
- if (node.type == Node.TextNode)
- {
- boolean iswhitenode = false;
-
- if (node.type == Node.TextNode &&
- node.end <= node.start + 1 &&
- lexer.lexbuf[node.start] == (byte)' ')
- iswhitenode = true;
-
- if (lexer.configuration.EncloseBlockText && !iswhitenode)
- {
- lexer.ungetToken();
- node = lexer.inferredTag("p");
- Node.insertNodeAtEnd(element, node);
- parseTag(lexer, node, Lexer.MixedContent);
- continue;
- }
-
- if (checkstack)
- {
- checkstack = false;
-
- if (!((element.tag.model & Dict.CM_MIXED) != 0))
- {
- if (lexer.inlineDup( node) > 0)
- continue;
- }
- }
-
- Node.insertNodeAtEnd(element, node);
- mode = Lexer.MixedContent;
- /*
- HTML4 strict doesn't allow mixed content for
- elements with %block; as their content model
- */
- lexer.versions &= ~Dict.VERS_HTML40_STRICT;
- continue;
- }
-
- if (Node.insertMisc(element, node))
- continue;
-
- /* allow PARAM elements? */
- if (node.tag == tt.tagParam)
- {
- if (((element.tag.model & Dict.CM_PARAM) != 0) &&
- (node.type == Node.StartTag || node.type == Node.StartEndTag))
- {
- Node.insertNodeAtEnd(element, node);
- continue;
- }
-
- /* otherwise discard it */
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* allow AREA elements? */
- if (node.tag == tt.tagArea)
- {
- if ((element.tag == tt.tagMap) &&
- (node.type == Node.StartTag || node.type == Node.StartEndTag))
- {
- Node.insertNodeAtEnd(element, node);
- continue;
- }
-
- /* otherwise discard it */
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* ignore unknown start/end tags */
- if (node.tag == null)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /*
- Allow Dict.CM_INLINE elements here.
-
- Allow Dict.CM_BLOCK elements here unless
- lexer.excludeBlocks is yes.
-
- LI and DD are special cased.
-
- Otherwise infer end tag for this element.
- */
-
- if (!((node.tag.model & Dict.CM_INLINE) != 0))
- {
- if (node.type != Node.StartTag && node.type != Node.StartEndTag)
- {
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (element.tag == tt.tagTd || element.tag == tt.tagTh)
- {
- /* if parent is a table cell, avoid inferring the end of the cell */
-
- if ((node.tag.model & Dict.CM_HEAD) != 0)
- {
- moveToHead(lexer, element, node);
- continue;
- }
-
- if ((node.tag.model & Dict.CM_LIST) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "ul");
- Node.addClass(node, "noindent");
- lexer.excludeBlocks = true;
- }
- else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "dl");
- lexer.excludeBlocks = true;
- }
-
- /* infer end of current table cell */
- if (!((node.tag.model & Dict.CM_BLOCK) != 0))
- {
- lexer.ungetToken();
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
- }
- else if ((node.tag.model & Dict.CM_BLOCK) != 0)
- {
- if (lexer.excludeBlocks)
- {
- if (!((element.tag.model & Dict.CM_OPT) != 0))
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
-
- lexer.ungetToken();
-
- if ((element.tag.model & Dict.CM_OBJECT) != 0)
- lexer.istackbase = istackbase;
-
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
- }
- else /* things like list items */
- {
- if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
-
- if ((node.tag.model & Dict.CM_HEAD) != 0)
- {
- moveToHead(lexer, element, node);
- continue;
- }
-
- lexer.ungetToken();
-
- if ((node.tag.model & Dict.CM_LIST) != 0)
- {
- if (element.parent != null && element.parent.tag != null &&
- element.parent.tag.parser == getParseList())
- {
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- node = lexer.inferredTag("ul");
- Node.addClass(node, "noindent");
- }
- else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
- {
- if (element.parent.tag == tt.tagDl)
- {
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
-
- node = lexer.inferredTag("dl");
- }
- else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
- (node.tag.model & Dict.CM_ROW) != 0)
- {
- node = lexer.inferredTag("table");
- }
- else if ((element.tag.model & Dict.CM_OBJECT) != 0)
- {
- /* pop inline stack */
- while (lexer.istack.size() > lexer.istackbase)
- lexer.popInline( null);
- lexer.istackbase = istackbase;
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
-
- }
- else
- {
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- return;
- }
- }
- }
-
- /* parse known element */
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- if ((node.tag.model & Dict.CM_INLINE) != 0)
- {
- if (checkstack && !node.implicit)
- {
- checkstack = false;
-
- if (lexer.inlineDup( node) > 0)
- continue;
- }
-
- mode = Lexer.MixedContent;
- }
- else
- {
- checkstack = true;
- mode = Lexer.IgnoreWhitespace;
- }
-
- /* trim white space before <br> */
- if (node.tag == tt.tagBr)
- Node.trimSpaces(lexer, element);
-
- Node.insertNodeAtEnd(element, node);
-
- if (node.implicit)
- Report.warning(lexer, element, node, Report.INSERTING_TAG);
-
- parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
- continue;
- }
-
- /* discard unexpected tags */
- if (node.type == Node.EndTag)
- lexer.popInline( node); /* if inline end tag */
-
- Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
- }
-
- if (!((element.tag.model & Dict.CM_OPT) != 0))
- Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
-
- if ((element.tag.model & Dict.CM_OBJECT) != 0)
- {
- /* pop inline stack */
- while (lexer.istack.size() > lexer.istackbase)
- lexer.popInline( null);
- lexer.istackbase = istackbase;
- }
-
- Node.trimSpaces(lexer, element);
- Node.trimEmptyElement(lexer, element);
- }
-
- };
-
- public static class ParseTableTag implements Parser {
-
- public void parse( Lexer lexer, Node table, short mode )
- {
- Node node, parent;
- int istackbase;
- TagTable tt = lexer.configuration.tt;
-
- lexer.deferDup();
- istackbase = lexer.istackbase;
- lexer.istackbase = lexer.istack.size();
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == table.tag && node.type == Node.EndTag)
- {
- lexer.istackbase = istackbase;
- table.closed = true;
- Node.trimEmptyElement(lexer, table);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(table, node))
- continue;
-
- /* discard unknown tags */
- if (node.tag == null && node.type != Node.TextNode)
- {
- Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* if TD or TH or text or inline or block then infer <TR> */
-
- if (node.type != Node.EndTag)
- {
- if (node.tag == tt.tagTd ||
- node.tag == tt.tagTh ||
- node.tag == tt.tagTable)
- {
- lexer.ungetToken();
- node = lexer.inferredTag( "tr");
- Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
- }
- else if (node.type == Node.TextNode
- || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
- {
- Node.insertNodeBeforeElement(table, node);
- Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
- lexer.exiled = true;
-
- /* AQ: TODO
- Line 2040 of parser.c (13 Jan 2000) reads as follows:
- if (!node->type == TextNode)
- This will always evaluate to false.
- This has been reported to Dave Raggett <dsr@w3.org>
- */
- //Should be?: if (!(node.type == Node.TextNode))
- if (false)
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
-
- lexer.exiled = false;
- continue;
- }
- else if ((node.tag.model & Dict.CM_HEAD) != 0)
- {
- moveToHead(lexer, table, node);
- continue;
- }
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.badForm = 1;
- Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
- {
- Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = table.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
- lexer.ungetToken();
- lexer.istackbase = istackbase;
- Node.trimEmptyElement(lexer, table);
- return;
- }
- }
- }
-
- if (!((node.tag.model & Dict.CM_TABLE) != 0))
- {
- lexer.ungetToken();
- Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
- lexer.istackbase = istackbase;
- Node.trimEmptyElement(lexer, table);
- return;
- }
-
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- {
- Node.insertNodeAtEnd(table, node);;
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- continue;
- }
-
- /* discard unexpected text nodes and end tags */
- Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
- }
-
- Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
- Node.trimEmptyElement(lexer, table);
- lexer.istackbase = istackbase;
- }
-
- };
-
- public static class ParseColGroup implements Parser {
-
- public void parse( Lexer lexer, Node colgroup, short mode )
- {
- Node node, parent;
- TagTable tt = lexer.configuration.tt;
-
- if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == colgroup.tag && node.type == Node.EndTag)
- {
- colgroup.closed = true;
- return;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.badForm = 1;
- Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = colgroup.parent;
- parent != null; parent = parent.parent)
- {
-
- if (node.tag == parent.tag)
- {
- lexer.ungetToken();
- return;
- }
- }
- }
-
- if (node.type == Node.TextNode)
- {
- lexer.ungetToken();
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(colgroup, node))
- continue;
-
- /* discard unknown tags */
- if (node.tag == null)
- {
- Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag != tt.tagCol)
- {
- lexer.ungetToken();
- return;
- }
-
- if (node.type == Node.EndTag)
- {
- Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* node should be <COL> */
- Node.insertNodeAtEnd(colgroup, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- }
- }
-
- };
-
- public static class ParseRowGroup implements Parser {
-
- public void parse( Lexer lexer, Node rowgroup, short mode )
- {
- Node node, parent;
- TagTable tt = lexer.configuration.tt;
-
- if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == rowgroup.tag)
- {
- if (node.type == Node.EndTag)
- {
- rowgroup.closed = true;
- Node.trimEmptyElement(lexer, rowgroup);
- return;
- }
-
- lexer.ungetToken();
- return;
- }
-
- /* if </table> infer end tag */
- if (node.tag == tt.tagTable && node.type == Node.EndTag)
- {
- lexer.ungetToken();
- Node.trimEmptyElement(lexer, rowgroup);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(rowgroup, node))
- continue;
-
- /* discard unknown tags */
- if (node.tag == null && node.type != Node.TextNode)
- {
- Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /*
- if TD or TH then infer <TR>
- if text or inline or block move before table
- if head content move to head
- */
-
- if (node.type != Node.EndTag)
- {
- if (node.tag == tt.tagTd || node.tag == tt.tagTh)
- {
- lexer.ungetToken();
- node = lexer.inferredTag("tr");
- Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
- }
- else if (node.type == Node.TextNode
- || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
- {
- Node.moveBeforeTable(rowgroup, node, tt);
- Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
- lexer.exiled = true;
-
- if (node.type != Node.TextNode)
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
-
- lexer.exiled = false;
- continue;
- }
- else if ((node.tag.model & Dict.CM_HEAD) != 0)
- {
- Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
- moveToHead(lexer, rowgroup, node);
- continue;
- }
- }
-
- /*
- if this is the end tag for ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.badForm = 1;
- Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
- {
- Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = rowgroup.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- lexer.ungetToken();
- Node.trimEmptyElement(lexer, rowgroup);
- return;
- }
- }
- }
-
- /*
- if THEAD, TFOOT or TBODY then implied end tag
-
- */
- if ((node.tag.model & Dict.CM_ROWGRP) != 0)
- {
- if (node.type != Node.EndTag)
- lexer.ungetToken();
-
- Node.trimEmptyElement(lexer, rowgroup);
- return;
- }
-
- if (node.type == Node.EndTag)
- {
- Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (!(node.tag == tt.tagTr))
- {
- node = lexer.inferredTag( "tr");
- Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
- lexer.ungetToken();
- }
-
- /* node should be <TR> */
- Node.insertNodeAtEnd(rowgroup, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- }
-
- Node.trimEmptyElement(lexer, rowgroup);
- }
-
- };
-
- public static class ParseRow implements Parser {
-
- public void parse( Lexer lexer, Node row, short mode )
- {
- Node node, parent;
- boolean exclude_state;
- TagTable tt = lexer.configuration.tt;
-
- if ((row.tag.model & Dict.CM_EMPTY) != 0)
- return;
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == row.tag)
- {
- if (node.type == Node.EndTag)
- {
- row.closed = true;
- Node.fixEmptyRow(lexer, row);
- return;
- }
-
- lexer.ungetToken();
- Node.fixEmptyRow(lexer, row);
- return;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node.type == Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.badForm = 1;
- Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.tag == tt.tagTd || node.tag == tt.tagTh)
- {
- Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = row.parent;
- parent != null; parent = parent.parent)
- {
- if (node.tag == parent.tag)
- {
- lexer.ungetToken();
- Node.trimEmptyElement(lexer, row);
- return;
- }
- }
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(row, node))
- continue;
-
- /* discard unknown tags */
- if (node.tag == null && node.type != Node.TextNode)
- {
- Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* discard unexpected <table> element */
- if (node.tag == tt.tagTable)
- {
- Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* THEAD, TFOOT or TBODY */
- if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
- {
- lexer.ungetToken();
- Node.trimEmptyElement(lexer, row);
- return;
- }
-
- if (node.type == Node.EndTag)
- {
- Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /*
- if text or inline or block move before table
- if head content move to head
- */
-
- if (node.type != Node.EndTag)
- {
- if (node.tag == tt.tagForm)
- {
- lexer.ungetToken();
- node = lexer.inferredTag("td");
- Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
- }
- else if (node.type == Node.TextNode
- || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
- {
- Node.moveBeforeTable(row, node, tt);
- Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
- lexer.exiled = true;
-
- if (node.type != Node.TextNode)
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
-
- lexer.exiled = false;
- continue;
- }
- else if ((node.tag.model & Dict.CM_HEAD) != 0)
- {
- Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
- moveToHead(lexer, row, node);
- continue;
- }
- }
-
- if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
- {
- Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
- continue;
- }
-
- /* node should be <TD> or <TH> */
- Node.insertNodeAtEnd(row, node);
- exclude_state = lexer.excludeBlocks;
- lexer.excludeBlocks = false;
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- lexer.excludeBlocks = exclude_state;
-
- /* pop inline stack */
-
- while (lexer.istack.size() > lexer.istackbase)
- lexer.popInline( null);
- }
-
- Node.trimEmptyElement(lexer, row);
- }
-
- };
-
- public static class ParseNoFrames implements Parser {
-
- public void parse( Lexer lexer, Node noframes, short mode )
- {
- Node node;
- boolean checkstack;
- TagTable tt = lexer.configuration.tt;
-
- lexer.badAccess |= Report.USING_NOFRAMES;
- mode = Lexer.IgnoreWhitespace;
- checkstack = true;
-
- while (true)
- {
- node = lexer.getToken(mode);
- if (node == null) break;
- if (node.tag == noframes.tag && node.type == Node.EndTag)
- {
- noframes.closed = true;
- Node.trimSpaces(lexer, noframes);
- return;
- }
-
- if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
- {
- Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
- Node.trimSpaces(lexer, noframes);
- lexer.ungetToken();
- return;
- }
-
- if (node.tag == tt.tagHtml)
- {
- if (node.type == Node.StartTag || node.type == Node.StartEndTag)
- Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
-
- continue;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(noframes, node))
- continue;
-
- if (node.tag == tt.tagBody && node.type == Node.StartTag)
- {
- Node.insertNodeAtEnd(noframes, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
- continue;
- }
-
- /* implicit body element inferred */
- if (node.type == Node.TextNode || node.tag != null)
- {
- lexer.ungetToken();
- node = lexer.inferredTag("body");
- if (lexer.configuration.XmlOut)
- Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
- Node.insertNodeAtEnd(noframes, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
- continue;
- }
- /* discard unexpected end tags */
- Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
- }
-
- Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
- }
-
- };
-
- public static class ParseSelect implements Parser {
-
- public void parse( Lexer lexer, Node field, short mode )
- {
- Node node;
- TagTable tt = lexer.configuration.tt;
-
- lexer.insert = -1; /* defer implicit inline start tags */
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == field.tag && node.type == Node.EndTag)
- {
- field.closed = true;
- Node.trimSpaces(lexer, field);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(field, node))
- continue;
-
- if (node.type == Node.StartTag &&
- (node.tag == tt.tagOption ||
- node.tag == tt.tagOptgroup ||
- node.tag == tt.tagScript))
- {
- Node.insertNodeAtEnd(field, node);
- parseTag(lexer, node, Lexer.IgnoreWhitespace);
- continue;
- }
-
- /* discard unexpected tags */
- Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
- }
-
- Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
- }
-
- };
-
- public static class ParseText implements Parser {
-
- public void parse( Lexer lexer, Node field, short mode )
- {
- Node node;
- TagTable tt = lexer.configuration.tt;
-
- lexer.insert = -1; /* defer implicit inline start tags */
-
- if (field.tag == tt.tagTextarea)
- mode = Lexer.Preformatted;
-
- while (true)
- {
- node = lexer.getToken(mode);
- if (node == null) break;
- if (node.tag == field.tag && node.type == Node.EndTag)
- {
- field.closed = true;
- Node.trimSpaces(lexer, field);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(field, node))
- continue;
-
- if (node.type == Node.TextNode)
- {
- /* only called for 1st child */
- if (field.content == null && !((mode & Lexer.Preformatted) != 0))
- Node.trimSpaces(lexer, field);
-
- if (node.start >= node.end)
- {
- continue;
- }
-
- Node.insertNodeAtEnd(field, node);
- continue;
- }
-
- if (node.tag == tt.tagFont)
- {
- Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* terminate element on other tags */
- if (!((field.tag.model & Dict.CM_OPT) != 0))
- Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
-
- lexer.ungetToken();
- Node.trimSpaces(lexer, field);
- return;
- }
-
- if (!((field.tag.model & Dict.CM_OPT) != 0))
- Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
- }
-
- };
-
- public static class ParseOptGroup implements Parser {
-
- public void parse( Lexer lexer, Node field, short mode )
- {
- Node node;
- TagTable tt = lexer.configuration.tt;
-
- lexer.insert = -1; /* defer implicit inline start tags */
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- if (node.tag == field.tag && node.type == Node.EndTag)
- {
- field.closed = true;
- Node.trimSpaces(lexer, field);
- return;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(field, node))
- continue;
-
- if (node.type == Node.StartTag &&
- (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
- {
- if (node.tag == tt.tagOptgroup)
- Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
-
- Node.insertNodeAtEnd(field, node);
- parseTag(lexer, node, Lexer.MixedContent);
- continue;
- }
-
- /* discard unexpected tags */
- Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
- }
- }
-
- };
-
- public static Parser getParseHTML()
- {
- return _parseHTML;
- }
-
- public static Parser getParseHead()
- {
- return _parseHead;
- }
-
- public static Parser getParseTitle()
- {
- return _parseTitle;
- }
-
- public static Parser getParseScript()
- {
- return _parseScript;
- }
-
- public static Parser getParseBody()
- {
- return _parseBody;
- }
-
- public static Parser getParseFrameSet()
- {
- return _parseFrameSet;
- }
-
- public static Parser getParseInline()
- {
- return _parseInline;
- }
-
- public static Parser getParseList()
- {
- return _parseList;
- }
-
- public static Parser getParseDefList()
- {
- return _parseDefList;
- }
-
- public static Parser getParsePre()
- {
- return _parsePre;
- }
-
- public static Parser getParseBlock()
- {
- return _parseBlock;
- }
-
- public static Parser getParseTableTag()
- {
- return _parseTableTag;
- }
-
- public static Parser getParseColGroup()
- {
- return _parseColGroup;
- }
-
- public static Parser getParseRowGroup()
- {
- return _parseRowGroup;
- }
-
- public static Parser getParseRow()
- {
- return _parseRow;
- }
-
- public static Parser getParseNoFrames()
- {
- return _parseNoFrames;
- }
-
- public static Parser getParseSelect()
- {
- return _parseSelect;
- }
-
- public static Parser getParseText()
- {
- return _parseText;
- }
-
- public static Parser getParseOptGroup()
- {
- return _parseOptGroup;
- }
-
-
- private static Parser _parseHTML = new ParseHTML();
- private static Parser _parseHead = new ParseHead();
- private static Parser _parseTitle = new ParseTitle();
- private static Parser _parseScript = new ParseScript();
- private static Parser _parseBody = new ParseBody();
- private static Parser _parseFrameSet = new ParseFrameSet();
- private static Parser _parseInline = new ParseInline();
- private static Parser _parseList = new ParseList();
- private static Parser _parseDefList = new ParseDefList();
- private static Parser _parsePre = new ParsePre();
- private static Parser _parseBlock = new ParseBlock();
- private static Parser _parseTableTag = new ParseTableTag();
- private static Parser _parseColGroup = new ParseColGroup();
- private static Parser _parseRowGroup = new ParseRowGroup();
- private static Parser _parseRow = new ParseRow();
- private static Parser _parseNoFrames = new ParseNoFrames();
- private static Parser _parseSelect = new ParseSelect();
- private static Parser _parseText = new ParseText();
- private static Parser _parseOptGroup = new ParseOptGroup();
-
- /*
- HTML is the top level element
- */
- public static Node parseDocument(Lexer lexer)
- {
- Node node, document, html;
- Node doctype = null;
- TagTable tt = lexer.configuration.tt;
-
- document = lexer.newNode();
- document.type = Node.RootNode;
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
-
- /* deal with comments etc. */
- if (Node.insertMisc(document, node))
- continue;
-
- if (node.type == Node.DocTypeTag)
- {
- if (doctype == null)
- {
- Node.insertNodeAtEnd(document, node);
- doctype = node;
- }
- else
- Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node.type == Node.EndTag)
- {
- Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
- continue;
- }
-
- if (node.type != Node.StartTag || node.tag != tt.tagHtml)
- {
- lexer.ungetToken();
- html = lexer.inferredTag("html");
- }
- else
- html = node;
-
- Node.insertNodeAtEnd(document, html);
- getParseHTML().parse(lexer, html, (short)0); // TODO?
- break;
- }
-
- return document;
- }
-
- /**
- * Indicates whether or not whitespace should be preserved for this element.
- * If an <code>xml:space</code> attribute is found, then if the attribute value is
- * <code>preserve</code>, returns <code>true</code>. For any other value, returns
- * <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
- * found, then the following element names result in a return value of <code>true:
- * pre, script, style,</code> and <code>xsl:text</code>. Finally, if a
- * <code>TagTable</code> was passed in and the element appears as the "pre" element
- * in the <code>TagTable</code>, then <code>true</code> will be returned.
- * Otherwise, <code>false</code> is returned.
- * @param element The <code>Node</code> to test to see if whitespace should be
- * preserved.
- * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
- * function. This may be <code>null</code>, in which case this test
- * is bypassed.
- * @return <code>true</code> or <code>false</code>, as explained above.
- */
-
- public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
- {
- AttVal attribute;
-
- /* search attributes for xml:space */
- for (attribute = element.attributes; attribute != null; attribute = attribute.next)
- {
- if (attribute.attribute.equals("xml:space"))
- {
- if (attribute.value.equals("preserve"))
- return true;
-
- return false;
- }
- }
-
- /* kludge for html docs without explicit xml:space attribute */
- if (Lexer.wstrcasecmp(element.element, "pre") == 0
- || Lexer.wstrcasecmp(element.element, "script") == 0
- || Lexer.wstrcasecmp(element.element, "style") == 0)
- return true;
-
- if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
- return true;
-
- /* kludge for XSL docs */
- if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
- return true;
-
- return false;
- }
-
- /*
- XML documents
- */
- public static void parseXMLElement(Lexer lexer, Node element, short mode)
- {
- Node node;
-
- /* Jeff Young's kludge for XSL docs */
-
- if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
- return;
-
- /* if node is pre or has xml:space="preserve" then do so */
-
- if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
- mode = Lexer.Preformatted;
-
- while (true)
- {
- node = lexer.getToken(mode);
- if (node == null) break;
- if (node.type == Node.EndTag && node.element.equals(element.element))
- {
- element.closed = true;
- break;
- }
-
- /* discard unexpected end tags */
- if (node.type == Node.EndTag)
- {
- Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
- continue;
- }
-
- /* parse content on seeing start tag */
- if (node.type == Node.StartTag)
- parseXMLElement(lexer, node, mode);
-
- Node.insertNodeAtEnd(element, node);
- }
-
- /*
- if first child is text then trim initial space and
- delete text node if it is empty.
- */
-
- node = element.content;
-
- if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
- {
- if (node.textarray[node.start] == (byte)' ')
- {
- node.start++;
-
- if (node.start >= node.end)
- Node.discardElement(node);
- }
- }
-
- /*
- if last child is text then trim final space and
- delete the text node if it is empty
- */
-
- node = element.last;
-
- if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
- {
- if (node.textarray[node.end - 1] == (byte)' ')
- {
- node.end--;
-
- if (node.start >= node.end)
- Node.discardElement(node);
- }
- }
- }
-
- public static Node parseXMLDocument(Lexer lexer)
- {
- Node node, document, doctype;
-
- document = lexer.newNode();
- document.type = Node.RootNode;
- doctype = null;
- lexer.configuration.XmlTags = true;
-
- while (true)
- {
- node = lexer.getToken(Lexer.IgnoreWhitespace);
- if (node == null) break;
- /* discard unexpected end tags */
- if (node.type == Node.EndTag)
- {
- Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
- continue;
- }
-
- /* deal with comments etc. */
- if (Node.insertMisc(document, node))
- continue;
-
- if (node.type == Node.DocTypeTag)
- {
- if (doctype == null)
- {
- Node.insertNodeAtEnd(document, node);
- doctype = node;
- }
- else
- Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
- continue;
- }
-
- /* if start tag then parse element's content */
- if (node.type == Node.StartTag)
- {
- Node.insertNodeAtEnd(document, node);
- parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
- }
-
- }
-
-if (false) { //#if 0
- /* discard the document type */
- node = document.findDocType();
-
- if (node != null)
- Node.discardElement(node);
-} // #endif
-
- if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
- Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
-
- /* ensure presence of initial <?XML version="1.0"?> */
- if (lexer.configuration.XmlPi)
- lexer.fixXMLPI(document);
-
- return document;
- }
-
- public static boolean isJavaScript(Node node)
- {
- boolean result = false;
- AttVal attr;
-
- if (node.attributes == null)
- return true;
-
- for (attr = node.attributes; attr != null; attr = attr.next)
- {
- if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
- || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
- && Lexer.wsubstr(attr.value, "javascript"))
- result = true;
- }
-
- return result;
- }
-
-}
+++ /dev/null
-/*
- * @(#)Report.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Error/informational message reporter.
- *
- * You should only need to edit the file TidyMessages.properties
- * to localize HTML tidy.
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-import java.io.PrintWriter;
-import java.text.MessageFormat;
-import java.util.Hashtable;
-import java.util.MissingResourceException;
-import java.util.ResourceBundle;
-
-import org.eclipse.core.resources.IMarker;
-import org.eclipse.core.runtime.CoreException;
-import org.eclipse.ui.texteditor.MarkerUtilities;
-
-public class Report {
-
- /* used to point to Web Accessibility Guidelines */
- public static final String ACCESS_URL = "http://www.w3.org/WAI/GL";
-
- public static final String RELEASE_DATE = "4th August 2000";
-
- public static String currentFile; /* sasdjb 01May00 for GNU Emacs error parsing */
-
- /* error codes for entities */
-
- public static final short MISSING_SEMICOLON = 1;
- public static final short UNKNOWN_ENTITY = 2;
- public static final short UNESCAPED_AMPERSAND = 3;
-
- /* error codes for element messages */
-
- public static final short MISSING_ENDTAG_FOR = 1;
- public static final short MISSING_ENDTAG_BEFORE = 2;
- public static final short DISCARDING_UNEXPECTED = 3;
- public static final short NESTED_EMPHASIS = 4;
- public static final short NON_MATCHING_ENDTAG = 5;
- public static final short TAG_NOT_ALLOWED_IN = 6;
- public static final short MISSING_STARTTAG = 7;
- public static final short UNEXPECTED_ENDTAG = 8;
- public static final short USING_BR_INPLACE_OF = 9;
- public static final short INSERTING_TAG = 10;
- public static final short SUSPECTED_MISSING_QUOTE = 11;
- public static final short MISSING_TITLE_ELEMENT = 12;
- public static final short DUPLICATE_FRAMESET = 13;
- public static final short CANT_BE_NESTED = 14;
- public static final short OBSOLETE_ELEMENT = 15;
- public static final short PROPRIETARY_ELEMENT = 16;
- public static final short UNKNOWN_ELEMENT = 17;
- public static final short TRIM_EMPTY_ELEMENT = 18;
- public static final short COERCE_TO_ENDTAG = 19;
- public static final short ILLEGAL_NESTING = 20;
- public static final short NOFRAMES_CONTENT = 21;
- public static final short CONTENT_AFTER_BODY = 22;
- public static final short INCONSISTENT_VERSION = 23;
- public static final short MALFORMED_COMMENT = 24;
- public static final short BAD_COMMENT_CHARS = 25;
- public static final short BAD_XML_COMMENT = 26;
- public static final short BAD_CDATA_CONTENT = 27;
- public static final short INCONSISTENT_NAMESPACE = 28;
- public static final short DOCTYPE_AFTER_TAGS = 29;
- public static final short MALFORMED_DOCTYPE = 30;
- public static final short UNEXPECTED_END_OF_FILE = 31;
- public static final short DTYPE_NOT_UPPER_CASE = 32;
- public static final short TOO_MANY_ELEMENTS = 33;
-
- /* error codes used for attribute messages */
-
- public static final short UNKNOWN_ATTRIBUTE = 1;
- public static final short MISSING_ATTRIBUTE = 2;
- public static final short MISSING_ATTR_VALUE = 3;
- public static final short BAD_ATTRIBUTE_VALUE = 4;
- public static final short UNEXPECTED_GT = 5;
- public static final short PROPRIETARY_ATTR_VALUE = 6;
- public static final short REPEATED_ATTRIBUTE = 7;
- public static final short MISSING_IMAGEMAP = 8;
- public static final short XML_ATTRIBUTE_VALUE = 9;
- public static final short UNEXPECTED_QUOTEMARK = 10;
- public static final short ID_NAME_MISMATCH = 11;
-
- /* accessibility flaws */
-
- public static final short MISSING_IMAGE_ALT = 1;
- public static final short MISSING_LINK_ALT = 2;
- public static final short MISSING_SUMMARY = 4;
- public static final short MISSING_IMAGE_MAP = 8;
- public static final short USING_FRAMES = 16;
- public static final short USING_NOFRAMES = 32;
-
- /* presentation flaws */
-
- public static final short USING_SPACER = 1;
- public static final short USING_LAYER = 2;
- public static final short USING_NOBR = 4;
- public static final short USING_FONT = 8;
- public static final short USING_BODY = 16;
-
- /* character encoding errors */
- public static final short WINDOWS_CHARS = 1;
- public static final short NON_ASCII = 2;
- public static final short FOUND_UTF16 = 4;
-
- private static short optionerrors;
-
- private static ResourceBundle res = null;
-
- static {
- try {
- res = ResourceBundle.getBundle("org/w3c/tidy/TidyMessages");
- } catch (MissingResourceException e) {
- throw new Error(e.toString());
- }
- }
-
- public static void tidyPrint(PrintWriter p, String msg) {
- p.print(msg);
- }
-
- public static void tidyPrintln(PrintWriter p, String msg) {
- p.println(msg);
- }
-
- public static void tidyPrintln(PrintWriter p) {
- p.println();
- }
-
- public static void showVersion(PrintWriter p) {
- tidyPrintln(p, "Java HTML Tidy release date: " + RELEASE_DATE);
- tidyPrintln(p, "See http://www.w3.org/People/Raggett for details");
- }
-
- public static void tag(Lexer lexer, Node tag) {
- if (tag != null) {
- if (tag.type == Node.StartTag)
- tidyPrint(lexer.errout, "<" + tag.element + ">");
- else if (tag.type == Node.EndTag)
- tidyPrint(lexer.errout, "</" + tag.element + ">");
- else if (tag.type == Node.DocTypeTag)
- tidyPrint(lexer.errout, "<!DOCTYPE>");
- else if (tag.type == Node.TextNode)
- tidyPrint(lexer.errout, "plain text");
- else
- tidyPrint(lexer.errout, tag.element);
- }
- }
-
- public static void tag(StringBuffer errorMessage, Lexer lexer, Node tag) {
- if (tag != null) {
- if (tag.type == Node.StartTag) {
- tidyPrint(lexer.errout, "<" + tag.element + ">");
- errorMessage.append("<" + tag.element + ">");
- } else if (tag.type == Node.EndTag) {
- tidyPrint(lexer.errout, "</" + tag.element + ">");
- errorMessage.append("</" + tag.element + ">");
- } else if (tag.type == Node.DocTypeTag) {
- tidyPrint(lexer.errout, "<!DOCTYPE>");
- errorMessage.append("</" + tag.element + ">");
- } else if (tag.type == Node.TextNode) {
- tidyPrint(lexer.errout, "plain text");
- errorMessage.append("plain text");
- } else {
- tidyPrint(lexer.errout, tag.element);
- errorMessage.append(tag.element);
- }
- }
- }
-
- /* lexer is not defined when this is called */
- public static void unknownOption(String option) {
- optionerrors++;
- try {
- System.err.println(MessageFormat.format(res.getString("unknown_option"), new Object[] { option }));
- } catch (MissingResourceException e) {
- System.err.println(e.toString());
- }
- }
-
- /* lexer is not defined when this is called */
- public static void badArgument(String option) {
- optionerrors++;
- try {
- System.err.println(MessageFormat.format(res.getString("bad_argument"), new Object[] { option }));
- } catch (MissingResourceException e) {
- System.err.println(e.toString());
- }
- }
-
- public static void position(Lexer lexer) {
- try {
- /* Change formatting to be parsable by GNU Emacs */
- if (lexer.configuration.Emacs) {
- tidyPrint(
- lexer.errout,
- MessageFormat.format(
- res.getString("emacs_format"),
- new Object[] { currentFile, new Integer(lexer.lines), new Integer(lexer.columns)}));
- tidyPrint(lexer.errout, " ");
- } else /* traditional format */ {
- tidyPrint(
- lexer.errout,
- MessageFormat.format(res.getString("line_column"), new Object[] { new Integer(lexer.lines), new Integer(lexer.columns)}));
- }
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- public static void encodingError(Lexer lexer, short code, int c) {
- lexer.warnings++;
-
- if (lexer.configuration.ShowWarnings) {
- position(lexer);
-
- if (code == WINDOWS_CHARS) {
- lexer.badChars |= WINDOWS_CHARS;
- try {
- Hashtable attributes = new Hashtable();
- StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
- MarkerUtilities.setLineNumber(attributes, lexer.lines);
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)}));
- errorMessage.append(MessageFormat.format(res.getString("illegal_char"), new Object[] { new Integer(c)}));
- attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
- try {
- MarkerUtilities.setMessage(attributes, errorMessage.toString());
- MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
- } catch (CoreException e) {
- }
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- tidyPrintln(lexer.errout);
- }
- }
-
- public static void entityError(Lexer lexer, short code, String entity, int c) {
- lexer.warnings++;
-
- if (lexer.configuration.ShowWarnings) {
- position(lexer);
- Hashtable attributes = new Hashtable();
- StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
- MarkerUtilities.setLineNumber(attributes, lexer.lines);
-
- if (code == MISSING_SEMICOLON) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity }));
- errorMessage.append(MessageFormat.format(res.getString("missing_semicolon"), new Object[] { entity }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNKNOWN_ENTITY) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity }));
- errorMessage.append(MessageFormat.format(res.getString("unknown_entity"), new Object[] { entity }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNESCAPED_AMPERSAND) {
- try {
- tidyPrint(lexer.errout, res.getString("unescaped_ampersand"));
- errorMessage.append(res.getString("unescaped_ampersand"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
- attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
- try {
- MarkerUtilities.setMessage(attributes, errorMessage.toString());
- MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
- } catch (CoreException e) {
- }
- tidyPrintln(lexer.errout);
- }
- }
-
- public static void attrError(Lexer lexer, Node node, String attr, short code) {
- lexer.warnings++;
-
- /* keep quiet after 6 errors */
- if (lexer.errors > 6)
- return;
-
- Hashtable attributes = new Hashtable();
- StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
-
- if (lexer.configuration.ShowWarnings) {
- /* on end of file adjust reported position to end of input */
- if (code == UNEXPECTED_END_OF_FILE) {
- lexer.lines = lexer.in.curline;
- lexer.columns = lexer.in.curcol;
- }
-
- position(lexer);
-
- MarkerUtilities.setLineNumber(attributes, lexer.lines);
-
- if (code == UNKNOWN_ATTRIBUTE) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr }));
- errorMessage.append(MessageFormat.format(res.getString("unknown_attribute"), new Object[] { attr }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == MISSING_ATTRIBUTE) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr }));
- errorMessage.append(MessageFormat.format(res.getString("missing_attribute"), new Object[] { attr }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == MISSING_ATTR_VALUE) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr }));
- errorMessage.append(MessageFormat.format(res.getString("missing_attr_value"), new Object[] { attr }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == MISSING_IMAGEMAP) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, res.getString("missing_imagemap"));
- errorMessage.append(res.getString("missing_imagemap"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- lexer.badAccess |= MISSING_IMAGE_MAP;
- } else if (code == BAD_ATTRIBUTE_VALUE) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(lexer, node);
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr }));
- errorMessage.append(MessageFormat.format(res.getString("bad_attribute_value"), new Object[] { attr }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == XML_ATTRIBUTE_VALUE) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr }));
- errorMessage.append(MessageFormat.format(res.getString("xml_attribute_value"), new Object[] { attr }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNEXPECTED_GT) {
- try {
- tidyPrint(lexer.errout, res.getString("error"));
- errorMessage.append(res.getString("error"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, res.getString("unexpected_gt"));
- errorMessage.append(res.getString("unexpected_gt"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- lexer.errors++;
- ;
- } else if (code == UNEXPECTED_QUOTEMARK) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, res.getString("unexpected_quotemark"));
- errorMessage.append(res.getString("unexpected_quotemark"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == REPEATED_ATTRIBUTE) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, res.getString("repeated_attribute"));
- errorMessage.append(res.getString("repeated_attribute"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == PROPRIETARY_ATTR_VALUE) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr }));
- errorMessage.append(MessageFormat.format(res.getString("proprietary_attr_value"), new Object[] { attr }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNEXPECTED_END_OF_FILE) {
- try {
- tidyPrint(lexer.errout, res.getString("unexpected_end_of_file"));
- errorMessage.append(res.getString("unexpected_end_of_file"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == ID_NAME_MISMATCH) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, res.getString("id_name_mismatch"));
- errorMessage.append(res.getString("id_name_mismatch"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
- attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
- try {
- MarkerUtilities.setMessage(attributes, errorMessage.toString());
- MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
- } catch (CoreException e) {
- }
- tidyPrintln(lexer.errout);
- } else if (code == UNEXPECTED_GT) {
- position(lexer);
- MarkerUtilities.setLineNumber(attributes, lexer.lines);
- try {
- tidyPrint(lexer.errout, res.getString("error"));
- errorMessage.append(res.getString("error"));
- tag(errorMessage, lexer, node);
- tidyPrint(lexer.errout, res.getString("unexpected_gt"));
- errorMessage.append(res.getString("unexpected_gt"));
- attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
- try {
- MarkerUtilities.setMessage(attributes, errorMessage.toString());
- MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
- } catch (CoreException e) {
- }
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tidyPrintln(lexer.errout);
- lexer.errors++;
- ;
- }
-
- }
-
- public static void warning(Lexer lexer, Node element, Node node, short code) {
-
- TagTable tt = lexer.configuration.tt;
-
- lexer.warnings++;
-
- /* keep quiet after 6 errors */
- if (lexer.errors > 6)
- return;
-
- if (lexer.configuration.ShowWarnings) {
-
- /* on end of file adjust reported position to end of input */
- if (code == UNEXPECTED_END_OF_FILE) {
- lexer.lines = lexer.in.curline;
- lexer.columns = lexer.in.curcol;
- }
-
- position(lexer);
- Hashtable attributes = new Hashtable();
- StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
-
- MarkerUtilities.setLineNumber(attributes, lexer.lines);
-
- if (code == MISSING_ENDTAG_FOR) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("missing_endtag_for"), new Object[] { element.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == MISSING_ENDTAG_BEFORE) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("missing_endtag_before"), new Object[] { element.element }));
-
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- } else if (code == DISCARDING_UNEXPECTED) {
- try {
- tidyPrint(lexer.errout, res.getString("discarding_unexpected"));
- errorMessage.append(res.getString("discarding_unexpected"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- } else if (code == NESTED_EMPHASIS) {
- try {
- tidyPrint(lexer.errout, res.getString("nested_emphasis"));
- errorMessage.append(res.getString("nested_emphasis"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- } else if (code == COERCE_TO_ENDTAG) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("coerce_to_endtag"), new Object[] { element.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == NON_MATCHING_ENDTAG) {
- try {
- tidyPrint(lexer.errout, res.getString("non_matching_endtag_1"));
- errorMessage.append(res.getString("non_matching_endtag_1"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("non_matching_endtag_2"), new Object[] { element.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == TAG_NOT_ALLOWED_IN) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("tag_not_allowed_in"), new Object[] { element.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == DOCTYPE_AFTER_TAGS) {
- try {
- tidyPrint(lexer.errout, res.getString("doctype_after_tags"));
- errorMessage.append(res.getString("doctype_after_tags"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == MISSING_STARTTAG) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element }));
- errorMessage.append(MessageFormat.format(res.getString("missing_starttag"), new Object[] { node.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNEXPECTED_ENDTAG) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
- if (element != null)
- tidyPrint(
- lexer.errout,
- MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == TOO_MANY_ELEMENTS) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element }));
- if (element != null)
- tidyPrint(
- lexer.errout,
- MessageFormat.format(res.getString("too_many_elements_suffix"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("too_many_elements"), new Object[] { node.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == USING_BR_INPLACE_OF) {
- try {
- tidyPrint(lexer.errout, res.getString("using_br_inplace_of"));
- errorMessage.append(res.getString("using_br_inplace_of"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- } else if (code == INSERTING_TAG) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element }));
- errorMessage.append(MessageFormat.format(res.getString("inserting_tag"), new Object[] { node.element }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == CANT_BE_NESTED) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- try {
- tidyPrint(lexer.errout, res.getString("cant_be_nested"));
- errorMessage.append(res.getString("cant_be_nested"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == PROPRIETARY_ELEMENT) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- try {
- tidyPrint(lexer.errout, res.getString("proprietary_element"));
- errorMessage.append(res.getString("proprietary_element"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
-
- if (node.tag == tt.tagLayer)
- lexer.badLayout |= USING_LAYER;
- else if (node.tag == tt.tagSpacer)
- lexer.badLayout |= USING_SPACER;
- else if (node.tag == tt.tagNobr)
- lexer.badLayout |= USING_NOBR;
- } else if (code == OBSOLETE_ELEMENT) {
- try {
- if (element.tag != null && (element.tag.model & Dict.CM_OBSOLETE) != 0) {
- tidyPrint(lexer.errout, res.getString("obsolete_element"));
- errorMessage.append(res.getString("obsolete_element"));
- } else {
- tidyPrint(lexer.errout, res.getString("replacing_element"));
- errorMessage.append(res.getString("replacing_element"));
- }
-
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, element);
- try {
- tidyPrint(lexer.errout, res.getString("by"));
- errorMessage.append(res.getString("by"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- } else if (code == TRIM_EMPTY_ELEMENT) {
- try {
- tidyPrint(lexer.errout, res.getString("trim_empty_element"));
- errorMessage.append(res.getString("trim_empty_element"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, element);
- } else if (code == MISSING_TITLE_ELEMENT) {
- try {
- tidyPrint(lexer.errout, res.getString("missing_title_element"));
- errorMessage.append(res.getString("missing_title_element"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == ILLEGAL_NESTING) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, element);
- try {
- tidyPrint(lexer.errout, res.getString("illegal_nesting"));
- errorMessage.append(res.getString("illegal_nesting"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == NOFRAMES_CONTENT) {
- try {
- tidyPrint(lexer.errout, res.getString("warning"));
- errorMessage.append(res.getString("warning"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- try {
- tidyPrint(lexer.errout, res.getString("noframes_content"));
- errorMessage.append(res.getString("noframes_content"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == INCONSISTENT_VERSION) {
- try {
- tidyPrint(lexer.errout, res.getString("inconsistent_version"));
- errorMessage.append(res.getString("inconsistent_version"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == MALFORMED_DOCTYPE) {
- try {
- tidyPrint(lexer.errout, res.getString("malformed_doctype"));
- errorMessage.append(res.getString("malformed_doctype"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == CONTENT_AFTER_BODY) {
- try {
- tidyPrint(lexer.errout, res.getString("content_after_body"));
- errorMessage.append(res.getString("content_after_body"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == MALFORMED_COMMENT) {
- try {
- tidyPrint(lexer.errout, res.getString("malformed_comment"));
- errorMessage.append(res.getString("malformed_comment"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == BAD_COMMENT_CHARS) {
- try {
- tidyPrint(lexer.errout, res.getString("bad_comment_chars"));
- errorMessage.append(res.getString("bad_comment_chars"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == BAD_XML_COMMENT) {
- try {
- tidyPrint(lexer.errout, res.getString("bad_xml_comment"));
- errorMessage.append(res.getString("bad_xml_comment"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == BAD_CDATA_CONTENT) {
- try {
- tidyPrint(lexer.errout, res.getString("bad_cdata_content"));
- errorMessage.append(res.getString("bad_cdata_content"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == INCONSISTENT_NAMESPACE) {
- try {
- tidyPrint(lexer.errout, res.getString("inconsistent_namespace"));
- errorMessage.append(res.getString("inconsistent_namespace"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == DTYPE_NOT_UPPER_CASE) {
- try {
- tidyPrint(lexer.errout, res.getString("dtype_not_upper_case"));
- errorMessage.append(res.getString("dtype_not_upper_case"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNEXPECTED_END_OF_FILE) {
- try {
- tidyPrint(lexer.errout, res.getString("unexpected_end_of_file"));
- errorMessage.append(res.getString("unexpected_end_of_file"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, element);
- }
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
- attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
- try {
- MarkerUtilities.setMessage(attributes, errorMessage.toString());
- MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
- } catch (CoreException e) {
- }
- tidyPrintln(lexer.errout);
- }
- }
-
- public static void error(Lexer lexer, Node element, Node node, short code) {
- lexer.warnings++;
-
- /* keep quiet after 6 errors */
- if (lexer.errors > 6)
- return;
-
- lexer.errors++;
-
- position(lexer);
-
- Hashtable attributes = new Hashtable();
- StringBuffer errorMessage = new StringBuffer("Column " + lexer.columns + ": ");
-
- MarkerUtilities.setLineNumber(attributes, lexer.lines);
-
- if (code == SUSPECTED_MISSING_QUOTE) {
- try {
- tidyPrint(lexer.errout, res.getString("suspected_missing_quote"));
- errorMessage.append(res.getString("suspected_missing_quote"));
-
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == DUPLICATE_FRAMESET) {
- try {
- tidyPrint(lexer.errout, res.getString("duplicate_frameset"));
- errorMessage.append(res.getString("duplicate_frameset"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNKNOWN_ELEMENT) {
- try {
- tidyPrint(lexer.errout, res.getString("error"));
- errorMessage.append(res.getString("error"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- tag(errorMessage, lexer, node);
- try {
- tidyPrint(lexer.errout, res.getString("unknown_element"));
- errorMessage.append(res.getString("unknown_element"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- } else if (code == UNEXPECTED_ENDTAG) {
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
- errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag"), new Object[] { node.element }));
- if (element != null) {
- tidyPrint(
- lexer.errout,
- MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
- errorMessage.append(MessageFormat.format(res.getString("unexpected_endtag_suffix"), new Object[] { element.element }));
- }
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
- attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_ERROR));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_WARNING));
- // attributes.put(IMarker.SEVERITY, new Integer(IMarker.SEVERITY_INFO));
- try {
- MarkerUtilities.setMessage(attributes, errorMessage.toString());
- MarkerUtilities.createMarker(lexer.getIFile(), attributes, IMarker.PROBLEM);
- } catch (CoreException e) {
- }
- tidyPrintln(lexer.errout);
- }
-
- public static void errorSummary(Lexer lexer) {
- /* adjust badAccess to that its null if frames are ok */
- if ((lexer.badAccess & (USING_FRAMES | USING_NOFRAMES)) != 0) {
- if (!(((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0)))
- lexer.badAccess &= ~(USING_FRAMES | USING_NOFRAMES);
- }
-
- if (lexer.badChars != 0) {
- if ((lexer.badChars & WINDOWS_CHARS) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badchars_summary"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
- }
-
- if (lexer.badForm != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badform_summary"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if (lexer.badAccess != 0) {
- if ((lexer.badAccess & MISSING_SUMMARY) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badaccess_missing_summary"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if ((lexer.badAccess & MISSING_IMAGE_ALT) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badaccess_missing_image_alt"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if ((lexer.badAccess & MISSING_IMAGE_MAP) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badaccess_missing_image_map"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if ((lexer.badAccess & MISSING_LINK_ALT) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badaccess_missing_link_alt"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if (((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0)) {
- try {
- tidyPrint(lexer.errout, res.getString("badaccess_frames"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- try {
- tidyPrint(lexer.errout, MessageFormat.format(res.getString("badaccess_summary"), new Object[] { ACCESS_URL }));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if (lexer.badLayout != 0) {
- if ((lexer.badLayout & USING_LAYER) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badlayout_using_layer"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if ((lexer.badLayout & USING_SPACER) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badlayout_using_spacer"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if ((lexer.badLayout & USING_FONT) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badlayout_using_font"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if ((lexer.badLayout & USING_NOBR) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badlayout_using_nobr"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
-
- if ((lexer.badLayout & USING_BODY) != 0) {
- try {
- tidyPrint(lexer.errout, res.getString("badlayout_using_body"));
- } catch (MissingResourceException e) {
- lexer.errout.println(e.toString());
- }
- }
- }
- }
-
- public static void unknownOption(PrintWriter errout, char c) {
- try {
- tidyPrintln(
- errout,
- MessageFormat.format(res.getString("unrecognized_option"), new Object[] { new String(new char[] { c })
- }));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void unknownFile(PrintWriter errout, String program, String file) {
- try {
- tidyPrintln(errout, MessageFormat.format(res.getString("unknown_file"), new Object[] { program, file }));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void needsAuthorIntervention(PrintWriter errout) {
- try {
- tidyPrintln(errout, res.getString("needs_author_intervention"));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void missingBody(PrintWriter errout) {
- try {
- tidyPrintln(errout, res.getString("missing_body"));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void reportNumberOfSlides(PrintWriter errout, int count) {
- try {
- tidyPrintln(errout, MessageFormat.format(res.getString("slides_found"), new Object[] { new Integer(count)}));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void generalInfo(PrintWriter errout) {
- try {
- tidyPrintln(errout, res.getString("general_info"));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void helloMessage(PrintWriter errout, String date, String filename) {
- currentFile = filename; /* for use with Gnu Emacs */
-
- try {
- tidyPrintln(errout, MessageFormat.format(res.getString("hello_message"), new Object[] { date, filename }));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void reportVersion(PrintWriter errout, Lexer lexer, String filename, Node doctype) {
- int i, c;
- int state = 0;
- String vers = lexer.HTMLVersionName();
- MutableInteger cc = new MutableInteger();
-
- try {
- if (doctype != null) {
- tidyPrint(errout, MessageFormat.format(res.getString("doctype_given"), new Object[] { filename }));
-
- for (i = doctype.start; i < doctype.end; ++i) {
- c = (int) doctype.textarray[i];
-
- /* look for UTF-8 multibyte character */
- if (c < 0) {
- i += PPrint.getUTF8(doctype.textarray, i, cc);
- c = cc.value;
- }
-
- if (c == (char) '"')
- ++state;
- else if (state == 1)
- errout.print((char) c);
- }
-
- errout.print('"');
- }
-
- tidyPrintln(
- errout,
- MessageFormat.format(
- res.getString("report_version"),
- new Object[] { filename, (vers != null ? vers : "HTML proprietary")}));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
- public static void reportNumWarnings(PrintWriter errout, Lexer lexer) {
- if (lexer.warnings > 0) {
- try {
- tidyPrintln(errout, MessageFormat.format(res.getString("num_warnings"), new Object[] { new Integer(lexer.warnings)}));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- } else {
- try {
- tidyPrintln(errout, res.getString("no_warnings"));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
- }
-
- public static void helpText(PrintWriter out, String prog) {
- try {
- tidyPrintln(out, MessageFormat.format(res.getString("help_text"), new Object[] { prog, RELEASE_DATE }));
- } catch (MissingResourceException e) {
- out.println(e.toString());
- }
- }
-
- public static void badTree(PrintWriter errout) {
- try {
- tidyPrintln(errout, res.getString("bad_tree"));
- } catch (MissingResourceException e) {
- errout.println(e.toString());
- }
- }
-
-}
+++ /dev/null
-/*
- * @(#)StreamIn.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Input Stream
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-import java.io.InputStream;
-
-public abstract class StreamIn {
-
- public static final int EndOfStream = -1; // EOF
-
- /* states for ISO 2022
-
- A document in ISO-2022 based encoding uses some ESC sequences called
- "designator" to switch character sets. The designators defined and
- used in ISO-2022-JP are:
-
- "ESC" + "(" + ? for ISO646 variants
-
- "ESC" + "$" + ? and
- "ESC" + "$" + "(" + ? for multibyte character sets
- */
-
- public static final int FSM_ASCII = 0;
- public static final int FSM_ESC = 1;
- public static final int FSM_ESCD = 2;
- public static final int FSM_ESCDP = 3;
- public static final int FSM_ESCP = 4;
- public static final int FSM_NONASCII = 5;
-
- /* non-raw input is cleaned up*/
- public int state; /* FSM for ISO2022 */
- public boolean pushed;
- public int c;
- public int tabs;
- public int tabsize;
- public int lastcol;
- public int curcol;
- public int curline;
- public int encoding;
- public InputStream stream;
- public boolean endOfStream;
- public Object lexer; /* needed for error reporting */
-
- /* read char from stream */
- public abstract int readCharFromStream();
-
- public abstract int readChar();
-
- public abstract void ungetChar(int c);
-
- public abstract boolean isEndOfStream();
-
-}
+++ /dev/null
-/*
- * @(#)StreamInImpl.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Input Stream Implementation
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-import java.io.InputStream;
-import java.io.IOException;
-
-public class StreamInImpl extends StreamIn {
-
- /* Mapping for Windows Western character set (128-159) to Unicode */
- private static int[] Win2Unicode =
- {
- 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
- 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
- 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
- };
-
- /*
- John Love-Jensen contributed this table for mapping MacRoman
- character set to Unicode
- */
-
- private static int[] Mac2Unicode =
- {
-
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
- 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
-
- 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
- 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
-
- 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
- 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
-
- 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
- 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
-
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
- 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
-
- 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
- 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
-
- 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
- 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
-
- 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
- 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
- /* x7F = DEL */
- 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
- 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
-
- 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
- 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
-
- 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
- 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
-
- 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
- 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
-
- 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
- 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
-
- 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
- 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
-
- 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
- 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
- /* xF0 = Apple Logo */
- 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
- 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7
- };
-
- public StreamInImpl(InputStream stream, int encoding, int tabsize)
- {
- this.stream = stream;
- this.pushed = false;
- this.c = (int)'\0';
- this.tabs = 0;
- this.tabsize = tabsize;
- this.curline = 1;
- this.curcol = 1;
- this.encoding = encoding;
- this.state = FSM_ASCII;
- this.endOfStream = false;
- }
-
- /* read char from stream */
- public int readCharFromStream()
- {
- int n, c, i, count;
-
- try {
- c = this.stream.read();
-
- if (c == EndOfStream) {
- this.endOfStream = true;
- return c;
- }
-
- /*
- A document in ISO-2022 based encoding uses some ESC sequences
- called "designator" to switch character sets. The designators
- defined and used in ISO-2022-JP are:
-
- "ESC" + "(" + ? for ISO646 variants
-
- "ESC" + "$" + ? and
- "ESC" + "$" + "(" + ? for multibyte character sets
-
- Where ? stands for a single character used to indicate the
- character set for multibyte characters.
-
- Tidy handles this by preserving the escape sequence and
- setting the top bit of each byte for non-ascii chars. This
- bit is then cleared on output. The input stream keeps track
- of the state to determine when to set/clear the bit.
- */
-
- if (this.encoding == Configuration.ISO2022)
- {
- if (c == 0x1b) /* ESC */
- {
- this.state = FSM_ESC;
- return c;
- }
-
- switch (this.state)
- {
- case FSM_ESC:
- if (c == '$')
- this.state = FSM_ESCD;
- else if (c == '(')
- this.state = FSM_ESCP;
- else
- this.state = FSM_ASCII;
- break;
-
- case FSM_ESCD:
- if (c == '(')
- this.state = FSM_ESCDP;
- else
- this.state = FSM_NONASCII;
- break;
-
- case FSM_ESCDP:
- this.state = FSM_NONASCII;
- break;
-
- case FSM_ESCP:
- this.state = FSM_ASCII;
- break;
-
- case FSM_NONASCII:
- c |= 0x80;
- break;
- }
-
- return c;
- }
-
- if (this.encoding != Configuration.UTF8)
- return c;
-
- /* deal with UTF-8 encoded char */
-
- if ((c & 0xE0) == 0xC0) /* 110X XXXX two bytes */
- {
- n = c & 31;
- count = 1;
- }
- else if ((c & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
- {
- n = c & 15;
- count = 2;
- }
- else if ((c & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
- {
- n = c & 7;
- count = 3;
- }
- else if ((c & 0xFC) == 0xF8) /* 1111 10XX five bytes */
- {
- n = c & 3;
- count = 4;
- }
- else if ((c & 0xFE) == 0xFC) /* 1111 110X six bytes */
- {
- n = c & 1;
- count = 5;
- }
- else /* 0XXX XXXX one byte */
- return c;
-
- /* successor bytes should have the form 10XX XXXX */
- for (i = 1; i <= count; ++i)
- {
- c = this.stream.read();
-
- if (c == EndOfStream) {
- this.endOfStream = true;
- return c;
- }
-
- n = (n << 6) | (c & 0x3F);
- }
- }
- catch (IOException e) {
- System.err.println("StreamInImpl.readCharFromStream: " + e.toString());
- n = EndOfStream;
- }
-
- return n;
- }
-
- public int readChar()
- {
- int c;
-
- if (this.pushed)
- {
- this.pushed = false;
- c = this.c;
-
- if (c == '\n')
- {
- this.curcol = 1;
- this.curline++;
- return c;
- }
-
- this.curcol++;
- return c;
- }
-
- this.lastcol = this.curcol;
-
- if (this.tabs > 0)
- {
- this.curcol++;
- this.tabs--;
- return ' ';
- }
-
- for (;;)
- {
- c = readCharFromStream();
-
- if (c < 0)
- return EndOfStream;
-
- if (c == '\n')
- {
- this.curcol = 1;
- this.curline++;
- break;
- }
-
- if (c == '\r')
- {
- c = readCharFromStream();
- if (c != '\n')
- {
- ungetChar(c);
- c = '\n';
- }
- this.curcol = 1;
- this.curline++;
- break;
- }
-
- if (c == '\t')
- {
- this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
- this.curcol++;
- c = ' ';
- break;
- }
-
- /* strip control characters, except for Esc */
-
- if (c == '\033')
- break;
-
- if (0 < c && c < 32)
- continue;
-
- /* watch out for IS02022 */
-
- if (this.encoding == Configuration.RAW ||
- this.encoding == Configuration.ISO2022)
- {
- this.curcol++;
- break;
- }
-
- if (this.encoding == Configuration.MACROMAN)
- c = Mac2Unicode[c];
-
- /* produced e.g. as a side-effect of smart quotes in Word */
-
- if (127 < c && c < 160)
- {
- Report.encodingError((Lexer)this.lexer, Report.WINDOWS_CHARS, c);
-
- c = Win2Unicode[c - 128];
-
- if (c == 0)
- continue;
- }
-
- this.curcol++;
- break;
- }
-
- return c;
- }
-
- public void ungetChar(int c)
- {
- this.pushed = true;
- this.c = c;
-
- if (c == '\n')
- {
- --this.curline;
- }
-
- this.curcol = this.lastcol;
- }
-
- public boolean isEndOfStream()
- {
- return this.endOfStream;
- }
-
-}
+++ /dev/null
-/*
- * @(#)Style.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Linked list of class names and styles
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class Style {
-
- public Style(String tag, String tagClass, String properties, Style next)
- {
- this.tag = tag;
- this.tagClass = tagClass;
- this.properties = properties;
- this.next = next;
- }
-
- public Style(String tag, String tagClass, String properties)
- {
- this(tag, tagClass, properties, null);
- }
-
- public Style()
- {
- this(null, null, null, null);
- }
-
- public String tag;
- public String tagClass;
- public String properties;
- public Style next;
-
-}
+++ /dev/null
-/*
- * @(#)StyleProp.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Linked list of style properties
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-public class StyleProp {
-
- public StyleProp(String name, String value, StyleProp next)
- {
- this.name = name;
- this.value = value;
- this.next = next;
- }
-
- public StyleProp(String name, String value)
- {
- this(name, value, null);
- }
-
- public StyleProp()
- {
- this(null, null, null);
- }
-
- public String name;
- public String value;
- public StyleProp next;
-
-}
+++ /dev/null
-/*
- * @(#)TagTable.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * Tag dictionary node hash table
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- * Modified from a Singleton to a non-Singleton.
- */
-
-import java.util.Hashtable;
-import java.util.Enumeration;
-
-public class TagTable {
-
- private Configuration configuration = null;
-
- public TagTable()
- {
- for ( int i = 0; i < tags.length; i++ ) {
- install( tags[i] );
- }
- tagHtml = lookup("html");
- tagHead = lookup("head");
- tagBody = lookup("body");
- tagFrameset = lookup("frameset");
- tagFrame = lookup("frame");
- tagNoframes = lookup("noframes");
- tagMeta = lookup("meta");
- tagTitle = lookup("title");
- tagBase = lookup("base");
- tagHr = lookup("hr");
- tagPre = lookup("pre");
- tagListing = lookup("listing");
- tagH1 = lookup("h1");
- tagH2 = lookup("h2");
- tagP = lookup("p");
- tagUl = lookup("ul");
- tagOl = lookup("ol");
- tagDir = lookup("dir");
- tagLi = lookup("li");
- tagDt = lookup("dt");
- tagDd = lookup("dd");
- tagDl = lookup("dl");
- tagTd = lookup("td");
- tagTh = lookup("th");
- tagTr = lookup("tr");
- tagCol = lookup("col");
- tagBr = lookup("br");
- tagA = lookup("a");
- tagLink = lookup("link");
- tagB = lookup("b");
- tagI = lookup("i");
- tagStrong = lookup("strong");
- tagEm = lookup("em");
- tagBig = lookup("big");
- tagSmall = lookup("small");
- tagParam = lookup("param");
- tagOption = lookup("option");
- tagOptgroup = lookup("optgroup");
- tagImg = lookup("img");
- tagMap = lookup("map");
- tagArea = lookup("area");
- tagNobr = lookup("nobr");
- tagWbr = lookup("wbr");
- tagFont = lookup("font");
- tagSpacer = lookup("spacer");
- tagLayer = lookup("layer");
- tagCenter = lookup("center");
- tagStyle = lookup("style");
- tagScript = lookup("script");
- tagNoscript = lookup("noscript");
- tagTable = lookup("table");
- tagCaption = lookup("caption");
- tagForm = lookup("form");
- tagTextarea = lookup("textarea");
- tagBlockquote = lookup("blockquote");
- tagApplet = lookup("applet");
- tagObject = lookup("object");
- tagDiv = lookup("div");
- tagSpan = lookup("span");
- }
-
- public void setConfiguration(Configuration configuration)
- {
- this.configuration = configuration;
- }
-
- public Dict lookup( String name )
- {
- return (Dict)tagHashtable.get( name );
- }
-
- public Dict install( Dict dict )
- {
- Dict d = (Dict)tagHashtable.get(dict.name);
- if (d != null)
- {
- d.versions = dict.versions;
- d.model |= dict.model;
- d.parser = dict.parser;
- d.chkattrs = dict.chkattrs;
- return d;
- }
- else
- {
- tagHashtable.put(dict.name, dict);
- return dict;
- }
- }
-
- /* public interface for finding tag by name */
- public boolean findTag( Node node )
- {
- Dict np;
-
- if ( configuration != null && configuration.XmlTags ) {
- node.tag = xmlTags;
- return true;
- }
-
- if ( node.element != null ) {
- np = lookup( node.element );
- if ( np != null ) {
- node.tag = np;
- return true;
- }
- }
-
- return false;
- }
-
- public Parser findParser(Node node)
- {
- Dict np;
-
- if (node.element != null) {
- np = lookup(node.element);
- if (np != null) {
- return np.parser;
- }
- }
-
- return null;
- }
-
- private Hashtable tagHashtable = new Hashtable();
-
- private static Dict[] tags = {
-
- new Dict( "html", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHTML(), CheckAttribsImpl.getCheckHTML() ),
-
- new Dict( "head", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseHead(), null ),
-
- new Dict( "title", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseTitle(), null ),
- new Dict( "base", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
- new Dict( "link", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckLINK() ),
- new Dict( "meta", (short)(Dict.VERS_ALL|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
- new Dict( "style", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), Dict.CM_HEAD, ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSTYLE() ),
- new Dict( "script", (short)(Dict.VERS_FROM32|Dict.VERS_FRAMES), (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), CheckAttribsImpl.getCheckSCRIPT() ),
- new Dict( "server", Dict.VERS_NETSCAPE, (Dict.CM_HEAD|Dict.CM_MIXED|Dict.CM_BLOCK|Dict.CM_INLINE), ParserImpl.getParseScript(), null ),
-
- new Dict( "body", Dict.VERS_ALL, (Dict.CM_HTML|Dict.CM_OPT|Dict.CM_OMITST), ParserImpl.getParseBody(), null ),
- new Dict( "frameset", Dict.VERS_FRAMES, (Dict.CM_HTML|Dict.CM_FRAMES), ParserImpl.getParseFrameSet(), null ),
-
- new Dict( "p", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OPT), ParserImpl.getParseInline(), null ),
- new Dict( "h1", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
- new Dict( "h2", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
- new Dict( "h3", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
- new Dict( "h4", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
- new Dict( "h5", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
- new Dict( "h6", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_HEADING), ParserImpl.getParseInline(), null ),
- new Dict( "ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ),
- new Dict( "ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseList(), null ),
- new Dict( "dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseDefList(), null ),
- new Dict( "dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ),
- new Dict( "menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParseList(), null ),
- new Dict( "pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParsePre(), null ),
- new Dict( "listing", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
- new Dict( "xmp", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
- new Dict( "plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_OBSOLETE), ParserImpl.getParsePre(), null ),
- new Dict( "address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, null ),
- new Dict( "fieldset", Dict.VERS_HTML40, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseTableTag(), CheckAttribsImpl.getCheckTABLE() ),
- new Dict( "hr", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckHR() ),
- new Dict( "div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
- new Dict( "align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.getParseBlock(), null ),
- new Dict( "ins", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ),
- new Dict( "del", Dict.VERS_HTML40, (Dict.CM_INLINE|Dict.CM_BLOCK|Dict.CM_MIXED), ParserImpl.getParseInline(), null ),
-
- new Dict( "li", Dict.VERS_ALL, (Dict.CM_LIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ),
- new Dict( "dt", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseInline(), null ),
- new Dict( "dd", Dict.VERS_ALL, (Dict.CM_DEFLIST|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), null ),
-
- new Dict( "caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckCaption() ),
- new Dict( "colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseColGroup(), null ),
- new Dict( "col", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_EMPTY), null, null ),
- new Dict( "thead", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
- new Dict( "tfoot", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
- new Dict( "tbody", Dict.VERS_HTML40, (Dict.CM_TABLE|Dict.CM_ROWGRP|Dict.CM_OPT), ParserImpl.getParseRowGroup(), null ),
- new Dict( "tr", Dict.VERS_FROM32, (Dict.CM_TABLE|Dict.CM_OPT), ParserImpl.getParseRow(), null ),
- new Dict( "td", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ),
- new Dict( "th", Dict.VERS_FROM32, (Dict.CM_ROW|Dict.CM_OPT|Dict.CM_NO_INDENT), ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckTableCell() ),
-
- new Dict( "q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), CheckAttribsImpl.getCheckAnchor() ),
- new Dict( "br", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
- new Dict( "img", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckIMG() ),
- new Dict( "object", Dict.VERS_HTML40, (Dict.CM_OBJECT|Dict.CM_HEAD|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
- new Dict( "applet", Dict.VERS_LOOSE, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
- new Dict( "servlet", Dict.VERS_SUN, (Dict.CM_OBJECT|Dict.CM_IMG|Dict.CM_INLINE|Dict.CM_PARAM), ParserImpl.getParseBlock(), null ),
- new Dict( "param", Dict.VERS_FROM32, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
- new Dict( "embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ),
- new Dict( "noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.getParseBlock(), null ),
- new Dict( "frame", Dict.VERS_FRAMES, (Dict.CM_FRAMES|Dict.CM_EMPTY), null, null ),
- new Dict( "noframes", Dict.VERS_IFRAMES, (Dict.CM_BLOCK|Dict.CM_FRAMES), ParserImpl.getParseNoFrames(), null ),
- new Dict( "noscript", (short)(Dict.VERS_FRAMES|Dict.VERS_HTML40), (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
- new Dict( "b", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "i", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "tt", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "big", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "small", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "sub", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "sup", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "acronym", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
- new Dict( "marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE|Dict.CM_OPT), ParserImpl.getParseInline(), null ),
- new Dict( "bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD|Dict.CM_EMPTY), null, null ),
- new Dict( "comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
- new Dict( "keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
- new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK|Dict.CM_INLINE|Dict.CM_MIXED), ParserImpl.getParseBlock(), null ),
- new Dict( "ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "map", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.getParseBlock(), CheckAttribsImpl.getCheckMap() ),
- new Dict( "area", Dict.VERS_ALL, (Dict.CM_BLOCK|Dict.CM_EMPTY), null, CheckAttribsImpl.getCheckAREA() ),
- new Dict( "input", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_IMG|Dict.CM_EMPTY), null, null ),
- new Dict( "select", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseSelect(), null ),
- new Dict( "option", Dict.VERS_ALL, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseText(), null ),
- new Dict( "optgroup", Dict.VERS_HTML40, (Dict.CM_FIELD|Dict.CM_OPT), ParserImpl.getParseOptGroup(), null ),
- new Dict( "textarea", Dict.VERS_ALL, (Dict.CM_INLINE|Dict.CM_FIELD), ParserImpl.getParseText(), null ),
- new Dict( "label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "legend", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "button", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE|Dict.CM_EMPTY), null, null ),
- new Dict( "font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
- new Dict( "bdo", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.getParseInline(), null ),
-
- };
-
- /* create dummy entry for all xml tags */
- public Dict xmlTags = new Dict( null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null );
-
- public Dict tagHtml = null;
- public Dict tagHead = null;
- public Dict tagBody = null;
- public Dict tagFrameset = null;
- public Dict tagFrame = null;
- public Dict tagNoframes = null;
- public Dict tagMeta = null;
- public Dict tagTitle = null;
- public Dict tagBase = null;
- public Dict tagHr = null;
- public Dict tagPre = null;
- public Dict tagListing = null;
- public Dict tagH1 = null;
- public Dict tagH2 = null;
- public Dict tagP = null;
- public Dict tagUl = null;
- public Dict tagOl = null;
- public Dict tagDir = null;
- public Dict tagLi = null;
- public Dict tagDt = null;
- public Dict tagDd = null;
- public Dict tagDl = null;
- public Dict tagTd = null;
- public Dict tagTh = null;
- public Dict tagTr = null;
- public Dict tagCol = null;
- public Dict tagBr = null;
- public Dict tagA = null;
- public Dict tagLink = null;
- public Dict tagB = null;
- public Dict tagI = null;
- public Dict tagStrong = null;
- public Dict tagEm = null;
- public Dict tagBig = null;
- public Dict tagSmall = null;
- public Dict tagParam = null;
- public Dict tagOption = null;
- public Dict tagOptgroup = null;
- public Dict tagImg = null;
- public Dict tagMap = null;
- public Dict tagArea = null;
- public Dict tagNobr = null;
- public Dict tagWbr = null;
- public Dict tagFont = null;
- public Dict tagSpacer = null;
- public Dict tagLayer = null;
- public Dict tagCenter = null;
- public Dict tagStyle = null;
- public Dict tagScript = null;
- public Dict tagNoscript = null;
- public Dict tagTable = null;
- public Dict tagCaption = null;
- public Dict tagForm = null;
- public Dict tagTextarea = null;
- public Dict tagBlockquote = null;
- public Dict tagApplet = null;
- public Dict tagObject = null;
- public Dict tagDiv = null;
- public Dict tagSpan = null;
-
- public void defineInlineTag( String name )
- {
- install( new Dict( name, Dict.VERS_PROPRIETARY,
- (Dict.CM_INLINE|Dict.CM_NO_INDENT|Dict.CM_NEW),
- ParserImpl.getParseBlock(), null ) );
- }
-
- public void defineBlockTag( String name )
- {
- install( new Dict( name, Dict.VERS_PROPRIETARY,
- (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW),
- ParserImpl.getParseBlock(), null ) );
- }
-
- public void defineEmptyTag(String name)
- {
- install(new Dict(name, Dict.VERS_PROPRIETARY,
- (Dict.CM_EMPTY|Dict.CM_NO_INDENT|Dict.CM_NEW),
- ParserImpl.getParseBlock(), null));
- }
-
- public void definePreTag(String name)
- {
- install(new Dict(name, Dict.VERS_PROPRIETARY,
- (Dict.CM_BLOCK|Dict.CM_NO_INDENT|Dict.CM_NEW),
- ParserImpl.getParsePre(), null));
- }
-}
+++ /dev/null
-/*
- * @(#)Tidy.java 1.11 2000/08/16
- *
- */
-
-/*
- HTML parser and pretty printer
-
- Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
- Institute of Technology, Institut National de Recherche en
- Informatique et en Automatique, Keio University). All Rights
- Reserved.
-
- Contributing Author(s):
-
- Dave Raggett <dsr@w3.org>
- Andy Quick <ac.quick@sympatico.ca> (translation to Java)
-
- The contributing author(s) would like to thank all those who
- helped with testing, bug fixes, and patience. This wouldn't
- have been possible without all of you.
-
- COPYRIGHT NOTICE:
-
- This software and documentation is provided "as is," and
- the copyright holders and contributing author(s) make no
- representations or warranties, express or implied, including
- but not limited to, warranties of merchantability or fitness
- for any particular purpose or that the use of the software or
- documentation will not infringe any third party patents,
- copyrights, trademarks or other rights.
-
- The copyright holders and contributing author(s) will not be
- liable for any direct, indirect, special or consequential damages
- arising out of any use of the software or documentation, even if
- advised of the possibility of such damage.
-
- Permission is hereby granted to use, copy, modify, and distribute
- this source code, or portions hereof, documentation and executables,
- for any purpose, without fee, subject to the following restrictions:
-
- 1. The origin of this source code must not be misrepresented.
- 2. Altered versions must be plainly marked as such and must
- not be misrepresented as being the original source.
- 3. This Copyright notice may not be removed or altered from any
- source or altered source distribution.
-
- The copyright holders and contributing author(s) specifically
- permit, without fee, and encourage the use of this source code
- as a component for supporting the Hypertext Markup Language in
- commercial products. If you use this source code in a product,
- acknowledgment is not required but would be appreciated.
-*/
-
-package org.w3c.tidy;
-
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.PrintWriter;
-import java.util.Properties;
-
-import org.eclipse.core.resources.IFile;
-import org.eclipse.core.resources.IMarker;
-import org.eclipse.core.runtime.CoreException;
-
-/**
- *
- * <p>HTML parser and pretty printer</p>
- *
- * <p>
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * See Tidy.java for the copyright notice.
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- * </p>
- *
- * <p>
- * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
- * Institute of Technology, Institut National de Recherche en
- * Informatique et en Automatique, Keio University). All Rights
- * Reserved.
- * </p>
- *
- * <p>
- * Contributing Author(s):<br>
- * <a href="mailto:dsr@w3.org">Dave Raggett</a><br>
- * <a href="mailto:ac.quick@sympatico.ca">Andy Quick</a> (translation to Java)
- * </p>
- *
- * <p>
- * The contributing author(s) would like to thank all those who
- * helped with testing, bug fixes, and patience. This wouldn't
- * have been possible without all of you.
- * </p>
- *
- * <p>
- * COPYRIGHT NOTICE:<br>
- *
- * This software and documentation is provided "as is," and
- * the copyright holders and contributing author(s) make no
- * representations or warranties, express or implied, including
- * but not limited to, warranties of merchantability or fitness
- * for any particular purpose or that the use of the software or
- * documentation will not infringe any third party patents,
- * copyrights, trademarks or other rights.
- * </p>
- *
- * <p>
- * The copyright holders and contributing author(s) will not be
- * liable for any direct, indirect, special or consequential damages
- * arising out of any use of the software or documentation, even if
- * advised of the possibility of such damage.
- * </p>
- *
- * <p>
- * Permission is hereby granted to use, copy, modify, and distribute
- * this source code, or portions hereof, documentation and executables,
- * for any purpose, without fee, subject to the following restrictions:
- * </p>
- *
- * <p>
- * <ol>
- * <li>The origin of this source code must not be misrepresented.</li>
- * <li>Altered versions must be plainly marked as such and must
- * not be misrepresented as being the original source.</li>
- * <li>This Copyright notice may not be removed or altered from any
- * source or altered source distribution.</li>
- * </ol>
- * </p>
- *
- * <p>
- * The copyright holders and contributing author(s) specifically
- * permit, without fee, and encourage the use of this source code
- * as a component for supporting the Hypertext Markup Language in
- * commercial products. If you use this source code in a product,
- * acknowledgment is not required but would be appreciated.
- * </p>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.0, 1999/05/22
- * @version 1.0.1, 1999/05/29
- * @version 1.1, 1999/06/18 Java Bean
- * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- *
- */
-
-public class Tidy implements java.io.Serializable {
-
- static final long serialVersionUID = -2794371560623987718L;
-
- private boolean initialized = false;
- private PrintWriter errout = null; /* error output stream */
- private PrintWriter stderr = null;
- private Configuration configuration = null;
- private String inputStreamName = "InputStream";
- private int parseErrors = 0;
- private int parseWarnings = 0;
-
- public Tidy() {
- init();
- }
-
- public Configuration getConfiguration() {
- return configuration;
- }
-
- public PrintWriter getStderr() {
- return stderr;
- }
-
- /**
- * ParseErrors - the number of errors that occurred in the most
- * recent parse operation
- */
-
- public int getParseErrors() {
- return parseErrors;
- }
-
- /**
- * ParseWarnings - the number of warnings that occurred in the most
- * recent parse operation
- */
-
- public int getParseWarnings() {
- return parseWarnings;
- }
-
- /**
- * Errout - the error output stream
- */
-
- public PrintWriter getErrout() {
- return errout;
- }
-
- public void setErrout(PrintWriter errout) {
- this.errout = errout;
- }
-
- /**
- * Spaces - default indentation
- * @see org.w3c.tidy.Configuration#spaces
- */
-
- public void setSpaces(int spaces) {
- configuration.spaces = spaces;
- }
-
- public int getSpaces() {
- return configuration.spaces;
- }
-
- /**
- * Wraplen - default wrap margin
- * @see org.w3c.tidy.Configuration#wraplen
- */
-
- public void setWraplen(int wraplen) {
- configuration.wraplen = wraplen;
- }
-
- public int getWraplen() {
- return configuration.wraplen;
- }
-
- /**
- * CharEncoding
- * @see org.w3c.tidy.Configuration#CharEncoding
- */
-
- public void setCharEncoding(int charencoding) {
- configuration.CharEncoding = charencoding;
- }
-
- public int getCharEncoding() {
- return configuration.CharEncoding;
- }
-
- /**
- * Tabsize
- * @see org.w3c.tidy.Configuration#tabsize
- */
-
- public void setTabsize(int tabsize) {
- configuration.tabsize = tabsize;
- }
-
- public int getTabsize() {
- return configuration.tabsize;
- }
-
- /**
- * Errfile - file name to write errors to
- * @see org.w3c.tidy.Configuration#errfile
- */
-
- public void setErrfile(String errfile) {
- configuration.errfile = errfile;
- }
-
- public String getErrfile() {
- return configuration.errfile;
- }
-
- /**
- * Writeback - if true then output tidied markup
- * NOTE: this property is ignored when parsing from an InputStream.
- * @see org.w3c.tidy.Configuration#writeback
- */
-
- public void setWriteback(boolean writeback) {
- configuration.writeback = writeback;
- }
-
- public boolean getWriteback() {
- return configuration.writeback;
- }
-
- /**
- * OnlyErrors - if true normal output is suppressed
- * @see org.w3c.tidy.Configuration#OnlyErrors
- */
-
- public void setOnlyErrors(boolean OnlyErrors) {
- configuration.OnlyErrors = OnlyErrors;
- }
-
- public boolean getOnlyErrors() {
- return configuration.OnlyErrors;
- }
-
- /**
- * ShowWarnings - however errors are always shown
- * @see org.w3c.tidy.Configuration#ShowWarnings
- */
-
- public void setShowWarnings(boolean ShowWarnings) {
- configuration.ShowWarnings = ShowWarnings;
- }
-
- public boolean getShowWarnings() {
- return configuration.ShowWarnings;
- }
-
- /**
- * Quiet - no 'Parsing X', guessed DTD or summary
- * @see org.w3c.tidy.Configuration#Quiet
- */
-
- public void setQuiet(boolean Quiet) {
- configuration.Quiet = Quiet;
- }
-
- public boolean getQuiet() {
- return configuration.Quiet;
- }
-
- /**
- * IndentContent - indent content of appropriate tags
- * @see org.w3c.tidy.Configuration#IndentContent
- */
-
- public void setIndentContent(boolean IndentContent) {
- configuration.IndentContent = IndentContent;
- }
-
- public boolean getIndentContent() {
- return configuration.IndentContent;
- }
-
- /**
- * SmartIndent - does text/block level content effect indentation
- * @see org.w3c.tidy.Configuration#SmartIndent
- */
-
- public void setSmartIndent(boolean SmartIndent) {
- configuration.SmartIndent = SmartIndent;
- }
-
- public boolean getSmartIndent() {
- return configuration.SmartIndent;
- }
-
- /**
- * HideEndTags - suppress optional end tags
- * @see org.w3c.tidy.Configuration#HideEndTags
- */
-
- public void setHideEndTags(boolean HideEndTags) {
- configuration.HideEndTags = HideEndTags;
- }
-
- public boolean getHideEndTags() {
- return configuration.HideEndTags;
- }
-
- /**
- * XmlTags - treat input as XML
- * @see org.w3c.tidy.Configuration#XmlTags
- */
-
- public void setXmlTags(boolean XmlTags) {
- configuration.XmlTags = XmlTags;
- }
-
- public boolean getXmlTags() {
- return configuration.XmlTags;
- }
-
- /**
- * XmlOut - create output as XML
- * @see org.w3c.tidy.Configuration#XmlOut
- */
-
- public void setXmlOut(boolean XmlOut) {
- configuration.XmlOut = XmlOut;
- }
-
- public boolean getXmlOut() {
- return configuration.XmlOut;
- }
-
- /**
- * XHTML - output extensible HTML
- * @see org.w3c.tidy.Configuration#xHTML
- */
-
- public void setXHTML(boolean xHTML) {
- configuration.xHTML = xHTML;
- }
-
- public boolean getXHTML() {
- return configuration.xHTML;
- }
-
- /**
- * RawOut - avoid mapping values > 127 to entities
- * @see org.w3c.tidy.Configuration#RawOut
- */
-
- public void setRawOut(boolean RawOut) {
- configuration.RawOut = RawOut;
- }
-
- public boolean getRawOut() {
- return configuration.RawOut;
- }
-
- /**
- * UpperCaseTags - output tags in upper not lower case
- * @see org.w3c.tidy.Configuration#UpperCaseTags
- */
-
- public void setUpperCaseTags(boolean UpperCaseTags) {
- configuration.UpperCaseTags = UpperCaseTags;
- }
-
- public boolean getUpperCaseTags() {
- return configuration.UpperCaseTags;
- }
-
- /**
- * UpperCaseAttrs - output attributes in upper not lower case
- * @see org.w3c.tidy.Configuration#UpperCaseAttrs
- */
-
- public void setUpperCaseAttrs(boolean UpperCaseAttrs) {
- configuration.UpperCaseAttrs = UpperCaseAttrs;
- }
-
- public boolean getUpperCaseAttrs() {
- return configuration.UpperCaseAttrs;
- }
-
- /**
- * MakeClean - remove presentational clutter
- * @see org.w3c.tidy.Configuration#MakeClean
- */
-
- public void setMakeClean(boolean MakeClean) {
- configuration.MakeClean = MakeClean;
- }
-
- public boolean getMakeClean() {
- return configuration.MakeClean;
- }
-
- /**
- * BreakBeforeBR - o/p newline before <br> or not?
- * @see org.w3c.tidy.Configuration#BreakBeforeBR
- */
-
- public void setBreakBeforeBR(boolean BreakBeforeBR) {
- configuration.BreakBeforeBR = BreakBeforeBR;
- }
-
- public boolean getBreakBeforeBR() {
- return configuration.BreakBeforeBR;
- }
-
- /**
- * BurstSlides - create slides on each h2 element
- * @see org.w3c.tidy.Configuration#BurstSlides
- */
-
- public void setBurstSlides(boolean BurstSlides) {
- configuration.BurstSlides = BurstSlides;
- }
-
- public boolean getBurstSlides() {
- return configuration.BurstSlides;
- }
-
- /**
- * NumEntities - use numeric entities
- * @see org.w3c.tidy.Configuration#NumEntities
- */
-
- public void setNumEntities(boolean NumEntities) {
- configuration.NumEntities = NumEntities;
- }
-
- public boolean getNumEntities() {
- return configuration.NumEntities;
- }
-
- /**
- * QuoteMarks - output " marks as &quot;
- * @see org.w3c.tidy.Configuration#QuoteMarks
- */
-
- public void setQuoteMarks(boolean QuoteMarks) {
- configuration.QuoteMarks = QuoteMarks;
- }
-
- public boolean getQuoteMarks() {
- return configuration.QuoteMarks;
- }
-
- /**
- * QuoteNbsp - output non-breaking space as entity
- * @see org.w3c.tidy.Configuration#QuoteNbsp
- */
-
- public void setQuoteNbsp(boolean QuoteNbsp) {
- configuration.QuoteNbsp = QuoteNbsp;
- }
-
- public boolean getQuoteNbsp() {
- return configuration.QuoteNbsp;
- }
-
- /**
- * QuoteAmpersand - output naked ampersand as &
- * @see org.w3c.tidy.Configuration#QuoteAmpersand
- */
-
- public void setQuoteAmpersand(boolean QuoteAmpersand) {
- configuration.QuoteAmpersand = QuoteAmpersand;
- }
-
- public boolean getQuoteAmpersand() {
- return configuration.QuoteAmpersand;
- }
-
- /**
- * WrapAttVals - wrap within attribute values
- * @see org.w3c.tidy.Configuration#WrapAttVals
- */
-
- public void setWrapAttVals(boolean WrapAttVals) {
- configuration.WrapAttVals = WrapAttVals;
- }
-
- public boolean getWrapAttVals() {
- return configuration.WrapAttVals;
- }
-
- /**
- * WrapScriptlets - wrap within JavaScript string literals
- * @see org.w3c.tidy.Configuration#WrapScriptlets
- */
-
- public void setWrapScriptlets(boolean WrapScriptlets) {
- configuration.WrapScriptlets = WrapScriptlets;
- }
-
- public boolean getWrapScriptlets() {
- return configuration.WrapScriptlets;
- }
-
- /**
- * WrapSection - wrap within <![ ... ]> section tags
- * @see org.w3c.tidy.Configuration#WrapSection
- */
-
- public void setWrapSection(boolean WrapSection) {
- configuration.WrapSection = WrapSection;
- }
-
- public boolean getWrapSection() {
- return configuration.WrapSection;
- }
-
- /**
- * AltText - default text for alt attribute
- * @see org.w3c.tidy.Configuration#altText
- */
-
- public void setAltText(String altText) {
- configuration.altText = altText;
- }
-
- public String getAltText() {
- return configuration.altText;
- }
-
- /**
- * Slidestyle - style sheet for slides
- * @see org.w3c.tidy.Configuration#slidestyle
- */
-
- public void setSlidestyle(String slidestyle) {
- configuration.slidestyle = slidestyle;
- }
-
- public String getSlidestyle() {
- return configuration.slidestyle;
- }
-
- /**
- * XmlPi - add <?xml?> for XML docs
- * @see org.w3c.tidy.Configuration#XmlPi
- */
-
- public void setXmlPi(boolean XmlPi) {
- configuration.XmlPi = XmlPi;
- }
-
- public boolean getXmlPi() {
- return configuration.XmlPi;
- }
-
- /**
- * DropFontTags - discard presentation tags
- * @see org.w3c.tidy.Configuration#DropFontTags
- */
-
- public void setDropFontTags(boolean DropFontTags) {
- configuration.DropFontTags = DropFontTags;
- }
-
- public boolean getDropFontTags() {
- return configuration.DropFontTags;
- }
-
- /**
- * DropEmptyParas - discard empty p elements
- * @see org.w3c.tidy.Configuration#DropEmptyParas
- */
-
- public void setDropEmptyParas(boolean DropEmptyParas) {
- configuration.DropEmptyParas = DropEmptyParas;
- }
-
- public boolean getDropEmptyParas() {
- return configuration.DropEmptyParas;
- }
-
- /**
- * FixComments - fix comments with adjacent hyphens
- * @see org.w3c.tidy.Configuration#FixComments
- */
-
- public void setFixComments(boolean FixComments) {
- configuration.FixComments = FixComments;
- }
-
- public boolean getFixComments() {
- return configuration.FixComments;
- }
-
- /**
- * WrapAsp - wrap within ASP pseudo elements
- * @see org.w3c.tidy.Configuration#WrapAsp
- */
-
- public void setWrapAsp(boolean WrapAsp) {
- configuration.WrapAsp = WrapAsp;
- }
-
- public boolean getWrapAsp() {
- return configuration.WrapAsp;
- }
-
- /**
- * WrapJste - wrap within JSTE pseudo elements
- * @see org.w3c.tidy.Configuration#WrapJste
- */
-
- public void setWrapJste(boolean WrapJste) {
- configuration.WrapJste = WrapJste;
- }
-
- public boolean getWrapJste() {
- return configuration.WrapJste;
- }
-
- /**
- * WrapPhp - wrap within PHP pseudo elements
- * @see org.w3c.tidy.Configuration#WrapPhp
- */
-
- public void setWrapPhp(boolean WrapPhp) {
- configuration.WrapPhp = WrapPhp;
- }
-
- public boolean getWrapPhp() {
- return configuration.WrapPhp;
- }
-
- /**
- * FixBackslash - fix URLs by replacing \ with /
- * @see org.w3c.tidy.Configuration#FixBackslash
- */
-
- public void setFixBackslash(boolean FixBackslash) {
- configuration.FixBackslash = FixBackslash;
- }
-
- public boolean getFixBackslash() {
- return configuration.FixBackslash;
- }
-
- /**
- * IndentAttributes - newline+indent before each attribute
- * @see org.w3c.tidy.Configuration#IndentAttributes
- */
-
- public void setIndentAttributes(boolean IndentAttributes) {
- configuration.IndentAttributes = IndentAttributes;
- }
-
- public boolean getIndentAttributes() {
- return configuration.IndentAttributes;
- }
-
- /**
- * DocType - user specified doctype
- * omit | auto | strict | loose | <i>fpi</i>
- * where the <i>fpi</i> is a string similar to
- * "-//ACME//DTD HTML 3.14159//EN"
- * Note: for <i>fpi</i> include the double-quotes in the string.
- * @see org.w3c.tidy.Configuration#docTypeStr
- * @see org.w3c.tidy.Configuration#docTypeMode
- */
-
- public void setDocType(String doctype) {
- if (doctype != null)
- configuration.docTypeStr = configuration.parseDocType(doctype, "doctype");
- }
-
- public String getDocType() {
- String result = null;
- switch (configuration.docTypeMode) {
- case Configuration.DOCTYPE_OMIT :
- result = "omit";
- break;
- case Configuration.DOCTYPE_AUTO :
- result = "auto";
- break;
- case Configuration.DOCTYPE_STRICT :
- result = "strict";
- break;
- case Configuration.DOCTYPE_LOOSE :
- result = "loose";
- break;
- case Configuration.DOCTYPE_USER :
- result = configuration.docTypeStr;
- break;
- }
- return result;
- }
-
- /**
- * LogicalEmphasis - replace i by em and b by strong
- * @see org.w3c.tidy.Configuration#LogicalEmphasis
- */
-
- public void setLogicalEmphasis(boolean LogicalEmphasis) {
- configuration.LogicalEmphasis = LogicalEmphasis;
- }
-
- public boolean getLogicalEmphasis() {
- return configuration.LogicalEmphasis;
- }
-
- /**
- * XmlPIs - if set to true PIs must end with ?>
- * @see org.w3c.tidy.Configuration#XmlPIs
- */
-
- public void setXmlPIs(boolean XmlPIs) {
- configuration.XmlPIs = XmlPIs;
- }
-
- public boolean getXmlPIs() {
- return configuration.XmlPIs;
- }
-
- /**
- * EncloseText - if true text at body is wrapped in <p>'s
- * @see org.w3c.tidy.Configuration#EncloseBodyText
- */
-
- public void setEncloseText(boolean EncloseText) {
- configuration.EncloseBodyText = EncloseText;
- }
-
- public boolean getEncloseText() {
- return configuration.EncloseBodyText;
- }
-
- /**
- * EncloseBlockText - if true text in blocks is wrapped in <p>'s
- * @see org.w3c.tidy.Configuration#EncloseBlockText
- */
-
- public void setEncloseBlockText(boolean EncloseBlockText) {
- configuration.EncloseBlockText = EncloseBlockText;
- }
-
- public boolean getEncloseBlockText() {
- return configuration.EncloseBlockText;
- }
-
- /**
- * KeepFileTimes - if true last modified time is preserved<br>
- * <b>this is NOT supported at this time.</b>
- * @see org.w3c.tidy.Configuration#KeepFileTimes
- */
-
- public void setKeepFileTimes(boolean KeepFileTimes) {
- configuration.KeepFileTimes = KeepFileTimes;
- }
-
- public boolean getKeepFileTimes() {
- return configuration.KeepFileTimes;
- }
-
- /**
- * Word2000 - draconian cleaning for Word2000
- * @see org.w3c.tidy.Configuration#Word2000
- */
-
- public void setWord2000(boolean Word2000) {
- configuration.Word2000 = Word2000;
- }
-
- public boolean getWord2000() {
- return configuration.Word2000;
- }
-
- /**
- * TidyMark - add meta element indicating tidied doc
- * @see org.w3c.tidy.Configuration#TidyMark
- */
-
- public void setTidyMark(boolean TidyMark) {
- configuration.TidyMark = TidyMark;
- }
-
- public boolean getTidyMark() {
- return configuration.TidyMark;
- }
-
- /**
- * XmlSpace - if set to yes adds xml:space attr as needed
- * @see org.w3c.tidy.Configuration#XmlSpace
- */
-
- public void setXmlSpace(boolean XmlSpace) {
- configuration.XmlSpace = XmlSpace;
- }
-
- public boolean getXmlSpace() {
- return configuration.XmlSpace;
- }
-
- /**
- * Emacs - if true format error output for GNU Emacs
- * @see org.w3c.tidy.Configuration#Emacs
- */
-
- public void setEmacs(boolean Emacs) {
- configuration.Emacs = Emacs;
- }
-
- public boolean getEmacs() {
- return configuration.Emacs;
- }
-
- /**
- * LiteralAttribs - if true attributes may use newlines
- * @see org.w3c.tidy.Configuration#LiteralAttribs
- */
-
- public void setLiteralAttribs(boolean LiteralAttribs) {
- configuration.LiteralAttribs = LiteralAttribs;
- }
-
- public boolean getLiteralAttribs() {
- return configuration.LiteralAttribs;
- }
-
- /**
- * InputStreamName - the name of the input stream (printed in the
- * header information).
- */
- public void setInputStreamName(String name) {
- if (name != null)
- inputStreamName = name;
- }
-
- public String getInputStreamName() {
- return inputStreamName;
- }
-
- /**
- * Sets the configuration from a configuration file.
- */
-
- public void setConfigurationFromFile(String filename) {
- configuration.parseFile(filename);
- }
-
- /**
- * Sets the configuration from a properties object.
- */
-
- public void setConfigurationFromProps(Properties props) {
- configuration.addProps(props);
- }
-
- /**
- * first time initialization which should
- * precede reading the command line
- */
-
- private void init() {
- configuration = new Configuration();
- if (configuration == null)
- return;
-
- AttributeTable at = AttributeTable.getDefaultAttributeTable();
- if (at == null)
- return;
- TagTable tt = new TagTable();
- if (tt == null)
- return;
- tt.setConfiguration(configuration);
- configuration.tt = tt;
- EntityTable et = EntityTable.getDefaultEntityTable();
- if (et == null)
- return;
-
- /* Unnecessary - same initial values in Configuration
- Configuration.XmlTags = false;
- Configuration.XmlOut = false;
- Configuration.HideEndTags = false;
- Configuration.UpperCaseTags = false;
- Configuration.MakeClean = false;
- Configuration.writeback = false;
- Configuration.OnlyErrors = false;
- */
-
- configuration.errfile = null;
- stderr = new PrintWriter(System.err, true);
- errout = stderr;
- initialized = true;
- }
-
- /**
- * Parses InputStream in and returns the root Node.
- * If out is non-null, pretty prints to OutputStream out.
- */
-
- public Node parse(IFile iFile, InputStream in, OutputStream out) {
- Node document = null;
-
- try {
- iFile.deleteMarkers(IMarker.PROBLEM, false, 0);
- document = parse(iFile, in, null, out);
- } catch (CoreException e) {
- } catch (FileNotFoundException fnfe) {
- } catch (IOException e) {
- }
-
- return document;
- }
-
- /**
- * Internal routine that actually does the parsing. The caller
- * can pass either an InputStream or file name. If both are passed,
- * the file name is preferred.
- */
-
- private Node parse(IFile iFile, InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException {
- Lexer lexer;
- Node document = null;
- Node doctype;
- Out o = new OutImpl(); /* normal output stream */
- PPrint pprint;
-
- if (!initialized)
- return null;
-
- if (errout == null)
- return null;
-
- parseErrors = 0;
- parseWarnings = 0;
-
- /* ensure config is self-consistent */
- configuration.adjust();
-
- if (file != null) {
- in = new FileInputStream(file);
- inputStreamName = file;
- } else if (in == null) {
- in = System.in;
- inputStreamName = "stdin";
- }
-
- if (in != null) {
- lexer = new Lexer(iFile,new StreamInImpl(in, configuration.CharEncoding, configuration.tabsize), configuration);
- lexer.errout = errout;
-
- /*
- store pointer to lexer in input stream
- to allow character encoding errors to be
- reported
- */
- lexer.in.lexer = lexer;
-
- /* Tidy doesn't alter the doctype for generic XML docs */
- if (configuration.XmlTags)
- document = ParserImpl.parseXMLDocument(lexer);
- else {
- lexer.warnings = 0;
- if (!configuration.Quiet)
- Report.helloMessage(errout, Report.RELEASE_DATE, inputStreamName);
-
- document = ParserImpl.parseDocument(lexer);
-
- if (!document.checkNodeIntegrity()) {
- Report.badTree(errout);
- return null;
- }
-
- Clean cleaner = new Clean(configuration.tt);
-
- /* simplifies <b><b> ... </b> ...</b> etc. */
- cleaner.nestedEmphasis(document);
-
- /* cleans up <dir>indented text</dir> etc. */
- cleaner.list2BQ(document);
- cleaner.bQ2Div(document);
-
- /* replaces i by em and b by strong */
- if (configuration.LogicalEmphasis)
- cleaner.emFromI(document);
-
- if (configuration.Word2000 && cleaner.isWord2000(document, configuration.tt)) {
- /* prune Word2000's <![if ...]> ... <![endif]> */
- cleaner.dropSections(lexer, document);
-
- /* drop style & class attributes and empty p, span elements */
- cleaner.cleanWord2000(lexer, document);
- }
-
- /* replaces presentational markup by style rules */
- if (configuration.MakeClean || configuration.DropFontTags)
- cleaner.cleanTree(lexer, document);
-
- if (!document.checkNodeIntegrity()) {
- Report.badTree(errout);
- return null;
- }
- doctype = document.findDocType();
- if (document.content != null) {
- if (configuration.xHTML)
- lexer.setXHTMLDocType(document);
- else
- lexer.fixDocType(document);
-
- if (configuration.TidyMark)
- lexer.addGenerator(document);
- }
-
- /* ensure presence of initial <?XML version="1.0"?> */
- if (configuration.XmlOut && configuration.XmlPi)
- lexer.fixXMLPI(document);
-
- if (!configuration.Quiet && document.content != null) {
- Report.reportVersion(errout, lexer, inputStreamName, doctype);
- Report.reportNumWarnings(errout, lexer);
- }
- }
-
- parseWarnings = lexer.warnings;
- parseErrors = lexer.errors;
-
- // Try to close the InputStream but only if if we created it.
-
- if ((file != null) && (in != System.in)) {
- try {
- in.close();
- } catch (IOException e) {
- }
- }
-
- if (lexer.errors > 0)
- Report.needsAuthorIntervention(errout);
-
- o.state = StreamIn.FSM_ASCII;
- o.encoding = configuration.CharEncoding;
-
- if (!configuration.OnlyErrors && lexer.errors == 0) {
- if (configuration.BurstSlides) {
- Node body;
-
- body = null;
- /*
- remove doctype to avoid potential clash with
- markup introduced when bursting into slides
- */
- /* discard the document type */
- doctype = document.findDocType();
-
- if (doctype != null)
- Node.discardElement(doctype);
-
- /* slides use transitional features */
- lexer.versions |= Dict.VERS_HTML40_LOOSE;
-
- /* and patch up doctype to match */
- if (configuration.xHTML)
- lexer.setXHTMLDocType(document);
- else
- lexer.fixDocType(document);
-
- /* find the body element which may be implicit */
- body = document.findBody(configuration.tt);
-
- if (body != null) {
- pprint = new PPrint(configuration);
- Report.reportNumberOfSlides(errout, pprint.countSlides(body));
- pprint.createSlides(lexer, document);
- } else
- Report.missingBody(errout);
- } else if (configuration.writeback && (file != null)) {
- try {
- pprint = new PPrint(configuration);
- o.out = new FileOutputStream(file);
-
- if (configuration.XmlTags)
- pprint.printXMLTree(o, (short) 0, 0, lexer, document);
- else
- pprint.printTree(o, (short) 0, 0, lexer, document);
-
- pprint.flushLine(o, 0);
- o.out.close();
- } catch (IOException e) {
- errout.println(file + e.toString());
- }
- } else if (out != null) {
- pprint = new PPrint(configuration);
- o.out = out;
-
- if (configuration.XmlTags)
- pprint.printXMLTree(o, (short) 0, 0, lexer, document);
- else
- pprint.printTree(o, (short) 0, 0, lexer, document);
-
- pprint.flushLine(o, 0);
- }
-
- }
-
- Report.errorSummary(lexer);
- }
- return document;
- }
-
- /**
- * Parses InputStream in and returns a DOM Document node.
- * If out is non-null, pretty prints to OutputStream out.
- */
-
- public org.w3c.dom.Document parseDOM(IFile file, InputStream in, OutputStream out) {
- Node document = parse(file, in, out);
- if (document != null)
- return (org.w3c.dom.Document) document.getAdapter();
- else
- return null;
- }
-
- /**
- * Creates an empty DOM Document.
- */
-
- public static org.w3c.dom.Document createEmptyDocument() {
- Node document = new Node(Node.RootNode, new byte[0], 0, 0);
- Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html", new TagTable());
- if (document != null && node != null) {
- Node.insertNodeAtStart(document, node);
- return (org.w3c.dom.Document) document.getAdapter();
- } else {
- return null;
- }
- }
-
- /**
- * Pretty-prints a DOM Document.
- */
-
- public void pprint(org.w3c.dom.Document doc, OutputStream out) {
- Out o = new OutImpl();
- PPrint pprint;
- Node document;
-
- if (!(doc instanceof DOMDocumentImpl)) {
- return;
- }
- document = ((DOMDocumentImpl) doc).adaptee;
-
- o.state = StreamIn.FSM_ASCII;
- o.encoding = configuration.CharEncoding;
-
- if (out != null) {
- pprint = new PPrint(configuration);
- o.out = out;
-
- if (configuration.XmlTags)
- pprint.printXMLTree(o, (short) 0, 0, null, document);
- else
- pprint.printTree(o, (short) 0, 0, null, document);
-
- pprint.flushLine(o, 0);
- }
- }
-
- /**
- * Command line interface to parser and pretty printer.
- */
-
- public static void main(String[] argv) {
- int totalerrors = 0;
- int totalwarnings = 0;
- String file;
- InputStream in;
- String prog = "Tidy";
- Node document;
- Node doctype;
- Lexer lexer;
- String s;
- Out out = new OutImpl(); /* normal output stream */
- PPrint pprint;
- int argc = argv.length + 1;
- int argIndex = 0;
- Tidy tidy;
- Configuration configuration;
- String arg;
- String current_errorfile = "stderr";
-
- tidy = new Tidy();
- configuration = tidy.getConfiguration();
-
- /* read command line */
-
- while (argc > 0) {
- if (argc > 1 && argv[argIndex].startsWith("-")) {
- /* support -foo and --foo */
- arg = argv[argIndex].substring(1);
-
- if (arg.length() > 0 && arg.charAt(0) == '-')
- arg = arg.substring(1);
-
- if (arg.equals("xml"))
- configuration.XmlTags = true;
- else if (arg.equals("asxml") || arg.equals("asxhtml"))
- configuration.xHTML = true;
- else if (arg.equals("indent")) {
- configuration.IndentContent = true;
- configuration.SmartIndent = true;
- } else if (arg.equals("omit"))
- configuration.HideEndTags = true;
- else if (arg.equals("upper"))
- configuration.UpperCaseTags = true;
- else if (arg.equals("clean"))
- configuration.MakeClean = true;
- else if (arg.equals("raw"))
- configuration.CharEncoding = Configuration.RAW;
- else if (arg.equals("ascii"))
- configuration.CharEncoding = Configuration.ASCII;
- else if (arg.equals("latin1"))
- configuration.CharEncoding = Configuration.LATIN1;
- else if (arg.equals("utf8"))
- configuration.CharEncoding = Configuration.UTF8;
- else if (arg.equals("iso2022"))
- configuration.CharEncoding = Configuration.ISO2022;
- else if (arg.equals("mac"))
- configuration.CharEncoding = Configuration.MACROMAN;
- else if (arg.equals("numeric"))
- configuration.NumEntities = true;
- else if (arg.equals("modify"))
- configuration.writeback = true;
- else if (arg.equals("change")) /* obsolete */
- configuration.writeback = true;
- else if (arg.equals("update")) /* obsolete */
- configuration.writeback = true;
- else if (arg.equals("errors"))
- configuration.OnlyErrors = true;
- else if (arg.equals("quiet"))
- configuration.Quiet = true;
- else if (arg.equals("slides"))
- configuration.BurstSlides = true;
- else if (arg.equals("help") || argv[argIndex].charAt(1) == '?' || argv[argIndex].charAt(1) == 'h') {
- Report.helpText(new PrintWriter(System.out, true), prog);
- System.exit(1);
- } else if (arg.equals("config")) {
- if (argc >= 3) {
- configuration.parseFile(argv[argIndex + 1]);
- --argc;
- ++argIndex;
- }
- } else if (argv[argIndex].equals("-file") || argv[argIndex].equals("--file") || argv[argIndex].equals("-f")) {
- if (argc >= 3) {
- configuration.errfile = argv[argIndex + 1];
- --argc;
- ++argIndex;
- }
- } else if (argv[argIndex].equals("-wrap") || argv[argIndex].equals("--wrap") || argv[argIndex].equals("-w")) {
- if (argc >= 3) {
- configuration.wraplen = Integer.parseInt(argv[argIndex + 1]);
- --argc;
- ++argIndex;
- }
- } else if (argv[argIndex].equals("-version") || argv[argIndex].equals("--version") || argv[argIndex].equals("-v")) {
- Report.showVersion(tidy.getErrout());
- System.exit(0);
- } else {
- s = argv[argIndex];
-
- for (int i = 1; i < s.length(); i++) {
- if (s.charAt(i) == 'i') {
- configuration.IndentContent = true;
- configuration.SmartIndent = true;
- } else if (s.charAt(i) == 'o')
- configuration.HideEndTags = true;
- else if (s.charAt(i) == 'u')
- configuration.UpperCaseTags = true;
- else if (s.charAt(i) == 'c')
- configuration.MakeClean = true;
- else if (s.charAt(i) == 'n')
- configuration.NumEntities = true;
- else if (s.charAt(i) == 'm')
- configuration.writeback = true;
- else if (s.charAt(i) == 'e')
- configuration.OnlyErrors = true;
- else if (s.charAt(i) == 'q')
- configuration.Quiet = true;
- else
- Report.unknownOption(tidy.getErrout(), s.charAt(i));
- }
- }
-
- --argc;
- ++argIndex;
- continue;
- }
-
- /* ensure config is self-consistent */
- configuration.adjust();
-
- /* user specified error file */
- if (configuration.errfile != null) {
- /* is it same as the currently opened file? */
- if (!configuration.errfile.equals(current_errorfile)) {
- /* no so close previous error file */
-
- if (tidy.getErrout() != tidy.getStderr())
- tidy.getErrout().close();
-
- /* and try to open the new error file */
- try {
- tidy.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true));
- current_errorfile = configuration.errfile;
- } catch (IOException e) {
- /* can't be opened so fall back to stderr */
- current_errorfile = "stderr";
- tidy.setErrout(tidy.getStderr());
- }
- }
- }
-
- if (argc > 1) {
- file = argv[argIndex];
- } else {
- file = "stdin";
- }
-
- try {
- document = tidy.parse(null, null, file, System.out);
- totalwarnings += tidy.parseWarnings;
- totalerrors += tidy.parseErrors;
- } catch (FileNotFoundException fnfe) {
- Report.unknownFile(tidy.getErrout(), prog, file);
- } catch (IOException ioe) {
- Report.unknownFile(tidy.getErrout(), prog, file);
- }
-
- --argc;
- ++argIndex;
-
- if (argc <= 1)
- break;
- }
-
- if (totalerrors + totalwarnings > 0)
- Report.generalInfo(tidy.getErrout());
-
- if (tidy.getErrout() != tidy.getStderr())
- tidy.getErrout().close();
-
- /* return status can be used by scripts */
-
- if (totalerrors > 0)
- System.exit(2);
-
- if (totalwarnings > 0)
- System.exit(1);
-
- /* 0 signifies all is ok */
- System.exit(0);
- }
-}
+++ /dev/null
-/*
- * @(#)TidyBeanInfo.java 1.11 2000/08/16
- *
- */
-
-package org.w3c.tidy;
-
-/**
- *
- * BeanInfo for Tidy
- *
- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
- * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
- * HTML Tidy Release 4 Aug 2000</a>
- *
- * @author Dave Raggett <dsr@w3.org>
- * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
- * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
- * @version 1.4, 1999/09/04 DOM support
- * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
- * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
- * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
- * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
- * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
- * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
- * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
- */
-
-import java.beans.SimpleBeanInfo;
-import java.awt.Image;
-
-public class TidyBeanInfo extends SimpleBeanInfo {
-
- public Image getIcon(int kind)
- {
- return loadImage("tidy.gif");
- }
-
-}
+++ /dev/null
-error=Error:
-warning=Warning:
-line_column=line {0,number} column {1,number} -
-emacs_format={0}:{1,number}:{2,number}:
-illegal_char=Warning: replacing illegal character code {0,number}
-missing_semicolon=Warning: entity "{0}" doesn''t end in '';''
-unknown_entity=Warning: unescaped & or unknown entity "{0}"
-unescaped_ampersand=Warning: unescaped & which should be written as &
-unknown_attribute=Warning: unknown attribute "{0}"
-missing_attribute=\ lacks "{0}" attribute
-missing_attr_value=\ attribute "{0}" lacks value
-missing_imagemap=\ should use client-side image map
-bad_attribute_value=\ unknown attribute value "{0}"
-xml_attribute_value=\ has XML attribute "{0}"
-unexpected_gt=\ missing '>' for end of tag
-unexpected_quotemark=\ unexpected or duplicate quote mark
-repeated_attribute=\ repeated attribute
-nested_emphasis=\ nested emphasis
-coerce_to_endtag=\ <{0}> is probably intended as </{0}>
-proprietary_attr_value=\ proprietary attribute value "{0}"
-missing_endtag_for=Warning: missing </{0}>
-missing_endtag_before=Warning: missing </{0}> before
-discarding_unexpected=Warning: discarding unexpected
-forced_end_anchor=Warning: <a> is probably intended as </a>
-non_matching_endtag_1=Warning: replacing unexpected
-non_matching_endtag_2=\ by </{0}>
-tag_not_allowed_in=\ isn''t allowed in <{0}> elements
-doctype_after_tags=Warning: <!DOCTYPE> isn't allowed after elements
-missing_starttag=Warning: missing <{0}>
-unexpected_endtag=Warning: unexpected </{0}>
-unexpected_endtag_suffix=\ in <{0}>
-too_many_elements=Warning: too many {0} elements
-too_many_elements_suffix=\ in <{0}>
-using_br_inplace_of=Warning: using <br> in place of
-inserting_tag=Warning: inserting implicit <{0}>
-cant_be_nested=\ can't be nested
-proprietary_element=\ is not approved by W3C
-obsolete_element=Warning: replacing obsolete element
-replacing_element=Warning: replacing element
-by=\ by
-trim_empty_element=Warning: trimming empty
-missing_title_element=Warning: inserting missing 'title' element
-illegal_nesting=\ shouldn't be nested
-noframes_content=\ not inside 'noframes' element
-inconsistent_version=Warning: html doctype doesn't match content
-content_after_body=Warning: content occurs after end of body
-malformed_comment=Warning: adjacent hyphens within comment
-bad_comment_chars=Warning: expecting -- or >
-bad_xml_comment=Warning: XML comments can't contain --
-bad_cdata_content=Warning: '<' + '/' + letter not allowed here
-inconsistent_namespace=Warning: html namespace doesn't match content
-suspected_missing_quote=Error: missing quotemark for attribute value
-duplicate_frameset=Error: repeated FRAMESET element
-unknown_element=\ is not recognized!
-dtype_not_upper_case=Warning: SYSTEM, PUBLIC, W3C, DTD, EN must be upper case
-unexpected_end_of_file=Warning: end of file while parsing attributes
-malformed_doctype=Warning: expected "html PUBLIC" or "html SYSTEM"
-id_name_mismatch=\ id and name attribute value mismatch
-
-badchars_summary=Characters codes for the Microsoft Windows fonts in the range\n\
-128 - 159 may not be recognized on other platforms. You are\n\
-instead recommended to use named entities, e.g. ™ rather\n\
-than Windows character code 153 (0x2122 in Unicode). Note that\n\
-as of February 1998 few browsers support the new entities."\n\n
-
-badform_summary=You may need to move one or both of the <form> and </form>\n\
-tags. HTML elements should be properly nested and form elements\n\
-are no exception. For instance you should not place the <form>\n\
-in one table cell and the </form> in another. If the <form> is\n\
-placed before a table, the </form> cannot be placed inside the\n\
-table! Note that one form can't be nested inside another!\n\n
-
-badaccess_missing_summary=The table summary attribute should be used to describe\n\
-the table structure. It is very helpful for people using\n\
-non-visual browsers. The scope and headers attributes for\n\
-table cells are useful for specifying which headers apply\n\
-to each table cell, enabling non-visual browsers to provide\n\
-a meaningful context for each cell.\n\n
-
-badaccess_missing_image_alt=The alt attribute should be used to give a short description\n\
-of an image; longer descriptions should be given with the\n\
-longdesc attribute which takes a URL linked to the description.\n\
-These measures are needed for people using non-graphical browsers.\n\n
-
-badaccess_missing_image_map=Use client-side image maps in preference to server-side image\n\
-maps as the latter are inaccessible to people using non-\n\
-graphical browsers. In addition, client-side maps are easier\n\
-to set up and provide immediate feedback to users.\n\n
-
-badaccess_missing_link_alt=For hypertext links defined using a client-side image map, you\n\
-need to use the alt attribute to provide a textual description\n\
-of the link for people using non-graphical browsers.\n\n
-
-badaccess_frames=Pages designed using frames presents problems for\n\
-people who are either blind or using a browser that\n\
-doesn't support frames. A frames-based page should always\n\
-include an alternative layout inside a NOFRAMES element.\n\n
-
-badaccess_summary=For further advice on how to make your pages accessible\n\
-see "{0}". You may also want to try\n\
-"http://www.cast.org/bobby/" which is a free Web-based\n\
-service for checking URLs for accessibility.\n\n
-
-badlayout_using_layer=The Cascading Style Sheets (CSS) Positioning mechanism\n\
-is recommended in preference to the proprietary <LAYER>\n\
-element due to limited vendor support for LAYER.\n\n
-
-badlayout_using_spacer=You are recommended to use CSS for controlling white\n\
-space (e.g. for indentation, margins and line spacing).\n\
-The proprietary <SPACER> element has limited vendor support.\n\n
-
-badlayout_using_font=You are recommended to use CSS to specify the font and\n\
-properties such as its size and color. This will reduce\n\
-the size of HTML files and make them easier maintain\n\
-compared with using <FONT> elements.\n\n
-
-badlayout_using_nobr=You are recommended to use CSS to control line wrapping.\n\
-Use \"white-space: nowrap\" to inhibit wrapping in place\n\
-of inserting <NOBR>...</NOBR> into the markup.\n\n
-
-badlayout_using_body=You are recommended to use CSS to specify page and link colors\n\n
-
-unrecognized_option=unrecognized option -{0} use -help to list options
-unknown_file={0}: can''t open file "{1}"
-unknown_option=Warning - unknown option: {0}
-bad_argument=Warning - missing or malformed argument for option: {0}
-
-needs_author_intervention=This document has errors that must be fixed before\n\
-using HTML Tidy to generate a tidied up version.\n\n
-
-missing_body=Can't create slides - document is missing a body element.
-slides_found={0,number} Slides found
-
-general_info=HTML & CSS specifications are available from http://www.w3.org/\n\
-To learn more about Tidy see http://www.w3.org/People/Raggett/tidy/\n\
-Please send bug reports to Dave Raggett care of <html-tidy@w3.org>\n\
-Lobby your company to join W3C, see http://www.w3.org/Consortium\n
-
-hello_message=\nTidy (vers {0}) Parsing "{1}"
-
-report_version=\n{0}: Document content looks like {1}
-
-doctype_given=\n{0}: Doctype given is "
-
-num_warnings={0,number} warnings/errors were found!\n
-no_warnings=no warnings or errors were found\n
-
-help_text={0}: file1 file2 ...\n\
-Utility to clean up & pretty print html files\n\
-see http://www.w3.org/People/Raggett/tidy/\n\
-options for tidy released on {1}\n\
-\n\
-Processing directives\n\
---------------------\n\
- -indent or -i indent element content\n\
- -omit or -o omit optional endtags\n\
- -wrap 72 wrap text at column 72 (default is 68)\n\
- -upper or -u force tags to upper case (default is lower)\n\
- -clean or -c replace font, nobr & center tags by CSS\n\
- -numeric or -n output numeric rather than named entities\n\
- -errors or -e only show errors\n\
- -quiet or -q suppress nonessential output\n\
- -xml use this when input is wellformed xml\n\
- -asxml to convert html to wellformed xml\n\
- -slides to burst into slides on h2 elements\n\
-\n\
-Character encodings\n\
-------------------\n\
- -raw leave chars > 128 unchanged upon output\n\
- -ascii use ASCII for output, Latin-1 for input\n\
- -latin1 use Latin-1 for both input and output\n\
- -iso2022 use ISO2022 for both input and output\n\
- -utf8 use UTF-8 for both input and output\n\
- -mac use the Apple MacRoman character set\n\
-\n\
-File manipulation\n\
----------------\n\
- -config <file> set options from config file\n\
- -f <file> write errors to named <file>\n\
- -modify or -m to modify original files\n\
-\n\
-Miscellaneous\n\
-------------\n\
- -version or -v show version\n\
- -help or -h list command line options\n\
-You can also use --blah for any config file option blah\n\
-\n\
-Input/Output default to stdin/stdout respectively\n\
-Single letter options apart from -f may be combined\n\
-as in: tidy -f errs.txt -imu foo.html\n\
-For further info on HTML see http://www.w3.org/MarkUp\n\
-\n
-
-bad_tree=\nPanic - tree has lost its integrity\n
+++ /dev/null
-# sample config file for Java HTML tidy
-
-indent=auto
-indent-spaces=2
-wrap=72
-markup=yes
-clean=yes
-output-xml=no
-input-xml=no
-show-warnings=yes
-numeric-entities=yes
-quote-marks=yes
-quote-nbsp=yes
-quote-ampersand=no
-break-before-br=no
-uppercase-tags=yes
-uppercase-attributes=yes
-smart-indent=no
-output-xhtml=yes
-char-encoding=latin1
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<templates>
-<template name="class" description="class template with constructor" context="php" enabled="true">class ${class_name} {
- function ${class_name}() {
- ${cursor}
- }
-}</template>
-<template name="class" description="class with attribute" context="php" enabled="true">class ${class_name} {
- var $$${attribute};
- function ${class_name}() {
- ${cursor}
- }
-
- function set_${attribute}( $$${attr} ) {
- $$this->${attribute} = $$${attr};
- }
-
- function get_${attribute}() {
- return $$this->${attribute};
- }
-}</template>
-<template name="for" description="iterate over array" context="php" enabled="true">for ($$${index} = 0; $$${index} < sizeof($$${array}); $$${index}++) {
- ${cursor}
-}</template>
-<template name="for" description="iterate over array w/ temporary variable" context="php" enabled="true">for ($$${index} = 0; $$${index} < sizeof($$${array}); $$${index}++) {
- $$${array_element} = $$${array}[$$${index}];
- ${cursor}
-}</template>
-<template name="function" description="function template" context="php" enabled="true">function ${function_name} () {
- ${cursor}
-}</template>
-<template name="function" description="function template with return" context="php" enabled="true">function ${function_name} () {
- return (${cursor});
-}</template>
-<template name="while" description="while iteration" context="php" enabled="true">while (${condition}) {
- ${cursor}
-}</template>
-<template name="switch" description="switch case statement" context="php" enabled="true">switch (${key}) {
- case ${value}:
- ${cursor}
- break;
-
- default:
- break;
-}</template><template name="if" description="if statement" context="php" enabled="true">if (${condition}) {
- ${cursor}
-}</template><template name="ifelse" description="if else statement" context="php" enabled="true">if (${condition}) {
- ${cursor}
-} else {
-
-}</template><template name="elseif" description="else if block" context="php" enabled="true">elseif (${condition}) {
- ${cursor}
-}</template><template name="else" description="else block" context="php" enabled="true">else {
- ${cursor}
-}</template>
-<template name="filecomment" description="file comment used by the class and interface wizards" context="php" enabled="true">/**
- * Created on ${date} by ${user}
- *
- */</template><template name="functioncomment" description="function comment" context="php" enabled="true">/**
- * @author ${user}
- *
- */</template>
- <template name="echo" description="echo a string" context="php" enabled="true">echo "${string}";
- ${cursor}</template>
- </templates>
\ No newline at end of file