+ */ + + private boolean center2Div(Lexer lexer, Node node, MutableObject pnode) + { + if (node.tag == tt.tagCenter) + { + if (lexer.configuration.DropFontTags) + { + if (node.content != null) + { + Node last = node.last; + Node parent = node.parent; + + discardContainer(node, pnode); + + node = lexer.inferredTag("br"); + + if (last.next != null) + last.next.prev = node; + + node.next = last.next; + last.next = node; + node.prev = last; + + if (parent.last == last) + parent.last = node; + + node.parent = parent; + } + else + { + Node prev = node.prev; + Node next = node.next; + Node parent = node.parent; + discardContainer(node, pnode); + + node = lexer.inferredTag("br"); + node.next = next; + node.prev = prev; + node.parent = parent; + + if (next != null) + next.prev = node; + else + parent.last = node; + + if (prev != null) + prev.next = node; + else + parent.content = node; + } + + return true; + } + node.tag = tt.tagDiv; + node.element = "div"; + addStyleProperty(node, "text-align: center"); + return true; + } + + return false; + } + + /* + Symptom

...

+ Action: merge the two divs + + This is useful after nested s used by Word + for indenting have been converted to

s + */ + private boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode) + { + Node child; + + if (node.tag != tt.tagDiv) + return false; + + child = node.content; + + if (child == null) + return false; + + if (child.tag != tt.tagDiv) + return false; + + if (child.next != null) + return false; + + mergeStyles(node, child); + stripOnlyChild(node); + return true; + } + + /* + Symptom:

+ Action: discard outer list + */ + + private boolean nestedList(Lexer lexer, Node node, MutableObject pnode) + { + Node child, list; + + if (node.tag == tt.tagUl || node.tag == tt.tagOl) + { + child = node.content; + + if (child == null) + return false; + + /* check child has no peers */ + + if (child.next != null) + return false; + + list = child.content; + + if (list == null) + return false; + + if (list.tag != node.tag) + return false; + + pnode.setObject(node.next); + + /* move inner list node into position of outer node */ + list.prev = node.prev; + list.next = node.next; + list.parent = node.parent; + fixNodeLinks(list); + + /* get rid of outer ul and its li */ + child.content = null; + node.content = null; + node.next = null; + + /* + If prev node was a list the chances are this node + should be appended to that list. Word has no way of + recognizing nested lists and just uses indents + */ + + if (list.prev != null) + { + node = list; + list = node.prev; + + if (list.tag == tt.tagUl || list.tag == tt.tagOl) + { + list.next = node.next; + + if (list.next != null) + list.next.prev = list; + + child = list.last; /*

*/ + + node.parent = child; + node.next = null; + node.prev = child.last; + fixNodeLinks(node); + } + } + + cleanNode(lexer, node); + return true; + } + + return false; + } + + /* + Symptom: the only child of a block-level element is a + presentation element such as B, I or FONT + + Action: add style "font-weight: bold" to the block and + strip the element, leaving its children. + + example: + +
+ Draft Recommended Practice +
+ + becomes: + +
+ Draft Recommended Practice +
+ + This code also replaces the align attribute by a style attribute. + However, to avoid CSS problems with Navigator 4, this isn't done + for the elements: caption, tr and table + */ + private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode) + { + Node child; + + if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0) + { + if (node.tag != tt.tagTable + && node.tag != tt.tagTr + && node.tag != tt.tagLi) + { + /* check for align attribute */ + if (node.tag != tt.tagCaption) + textAlign(lexer, node); + + child = node.content; + + if (child == null) + return false; + + /* check child has no peers */ + + if (child.next != null) + return false; + + if (child.tag == tt.tagB) + { + mergeStyles(node, child); + addStyleProperty(node, "font-weight: bold"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagI) + { + mergeStyles(node, child); + addStyleProperty(node, "font-style: italic"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagFont) + { + mergeStyles(node, child); + addFontStyles(node, child.attributes); + stripOnlyChild(node); + return true; + } + } + } + + return false; + } + + /* the only child of table cell or an inline element such as em */ + private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode) + { + Node child; + + if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE|Dict.CM_ROW)) != 0) + { + child = node.content; + + if (child == null) + return false; + + /* check child has no peers */ + + if (child.next != null) + return false; + + if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis) + { + mergeStyles(node, child); + addStyleProperty(node, "font-weight: bold"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis) + { + mergeStyles(node, child); + addStyleProperty(node, "font-style: italic"); + stripOnlyChild(node); + return true; + } + + if (child.tag == tt.tagFont) + { + mergeStyles(node, child); + addFontStyles(node, child.attributes); + stripOnlyChild(node); + return true; + } + } + + return false; + } + + /* + Replace font elements by span elements, deleting + the font element's attributes and replacing them + by a single style attribute. + */ + private boolean font2Span(Lexer lexer, Node node, MutableObject pnode) + { + AttVal av, style, next; + + if (node.tag == tt.tagFont) + { + if (lexer.configuration.DropFontTags) + { + discardContainer(node, pnode); + return false; + } + + /* if FONT is only child of parent element then leave alone */ + if (node.parent.content == node + && node.next == null) + return false; + + addFontStyles(node, node.attributes); + + /* extract style attribute and free the rest */ + av = node.attributes; + style = null; + + while (av != null) + { + next = av.next; + + if (av.attribute.equals("style")) + { + av.next = null; + style = av; + } + + av = next; + } + + node.attributes = style; + + node.tag = tt.tagSpan; + node.element = "span"; + + return true; + } + + return false; + } + + /* + Applies all matching rules to a node. + */ + private Node cleanNode(Lexer lexer, Node node) + { + Node next = null; + MutableObject o = new MutableObject(); + boolean b = false; + + for (next = node; node.isElement(); node = next) + { + o.setObject(next); + + b = dir2Div(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = nestedList(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = center2Div(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = mergeDivs(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = blockStyle(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = inlineStyle(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + b = font2Span(lexer, node, o); + next = (Node)o.getObject(); + if (b) + continue; + + break; + } + + return next; + } + + private Node createStyleProperties(Lexer lexer, Node node) + { + Node child; + + if (node.content != null) + { + for (child = node.content; child != null; child = child.next) + { + child = createStyleProperties(lexer, child); + } + } + + return cleanNode(lexer, node); + } + + private void defineStyleRules(Lexer lexer, Node node) + { + Node child; + + if (node.content != null) + { + for (child = node.content; + child != null; child = child.next) + { + defineStyleRules(lexer, child); + } + } + + style2Rule(lexer, node); + } + + public void cleanTree(Lexer lexer, Node doc) + { + doc = createStyleProperties(lexer, doc); + + if (!lexer.configuration.MakeClean) + { + defineStyleRules(lexer, doc); + createStyleElement(lexer, doc); + } + } + + /* simplifies ... ... etc. */ + public void nestedEmphasis(Node node) + { + MutableObject o = new MutableObject(); + Node next; + + while (node != null) + { + next = node.next; + + if ((node.tag == tt.tagB || node.tag == tt.tagI) + && node.parent != null && node.parent.tag == node.tag) + { + /* strip redundant inner element */ + o.setObject(next); + discardContainer(node, o); + next = (Node)o.getObject(); + node = next; + continue; + } + + if (node.content != null) + nestedEmphasis(node.content); + + node = next; + } + } + + /* replace i by em and b by strong */ + public void emFromI(Node node) + { + while (node != null) + { + if (node.tag == tt.tagI) + { + node.element = tt.tagEm.name; + node.tag = tt.tagEm; + } + else if (node.tag == tt.tagB) + { + node.element = tt.tagStrong.name; + node.tag = tt.tagStrong; + } + + if (node.content != null) + emFromI(node.content); + + node = node.next; + } + } + + /* + Some people use dir or ul without an li + to indent the content. The pattern to + look for is a list with a single implicit + li. This is recursively replaced by an + implicit blockquote. + */ + public void list2BQ(Node node) + { + while (node != null) + { + if (node.content != null) + list2BQ(node.content); + + if (node.tag != null && node.tag.parser == ParserImpl.getParseList() && + node.hasOneChild() && node.content.implicit) + { + stripOnlyChild(node); + node.element = tt.tagBlockquote.name; + node.tag = tt.tagBlockquote; + node.implicit = true; + } + + node = node.next; + } + } + + /* + Replace implicit blockquote by div with an indent + taking care to reduce nested blockquotes to a single + div with the indent set to match the nesting depth + */ + public void bQ2Div(Node node) + { + int indent; + String indent_buf; + + while (node != null) + { + if (node.tag == tt.tagBlockquote && node.implicit) + { + indent = 1; + + while(node.hasOneChild() && + node.content.tag == tt.tagBlockquote && + node.implicit) + { + ++indent; + stripOnlyChild(node); + } + + if (node.content != null) + bQ2Div(node.content); + + indent_buf = "margin-left: " + + (new Integer(2*indent)).toString() + "em"; + + node.element = tt.tagDiv.name; + node.tag = tt.tagDiv; + node.addAttribute("style", indent_buf); + } + else if (node.content != null) + bQ2Div(node.content); + + + node = node.next; + } + } + + /* node is prune up to */ + public Node pruneSection(Lexer lexer, Node node) + { + for (;;) + { + /* discard node and returns next */ + node = Node.discardElement(node); + + if (node == null) + return null; + + if (node.type == Node.SectionTag) + { + if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) + { + node = pruneSection(lexer, node); + continue; + } + + if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif")) + { + node = Node.discardElement(node); + break; + } + } + } + + return node; + } + + public void dropSections(Lexer lexer, Node node) + { + while (node != null) + { + if (node.type == Node.SectionTag) + { + /* prune up to matching endif */ + if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) + { + node = pruneSection(lexer, node); + continue; + } + + /* discard others as well */ + node = Node.discardElement(node); + continue; + } + + if (node.content != null) + dropSections(lexer, node.content); + + node = node.next; + } + } + + public void purgeAttributes(Node node) + { + AttVal attr = node.attributes; + AttVal next = null; + AttVal prev = null; + + while (attr != null) + { + next = attr.next; + + /* special check for class="Code" denoting pre text */ + if (attr.attribute != null && + attr.value != null && + attr.attribute.equals("class") && + attr.value.equals("Code")) + { + prev = attr; + } + else if (attr.attribute != null && + (attr.attribute.equals("class") || + attr.attribute.equals("style") || + attr.attribute.equals("lang") || + attr.attribute.startsWith("x:") || + ((attr.attribute.equals("height") || attr.attribute.equals("width")) && + (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh)))) + { + if (prev != null) + prev.next = next; + else + node.attributes = next; + + } + else + prev = attr; + + attr = next; + } + } + + /* Word2000 uses span excessively, so we strip span out */ + public Node stripSpan(Lexer lexer, Node span) + { + Node node; + Node prev = null; + Node content; + + /* + deal with span elements that have content + by splicing the content in place of the span + after having processed it + */ + + cleanWord2000(lexer, span.content); + content = span.content; + + if (span.prev != null) + prev = span.prev; + else if (content != null) + { + node = content; + content = content.next; + Node.removeNode(node); + Node.insertNodeBeforeElement(span, node); + prev = node; + } + + while (content != null) + { + node = content; + content = content.next; + Node.removeNode(node); + Node.insertNodeAfterElement(prev, node); + prev = node; + } + + if (span.next == null) + span.parent.last = prev; + + node = span.next; + span.content = null; + Node.discardElement(span); + return node; + } + + /* map non-breaking spaces to regular spaces */ + private void normalizeSpaces(Lexer lexer, Node node) + { + while (node != null) + { + if (node.content != null) + normalizeSpaces(lexer, node.content); + + if (node.type == Node.TextNode) + { + int i; + MutableInteger c = new MutableInteger(); + int p = node.start; + + for (i = node.start; i < node.end; ++i) + { + c.value = (int)node.textarray[i]; + + /* look for UTF-8 multibyte character */ + if (c.value > 0x7F) + i += PPrint.getUTF8(node.textarray, i, c); + + if (c.value == 160) + c.value = ' '; + + p = PPrint.putUTF8(node.textarray, p, c.value); + } + } + + node = node.next; + } + } + + /* + This is a major clean up to strip out all the extra stuff you get + when you save as web page from Word 2000. It doesn't yet know what + to do with VML tags, but these will appear as errors unless you + declare them as new tags, such as o:p which needs to be declared + as inline. + */ + public void cleanWord2000(Lexer lexer, Node node) + { + /* used to a list from a sequence of bulletted p's */ + Node list = null; + + while (node != null) + { + /* discard Word's style verbiage */ + if (node.tag == tt.tagStyle || + node.tag == tt.tagMeta || + node.type == Node.CommentTag) + { + node = Node.discardElement(node); + continue; + } + + /* strip out all span tags Word scatters so liberally! */ + if (node.tag == tt.tagSpan) + { + node = stripSpan(lexer, node); + continue; + } + + /* get rid of Word's xmlns attributes */ + if (node.tag == tt.tagHtml) + { + /* check that it's a Word 2000 document */ + if (node.getAttrByName("xmlns:o") == null) + return; + } + + if (node.tag == tt.tagLink) + { + AttVal attr = node.getAttrByName("rel"); + + if (attr != null && attr.value != null && + attr.value.equals("File-List")) + { + node = Node.discardElement(node); + continue; + } + } + + /* discard empty paragraphs */ + if (node.content == null && node.tag == tt.tagP) + { + node = Node.discardElement(node); + continue; + } + + if (node.tag == tt.tagP) + { + AttVal attr = node.getAttrByName("class"); + + /* map sequence of
to
...
*/ + if (attr != null && attr.value != null && + attr.value.equals("MsoListBullet")) + { + Node.coerceNode(lexer, node, tt.tagLi); + + if (list == null || list.tag != tt.tagUl) + { + list = lexer.inferredTag("ul"); + Node.insertNodeBeforeElement(node, list); + } + + purgeAttributes(node); + + if (node.content != null) + cleanWord2000(lexer, node.content); + + /* remove node and append to contents of list */ + Node.removeNode(node); + Node.insertNodeAtEnd(list, node); + node = list.next; + } + /* map sequence of
to
...
*/ + else if (attr != null && attr.value != null && + attr.value.equals("Code")) + { + Node br = lexer.newLineNode(); + normalizeSpaces(lexer, node); + + if (list == null || list.tag != tt.tagPre) + { + list = lexer.inferredTag("pre"); + Node.insertNodeBeforeElement(node, list); + } + + /* remove node and append to contents of list */ + Node.removeNode(node); + Node.insertNodeAtEnd(list, node); + stripSpan(lexer, node); + Node.insertNodeAtEnd(list, br); + node = list.next; + } + else + list = null; + } + else + list = null; + + /* strip out style and class attributes */ + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + purgeAttributes(node); + + if (node.content != null) + cleanWord2000(lexer, node.content); + + node = node.next; + } + } + + public boolean isWord2000(Node root, TagTable tt) + { + Node html = root.findHTML(tt); + + return (html != null && html.getAttrByName("xmlns:o") != null); + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java new file mode 100644 index 0000000..b2d8dfd --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Configuration.java @@ -0,0 +1,600 @@ +/* + * @(#)Configuration.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Read configuration file and manage configuration properties. + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/* + Configuration files associate a property name with a value. + The format is that of a Java .properties file. +*/ + +import java.util.Enumeration; +import java.util.Properties; +import java.util.StringTokenizer; +import java.io.FileInputStream; +import java.io.IOException; + +public class Configuration implements java.io.Serializable { + + /* character encodings */ + public static final int RAW = 0; + public static final int ASCII = 1; + public static final int LATIN1 = 2; + public static final int UTF8 = 3; + public static final int ISO2022 = 4; + public static final int MACROMAN = 5; + + /* mode controlling treatment of doctype */ + public static final int DOCTYPE_OMIT = 0; + public static final int DOCTYPE_AUTO = 1; + public static final int DOCTYPE_STRICT= 2; + public static final int DOCTYPE_LOOSE = 3; + public static final int DOCTYPE_USER = 4; + + protected int spaces = 2; /* default indentation */ + protected int wraplen = 68; /* default wrap margin */ + protected int CharEncoding = ASCII; + protected int tabsize = 4; + + protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */ + protected String altText = null; /* default text for alt attribute */ + protected String slidestyle = null; /* style sheet for slides */ + protected String docTypeStr = null; /* user specified doctype */ + protected String errfile = null; /* file name to write errors to */ + protected boolean writeback = false; /* if true then output tidied markup */ + + protected boolean OnlyErrors = false; /* if true normal output is suppressed */ + protected boolean ShowWarnings = true; /* however errors are always shown */ + protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */ + protected boolean IndentContent = false; /* indent content of appropriate tags */ + protected boolean SmartIndent = false; /* does text/block level content effect indentation */ + protected boolean HideEndTags = false; /* suppress optional end tags */ + protected boolean XmlTags = false; /* treat input as XML */ + protected boolean XmlOut = false; /* create output as XML */ + protected boolean xHTML = false; /* output extensible HTML */ + protected boolean XmlPi = false; /* add for XML docs */ + protected boolean RawOut = false; /* avoid mapping values > 127 to entities */ + protected boolean UpperCaseTags = false; /* output tags in upper not lower case */ + protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */ + protected boolean MakeClean = false; /* remove presentational clutter */ + protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */ + protected boolean DropFontTags = false; /* discard presentation tags */ + protected boolean DropEmptyParas = true; /* discard empty p elements */ + protected boolean FixComments = true; /* fix comments with adjacent hyphens */ + protected boolean BreakBeforeBR = false; /* o/p newline before
or not? */ + protected boolean BurstSlides = false; /* create slides on each h2 element */ + protected boolean NumEntities = false; /* use numeric entities */ + protected boolean QuoteMarks = false; /* output " marks as " */ + protected boolean QuoteNbsp = true; /* output non-breaking space as entity */ + protected boolean QuoteAmpersand = true; /* output naked ampersand as & */ + protected boolean WrapAttVals = false; /* wrap within attribute values */ + protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */ + protected boolean WrapSection = true; /* wrap within section tags */ + protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */ + protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */ + protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */ + protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */ + protected boolean IndentAttributes = false; /* newline+indent before each attribute */ + protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */ + protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */ + protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in
's */ + protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in
's */ + protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */ + protected boolean Word2000 = false; /* draconian cleaning for Word2000 */ + protected boolean TidyMark = true; /* add meta element indicating tidied doc */ + protected boolean Emacs = false; /* if true format error output for GNU Emacs */ + protected boolean LiteralAttribs = false; /* if true attributes may use newlines */ + + protected TagTable tt; /* TagTable associated with this Configuration */ + + private transient Properties _properties = new Properties(); + + public Configuration() + { + } + + public void addProps( Properties p ) + { + Enumeration enum = p.propertyNames(); + while (enum.hasMoreElements()) + { + String key = (String) enum.nextElement(); + String value = p.getProperty(key); + _properties.put(key, value); + } + parseProps(); + } + + public void parseFile( String filename ) + { + try + { + _properties.load( new FileInputStream( filename ) ); + } + catch (IOException e) + { + System.err.println(filename + e.toString()); + return; + } + parseProps(); + } + + private void parseProps() + { + String value; + + value = _properties.getProperty("indent-spaces"); + if (value != null) + spaces = parseInt(value, "indent-spaces"); + + value = _properties.getProperty("wrap"); + if (value != null) + wraplen = parseInt(value, "wrap"); + + value = _properties.getProperty("wrap-attributes"); + if (value != null) + WrapAttVals = parseBool(value, "wrap-attributes"); + + value = _properties.getProperty("wrap-script-literals"); + if (value != null) + WrapScriptlets = parseBool(value, "wrap-script-literals"); + + value = _properties.getProperty("wrap-sections"); + if (value != null) + WrapSection = parseBool(value, "wrap-sections"); + + value = _properties.getProperty("wrap-asp"); + if (value != null) + WrapAsp = parseBool(value, "wrap-asp"); + + value = _properties.getProperty("wrap-jste"); + if (value != null) + WrapJste = parseBool(value, "wrap-jste"); + + value = _properties.getProperty("wrap-php"); + if (value != null) + WrapPhp = parseBool(value, "wrap-php"); + + value = _properties.getProperty("literal-attributes"); + if (value != null) + LiteralAttribs = parseBool(value, "literal-attributes"); + + value = _properties.getProperty("tab-size"); + if (value != null) + tabsize = parseInt(value, "tab-size"); + + value = _properties.getProperty("markup"); + if (value != null) + OnlyErrors = parseInvBool(value, "markup"); + + value = _properties.getProperty("quiet"); + if (value != null) + Quiet = parseBool(value, "quiet"); + + value = _properties.getProperty("tidy-mark"); + if (value != null) + TidyMark = parseBool(value, "tidy-mark"); + + value = _properties.getProperty("indent"); + if (value != null) + IndentContent = parseIndent(value, "indent"); + + value = _properties.getProperty("indent-attributes"); + if (value != null) + IndentAttributes = parseBool(value, "ident-attributes"); + + value = _properties.getProperty("hide-endtags"); + if (value != null) + HideEndTags = parseBool(value, "hide-endtags"); + + value = _properties.getProperty("input-xml"); + if (value != null) + XmlTags = parseBool(value, "input-xml"); + + value = _properties.getProperty("output-xml"); + if (value != null) + XmlOut = parseBool(value, "output-xml"); + + value = _properties.getProperty("output-xhtml"); + if (value != null) + xHTML = parseBool(value, "output-xhtml"); + + value = _properties.getProperty("add-xml-pi"); + if (value != null) + XmlPi = parseBool(value, "add-xml-pi"); + + value = _properties.getProperty("add-xml-decl"); + if (value != null) + XmlPi = parseBool(value, "add-xml-decl"); + + value = _properties.getProperty("assume-xml-procins"); + if (value != null) + XmlPIs = parseBool(value, "assume-xml-procins"); + + value = _properties.getProperty("raw"); + if (value != null) + RawOut = parseBool(value, "raw"); + + value = _properties.getProperty("uppercase-tags"); + if (value != null) + UpperCaseTags = parseBool(value, "uppercase-tags"); + + value = _properties.getProperty("uppercase-attributes"); + if (value != null) + UpperCaseAttrs = parseBool(value, "uppercase-attributes"); + + value = _properties.getProperty("clean"); + if (value != null) + MakeClean = parseBool(value, "clean"); + + value = _properties.getProperty("logical-emphasis"); + if (value != null) + LogicalEmphasis = parseBool(value, "logical-emphasis"); + + value = _properties.getProperty("word-2000"); + if (value != null) + Word2000 = parseBool(value, "word-2000"); + + value = _properties.getProperty("drop-empty-paras"); + if (value != null) + DropEmptyParas = parseBool(value, "drop-empty-paras"); + + value = _properties.getProperty("drop-font-tags"); + if (value != null) + DropFontTags = parseBool(value, "drop-font-tags"); + + value = _properties.getProperty("enclose-text"); + if (value != null) + EncloseBodyText = parseBool(value, "enclose-text"); + + value = _properties.getProperty("enclose-block-text"); + if (value != null) + EncloseBlockText = parseBool(value, "enclose-block-text"); + + value = _properties.getProperty("alt-text"); + if (value != null) + altText = value; + + value = _properties.getProperty("add-xml-space"); + if (value != null) + XmlSpace = parseBool(value, "add-xml-space"); + + value = _properties.getProperty("fix-bad-comments"); + if (value != null) + FixComments = parseBool(value, "fix-bad-comments"); + + value = _properties.getProperty("split"); + if (value != null) + BurstSlides = parseBool(value, "split"); + + value = _properties.getProperty("break-before-br"); + if (value != null) + BreakBeforeBR = parseBool(value, "break-before-br"); + + value = _properties.getProperty("numeric-entities"); + if (value != null) + NumEntities = parseBool(value, "numeric-entities"); + + value = _properties.getProperty("quote-marks"); + if (value != null) + QuoteMarks = parseBool(value, "quote-marks"); + + value = _properties.getProperty("quote-nbsp"); + if (value != null) + QuoteNbsp = parseBool(value, "quote-nbsp"); + + value = _properties.getProperty("quote-ampersand"); + if (value != null) + QuoteAmpersand = parseBool(value, "quote-ampersand"); + + value = _properties.getProperty("write-back"); + if (value != null) + writeback = parseBool(value, "write-back"); + + value = _properties.getProperty("keep-time"); + if (value != null) + KeepFileTimes = parseBool(value, "keep-time"); + + value = _properties.getProperty("show-warnings"); + if (value != null) + ShowWarnings = parseBool(value, "show-warnings"); + + value = _properties.getProperty("error-file"); + if (value != null) + errfile = parseName(value, "error-file"); + + value = _properties.getProperty("slide-style"); + if (value != null) + slidestyle = parseName(value, "slide-style"); + + value = _properties.getProperty("new-inline-tags"); + if (value != null) + parseInlineTagNames(value, "new-inline-tags"); + + value = _properties.getProperty("new-blocklevel-tags"); + if (value != null) + parseBlockTagNames(value, "new-blocklevel-tags"); + + value = _properties.getProperty("new-empty-tags"); + if (value != null) + parseEmptyTagNames(value, "new-empty-tags"); + + value = _properties.getProperty("new-pre-tags"); + if (value != null) + parsePreTagNames(value, "new-pre-tags"); + + value = _properties.getProperty("char-encoding"); + if (value != null) + CharEncoding = parseCharEncoding(value, "char-encoding"); + + value = _properties.getProperty("doctype"); + if (value != null) + docTypeStr = parseDocType(value, "doctype"); + + value = _properties.getProperty("fix-backslash"); + if (value != null) + FixBackslash = parseBool(value, "fix-backslash"); + + value = _properties.getProperty("gnu-emacs"); + if (value != null) + Emacs = parseBool(value, "gnu-emacs"); + } + + /* ensure that config is self consistent */ + public void adjust() + { + if (EncloseBlockText) + EncloseBodyText = true; + + /* avoid the need to set IndentContent when SmartIndent is set */ + + if (SmartIndent) + IndentContent = true; + + /* disable wrapping */ + if (wraplen == 0) + wraplen = 0x7FFFFFFF; + + /* Word 2000 needs o:p to be declared as inline */ + if (Word2000) + { + tt.defineInlineTag("o:p"); + } + + /* XHTML is written in lower case */ + if (xHTML) + { + XmlOut = true; + UpperCaseTags = false; + UpperCaseAttrs = false; + } + + /* if XML in, then XML out */ + if (XmlTags) + { + XmlOut = true; + XmlPIs = true; + } + + /* XML requires end tags */ + if (XmlOut) + { + QuoteAmpersand = true; + HideEndTags = false; + } + } + + private static int parseInt( String s, String option ) + { + int i = 0; + try { + i = Integer.parseInt( s ); + } + catch ( NumberFormatException e ) { + Report.badArgument(option); + i = -1; + } + return i; + } + + private static boolean parseBool( String s, String option ) + { + boolean b = false; + if ( s != null && s.length() > 0 ) { + char c = s.charAt(0); + if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1')) + b = true; + else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0')) + b = false; + else + Report.badArgument(option); + } + return b; + } + + private static boolean parseInvBool( String s, String option ) + { + boolean b = false; + if ( s != null && s.length() > 0 ) { + char c = s.charAt(0); + if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y')) + b = true; + else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n')) + b = false; + else + Report.badArgument(option); + } + return !b; + } + + private static String parseName( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s ); + String rs = null; + if ( t.countTokens() >= 1 ) + rs = t.nextToken(); + else + Report.badArgument(option); + return rs; + } + + private static int parseCharEncoding( String s, String option ) + { + int result = ASCII; + + if (Lexer.wstrcasecmp(s, "ascii") == 0) + result = ASCII; + else if (Lexer.wstrcasecmp(s, "latin1") == 0) + result = LATIN1; + else if (Lexer.wstrcasecmp(s, "raw") == 0) + result = RAW; + else if (Lexer.wstrcasecmp(s, "utf8") == 0) + result = UTF8; + else if (Lexer.wstrcasecmp(s, "iso2022") == 0) + result = ISO2022; + else if (Lexer.wstrcasecmp(s, "mac") == 0) + result = MACROMAN; + else + Report.badArgument(option); + + return result; + } + + /* slight hack to avoid changes to pprint.c */ + private boolean parseIndent( String s, String option ) + { + boolean b = IndentContent; + + if (Lexer.wstrcasecmp(s, "yes") == 0) + { + b = true; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "true") == 0) + { + b = true; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "no") == 0) + { + b = false; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "false") == 0) + { + b = false; + SmartIndent = false; + } + else if (Lexer.wstrcasecmp(s, "auto") == 0) + { + b = true; + SmartIndent = true; + } + else + Report.badArgument(option); + return b; + } + + private void parseInlineTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.defineInlineTag( t.nextToken() ); + } + } + + private void parseBlockTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.defineBlockTag( t.nextToken() ); + } + } + + private void parseEmptyTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.defineEmptyTag( t.nextToken() ); + } + } + + private void parsePreTagNames( String s, String option ) + { + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + while ( t.hasMoreTokens() ) { + tt.definePreTag( t.nextToken() ); + } + } + + /* + doctype: omit | auto | strict | loose | + + where the fpi is a string similar to + + "-//ACME//DTD HTML 3.14159//EN" + */ + protected String parseDocType( String s, String option ) + { + s = s.trim(); + + /* "-//ACME//DTD HTML 3.14159//EN" or similar */ + + if (s.startsWith("\"")) + { + docTypeMode = DOCTYPE_USER; + return s; + } + + /* read first word */ + String word = ""; + StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); + if (t.hasMoreTokens()) + word = t.nextToken(); + + if (Lexer.wstrcasecmp(word, "omit") == 0) + docTypeMode = DOCTYPE_OMIT; + else if (Lexer.wstrcasecmp(word, "strict") == 0) + docTypeMode = DOCTYPE_STRICT; + else if (Lexer.wstrcasecmp(word, "loose") == 0 || + Lexer.wstrcasecmp(word, "transitional") == 0) + docTypeMode = DOCTYPE_LOOSE; + else if (Lexer.wstrcasecmp(word, "auto") == 0) + docTypeMode = DOCTYPE_AUTO; + else + { + docTypeMode = DOCTYPE_AUTO; + Report.badArgument(option); + } + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java new file mode 100644 index 0000000..ebc8386 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrImpl.java @@ -0,0 +1,190 @@ +/* + * @(#)DOMAttrImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMAttrImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr { + + protected AttVal avAdaptee; + + protected DOMAttrImpl(AttVal adaptee) + { + super(null); // must override all methods of DOMNodeImpl + this.avAdaptee = adaptee; + } + + + /* --------------------- DOM ---------------------------- */ + + public String getNodeValue() throws DOMException + { + return getValue(); + } + + public void setNodeValue(String nodeValue) throws DOMException + { + setValue(nodeValue); + } + + public String getNodeName() + { + return getName(); + } + + public short getNodeType() + { + return org.w3c.dom.Node.ATTRIBUTE_NODE; + } + + public org.w3c.dom.Node getParentNode() + { + return null; + } + + public org.w3c.dom.NodeList getChildNodes() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.Node getFirstChild() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.Node getLastChild() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.Node getPreviousSibling() + { + return null; + } + + public org.w3c.dom.Node getNextSibling() + { + return null; + } + + public org.w3c.dom.NamedNodeMap getAttributes() + { + return null; + } + + public org.w3c.dom.Document getOwnerDocument() + { + return null; + } + + public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, + org.w3c.dom.Node refChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, + org.w3c.dom.Node oldChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + public boolean hasChildNodes() + { + return false; + } + + public org.w3c.dom.Node cloneNode(boolean deep) + { + return null; + } + + /** + * @see org.w3c.dom.Attr#getName + */ + public String getName() + { + return avAdaptee.attribute; + } + + /** + * @see org.w3c.dom.Attr#getSpecified + */ + public boolean getSpecified() + { + return true; + } + + /** + * Returns value of this attribute. If this attribute has a null value, + * then the attribute name is returned instead. + * Thanks to Brett Knights for this fix. + * @see org.w3c.dom.Attr#getValue + * + */ + public String getValue() + { + return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value ; + } + + /** + * @see org.w3c.dom.Attr#setValue + */ + public void setValue(String value) + { + avAdaptee.value = value; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Element getOwnerElement() { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java new file mode 100644 index 0000000..2b1ca08 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMAttrMapImpl.java @@ -0,0 +1,138 @@ +/* + * @(#)DOMAttrMapImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMAttrMapImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap { + + private AttVal first = null; + + protected DOMAttrMapImpl(AttVal first) + { + this.first = first; + } + + /** + * @see org.w3c.dom.NamedNodeMap#getNamedItem + */ + public org.w3c.dom.Node getNamedItem(String name) + { + AttVal att = this.first; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) + return att.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#setNamedItem + */ + public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#removeNamedItem + */ + public org.w3c.dom.Node removeNamedItem(String name) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#item + */ + public org.w3c.dom.Node item(int index) + { + int i = 0; + AttVal att = this.first; + while (att != null) { + if (i >= index) break; + i++; + att = att.next; + } + if (att != null) + return att.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NamedNodeMap#getLength + */ + public int getLength() + { + int len = 0; + AttVal att = this.first; + while (att != null) { + len++; + att = att.next; + } + return len; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Node getNamedItemNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Node removeNamedItemNS(String namespaceURI, + String localName) + throws org.w3c.dom.DOMException + { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java new file mode 100644 index 0000000..5e150db --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCDATASectionImpl.java @@ -0,0 +1,51 @@ +/* + * @(#)DOMCDATASectionImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMCDATASectionImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @author Gary L Peskin + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMCDATASectionImpl extends DOMTextImpl + implements org.w3c.dom.CDATASection { + + protected DOMCDATASectionImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#cdata-section"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.CDATA_SECTION_NODE; + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java new file mode 100644 index 0000000..37245fb --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCharacterDataImpl.java @@ -0,0 +1,143 @@ +/* + * @(#)DOMCharacterDataImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMCharacterDataImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMCharacterDataImpl extends DOMNodeImpl + implements org.w3c.dom.CharacterData { + + protected DOMCharacterDataImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.CharacterData#getData + */ + public String getData() throws DOMException + { + return getNodeValue(); + } + + /** + * @see org.w3c.dom.CharacterData#setData + */ + public void setData(String data) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#getLength + */ + public int getLength() + { + int len = 0; + if (adaptee.textarray != null && adaptee.start < adaptee.end) + len = adaptee.end - adaptee.start; + return len; + } + + /** + * @see org.w3c.dom.CharacterData#substringData + */ + public String substringData(int offset, + int count) throws DOMException + { + int len; + String value = null; + if (count < 0) + { + throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR, + "Invalid length"); + } + if (adaptee.textarray != null && adaptee.start < adaptee.end) + { + if (adaptee.start + offset >= adaptee.end) + { + throw new DOMExceptionImpl(DOMException.INDEX_SIZE_ERR, + "Invalid offset"); + } + len = count; + if (adaptee.start + offset + len - 1 >= adaptee.end) + len = adaptee.end - adaptee.start - offset; + + value = Lexer.getString(adaptee.textarray, + adaptee.start + offset, + len); + } + return value; + } + + /** + * @see org.w3c.dom.CharacterData#appendData + */ + public void appendData(String arg) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#insertData + */ + public void insertData(int offset, + String arg) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#deleteData + */ + public void deleteData(int offset, + int count) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + + /** + * @see org.w3c.dom.CharacterData#replaceData + */ + public void replaceData(int offset, + int count, + String arg) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java new file mode 100644 index 0000000..2491714 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMCommentImpl.java @@ -0,0 +1,55 @@ +/* + * @(#)DOMCommentImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMCommentImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMCommentImpl extends DOMCharacterDataImpl + implements org.w3c.dom.Comment { + + protected DOMCommentImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#comment"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.COMMENT_NODE; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java new file mode 100644 index 0000000..52f4f73 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentImpl.java @@ -0,0 +1,261 @@ +/* + * @(#)DOMDocumentImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMDocumentImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document { + + private TagTable tt; // a DOM Document has its own TagTable. + + protected DOMDocumentImpl(Node adaptee) + { + super(adaptee); + tt = new TagTable(); + } + + public void setTagTable(TagTable tt) + { + this.tt = tt; + } + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#document"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.DOCUMENT_NODE; + } + + /** + * @see org.w3c.dom.Document#getDoctype + */ + public org.w3c.dom.DocumentType getDoctype() + { + Node node = adaptee.content; + while (node != null) { + if (node.type == Node.DocTypeTag) break; + node = node.next; + } + if (node != null) + return (org.w3c.dom.DocumentType)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#getImplementation + */ + public org.w3c.dom.DOMImplementation getImplementation() + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#getDocumentElement + */ + public org.w3c.dom.Element getDocumentElement() + { + Node node = adaptee.content; + while (node != null) { + if (node.type == Node.StartTag || + node.type == Node.StartEndTag) break; + node = node.next; + } + if (node != null) + return (org.w3c.dom.Element)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#createElement + */ + public org.w3c.dom.Element createElement(String tagName) + throws DOMException + { + Node node = new Node(Node.StartEndTag, null, 0, 0, tagName, tt); + if (node != null) { + if (node.tag == null) // Fix Bug 121206 + node.tag = tt.xmlTags; + return (org.w3c.dom.Element)node.getAdapter(); + } + else + return null; + } + + /** + * @see org.w3c.dom.Document#createDocumentFragment + */ + public org.w3c.dom.DocumentFragment createDocumentFragment() + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#createTextNode + */ + public org.w3c.dom.Text createTextNode(String data) + { + byte[] textarray = Lexer.getBytes(data); + Node node = new Node(Node.TextNode, textarray, 0, textarray.length); + if (node != null) + return (org.w3c.dom.Text)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#createComment + */ + public org.w3c.dom.Comment createComment(String data) + { + byte[] textarray = Lexer.getBytes(data); + Node node = new Node(Node.CommentTag, textarray, 0, textarray.length); + if (node != null) + return (org.w3c.dom.Comment)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Document#createCDATASection + */ + public org.w3c.dom.CDATASection createCDATASection(String data) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#createProcessingInstruction + */ + public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target, + String data) + throws DOMException + { + throw new DOMExceptionImpl(DOMException.NOT_SUPPORTED_ERR, + "HTML document"); + } + + /** + * @see org.w3c.dom.Document#createAttribute + */ + public org.w3c.dom.Attr createAttribute(String name) + throws DOMException + { + AttVal av = new AttVal(null, null, (int)'"', name, null); + if (av != null) { + av.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(av); + return (org.w3c.dom.Attr)av.getAdapter(); + } else { + return null; + } + } + + /** + * @see org.w3c.dom.Document#createEntityReference + */ + public org.w3c.dom.EntityReference createEntityReference(String name) + throws DOMException + { + // NOT SUPPORTED + return null; + } + + /** + * @see org.w3c.dom.Document#getElementsByTagName + */ + public org.w3c.dom.NodeList getElementsByTagName(String tagname) + { + return new DOMNodeListByTagNameImpl(this.adaptee, tagname); + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Attr createAttributeNS(String namespaceURI, + String qualifiedName) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Element createElementNS(String namespaceURI, + String qualifiedName) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Element getElementById(String elementId) + { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java new file mode 100644 index 0000000..3e9fb8f --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMDocumentTypeImpl.java @@ -0,0 +1,107 @@ +/* + * @(#)DOMDocumentTypeImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMDocumentTypeImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMDocumentTypeImpl extends DOMNodeImpl + implements org.w3c.dom.DocumentType { + + protected DOMDocumentTypeImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.DOCUMENT_TYPE_NODE; + } + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return getName(); + } + + /** + * @see org.w3c.dom.DocumentType#getName + */ + public String getName() + { + String value = null; + if (adaptee.type == Node.DocTypeTag) + { + + if (adaptee.textarray != null && adaptee.start < adaptee.end) + { + value = Lexer.getString(adaptee.textarray, + adaptee.start, + adaptee.end - adaptee.start); + } + } + return value; + } + + public org.w3c.dom.NamedNodeMap getEntities() + { + // NOT SUPPORTED + return null; + } + + public org.w3c.dom.NamedNodeMap getNotations() + { + // NOT SUPPORTED + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getPublicId() { + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getSystemId() { + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getInternalSubset() { + return null; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java new file mode 100644 index 0000000..f9a367f --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMElementImpl.java @@ -0,0 +1,307 @@ +/* + * @(#)DOMElementImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMElementImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMElementImpl extends DOMNodeImpl + implements org.w3c.dom.Element { + + protected DOMElementImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.ELEMENT_NODE; + } + + /** + * @see org.w3c.dom.Element#getTagName + */ + public String getTagName() + { + return super.getNodeName(); + } + + /** + * @see org.w3c.dom.Element#getAttribute + */ + public String getAttribute(String name) + { + if (this.adaptee == null) + return null; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) + return att.value; + else + return ""; + } + + /** + * @see org.w3c.dom.Element#setAttribute + */ + public void setAttribute(String name, + String value) + throws DOMException + { + if (this.adaptee == null) + return; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) { + att.value = value; + } else { + att = new AttVal(null, null, (int)'"', name, value); + att.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(att); + if (this.adaptee.attributes == null) { + this.adaptee.attributes = att; + } else { + att.next = this.adaptee.attributes; + this.adaptee.attributes = att; + } + } + } + + /** + * @see org.w3c.dom.Element#removeAttribute + */ + public void removeAttribute(String name) + throws DOMException + { + if (this.adaptee == null) + return; + + AttVal att = this.adaptee.attributes; + AttVal pre = null; + while (att != null) { + if (att.attribute.equals(name)) break; + pre = att; + att = att.next; + } + if (att != null) { + if (pre == null) { + this.adaptee.attributes = att.next; + } else { + pre.next = att.next; + } + } + } + + /** + * @see org.w3c.dom.Element#getAttributeNode + */ + public org.w3c.dom.Attr getAttributeNode(String name) + { + if (this.adaptee == null) + return null; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) + return att.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Element#setAttributeNode + */ + public org.w3c.dom.Attr setAttributeNode(org.w3c.dom.Attr newAttr) + throws DOMException + { + if (newAttr == null) + return null; + if (!(newAttr instanceof DOMAttrImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newAttr not instanceof DOMAttrImpl"); + } + + DOMAttrImpl newatt = (DOMAttrImpl)newAttr; + String name = newatt.avAdaptee.attribute; + org.w3c.dom.Attr result = null; + + AttVal att = this.adaptee.attributes; + while (att != null) { + if (att.attribute.equals(name)) break; + att = att.next; + } + if (att != null) { + result = att.getAdapter(); + att.adapter = newAttr; + } else { + if (this.adaptee.attributes == null) { + this.adaptee.attributes = newatt.avAdaptee; + } else { + newatt.avAdaptee.next = this.adaptee.attributes; + this.adaptee.attributes = newatt.avAdaptee; + } + } + return result; + } + + /** + * @see org.w3c.dom.Element#removeAttributeNode + */ + public org.w3c.dom.Attr removeAttributeNode(org.w3c.dom.Attr oldAttr) + throws DOMException + { + if (oldAttr == null) + return null; + + org.w3c.dom.Attr result = null; + AttVal att = this.adaptee.attributes; + AttVal pre = null; + while (att != null) { + if (att.getAdapter() == oldAttr) break; + pre = att; + att = att.next; + } + if (att != null) { + if (pre == null) { + this.adaptee.attributes = att.next; + } else { + pre.next = att.next; + } + result = oldAttr; + } else { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "oldAttr not found"); + } + return result; + } + + /** + * @see org.w3c.dom.Element#getElementsByTagName + */ + public org.w3c.dom.NodeList getElementsByTagName(String name) + { + return new DOMNodeListByTagNameImpl(this.adaptee, name); + } + + /** + * @see org.w3c.dom.Element#normalize + */ + public void normalize() + { + // NOT SUPPORTED + } + + /** + * DOM2 - not implemented. + */ + public String getAttributeNS(String namespaceURI, String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public void setAttributeNS(String namespaceURI, + String qualifiedName, + String value) + throws org.w3c.dom.DOMException + { + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public void removeAttributeNS(String namespaceURI, String localName) + throws org.w3c.dom.DOMException + { + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + * @exception org.w3c.dom.DOMException + */ + public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr) + throws org.w3c.dom.DOMException + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, + String localName) + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public boolean hasAttribute(String name) + { + return false; + } + + /** + * DOM2 - not implemented. + */ + public boolean hasAttributeNS(String namespaceURI, + String localName) + { + return false; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java new file mode 100644 index 0000000..75c5337 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMExceptionImpl.java @@ -0,0 +1,37 @@ +/* + * @(#)DOMExceptionImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMExceptionImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMExceptionImpl extends org.w3c.dom.DOMException { + + public DOMExceptionImpl(short code, String message) { + super(code, message); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java new file mode 100644 index 0000000..d0b14e2 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeImpl.java @@ -0,0 +1,488 @@ +/* + * @(#)DOMNodeImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMNodeImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM Support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMNodeImpl implements org.w3c.dom.Node { + + protected Node adaptee; + + protected DOMNodeImpl(Node adaptee) + { + this.adaptee = adaptee; + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeValue + */ + public String getNodeValue() throws DOMException + { + String value = ""; //BAK 10/10/2000 replaced null + if (adaptee.type == Node.TextNode || + adaptee.type == Node.CDATATag || + adaptee.type == Node.CommentTag || + adaptee.type == Node.ProcInsTag) + { + + if (adaptee.textarray != null && adaptee.start < adaptee.end) + { + value = Lexer.getString(adaptee.textarray, + adaptee.start, + adaptee.end - adaptee.start); + } + } + return value; + } + + /** + * @see org.w3c.dom.Node#setNodeValue + */ + public void setNodeValue(String nodeValue) throws DOMException + { + if (adaptee.type == Node.TextNode || + adaptee.type == Node.CDATATag || + adaptee.type == Node.CommentTag || + adaptee.type == Node.ProcInsTag) + { + byte[] textarray = Lexer.getBytes(nodeValue); + adaptee.textarray = textarray; + adaptee.start = 0; + adaptee.end = textarray.length; + } + } + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return adaptee.element; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + short result = -1; + switch (adaptee.type) { + case Node.RootNode: + result = org.w3c.dom.Node.DOCUMENT_NODE; + break; + case Node.DocTypeTag: + result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE; + break; + case Node.CommentTag: + result = org.w3c.dom.Node.COMMENT_NODE; + break; + case Node.ProcInsTag: + result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; + break; + case Node.TextNode: + result = org.w3c.dom.Node.TEXT_NODE; + break; + case Node.CDATATag: + result = org.w3c.dom.Node.CDATA_SECTION_NODE; + break; + case Node.StartTag: + case Node.StartEndTag: + result = org.w3c.dom.Node.ELEMENT_NODE; + break; + } + return result; + } + + /** + * @see org.w3c.dom.Node#getParentNode + */ + public org.w3c.dom.Node getParentNode() + { + if (adaptee.parent != null) + return adaptee.parent.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getChildNodes + */ + public org.w3c.dom.NodeList getChildNodes() + { + return new DOMNodeListImpl(adaptee); + } + + /** + * @see org.w3c.dom.Node#getFirstChild + */ + public org.w3c.dom.Node getFirstChild() + { + if (adaptee.content != null) + return adaptee.content.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getLastChild + */ + public org.w3c.dom.Node getLastChild() + { + if (adaptee.last != null) + return adaptee.last.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getPreviousSibling + */ + public org.w3c.dom.Node getPreviousSibling() + { + if (adaptee.prev != null) + return adaptee.prev.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getNextSibling + */ + public org.w3c.dom.Node getNextSibling() + { + if (adaptee.next != null) + return adaptee.next.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#getAttributes + */ + public org.w3c.dom.NamedNodeMap getAttributes() + { + return new DOMAttrMapImpl(adaptee.attributes); + } + + /** + * @see org.w3c.dom.Node#getOwnerDocument + */ + public org.w3c.dom.Document getOwnerDocument() + { + Node node; + + node = this.adaptee; + if (node != null && node.type == Node.RootNode) + return null; + + for (node = this.adaptee; + node != null && node.type != Node.RootNode; node = node.parent); + + if (node != null) + return (org.w3c.dom.Document)node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.Node#insertBefore + */ + public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, + org.w3c.dom.Node refChild) + throws DOMException + { + // TODO - handle newChild already in tree + + if (newChild == null) + return null; + if (!(newChild instanceof DOMNodeImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newChild not instanceof DOMNodeImpl"); + } + DOMNodeImpl newCh = (DOMNodeImpl)newChild; + + if (this.adaptee.type == Node.RootNode) { + if (newCh.adaptee.type != Node.DocTypeTag && + newCh.adaptee.type != Node.ProcInsTag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } else if (this.adaptee.type == Node.StartTag) { + if (newCh.adaptee.type != Node.StartTag && + newCh.adaptee.type != Node.StartEndTag && + newCh.adaptee.type != Node.CommentTag && + newCh.adaptee.type != Node.TextNode && + newCh.adaptee.type != Node.CDATATag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } + if (refChild == null) { + Node.insertNodeAtEnd(this.adaptee, newCh.adaptee); + if (this.adaptee.type == Node.StartEndTag) { + this.adaptee.setType(Node.StartTag); + } + } else { + Node ref = this.adaptee.content; + while (ref != null) { + if (ref.getAdapter() == refChild) break; + ref = ref.next; + } + if (ref == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "refChild not found"); + } + Node.insertNodeBeforeElement(ref, newCh.adaptee); + } + return newChild; + } + + /** + * @see org.w3c.dom.Node#replaceChild + */ + public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, + org.w3c.dom.Node oldChild) + throws DOMException + { + // TODO - handle newChild already in tree + + if (newChild == null) + return null; + if (!(newChild instanceof DOMNodeImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newChild not instanceof DOMNodeImpl"); + } + DOMNodeImpl newCh = (DOMNodeImpl)newChild; + + if (this.adaptee.type == Node.RootNode) { + if (newCh.adaptee.type != Node.DocTypeTag && + newCh.adaptee.type != Node.ProcInsTag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } else if (this.adaptee.type == Node.StartTag) { + if (newCh.adaptee.type != Node.StartTag && + newCh.adaptee.type != Node.StartEndTag && + newCh.adaptee.type != Node.CommentTag && + newCh.adaptee.type != Node.TextNode && + newCh.adaptee.type != Node.CDATATag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } + if (oldChild == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "oldChild not found"); + } else { + Node n; + Node ref = this.adaptee.content; + while (ref != null) { + if (ref.getAdapter() == oldChild) break; + ref = ref.next; + } + if (ref == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "oldChild not found"); + } + newCh.adaptee.next = ref.next; + newCh.adaptee.prev = ref.prev; + newCh.adaptee.last = ref.last; + newCh.adaptee.parent = ref.parent; + newCh.adaptee.content = ref.content; + if (ref.parent != null) { + if (ref.parent.content == ref) + ref.parent.content = newCh.adaptee; + if (ref.parent.last == ref) + ref.parent.last = newCh.adaptee; + } + if (ref.prev != null) { + ref.prev.next = newCh.adaptee; + } + if (ref.next != null) { + ref.next.prev = newCh.adaptee; + } + for (n = ref.content; n != null; n = n.next) { + if (n.parent == ref) + n.parent = newCh.adaptee; + } + } + return oldChild; + } + + /** + * @see org.w3c.dom.Node#removeChild + */ + public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) + throws DOMException + { + if (oldChild == null) + return null; + + Node ref = this.adaptee.content; + while (ref != null) { + if (ref.getAdapter() == oldChild) break; + ref = ref.next; + } + if (ref == null) { + throw new DOMExceptionImpl(DOMException.NOT_FOUND_ERR, + "refChild not found"); + } + Node.discardElement(ref); + + if (this.adaptee.content == null + && this.adaptee.type == Node.StartTag) { + this.adaptee.setType(Node.StartEndTag); + } + + return oldChild; + } + + /** + * @see org.w3c.dom.Node#appendChild + */ + public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) + throws DOMException + { + // TODO - handle newChild already in tree + + if (newChild == null) + return null; + if (!(newChild instanceof DOMNodeImpl)) { + throw new DOMExceptionImpl(DOMException.WRONG_DOCUMENT_ERR, + "newChild not instanceof DOMNodeImpl"); + } + DOMNodeImpl newCh = (DOMNodeImpl)newChild; + + if (this.adaptee.type == Node.RootNode) { + if (newCh.adaptee.type != Node.DocTypeTag && + newCh.adaptee.type != Node.ProcInsTag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } else if (this.adaptee.type == Node.StartTag) { + if (newCh.adaptee.type != Node.StartTag && + newCh.adaptee.type != Node.StartEndTag && + newCh.adaptee.type != Node.CommentTag && + newCh.adaptee.type != Node.TextNode && + newCh.adaptee.type != Node.CDATATag) { + throw new DOMExceptionImpl(DOMException.HIERARCHY_REQUEST_ERR, + "newChild cannot be a child of this node"); + } + } + Node.insertNodeAtEnd(this.adaptee, newCh.adaptee); + + if (this.adaptee.type == Node.StartEndTag) { + this.adaptee.setType(Node.StartTag); + } + + return newChild; + } + + /** + * @see org.w3c.dom.Node#hasChildNodes + */ + public boolean hasChildNodes() + { + return (adaptee.content != null); + } + + /** + * @see org.w3c.dom.Node#cloneNode + */ + public org.w3c.dom.Node cloneNode(boolean deep) + { + Node node = adaptee.cloneNode(deep); + node.parent = null; + return node.getAdapter(); + } + + /** + * DOM2 - not implemented. + */ + public void normalize() + { + } + + /** + * DOM2 - not implemented. + */ + public boolean supports(String feature, String version) + { + return isSupported(feature, version); + } + + /** + * DOM2 - not implemented. + */ + public String getNamespaceURI() + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public String getPrefix() + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public void setPrefix(String prefix) + throws DOMException + { + } + + /** + * DOM2 - not implemented. + */ + public String getLocalName() + { + return null; + } + + /** + * DOM2 - not implemented. + */ + public boolean isSupported(String feature,String version) { + return false; + } + + /** + * DOM2 - @see org.w3c.dom.Node#hasAttributes + * contributed by dlp@users.sourceforge.net + */ + public boolean hasAttributes() + { + return adaptee.attributes != null; + } +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java new file mode 100644 index 0000000..d69feb3 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListByTagNameImpl.java @@ -0,0 +1,99 @@ +/* + * @(#)DOMNodeListByTagNameImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * DOMNodeListByTagNameImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/** + *
The items in the NodeList are accessible via an integral + * index, starting from 0. + * + */ +public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList { + + private Node first = null; + private String tagName = "*"; + private int currIndex = 0; + private int maxIndex = 0; + private Node currNode = null; + + protected DOMNodeListByTagNameImpl(Node first, String tagName) + { + this.first = first; + this.tagName = tagName; + } + + /** + * @see org.w3c.dom.NodeList#item + */ + public org.w3c.dom.Node item(int index) + { + currIndex = 0; + maxIndex = index; + preTraverse(first); + + if (currIndex > maxIndex && currNode != null) + return currNode.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NodeList#getLength + */ + public int getLength() + { + currIndex = 0; + maxIndex = Integer.MAX_VALUE; + preTraverse(first); + return currIndex; + } + + protected void preTraverse(Node node) + { + if (node == null) + return; + + if (node.type == Node.StartTag || node.type == Node.StartEndTag) + { + if (currIndex <= maxIndex && + (tagName.equals("*") || tagName.equals(node.element))) + { + currIndex += 1; + currNode = node; + } + } + if (currIndex > maxIndex) + return; + + node = node.content; + while (node != null) + { + preTraverse(node); + node = node.next; + } + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java new file mode 100644 index 0000000..726f007 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMNodeListImpl.java @@ -0,0 +1,75 @@ +/* + * @(#)DOMNodeListImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * DOMNodeListImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/** + *
The items in the NodeList are accessible via an integral + * index, starting from 0. + * + */ +public class DOMNodeListImpl implements org.w3c.dom.NodeList { + + private Node parent = null; + + protected DOMNodeListImpl(Node parent) + { + this.parent = parent; + } + + /** + * @see org.w3c.dom.NodeList#item + */ + public org.w3c.dom.Node item(int index) + { + int i = 0; + Node node = parent.content; + while (node != null) { + if (i >= index) break; + i++; + node = node.next; + } + if (node != null) + return node.getAdapter(); + else + return null; + } + + /** + * @see org.w3c.dom.NodeList#getLength + */ + public int getLength() + { + int len = 0; + Node node = parent.content; + while (node != null) { + len++; + node = node.next; + } + return len; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java new file mode 100644 index 0000000..1eefeca --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMProcessingInstructionImpl.java @@ -0,0 +1,74 @@ +/* + * @(#)DOMProcessingInstructionImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMProcessingInstructionImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMProcessingInstructionImpl extends DOMNodeImpl + implements org.w3c.dom.ProcessingInstruction { + + protected DOMProcessingInstructionImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; + } + + /** + * @see org.w3c.dom.ProcessingInstruction#getTarget + */ + public String getTarget() + { + // TODO + return null; + } + + /** + * @see org.w3c.dom.ProcessingInstruction#getData + */ + public String getData() + { + return getNodeValue(); + } + + /** + * @see org.w3c.dom.ProcessingInstruction#setData + */ + public void setData(String data) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java new file mode 100644 index 0000000..06ec997 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/DOMTextImpl.java @@ -0,0 +1,65 @@ +/* + * @(#)DOMTextImpl.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +import org.w3c.dom.DOMException; + +/** + * + * DOMTextImpl + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class DOMTextImpl extends DOMCharacterDataImpl + implements org.w3c.dom.Text { + + protected DOMTextImpl(Node adaptee) + { + super(adaptee); + } + + + /* --------------------- DOM ---------------------------- */ + + /** + * @see org.w3c.dom.Node#getNodeName + */ + public String getNodeName() + { + return "#text"; + } + + /** + * @see org.w3c.dom.Node#getNodeType + */ + public short getNodeType() + { + return org.w3c.dom.Node.TEXT_NODE; + } + + /** + * @see org.w3c.dom.Text#splitText + */ + public org.w3c.dom.Text splitText(int offset) throws DOMException + { + // NOT SUPPORTED + throw new DOMExceptionImpl(DOMException.NO_MODIFICATION_ALLOWED_ERR, + "Not supported"); + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java new file mode 100644 index 0000000..4d05767 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Dict.java @@ -0,0 +1,110 @@ +/* + * @(#)Dict.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Tag dictionary node + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class Dict { + + /* content model shortcut encoding */ + + public static final int CM_UNKNOWN = 0; + public static final int CM_EMPTY = (1 << 0); + public static final int CM_HTML = (1 << 1); + public static final int CM_HEAD = (1 << 2); + public static final int CM_BLOCK = (1 << 3); + public static final int CM_INLINE = (1 << 4); + public static final int CM_LIST = (1 << 5); + public static final int CM_DEFLIST = (1 << 6); + public static final int CM_TABLE = (1 << 7); + public static final int CM_ROWGRP = (1 << 8); + public static final int CM_ROW = (1 << 9); + public static final int CM_FIELD = (1 << 10); + public static final int CM_OBJECT = (1 << 11); + public static final int CM_PARAM = (1 << 12); + public static final int CM_FRAMES = (1 << 13); + public static final int CM_HEADING = (1 << 14); + public static final int CM_OPT = (1 << 15); + public static final int CM_IMG = (1 << 16); + public static final int CM_MIXED = (1 << 17); + public static final int CM_NO_INDENT = (1 << 18); + public static final int CM_OBSOLETE = (1 << 19); + public static final int CM_NEW = (1 << 20); + public static final int CM_OMITST = (1 << 21); + + /* + + If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 + Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary + tags and attributes then describe it as HTML Proprietary. If it includes the + xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe + it as one of the flavors of Voyager (strict, loose or frameset). + */ + + public static final short VERS_UNKNOWN = 0; + + public static final short VERS_HTML20 = 1; + public static final short VERS_HTML32 = 2; + public static final short VERS_HTML40_STRICT = 4; + public static final short VERS_HTML40_LOOSE = 8; + public static final short VERS_FRAMES = 16; + public static final short VERS_XML = 32; + + public static final short VERS_NETSCAPE = 64; + public static final short VERS_MICROSOFT = 128; + public static final short VERS_SUN = 256; + + public static final short VERS_MALFORMED = 512; + + public static final short VERS_ALL = (VERS_HTML20|VERS_HTML32|VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_HTML40 = (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_LOOSE = (VERS_HTML32|VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_IFRAMES = (VERS_HTML40_LOOSE|VERS_FRAMES); + public static final short VERS_FROM32 = (VERS_HTML40_STRICT|VERS_LOOSE); + public static final short VERS_PROPRIETARY = (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN); + + public static final short VERS_EVERYTHING = (VERS_ALL|VERS_PROPRIETARY); + + public Dict( String name, short versions, int model, + Parser parser, CheckAttribs chkattrs ) + { + this.name = name; + this.versions = versions; + this.model = model; + this.parser = parser; + this.chkattrs = chkattrs; + } + + public String name; + public short versions; + public int model; + public Parser parser; + public CheckAttribs chkattrs; +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java new file mode 100644 index 0000000..fea6e51 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Entity.java @@ -0,0 +1,51 @@ +/* + * @(#)Entity.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * HTML ISO entity + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class Entity { + + public Entity( String name, short code ) + { + this.name = name; + this.code = code; + } + + public Entity( String name, int code ) + { + this.name = name; + this.code = (short)code; + } + + public String name; + public short code; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java new file mode 100644 index 0000000..aeec74d --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/EntityTable.java @@ -0,0 +1,386 @@ +/* + * @(#)EntityTable.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Entity hash table + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +import java.util.Hashtable; +import java.util.Enumeration; + +public class EntityTable { + + public EntityTable() + { + } + + public Entity lookup( String name ) + { + return (Entity)entityHashtable.get( name ); + } + + public Entity install( String name, short code ) + { + Entity ent = lookup( name ); + if ( ent == null ) { + ent = new Entity( name, code ); + entityHashtable.put( name, ent ); + } else { + ent.code = code; + } + return ent; + } + + public Entity install( Entity ent ) + { + return (Entity)entityHashtable.put( ent.name, ent ); + } + + /* entity starting with "&" returns zero on error */ + public short entityCode( String name ) + { + int c; + + if (name.length() <= 1) + return 0; + + /* numeric entitity: name = "&#" followed by number */ + if ( name.charAt(1) == '#' ) { + c = 0; /* zero on missing/bad number */ + + /* 'x' prefix denotes hexadecimal number format */ + try { + if (name.length() >= 4 && name.charAt(2) == 'x') { + c = Integer.parseInt( name.substring(3), 16 ); + } else if (name.length() >= 3) { + c = Integer.parseInt( name.substring(2) ); + } + } + catch ( NumberFormatException e ) {} + + return (short)c; + } + + /* Named entity: name ="&" followed by a name */ + Entity ent = lookup( name.substring(1) ); + if ( ent != null ) { + return ent.code; + } + + return 0; /* zero signifies unknown entity name */ + } + + public String entityName( short code ) + { + String name = null; + Entity ent; + Enumeration en = entityHashtable.elements(); + while ( en.hasMoreElements() ) { + ent = (Entity)en.nextElement(); + if ( ent.code == code ) { + name = ent.name; + break; + } + } + return name; + } + + private Hashtable entityHashtable = new Hashtable(); + + private static EntityTable defaultEntityTable = null; + + private static Entity[] entities = { + + new Entity( "nbsp", 160 ), + new Entity( "iexcl", 161 ), + new Entity( "cent", 162 ), + new Entity( "pound", 163 ), + new Entity( "curren", 164 ), + new Entity( "yen", 165 ), + new Entity( "brvbar", 166 ), + new Entity( "sect", 167 ), + new Entity( "uml", 168 ), + new Entity( "copy", 169 ), + new Entity( "ordf", 170 ), + new Entity( "laquo", 171 ), + new Entity( "not", 172 ), + new Entity( "shy", 173 ), + new Entity( "reg", 174 ), + new Entity( "macr", 175 ), + new Entity( "deg", 176 ), + new Entity( "plusmn", 177 ), + new Entity( "sup2", 178 ), + new Entity( "sup3", 179 ), + new Entity( "acute", 180 ), + new Entity( "micro", 181 ), + new Entity( "para", 182 ), + new Entity( "middot", 183 ), + new Entity( "cedil", 184 ), + new Entity( "sup1", 185 ), + new Entity( "ordm", 186 ), + new Entity( "raquo", 187 ), + new Entity( "frac14", 188 ), + new Entity( "frac12", 189 ), + new Entity( "frac34", 190 ), + new Entity( "iquest", 191 ), + new Entity( "Agrave", 192 ), + new Entity( "Aacute", 193 ), + new Entity( "Acirc", 194 ), + new Entity( "Atilde", 195 ), + new Entity( "Auml", 196 ), + new Entity( "Aring", 197 ), + new Entity( "AElig", 198 ), + new Entity( "Ccedil", 199 ), + new Entity( "Egrave", 200 ), + new Entity( "Eacute", 201 ), + new Entity( "Ecirc", 202 ), + new Entity( "Euml", 203 ), + new Entity( "Igrave", 204 ), + new Entity( "Iacute", 205 ), + new Entity( "Icirc", 206 ), + new Entity( "Iuml", 207 ), + new Entity( "ETH", 208 ), + new Entity( "Ntilde", 209 ), + new Entity( "Ograve", 210 ), + new Entity( "Oacute", 211 ), + new Entity( "Ocirc", 212 ), + new Entity( "Otilde", 213 ), + new Entity( "Ouml", 214 ), + new Entity( "times", 215 ), + new Entity( "Oslash", 216 ), + new Entity( "Ugrave", 217 ), + new Entity( "Uacute", 218 ), + new Entity( "Ucirc", 219 ), + new Entity( "Uuml", 220 ), + new Entity( "Yacute", 221 ), + new Entity( "THORN", 222 ), + new Entity( "szlig", 223 ), + new Entity( "agrave", 224 ), + new Entity( "aacute", 225 ), + new Entity( "acirc", 226 ), + new Entity( "atilde", 227 ), + new Entity( "auml", 228 ), + new Entity( "aring", 229 ), + new Entity( "aelig", 230 ), + new Entity( "ccedil", 231 ), + new Entity( "egrave", 232 ), + new Entity( "eacute", 233 ), + new Entity( "ecirc", 234 ), + new Entity( "euml", 235 ), + new Entity( "igrave", 236 ), + new Entity( "iacute", 237 ), + new Entity( "icirc", 238 ), + new Entity( "iuml", 239 ), + new Entity( "eth", 240 ), + new Entity( "ntilde", 241 ), + new Entity( "ograve", 242 ), + new Entity( "oacute", 243 ), + new Entity( "ocirc", 244 ), + new Entity( "otilde", 245 ), + new Entity( "ouml", 246 ), + new Entity( "divide", 247 ), + new Entity( "oslash", 248 ), + new Entity( "ugrave", 249 ), + new Entity( "uacute", 250 ), + new Entity( "ucirc", 251 ), + new Entity( "uuml", 252 ), + new Entity( "yacute", 253 ), + new Entity( "thorn", 254 ), + new Entity( "yuml", 255 ), + new Entity( "fnof", 402 ), + new Entity( "Alpha", 913 ), + new Entity( "Beta", 914 ), + new Entity( "Gamma", 915 ), + new Entity( "Delta", 916 ), + new Entity( "Epsilon", 917 ), + new Entity( "Zeta", 918 ), + new Entity( "Eta", 919 ), + new Entity( "Theta", 920 ), + new Entity( "Iota", 921 ), + new Entity( "Kappa", 922 ), + new Entity( "Lambda", 923 ), + new Entity( "Mu", 924 ), + new Entity( "Nu", 925 ), + new Entity( "Xi", 926 ), + new Entity( "Omicron", 927 ), + new Entity( "Pi", 928 ), + new Entity( "Rho", 929 ), + new Entity( "Sigma", 931 ), + new Entity( "Tau", 932 ), + new Entity( "Upsilon", 933 ), + new Entity( "Phi", 934 ), + new Entity( "Chi", 935 ), + new Entity( "Psi", 936 ), + new Entity( "Omega", 937 ), + new Entity( "alpha", 945 ), + new Entity( "beta", 946 ), + new Entity( "gamma", 947 ), + new Entity( "delta", 948 ), + new Entity( "epsilon", 949 ), + new Entity( "zeta", 950 ), + new Entity( "eta", 951 ), + new Entity( "theta", 952 ), + new Entity( "iota", 953 ), + new Entity( "kappa", 954 ), + new Entity( "lambda", 955 ), + new Entity( "mu", 956 ), + new Entity( "nu", 957 ), + new Entity( "xi", 958 ), + new Entity( "omicron", 959 ), + new Entity( "pi", 960 ), + new Entity( "rho", 961 ), + new Entity( "sigmaf", 962 ), + new Entity( "sigma", 963 ), + new Entity( "tau", 964 ), + new Entity( "upsilon", 965 ), + new Entity( "phi", 966 ), + new Entity( "chi", 967 ), + new Entity( "psi", 968 ), + new Entity( "omega", 969 ), + new Entity( "thetasym", 977 ), + new Entity( "upsih", 978 ), + new Entity( "piv", 982 ), + new Entity( "bull", 8226 ), + new Entity( "hellip", 8230 ), + new Entity( "prime", 8242 ), + new Entity( "Prime", 8243 ), + new Entity( "oline", 8254 ), + new Entity( "frasl", 8260 ), + new Entity( "weierp", 8472 ), + new Entity( "image", 8465 ), + new Entity( "real", 8476 ), + new Entity( "trade", 8482 ), + new Entity( "alefsym", 8501 ), + new Entity( "larr", 8592 ), + new Entity( "uarr", 8593 ), + new Entity( "rarr", 8594 ), + new Entity( "darr", 8595 ), + new Entity( "harr", 8596 ), + new Entity( "crarr", 8629 ), + new Entity( "lArr", 8656 ), + new Entity( "uArr", 8657 ), + new Entity( "rArr", 8658 ), + new Entity( "dArr", 8659 ), + new Entity( "hArr", 8660 ), + new Entity( "forall", 8704 ), + new Entity( "part", 8706 ), + new Entity( "exist", 8707 ), + new Entity( "empty", 8709 ), + new Entity( "nabla", 8711 ), + new Entity( "isin", 8712 ), + new Entity( "notin", 8713 ), + new Entity( "ni", 8715 ), + new Entity( "prod", 8719 ), + new Entity( "sum", 8721 ), + new Entity( "minus", 8722 ), + new Entity( "lowast", 8727 ), + new Entity( "radic", 8730 ), + new Entity( "prop", 8733 ), + new Entity( "infin", 8734 ), + new Entity( "ang", 8736 ), + new Entity( "and", 8743 ), + new Entity( "or", 8744 ), + new Entity( "cap", 8745 ), + new Entity( "cup", 8746 ), + new Entity( "int", 8747 ), + new Entity( "there4", 8756 ), + new Entity( "sim", 8764 ), + new Entity( "cong", 8773 ), + new Entity( "asymp", 8776 ), + new Entity( "ne", 8800 ), + new Entity( "equiv", 8801 ), + new Entity( "le", 8804 ), + new Entity( "ge", 8805 ), + new Entity( "sub", 8834 ), + new Entity( "sup", 8835 ), + new Entity( "nsub", 8836 ), + new Entity( "sube", 8838 ), + new Entity( "supe", 8839 ), + new Entity( "oplus", 8853 ), + new Entity( "otimes", 8855 ), + new Entity( "perp", 8869 ), + new Entity( "sdot", 8901 ), + new Entity( "lceil", 8968 ), + new Entity( "rceil", 8969 ), + new Entity( "lfloor", 8970 ), + new Entity( "rfloor", 8971 ), + new Entity( "lang", 9001 ), + new Entity( "rang", 9002 ), + new Entity( "loz", 9674 ), + new Entity( "spades", 9824 ), + new Entity( "clubs", 9827 ), + new Entity( "hearts", 9829 ), + new Entity( "diams", 9830 ), + new Entity( "quot", 34 ), + new Entity( "amp", 38 ), + new Entity( "lt", 60 ), + new Entity( "gt", 62 ), + new Entity( "OElig", 338 ), + new Entity( "oelig", 339 ), + new Entity( "Scaron", 352 ), + new Entity( "scaron", 353 ), + new Entity( "Yuml", 376 ), + new Entity( "circ", 710 ), + new Entity( "tilde", 732 ), + new Entity( "ensp", 8194 ), + new Entity( "emsp", 8195 ), + new Entity( "thinsp", 8201 ), + new Entity( "zwnj", 8204 ), + new Entity( "zwj", 8205 ), + new Entity( "lrm", 8206 ), + new Entity( "rlm", 8207 ), + new Entity( "ndash", 8211 ), + new Entity( "mdash", 8212 ), + new Entity( "lsquo", 8216 ), + new Entity( "rsquo", 8217 ), + new Entity( "sbquo", 8218 ), + new Entity( "ldquo", 8220 ), + new Entity( "rdquo", 8221 ), + new Entity( "bdquo", 8222 ), + new Entity( "dagger", 8224 ), + new Entity( "Dagger", 8225 ), + new Entity( "permil", 8240 ), + new Entity( "lsaquo", 8249 ), + new Entity( "rsaquo", 8250 ), + new Entity( "euro", 8364 ) + + }; + + public static EntityTable getDefaultEntityTable() + { + if ( defaultEntityTable == null ) { + defaultEntityTable = new EntityTable(); + for ( int i = 0; i < entities.length; i++ ) { + defaultEntityTable.install( entities[i] ); + } + } + return defaultEntityTable; + } + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java new file mode 100644 index 0000000..8561a43 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/IStack.java @@ -0,0 +1,65 @@ +/* + * @(#)IStack.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Inline stack node + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class IStack { + + /* + Mosaic handles inlines via a separate stack from other elements + We duplicate this to recover from inline markup errors such as: + + italic text +

more italic text normal text + + which for compatibility with Mosaic is mapped to: + + italic text +

text +

more text + + Shouldn't be mapped to + +

text

+ more text + */ + public void pushInline( Node node ) + { + IStack is; + + if (node.implicit) + return; + + if (node.tag == null) + return; + + if ((node.tag.model & Dict.CM_INLINE) == 0 ) + return; + + if ((node.tag.model & Dict.CM_OBJECT) != 0) + return; + + if (node.tag != configuration.tt.tagFont && isPushed(node)) + return; + + // make sure there is enough space for the stack + is = new IStack(); + is.tag = node.tag; + is.element = node.element; + if (node.attributes != null) + is.attributes = cloneAttributes(node.attributes); + this.istack.push( is ); + } + + /* pop inline stack */ + public void popInline( Node node ) + { + AttVal av; + IStack is; + + if (node != null) { + + if (node.tag == null) + return; + + if ((node.tag.model & Dict.CM_INLINE) == 0) + return; + + if ((node.tag.model & Dict.CM_OBJECT) != 0) + return; + + // if node is then pop until we find an + if (node.tag == configuration.tt.tagA) { + + while (this.istack.size() > 0) { + is = (IStack)this.istack.pop(); + if (is.tag == configuration.tt.tagA) { + break; + } + } + + if (this.insert >= this.istack.size()) + this.insert = -1; + return; + } + } + + if (this.istack.size() > 0) { + is = (IStack)this.istack.pop(); + if (this.insert >= this.istack.size()) + this.insert = -1; + } + } + + public boolean isPushed( Node node ) + { + int i; + IStack is; + + for (i = this.istack.size() - 1; i >= 0; --i) { + is = (IStack)this.istack.elementAt(i); + if (is.tag == node.tag) + return true; + } + + return false; + } + + /* + This has the effect of inserting "missing" inline + elements around the contents of blocklevel elements + such as P, TD, TH, DIV, PRE etc. This procedure is + called at the start of ParseBlock. when the inline + stack is not empty, as will be the case in: + +
italic heading
+ + which is then treated as equivalent to + +
italic heading
+ + This is implemented by setting the lexer into a mode + where it gets tokens from the inline stack rather than + from the input stream. + */ + public int inlineDup( Node node ) + { + int n; + + n = this.istack.size() - this.istackbase; + if ( n > 0 ) { + this.insert = this.istackbase; + this.inode = node; + } + + return n; + } + + public Node insertedToken() + { + Node node; + IStack is; + int n; + + // this will only be null if inode != null + if (this.insert == -1) { + node = this.inode; + this.inode = null; + return node; + } + + // is this is the "latest" node then update + // the position, otherwise use current values + + if (this.inode == null) { + this.lines = this.in.curline; + this.columns = this.in.curcol; + } + + node = newNode(Node.StartTag, + this.lexbuf, + this.txtstart, + this.txtend); // GLP: Bugfix 126261. Remove when this change + // is fixed in istack.c in the original Tidy + node.implicit = true; + is = (IStack)this.istack.elementAt( this.insert ); + node.element = is.element; + node.tag = is.tag; + if (is.attributes != null) + node.attributes = cloneAttributes(is.attributes); + + // advance lexer to next item on the stack + n = this.insert; + + // and recover state if we have reached the end + if (++n < this.istack.size() ) { + this.insert = n; + } else { + this.insert = -1; + } + + return node; + } + + /* AQ: Try this for speed optimization */ + public static int wstrcasecmp(String s1, String s2) + { + return (s1.equalsIgnoreCase(s2) ? 0 : 1); + } + + public static int wstrcaselexcmp(String s1, String s2) + { + char c; + int i = 0; + + while ( i < s1.length() && i < s2.length() ) { + c = s1.charAt(i); + if ( toLower(c) != toLower( s2.charAt(i) ) ) { + break; + } + i += 1; + } + if ( i == s1.length() && i == s2.length() ) { + return 0; + } else if ( i == s1.length() ) { + return -1; + } else if ( i == s2.length() ) { + return 1; + } else { + return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 ); + } + } + + public static boolean wsubstr(String s1, String s2) + { + int i; + int len1 = s1.length(); + int len2 = s2.length(); + + for (i = 0; i <= len1 - len2; ++i) + { + if (s2.equalsIgnoreCase(s1.substring(i))) + return true; + } + + return false; + } + + public boolean canPrune(Node element) + { + if (element.type == Node.TextNode) + return true; + + if (element.content != null) + return false; + + if (element.tag == configuration.tt.tagA && element.attributes != null) + return false; + + if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas) + return false; + + if (element.tag == null) + return false; + + if ((element.tag.model & Dict.CM_ROW) != 0) + return false; + + if (element.tag == configuration.tt.tagApplet) + return false; + + if (element.tag == configuration.tt.tagObject) + return false; + + if (element.attributes != null && + (element.getAttrByName("id") != null || + element.getAttrByName("name") != null) ) + return false; + + return true; + } + + /* duplicate name attribute as an id */ + public void fixId(Node node) + { + AttVal name = node.getAttrByName("name"); + AttVal id = node.getAttrByName("id"); + + if (name != null) + { + if (id != null) + { + if (!id.value.equals(name.value)) + Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH); + } + else if (this.configuration.XmlOut) + node.addAttribute("id", name.value); + } + } + + /* + defer duplicates when entering a table or other + element where the inlines shouldn't be duplicated + */ + public void deferDup() + { + this.insert = -1; + this.inode = null; + } + + /* Private methods and fields */ + + /* lexer char types */ + private static final short DIGIT = 1; + private static final short LETTER = 2; + private static final short NAMECHAR = 4; + private static final short WHITE = 8; + private static final short NEWLINE = 16; + private static final short LOWERCASE = 32; + private static final short UPPERCASE = 64; + + /* lexer GetToken states */ + + private static final short LEX_CONTENT = 0; + private static final short LEX_GT = 1; + private static final short LEX_ENDTAG = 2; + private static final short LEX_STARTTAG = 3; + private static final short LEX_COMMENT = 4; + private static final short LEX_DOCTYPE = 5; + private static final short LEX_PROCINSTR = 6; + private static final short LEX_ENDCOMMENT = 7; + private static final short LEX_CDATA = 8; + private static final short LEX_SECTION = 9; + private static final short LEX_ASP = 10; + private static final short LEX_JSTE = 11; + private static final short LEX_PHP = 12; + + /* used to classify chars for lexical purposes */ + private static short[] lexmap = new short[128]; + + private static void mapStr(String str, short code) + { + int j; + + for ( int i = 0; i < str.length(); i++ ) { + j = (int)str.charAt(i); + lexmap[j] |= code; + } + } + + static { + mapStr("\r\n\f", (short)(NEWLINE|WHITE)); + mapStr(" \t", WHITE); + mapStr("-.:_", NAMECHAR); + mapStr("0123456789", (short)(DIGIT|NAMECHAR)); + mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR)); + mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR)); + } + + private static short MAP( char c ) + { + return ((int)c < 128 ? lexmap[(int)c] : 0); + } + + private static boolean isWhite(char c) + { + short m = MAP(c); + + return (m & WHITE) != 0; + } + + private static boolean isDigit(char c) + { + short m; + + m = MAP(c); + + return (m & DIGIT) != 0; + } + + private static boolean isLetter(char c) + { + short m; + + m = MAP(c); + + return (m & LETTER) != 0; + } + + private static char toLower(char c) + { + short m = MAP(c); + + if ((m & UPPERCASE) != 0) + c = (char)( (int)c + (int)'a' - (int)'A' ); + + return c; + } + + private static char toUpper(char c) + { + short m = MAP(c); + + if ((m & LOWERCASE) != 0) + c = (char)( (int)c + (int)'A' - (int)'a' ); + + return c; + } + + public static char foldCase(char c, boolean tocaps, boolean xmlTags) + { + short m; + + if (!xmlTags) + { + m = MAP(c); + + if (tocaps) + { + if ((m & LOWERCASE) != 0) + c = (char)( (int)c + (int)'A' - (int)'a' ); + } + else /* force to lower case */ + { + if ((m & UPPERCASE) != 0) + c = (char)( (int)c + (int)'a' - (int)'A' ); + } + } + + return c; + } + + + private static class W3CVersionInfo + { + String name; + String voyagerName; + String profile; + short code; + + public W3CVersionInfo( String name, + String voyagerName, + String profile, + short code ) + { + this.name = name; + this.voyagerName = voyagerName; + this.profile = profile; + this.code = code; + } + } + + /* the 3 URIs for the XHTML 1.0 DTDs */ + private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; + private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; + private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"; + + private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; + + private static Lexer.W3CVersionInfo[] W3CVersion = + { + new W3CVersionInfo("HTML 4.01", + "XHTML 1.0 Strict", + voyager_strict, + Dict.VERS_HTML40_STRICT), + new W3CVersionInfo("HTML 4.01 Transitional", + "XHTML 1.0 Transitional", + voyager_loose, + Dict.VERS_HTML40_LOOSE), + new W3CVersionInfo("HTML 4.01 Frameset", + "XHTML 1.0 Frameset", + voyager_frameset, + Dict.VERS_FRAMES), + new W3CVersionInfo("HTML 4.0", + "XHTML 1.0 Strict", + voyager_strict, + Dict.VERS_HTML40_STRICT), + new W3CVersionInfo("HTML 4.0 Transitional", + "XHTML 1.0 Transitional", + voyager_loose, + Dict.VERS_HTML40_LOOSE), + new W3CVersionInfo("HTML 4.0 Frameset", + "XHTML 1.0 Frameset", + voyager_frameset, + Dict.VERS_FRAMES), + new W3CVersionInfo("HTML 3.2", + "XHTML 1.0 Transitional", + voyager_loose, + Dict.VERS_HTML32), + new W3CVersionInfo("HTML 2.0", + "XHTML 1.0 Strict", + voyager_strict, + Dict.VERS_HTML20) + }; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java new file mode 100644 index 0000000..de0e64e --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableBoolean.java @@ -0,0 +1,38 @@ +/* + * @(#)MutableBoolean.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Mutable Boolean + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class MutableBoolean { + + public boolean value; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java new file mode 100644 index 0000000..00ef347 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableInteger.java @@ -0,0 +1,38 @@ +/* + * @(#)MutableInteger.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Mutable Integer + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class MutableInteger { + + public int value; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java new file mode 100644 index 0000000..a66fa73 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/MutableObject.java @@ -0,0 +1,58 @@ +/* + * @(#)MutableObject.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Mutable Object + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +public class MutableObject { + + public MutableObject() + { + this(null); + } + + public MutableObject(Object o) + { + this.value = o; + } + + public void setObject(Object o) + { + value = o; + } + + public Object getObject() + { + return value; + } + + private Object value; + +} diff --git a/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java new file mode 100644 index 0000000..e502702 --- /dev/null +++ b/net.sourceforge.phpeclipse/src/org/w3c/tidy/Node.java @@ -0,0 +1,917 @@ +/* + * @(#)Node.java 1.11 2000/08/16 + * + */ + +package org.w3c.tidy; + +/** + * + * Node + * + * (c) 1998-2000 (W3C) MIT, INRIA, Keio University + * See Tidy.java for the copyright notice. + * Derived from + * HTML Tidy Release 4 Aug 2000 + * + * @author Dave Raggett + * @author Andy Quick (translation to Java) + * @version 1.0, 1999/05/22 + * @version 1.0.1, 1999/05/29 + * @version 1.1, 1999/06/18 Java Bean + * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 + * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 + * @version 1.4, 1999/09/04 DOM support + * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 + * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 + * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 + * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 + * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 + * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 + * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 + */ + +/* + Used for elements and text nodes + element name is null for text nodes + start and end are offsets into lexbuf + which contains the textual content of + all elements in the parse tree. + + parent and content allow traversal + of the parse tree in any direction. + attributes are represented as a linked + list of AttVal nodes which hold the + strings for attribute/value pairs. +*/ + +public class Node { + + public static final short RootNode = 0; + public static final short DocTypeTag = 1; + public static final short CommentTag = 2; + public static final short ProcInsTag = 3; + public static final short TextNode = 4; + public static final short StartTag = 5; + public static final short EndTag = 6; + public static final short StartEndTag = 7; + public static final short CDATATag = 8; + public static final short SectionTag = 9; + public static final short AspTag = 10; + public static final short JsteTag = 11; + public static final short PhpTag = 12; + + protected Node parent; + protected Node prev; + protected Node next; + protected Node last; + protected int start; /* start of span onto text array */ + protected int end; /* end of span onto text array */ + protected byte[] textarray; /* the text array */ + protected short type; /* TextNode, StartTag, EndTag etc. */ + protected boolean closed; /* true if closed by explicit end tag */ + protected boolean implicit; /* true if inferred */ + protected boolean linebreak; /* true if followed by a line break */ + protected Dict was; /* old tag when it was changed */ + protected Dict tag; /* tag's dictionary definition */ + protected String element; /* name (null for text nodes) */ + protected AttVal attributes; + protected Node content; + + public Node() + { + this(TextNode, null, 0, 0); + } + + public Node(short type, byte[] textarray, int start, int end) + { + this.parent = null; + this.prev = null; + this.next = null; + this.last = null; + this.start = start; + this.end = end; + this.textarray = textarray; + this.type = type; + this.closed = false; + this.implicit = false; + this.linebreak = false; + this.was = null; + this.tag = null; + this.element = null; + this.attributes = null; + this.content = null; + } + + public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt) + { + this.parent = null; + this.prev = null; + this.next = null; + this.last = null; + this.start = start; + this.end = end; + this.textarray = textarray; + this.type = type; + this.closed = false; + this.implicit = false; + this.linebreak = false; + this.was = null; + this.tag = null; + this.element = element; + this.attributes = null; + this.content = null; + if (type == StartTag || type == StartEndTag || type == EndTag) + tt.findTag(this); + } + + /* used to clone heading nodes when split by an
*/ + protected Object clone() + { + Node node = new Node(); + + node.parent = this.parent; + if (this.textarray != null) + { + node.textarray = new byte[this.end - this.start]; + node.start = 0; + node.end = this.end - this.start; + if (node.end > 0) + System.arraycopy(this.textarray, this.start, + node.textarray, node.start, node.end); + } + node.type = this.type; + node.closed = this.closed; + node.implicit = this.implicit; + node.linebreak = this.linebreak; + node.was = this.was; + node.tag = this.tag; + if (this.element != null) + node.element = this.element; + if (this.attributes != null) + node.attributes = (AttVal)this.attributes.clone(); + return node; + } + + public AttVal getAttrByName(String name) + { + AttVal attr; + + for (attr = this.attributes; attr != null; attr = attr.next) + { + if (name != null && + attr.attribute != null && + attr.attribute.equals(name)) + break; + } + + return attr; + } + + /* default method for checking an element's attributes */ + public void checkAttributes( Lexer lexer ) + { + AttVal attval; + + for (attval = this.attributes; attval != null; attval = attval.next) + attval.checkAttribute( lexer, this ); + } + + public void checkUniqueAttributes(Lexer lexer) + { + AttVal attval; + + for (attval = this.attributes; attval != null; attval = attval.next) { + if (attval.asp == null && attval.php == null) + attval.checkUniqueAttribute(lexer, this); + } + } + + public void addAttribute(String name, String value) + { + AttVal av = new AttVal(null, null, null, null, + '"', name, value); + av.dict = + AttributeTable.getDefaultAttributeTable().findAttribute(av); + + if (this.attributes == null) + this.attributes = av; + else /* append to end of attributes */ + { + AttVal here = this.attributes; + + while (here.next != null) + here = here.next; + + here.next = av; + } + } + + /* remove attribute from node then free it */ + public void removeAttribute(AttVal attr) + { + AttVal av; + AttVal prev = null; + AttVal next; + + for (av = this.attributes; av != null; av = next) + { + next = av.next; + + if (av == attr) + { + if (prev != null) + prev.next = next; + else + this.attributes = next; + } + else + prev = av; + } + } + + /* find doctype element */ + public Node findDocType() + { + Node node; + + for (node = this.content; + node != null && node.type != DocTypeTag; node = node.next); + + return node; + } + + public void discardDocType() + { + Node node; + + node = findDocType(); + if (node != null) + { + if (node.prev != null) + node.prev.next = node.next; + else + node.parent.content = node.next; + + if (node.next != null) + node.next.prev = node.prev; + + node.next = null; + } + } + + /* remove node from markup tree and discard it */ + public static Node discardElement(Node element) + { + Node next = null; + + if (element != null) + { + next = element.next; + removeNode(element); + } + + return next; + } + + /* insert node into markup tree */ + public static void insertNodeAtStart(Node element, Node node) + { + node.parent = element; + + if (element.content == null) + element.last = node; + else + element.content.prev = node; // AQ added 13 Apr 2000 + + node.next = element.content; + node.prev = null; + element.content = node; + } + + /* insert node into markup tree */ + public static void insertNodeAtEnd(Node element, Node node) + { + node.parent = element; + node.prev = element.last; + + if (element.last != null) + element.last.next = node; + else + element.content = node; + + element.last = node; + } + + /* + insert node into markup tree in pace of element + which is moved to become the child of the node + */ + public static void insertNodeAsParent(Node element, Node node) + { + node.content = element; + node.last = element; + node.parent = element.parent; + element.parent = node; + + if (node.parent.content == element) + node.parent.content = node; + + if (node.parent.last == element) + node.parent.last = node; + + node.prev = element.prev; + element.prev = null; + + if (node.prev != null) + node.prev.next = node; + + node.next = element.next; + element.next = null; + + if (node.next != null) + node.next.prev = node; + } + + /* insert node into markup tree before element */ + public static void insertNodeBeforeElement(Node element, Node node) + { + Node parent; + + parent = element.parent; + node.parent = parent; + node.next = element; + node.prev = element.prev; + element.prev = node; + + if (node.prev != null) + node.prev.next = node; + + if (parent.content == element) + parent.content = node; + } + + /* insert node into markup tree after element */ + public static void insertNodeAfterElement(Node element, Node node) + { + Node parent; + + parent = element.parent; + node.parent = parent; + + // AQ - 13Jan2000 fix for parent == null + if (parent != null && parent.last == element) + parent.last = node; + else + { + node.next = element.next; + // AQ - 13Jan2000 fix for node.next == null + if (node.next != null) + node.next.prev = node; + } + + element.next = node; + node.prev = element; + } + + public static void trimEmptyElement(Lexer lexer, Node element) + { + TagTable tt = lexer.configuration.tt; + + if (lexer.canPrune(element)) + { + if (element.type != TextNode) + Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT); + + discardElement(element); + } + else if (element.tag == tt.tagP && element.content == null) + { + /* replace
by

to preserve formatting */ + Node node = lexer.inferredTag("br"); + Node.coerceNode(lexer, element, tt.tagBr); + Node.insertNodeAfterElement(element, node); + } + } + + /* + This maps + hello world + to + hello world + + If last child of element is a text node + then trim trailing white space character + moving it to after element's end tag. + */ + public static void trimTrailingSpace(Lexer lexer, Node element, Node last) + { + byte c; + TagTable tt = lexer.configuration.tt; + + if (last != null && last.type == Node.TextNode && + last.end > last.start) + { + c = lexer.lexbuf[last.end - 1]; + + if (c == 160 || c == (byte)' ') + { + /* take care with