...
->...
- - Such rules are applied to the element's content and then - to the element itself until none of the rules more apply. - Having applied all the rules to an element, it will have - a style attribute with one or more properties. - - Other rules strip the element they apply to, replacing - it by style properties on the contents, e.g. - -...
... - - These rules are applied to an element before processing - its content and replace the current element by the first - element in the exposed content. - - After applying both sets of rules, you can replace the - style attribute by a class value and style rule in the - document head. To support this, an association of styles - and class names is built. - - A naive approach is to rely on string matching to test - when two property lists are the same. A better approach - would be to first sort the properties before matching. -*/ - -public class Clean { - - private int classNum = 1; - - private TagTable tt; - - public Clean(TagTable tt) - { - this.tt = tt; - } - - private StyleProp insertProperty(StyleProp props, String name, - String value) - { - StyleProp first, prev, prop; - int cmp; - - prev = null; - first = props; - - while (props != null) - { - cmp = props.name.compareTo(name); - - if (cmp == 0) - { - /* this property is already defined, ignore new value */ - return first; - } - - if (cmp > 0) // props.name > name - { - /* insert before this */ - - prop = new StyleProp(name, value, props); - - if (prev != null) - prev.next = prop; - else - first = prop; - - return first; - } - - prev = props; - props = props.next; - } - - prop = new StyleProp(name, value); - - if (prev != null) - prev.next = prop; - else - first = prop; - - return first; - } - - /* - Create sorted linked list of properties from style string - It temporarily places nulls in place of ':' and ';' to - delimit the strings for the property name and value. - Some systems don't allow you to null literal strings, - so to avoid this, a copy is made first. - */ - private StyleProp createProps(StyleProp prop, String style) - { - int name_end; - int value_end; - int value_start = 0; - int name_start = 0; - boolean more; - - name_start = 0; - while (name_start < style.length()) - { - while (name_start < style.length() && - style.charAt(name_start) == ' ') - ++name_start; - - name_end = name_start; - - while (name_end < style.length()) - { - if (style.charAt(name_end) == ':') - { - value_start = name_end + 1; - break; - } - - ++name_end; - } - - if (name_end >= style.length() || style.charAt(name_end) != ':') - break; - - while (value_start < style.length() && - style.charAt(value_start) == ' ') - ++value_start; - - value_end = value_start; - more = false; - - while (value_end < style.length()) - { - if (style.charAt(value_end) == ';') - { - more = true; - break; - } - - ++value_end; - } - - prop = insertProperty(prop, - style.substring(name_start, name_end), - style.substring(value_start, value_end)); - - if (more) - { - name_start = value_end + 1; - continue; - } - - break; - } - - return prop; - } - - private String createPropString(StyleProp props) - { - String style = ""; - int len; - StyleProp prop; - - /* compute length */ - - for (len = 0, prop = props; prop != null; prop = prop.next) - { - len += prop.name.length() + 2; - len += prop.value.length() + 2; - } - - for (prop = props; prop != null; prop = prop.next) - { - style = style.concat(prop.name); - style = style.concat(": "); - - style = style.concat(prop.value); - - if (prop.next == null) - break; - - style = style.concat("; "); - } - - return style; - } - - /* - create string with merged properties - */ - private String addProperty(String style, String property) - { - StyleProp prop; - - prop = createProps(null, style); - prop = createProps(prop, property); - style = createPropString(prop); - return style; - } - - private String gensymClass(String tag) - { - String str; - - str = "c" + classNum; - classNum++; - return str; - } - - private String findStyle(Lexer lexer, String tag, String properties) - { - Style style; - - for (style = lexer.styles; style != null; style=style.next) - { - if (style.tag.equals(tag) && - style.properties.equals(properties)) - return style.tagClass; - } - - style = new Style(tag, gensymClass(tag), properties, lexer.styles); - lexer.styles = style; - return style.tagClass; - } - - /* - Find style attribute in node, and replace it - by corresponding class attribute. Search for - class in style dictionary otherwise gensym - new class and add to dictionary. - - Assumes that node doesn't have a class attribute - */ - private void style2Rule(Lexer lexer, Node node) - { - AttVal styleattr, classattr; - String classname; - - styleattr = node.getAttrByName("style"); - - if (styleattr != null) - { - classname = findStyle(lexer, node.element, styleattr.value); - classattr = node.getAttrByName("class"); - - /* - if there already is a class attribute - then append class name after a space - */ - if (classattr != null) - { - classattr.value = classattr.value + " " + classname; - node.removeAttribute(styleattr); - } - else /* reuse style attribute for class attribute */ - { - styleattr.attribute = "class"; - styleattr.value = classname; - } - } - } - - private void addColorRule(Lexer lexer, String selector, String color) - { - if (color != null) - { - lexer.addStringLiteral(selector); - lexer.addStringLiteral(" { color: "); - lexer.addStringLiteral(color); - lexer.addStringLiteral(" }\n"); - } - } - - /* - move presentation attribs from body to style element - - background="foo" -> body { background-image: url(foo) } - bgcolor="foo" -> body { background-color: foo } - text="foo" -> body { color: foo } - link="foo" -> :link { color: foo } - vlink="foo" -> :visited { color: foo } - alink="foo" -> :active { color: foo } - */ - private void cleanBodyAttrs(Lexer lexer, Node body) - { - AttVal attr; - String bgurl = null; - String bgcolor = null; - String color = null; - - attr = body.getAttrByName("background"); - - if (attr != null) - { - bgurl = attr.value; - attr.value = null; - body.removeAttribute(attr); - } - - attr = body.getAttrByName("bgcolor"); - - if (attr != null) - { - bgcolor = attr.value; - attr.value = null; - body.removeAttribute(attr); - } - - attr = body.getAttrByName("text"); - - if (attr != null) - { - color = attr.value; - attr.value = null; - body.removeAttribute(attr); - } - - if (bgurl != null || bgcolor != null || color != null) - { - lexer.addStringLiteral(" body {\n"); - - if (bgurl != null) - { - lexer.addStringLiteral(" background-image: url("); - lexer.addStringLiteral(bgurl); - lexer.addStringLiteral(");\n"); - } - - if (bgcolor != null) - { - lexer.addStringLiteral(" background-color: "); - lexer.addStringLiteral(bgcolor); - lexer.addStringLiteral(";\n"); - } - - if (color != null) - { - lexer.addStringLiteral(" color: "); - lexer.addStringLiteral(color); - lexer.addStringLiteral(";\n"); - } - - lexer.addStringLiteral(" }\n"); - } - - attr = body.getAttrByName("link"); - - if (attr != null) - { - addColorRule(lexer, " :link", attr.value); - body.removeAttribute(attr); - } - - attr = body.getAttrByName("vlink"); - - if (attr != null) - { - addColorRule(lexer, " :visited", attr.value); - body.removeAttribute(attr); - } - - attr = body.getAttrByName("alink"); - - if (attr != null) - { - addColorRule(lexer, " :active", attr.value); - body.removeAttribute(attr); - } - } - - private boolean niceBody(Lexer lexer, Node doc) - { - Node body = doc.findBody(lexer.configuration.tt); - - if (body != null) - { - if ( - body.getAttrByName("background") != null || - body.getAttrByName("bgcolor") != null || - body.getAttrByName("text") != null || - body.getAttrByName("link") != null || - body.getAttrByName("vlink") != null || - body.getAttrByName("alink") != null - ) - { - lexer.badLayout |= Report.USING_BODY; - return false; - } - } - - return true; - } - - /* create style element using rules from dictionary */ - private void createStyleElement(Lexer lexer, Node doc) - { - Node node, head, body; - Style style; - AttVal av; - - if (lexer.styles == null && niceBody(lexer, doc)) - return; - - node = lexer.newNode(Node.StartTag, null, 0, 0, "style"); - node.implicit = true; - - /* insert type attribute */ - av = new AttVal(null, null, '"', "type", "text/css"); - av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); - node.attributes = av; - - body = doc.findBody(lexer.configuration.tt); - - lexer.txtstart = lexer.lexsize; - - if (body != null) - cleanBodyAttrs(lexer, body); - - for (style = lexer.styles; style != null; style = style.next) - { - lexer.addCharToLexer(' '); - lexer.addStringLiteral(style.tag); - lexer.addCharToLexer('.'); - lexer.addStringLiteral(style.tagClass); - lexer.addCharToLexer(' '); - lexer.addCharToLexer('{'); - lexer.addStringLiteral(style.properties); - lexer.addCharToLexer('}'); - lexer.addCharToLexer('\n'); - } - - lexer.txtend = lexer.lexsize; - - Node.insertNodeAtEnd(node, - lexer.newNode(Node.TextNode, - lexer.lexbuf, - lexer.txtstart, - lexer.txtend)); - - /* - now insert style element into document head - - doc is root node. search its children for html node - the head node should be first child of html node - */ - - head = doc.findHEAD(lexer.configuration.tt); - - if (head != null) - Node.insertNodeAtEnd(head, node); - } - - /* ensure bidirectional links are consistent */ - private void fixNodeLinks(Node node) - { - Node child; - - if (node.prev != null) - node.prev.next = node; - else - node.parent.content = node; - - if (node.next != null) - node.next.prev = node; - else - node.parent.last = node; - - for (child = node.content; child != null; child = child.next) - child.parent = node; - } - - /* - used to strip child of node when - the node has one and only one child - */ - private void stripOnlyChild(Node node) - { - Node child; - - child = node.content; - node.content = child.content; - node.last = child.last; - child.content = null; - - for (child = node.content; child != null; child = child.next) - child.parent = node; - } - - /* used to strip font start and end tags */ - private void discardContainer(Node element, MutableObject pnode) - { - Node node; - Node parent = element.parent; - - if (element.content != null) - { - element.last.next = element.next; - - if (element.next != null) - { - element.next.prev = element.last; - element.last.next = element.next; - } - else - parent.last = element.last; - - if (element.prev != null) - { - element.content.prev = element.prev; - element.prev.next = element.content; - } - else - parent.content = element.content; - - for (node = element.content; node != null; node = node.next) - node.parent = parent; - - pnode.setObject(element.content); - } - else - { - if (element.next != null) - element.next.prev = element.prev; - else - parent.last = element.prev; - - if (element.prev != null) - element.prev.next = element.next; - else - parent.content = element.next; - - pnode.setObject(element.next); - } - - element.next = null; - element.content = null; - } - - /* - Add style property to element, creating style - attribute as needed and adding ; delimiter - */ - private void addStyleProperty(Node node, String property) - { - AttVal av; - - for (av = node.attributes; av != null; av = av.next) - { - if (av.attribute.equals("style")) - break; - } - - /* if style attribute already exists then insert property */ - - if (av != null) - { - String s; - - s = addProperty(av.value, property); - av.value = s; - } - else /* else create new style attribute */ - { - av = new AttVal(node.attributes, null, '"', "style", property); - av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); - node.attributes = av; - } - } - - /* - Create new string that consists of the - combined style properties in s1 and s2 - - To merge property lists, we build a linked - list of property/values and insert properties - into the list in order, merging values for - the same property name. - */ - private String mergeProperties(String s1, String s2) - { - String s; - StyleProp prop; - - prop = createProps(null, s1); - prop = createProps(prop, s2); - s = createPropString(prop); - return s; - } - - private void mergeStyles(Node node, Node child) - { - AttVal av; - String s1, s2, style; - - for (s2 = null, av = child.attributes; av != null; av = av.next) - { - if (av.attribute.equals("style")) - { - s2 = av.value; - break; - } - } - - for (s1 = null, av = node.attributes; av != null; av = av.next) - { - if (av.attribute.equals("style")) - { - s1 = av.value; - break; - } - } - - if (s1 != null) - { - if (s2 != null) /* merge styles from both */ - { - style = mergeProperties(s1, s2); - av.value = style; - } - } - else if (s2 != null) /* copy style of child */ - { - av = new AttVal(node.attributes, null, '"', "style", s2); - av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); - node.attributes = av; - } - } - - private String fontSize2Name(String size) - { - /* - String[] sizes = - { - "50%", - "60%", - "80%", - null, - "120%", - "150%", - "200%" - }; - */ - - String[] sizes = - { - "60%", - "70%", - "80%", - null, - "120%", - "150%", - "200%" - }; - String buf; - - if (size.length() > 0 && - '0' <= size.charAt(0) && size.charAt(0) <= '6') - { - int n = size.charAt(0) - '0'; - return sizes[n]; - } - - if (size.length() > 0 && size.charAt(0) == '-') - { - if (size.length() > 1 && - '0' <= size.charAt(1) && size.charAt(1) <= '6') - { - int n = size.charAt(1) - '0'; - double x; - - for (x = 1.0; n > 0; --n) - x *= 0.8; - - x *= 100.0; - buf = "" + (int)x + "%"; - - return buf; - } - - return "smaller"; /*"70%"; */ - } - - if (size.length() > 1 && - '0' <= size.charAt(1) && size.charAt(1) <= '6') - { - int n = size.charAt(1) - '0'; - double x; - - for (x = 1.0; n > 0; --n) - x *= 1.2; - - x *= 100.0; - buf = "" + (int)x + "%"; - - return buf; - } - - return "larger"; /* "140%" */ - } - - private void addFontFace(Node node, String face) - { - addStyleProperty(node, "font-family: " + face); - } - - private void addFontSize(Node node, String size) - { - String value; - - if (size.equals("6") && node.tag == tt.tagP) - { - node.element = "h1"; - tt.findTag(node); - return; - } - - if (size.equals("5") && node.tag == tt.tagP) - { - node.element = "h2"; - tt.findTag(node); - return; - } - - if (size.equals("4") && node.tag == tt.tagP) - { - node.element = "h3"; - tt.findTag(node); - return; - } - - value = fontSize2Name(size); - - if (value != null) - { - addStyleProperty(node, "font-size: " + value); - } - } - - private void addFontColor(Node node, String color) - { - addStyleProperty(node, "color: " + color); - } - - private void addAlign(Node node, String align) - { - /* force alignment value to lower case */ - addStyleProperty(node, "text-align: " + align.toLowerCase()); - } - - /* - add style properties to node corresponding to - the font face, size and color attributes - */ - private void addFontStyles(Node node, AttVal av) - { - while (av != null) - { - if (av.attribute.equals("face")) - addFontFace(node, av.value); - else if (av.attribute.equals("size")) - addFontSize(node, av.value); - else if (av.attribute.equals("color")) - addFontColor(node, av.value); - - av = av.next; - } - } - - /* - Symptom:
- Action:
- */ - private void textAlign(Lexer lexer, Node node) - { - AttVal av, prev; - - prev = null; - - for (av = node.attributes; av != null; av = av.next) - { - if (av.attribute.equals("align")) - { - if (prev != null) - prev.next = av.next; - else - node.attributes = av.next; - - if (av.value != null) - { - addAlign(node, av.value); - } - - break; - } - - prev = av; - } - } - - /* - The clean up rules use the pnode argument to return the - next node when the orignal node has been deleted - */ - - /* - Symptom:
- Draft Recommended Practice -
- - becomes: - -- Draft Recommended Practice -
- - This code also replaces the align attribute by a style attribute. - However, to avoid CSS problems with Navigator 4, this isn't done - for the elements: caption, tr and table - */ - private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode) - { - Node child; - - if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0) - { - if (node.tag != tt.tagTable - && node.tag != tt.tagTr - && node.tag != tt.tagLi) - { - /* check for align attribute */ - if (node.tag != tt.tagCaption) - textAlign(lexer, node); - - child = node.content; - - if (child == null) - return false; - - /* check child has no peers */ - - if (child.next != null) - return false; - - if (child.tag == tt.tagB) - { - mergeStyles(node, child); - addStyleProperty(node, "font-weight: bold"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagI) - { - mergeStyles(node, child); - addStyleProperty(node, "font-style: italic"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagFont) - { - mergeStyles(node, child); - addFontStyles(node, child.attributes); - stripOnlyChild(node); - return true; - } - } - } - - return false; - } - - /* the only child of table cell or an inline element such as em */ - private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode) - { - Node child; - - if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE|Dict.CM_ROW)) != 0) - { - child = node.content; - - if (child == null) - return false; - - /* check child has no peers */ - - if (child.next != null) - return false; - - if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis) - { - mergeStyles(node, child); - addStyleProperty(node, "font-weight: bold"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis) - { - mergeStyles(node, child); - addStyleProperty(node, "font-style: italic"); - stripOnlyChild(node); - return true; - } - - if (child.tag == tt.tagFont) - { - mergeStyles(node, child); - addFontStyles(node, child.attributes); - stripOnlyChild(node); - return true; - } - } - - return false; - } - - /* - Replace font elements by span elements, deleting - the font element's attributes and replacing them - by a single style attribute. - */ - private boolean font2Span(Lexer lexer, Node node, MutableObject pnode) - { - AttVal av, style, next; - - if (node.tag == tt.tagFont) - { - if (lexer.configuration.DropFontTags) - { - discardContainer(node, pnode); - return false; - } - - /* if FONT is only child of parent element then leave alone */ - if (node.parent.content == node - && node.next == null) - return false; - - addFontStyles(node, node.attributes); - - /* extract style attribute and free the rest */ - av = node.attributes; - style = null; - - while (av != null) - { - next = av.next; - - if (av.attribute.equals("style")) - { - av.next = null; - style = av; - } - - av = next; - } - - node.attributes = style; - - node.tag = tt.tagSpan; - node.element = "span"; - - return true; - } - - return false; - } - - /* - Applies all matching rules to a node. - */ - private Node cleanNode(Lexer lexer, Node node) - { - Node next = null; - MutableObject o = new MutableObject(); - boolean b = false; - - for (next = node; node.isElement(); node = next) - { - o.setObject(next); - - b = dir2Div(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = nestedList(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = center2Div(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = mergeDivs(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = blockStyle(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = inlineStyle(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - b = font2Span(lexer, node, o); - next = (Node)o.getObject(); - if (b) - continue; - - break; - } - - return next; - } - - private Node createStyleProperties(Lexer lexer, Node node) - { - Node child; - - if (node.content != null) - { - for (child = node.content; child != null; child = child.next) - { - child = createStyleProperties(lexer, child); - } - } - - return cleanNode(lexer, node); - } - - private void defineStyleRules(Lexer lexer, Node node) - { - Node child; - - if (node.content != null) - { - for (child = node.content; - child != null; child = child.next) - { - defineStyleRules(lexer, child); - } - } - - style2Rule(lexer, node); - } - - public void cleanTree(Lexer lexer, Node doc) - { - doc = createStyleProperties(lexer, doc); - - if (!lexer.configuration.MakeClean) - { - defineStyleRules(lexer, doc); - createStyleElement(lexer, doc); - } - } - - /* simplifies ... ... etc. */ - public void nestedEmphasis(Node node) - { - MutableObject o = new MutableObject(); - Node next; - - while (node != null) - { - next = node.next; - - if ((node.tag == tt.tagB || node.tag == tt.tagI) - && node.parent != null && node.parent.tag == node.tag) - { - /* strip redundant inner element */ - o.setObject(next); - discardContainer(node, o); - next = (Node)o.getObject(); - node = next; - continue; - } - - if (node.content != null) - nestedEmphasis(node.content); - - node = next; - } - } - - /* replace i by em and b by strong */ - public void emFromI(Node node) - { - while (node != null) - { - if (node.tag == tt.tagI) - { - node.element = tt.tagEm.name; - node.tag = tt.tagEm; - } - else if (node.tag == tt.tagB) - { - node.element = tt.tagStrong.name; - node.tag = tt.tagStrong; - } - - if (node.content != null) - emFromI(node.content); - - node = node.next; - } - } - - /* - Some people use dir or ul without an li - to indent the content. The pattern to - look for is a list with a single implicit - li. This is recursively replaced by an - implicit blockquote. - */ - public void list2BQ(Node node) - { - while (node != null) - { - if (node.content != null) - list2BQ(node.content); - - if (node.tag != null && node.tag.parser == ParserImpl.getParseList() && - node.hasOneChild() && node.content.implicit) - { - stripOnlyChild(node); - node.element = tt.tagBlockquote.name; - node.tag = tt.tagBlockquote; - node.implicit = true; - } - - node = node.next; - } - } - - /* - Replace implicit blockquote by div with an indent - taking care to reduce nested blockquotes to a single - div with the indent set to match the nesting depth - */ - public void bQ2Div(Node node) - { - int indent; - String indent_buf; - - while (node != null) - { - if (node.tag == tt.tagBlockquote && node.implicit) - { - indent = 1; - - while(node.hasOneChild() && - node.content.tag == tt.tagBlockquote && - node.implicit) - { - ++indent; - stripOnlyChild(node); - } - - if (node.content != null) - bQ2Div(node.content); - - indent_buf = "margin-left: " + - (new Integer(2*indent)).toString() + "em"; - - node.element = tt.tagDiv.name; - node.tag = tt.tagDiv; - node.addAttribute("style", indent_buf); - } - else if (node.content != null) - bQ2Div(node.content); - - - node = node.next; - } - } - - /* node is prune up to */ - public Node pruneSection(Lexer lexer, Node node) - { - for (;;) - { - /* discard node and returns next */ - node = Node.discardElement(node); - - if (node == null) - return null; - - if (node.type == Node.SectionTag) - { - if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) - { - node = pruneSection(lexer, node); - continue; - } - - if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif")) - { - node = Node.discardElement(node); - break; - } - } - } - - return node; - } - - public void dropSections(Lexer lexer, Node node) - { - while (node != null) - { - if (node.type == Node.SectionTag) - { - /* prune up to matching endif */ - if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) - { - node = pruneSection(lexer, node); - continue; - } - - /* discard others as well */ - node = Node.discardElement(node); - continue; - } - - if (node.content != null) - dropSections(lexer, node.content); - - node = node.next; - } - } - - public void purgeAttributes(Node node) - { - AttVal attr = node.attributes; - AttVal next = null; - AttVal prev = null; - - while (attr != null) - { - next = attr.next; - - /* special check for class="Code" denoting pre text */ - if (attr.attribute != null && - attr.value != null && - attr.attribute.equals("class") && - attr.value.equals("Code")) - { - prev = attr; - } - else if (attr.attribute != null && - (attr.attribute.equals("class") || - attr.attribute.equals("style") || - attr.attribute.equals("lang") || - attr.attribute.startsWith("x:") || - ((attr.attribute.equals("height") || attr.attribute.equals("width")) && - (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh)))) - { - if (prev != null) - prev.next = next; - else - node.attributes = next; - - } - else - prev = attr; - - attr = next; - } - } - - /* Word2000 uses span excessively, so we strip span out */ - public Node stripSpan(Lexer lexer, Node span) - { - Node node; - Node prev = null; - Node content; - - /* - deal with span elements that have content - by splicing the content in place of the span - after having processed it - */ - - cleanWord2000(lexer, span.content); - content = span.content; - - if (span.prev != null) - prev = span.prev; - else if (content != null) - { - node = content; - content = content.next; - Node.removeNode(node); - Node.insertNodeBeforeElement(span, node); - prev = node; - } - - while (content != null) - { - node = content; - content = content.next; - Node.removeNode(node); - Node.insertNodeAfterElement(prev, node); - prev = node; - } - - if (span.next == null) - span.parent.last = prev; - - node = span.next; - span.content = null; - Node.discardElement(span); - return node; - } - - /* map non-breaking spaces to regular spaces */ - private void normalizeSpaces(Lexer lexer, Node node) - { - while (node != null) - { - if (node.content != null) - normalizeSpaces(lexer, node.content); - - if (node.type == Node.TextNode) - { - int i; - MutableInteger c = new MutableInteger(); - int p = node.start; - - for (i = node.start; i < node.end; ++i) - { - c.value = (int)node.textarray[i]; - - /* look for UTF-8 multibyte character */ - if (c.value > 0x7F) - i += PPrint.getUTF8(node.textarray, i, c); - - if (c.value == 160) - c.value = ' '; - - p = PPrint.putUTF8(node.textarray, p, c.value); - } - } - - node = node.next; - } - } - - /* - This is a major clean up to strip out all the extra stuff you get - when you save as web page from Word 2000. It doesn't yet know what - to do with VML tags, but these will appear as errors unless you - declare them as new tags, such as o:p which needs to be declared - as inline. - */ - public void cleanWord2000(Lexer lexer, Node node) - { - /* used to a list from a sequence of bulletted p's */ - Node list = null; - - while (node != null) - { - /* discard Word's style verbiage */ - if (node.tag == tt.tagStyle || - node.tag == tt.tagMeta || - node.type == Node.CommentTag) - { - node = Node.discardElement(node); - continue; - } - - /* strip out all span tags Word scatters so liberally! */ - if (node.tag == tt.tagSpan) - { - node = stripSpan(lexer, node); - continue; - } - - /* get rid of Word's xmlns attributes */ - if (node.tag == tt.tagHtml) - { - /* check that it's a Word 2000 document */ - if (node.getAttrByName("xmlns:o") == null) - return; - } - - if (node.tag == tt.tagLink) - { - AttVal attr = node.getAttrByName("rel"); - - if (attr != null && attr.value != null && - attr.value.equals("File-List")) - { - node = Node.discardElement(node); - continue; - } - } - - /* discard empty paragraphs */ - if (node.content == null && node.tag == tt.tagP) - { - node = Node.discardElement(node); - continue; - } - - if (node.tag == tt.tagP) - { - AttVal attr = node.getAttrByName("class"); - - /* map sequence ofto
to
...*/ - else if (attr != null && attr.value != null && - attr.value.equals("Code")) - { - Node br = lexer.newLineNode(); - normalizeSpaces(lexer, node); - - if (list == null || list.tag != tt.tagPre) - { - list = lexer.inferredTag("pre"); - Node.insertNodeBeforeElement(node, list); - } - - /* remove node and append to contents of list */ - Node.removeNode(node); - Node.insertNodeAtEnd(list, node); - stripSpan(lexer, node); - Node.insertNodeAtEnd(list, br); - node = list.next; - } - else - list = null; - } - else - list = null; - - /* strip out style and class attributes */ - if (node.type == Node.StartTag || node.type == Node.StartEndTag) - purgeAttributes(node); - - if (node.content != null) - cleanWord2000(lexer, node.content); - - node = node.next; - } - } - - public boolean isWord2000(Node root, TagTable tt) - { - Node html = root.findHTML(tt); - - return (html != null && html.getAttrByName("xmlns:o") != null); - } -}