text

more text Shouldn't be mapped to

text

more text */ public void pushInline( Node node ) { IStack is; if (node.implicit) return; if (node.tag == null) return; if ((node.tag.model & Dict.CM_INLINE) == 0 ) return; if ((node.tag.model & Dict.CM_OBJECT) != 0) return; if (node.tag != configuration.tt.tagFont && isPushed(node)) return; // make sure there is enough space for the stack is = new IStack(); is.tag = node.tag; is.element = node.element; if (node.attributes != null) is.attributes = cloneAttributes(node.attributes); this.istack.push( is ); } /* pop inline stack */ public void popInline( Node node ) { AttVal av; IStack is; if (node != null) { if (node.tag == null) return; if ((node.tag.model & Dict.CM_INLINE) == 0) return; if ((node.tag.model & Dict.CM_OBJECT) != 0) return; // if node is then pop until we find an if (node.tag == configuration.tt.tagA) { while (this.istack.size() > 0) { is = (IStack)this.istack.pop(); if (is.tag == configuration.tt.tagA) { break; } } if (this.insert >= this.istack.size()) this.insert = -1; return; } } if (this.istack.size() > 0) { is = (IStack)this.istack.pop(); if (this.insert >= this.istack.size()) this.insert = -1; } } public boolean isPushed( Node node ) { int i; IStack is; for (i = this.istack.size() - 1; i >= 0; --i) { is = (IStack)this.istack.elementAt(i); if (is.tag == node.tag) return true; } return false; } /* This has the effect of inserting "missing" inline elements around the contents of blocklevel elements such as P, TD, TH, DIV, PRE etc. This procedure is called at the start of ParseBlock. when the inline stack is not empty, as will be the case in:
italic heading
which is then treated as equivalent to
italic heading
This is implemented by setting the lexer into a mode where it gets tokens from the inline stack rather than from the input stream. */ public int inlineDup( Node node ) { int n; n = this.istack.size() - this.istackbase; if ( n > 0 ) { this.insert = this.istackbase; this.inode = node; } return n; } public Node insertedToken() { Node node; IStack is; int n; // this will only be null if inode != null if (this.insert == -1) { node = this.inode; this.inode = null; return node; } // is this is the "latest" node then update // the position, otherwise use current values if (this.inode == null) { this.lines = this.in.curline; this.columns = this.in.curcol; } node = newNode(Node.StartTag, this.lexbuf, this.txtstart, this.txtend); // GLP: Bugfix 126261. Remove when this change // is fixed in istack.c in the original Tidy node.implicit = true; is = (IStack)this.istack.elementAt( this.insert ); node.element = is.element; node.tag = is.tag; if (is.attributes != null) node.attributes = cloneAttributes(is.attributes); // advance lexer to next item on the stack n = this.insert; // and recover state if we have reached the end if (++n < this.istack.size() ) { this.insert = n; } else { this.insert = -1; } return node; } /* AQ: Try this for speed optimization */ public static int wstrcasecmp(String s1, String s2) { return (s1.equalsIgnoreCase(s2) ? 0 : 1); } public static int wstrcaselexcmp(String s1, String s2) { char c; int i = 0; while ( i < s1.length() && i < s2.length() ) { c = s1.charAt(i); if ( toLower(c) != toLower( s2.charAt(i) ) ) { break; } i += 1; } if ( i == s1.length() && i == s2.length() ) { return 0; } else if ( i == s1.length() ) { return -1; } else if ( i == s2.length() ) { return 1; } else { return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 ); } } public static boolean wsubstr(String s1, String s2) { int i; int len1 = s1.length(); int len2 = s2.length(); for (i = 0; i <= len1 - len2; ++i) { if (s2.equalsIgnoreCase(s1.substring(i))) return true; } return false; } public boolean canPrune(Node element) { if (element.type == Node.TextNode) return true; if (element.content != null) return false; if (element.tag == configuration.tt.tagA && element.attributes != null) return false; if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas) return false; if (element.tag == null) return false; if ((element.tag.model & Dict.CM_ROW) != 0) return false; if (element.tag == configuration.tt.tagApplet) return false; if (element.tag == configuration.tt.tagObject) return false; if (element.attributes != null && (element.getAttrByName("id") != null || element.getAttrByName("name") != null) ) return false; return true; } /* duplicate name attribute as an id */ public void fixId(Node node) { AttVal name = node.getAttrByName("name"); AttVal id = node.getAttrByName("id"); if (name != null) { if (id != null) { if (!id.value.equals(name.value)) Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH); } else if (this.configuration.XmlOut) node.addAttribute("id", name.value); } } /* defer duplicates when entering a table or other element where the inlines shouldn't be duplicated */ public void deferDup() { this.insert = -1; this.inode = null; } /* Private methods and fields */ /* lexer char types */ private static final short DIGIT = 1; private static final short LETTER = 2; private static final short NAMECHAR = 4; private static final short WHITE = 8; private static final short NEWLINE = 16; private static final short LOWERCASE = 32; private static final short UPPERCASE = 64; /* lexer GetToken states */ private static final short LEX_CONTENT = 0; private static final short LEX_GT = 1; private static final short LEX_ENDTAG = 2; private static final short LEX_STARTTAG = 3; private static final short LEX_COMMENT = 4; private static final short LEX_DOCTYPE = 5; private static final short LEX_PROCINSTR = 6; private static final short LEX_ENDCOMMENT = 7; private static final short LEX_CDATA = 8; private static final short LEX_SECTION = 9; private static final short LEX_ASP = 10; private static final short LEX_JSTE = 11; private static final short LEX_PHP = 12; /* used to classify chars for lexical purposes */ private static short[] lexmap = new short[128]; private static void mapStr(String str, short code) { int j; for ( int i = 0; i < str.length(); i++ ) { j = (int)str.charAt(i); lexmap[j] |= code; } } static { mapStr("\r\n\f", (short)(NEWLINE|WHITE)); mapStr(" \t", WHITE); mapStr("-.:_", NAMECHAR); mapStr("0123456789", (short)(DIGIT|NAMECHAR)); mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR)); mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR)); } private static short MAP( char c ) { return ((int)c < 128 ? lexmap[(int)c] : 0); } private static boolean isWhite(char c) { short m = MAP(c); return (m & WHITE) != 0; } private static boolean isDigit(char c) { short m; m = MAP(c); return (m & DIGIT) != 0; } private static boolean isLetter(char c) { short m; m = MAP(c); return (m & LETTER) != 0; } private static char toLower(char c) { short m = MAP(c); if ((m & UPPERCASE) != 0) c = (char)( (int)c + (int)'a' - (int)'A' ); return c; } private static char toUpper(char c) { short m = MAP(c); if ((m & LOWERCASE) != 0) c = (char)( (int)c + (int)'A' - (int)'a' ); return c; } public static char foldCase(char c, boolean tocaps, boolean xmlTags) { short m; if (!xmlTags) { m = MAP(c); if (tocaps) { if ((m & LOWERCASE) != 0) c = (char)( (int)c + (int)'A' - (int)'a' ); } else /* force to lower case */ { if ((m & UPPERCASE) != 0) c = (char)( (int)c + (int)'a' - (int)'A' ); } } return c; } private static class W3CVersionInfo { String name; String voyagerName; String profile; short code; public W3CVersionInfo( String name, String voyagerName, String profile, short code ) { this.name = name; this.voyagerName = voyagerName; this.profile = profile; this.code = code; } } /* the 3 URIs for the XHTML 1.0 DTDs */ private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"; private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; private static Lexer.W3CVersionInfo[] W3CVersion = { new W3CVersionInfo("HTML 4.01", "XHTML 1.0 Strict", voyager_strict, Dict.VERS_HTML40_STRICT), new W3CVersionInfo("HTML 4.01 Transitional", "XHTML 1.0 Transitional", voyager_loose, Dict.VERS_HTML40_LOOSE), new W3CVersionInfo("HTML 4.01 Frameset", "XHTML 1.0 Frameset", voyager_frameset, Dict.VERS_FRAMES), new W3CVersionInfo("HTML 4.0", "XHTML 1.0 Strict", voyager_strict, Dict.VERS_HTML40_STRICT), new W3CVersionInfo("HTML 4.0 Transitional", "XHTML 1.0 Transitional", voyager_loose, Dict.VERS_HTML40_LOOSE), new W3CVersionInfo("HTML 4.0 Frameset", "XHTML 1.0 Frameset", voyager_frameset, Dict.VERS_FRAMES), new W3CVersionInfo("HTML 3.2", "XHTML 1.0 Transitional", voyager_loose, Dict.VERS_HTML32), new W3CVersionInfo("HTML 2.0", "XHTML 1.0 Strict", voyager_strict, Dict.VERS_HTML20) }; }