X-Git-Url: http://secure.phpeclipse.com diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java deleted file mode 100644 index 20862c1..0000000 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java +++ /dev/null @@ -1,1424 +0,0 @@ -/* - * @(#)Tidy.java 1.11 2000/08/16 - * - */ - -/* - HTML parser and pretty printer - - Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts - Institute of Technology, Institut National de Recherche en - Informatique et en Automatique, Keio University). All Rights - Reserved. - - Contributing Author(s): - - Dave Raggett - Andy Quick (translation to Java) - - The contributing author(s) would like to thank all those who - helped with testing, bug fixes, and patience. This wouldn't - have been possible without all of you. - - COPYRIGHT NOTICE: - - This software and documentation is provided "as is," and - the copyright holders and contributing author(s) make no - representations or warranties, express or implied, including - but not limited to, warranties of merchantability or fitness - for any particular purpose or that the use of the software or - documentation will not infringe any third party patents, - copyrights, trademarks or other rights. - - The copyright holders and contributing author(s) will not be - liable for any direct, indirect, special or consequential damages - arising out of any use of the software or documentation, even if - advised of the possibility of such damage. - - Permission is hereby granted to use, copy, modify, and distribute - this source code, or portions hereof, documentation and executables, - for any purpose, without fee, subject to the following restrictions: - - 1. The origin of this source code must not be misrepresented. - 2. Altered versions must be plainly marked as such and must - not be misrepresented as being the original source. - 3. This Copyright notice may not be removed or altered from any - source or altered source distribution. - - The copyright holders and contributing author(s) specifically - permit, without fee, and encourage the use of this source code - as a component for supporting the Hypertext Markup Language in - commercial products. If you use this source code in a product, - acknowledgment is not required but would be appreciated. -*/ - -package net.sourceforge.phpdt.tidy; - -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.PrintWriter; -import java.util.Properties; - -import org.eclipse.core.resources.IFile; -import org.eclipse.core.resources.IMarker; -import org.eclipse.core.runtime.CoreException; - -/** - * - *

HTML parser and pretty printer

- * - *

- * (c) 1998-2000 (W3C) MIT, INRIA, Keio University - * See Tidy.java for the copyright notice. - * Derived from - * HTML Tidy Release 4 Aug 2000 - *

- * - *

- * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts - * Institute of Technology, Institut National de Recherche en - * Informatique et en Automatique, Keio University). All Rights - * Reserved. - *

- * - *

- * Contributing Author(s):
- * Dave Raggett
- * Andy Quick (translation to Java) - *

- * - *

- * The contributing author(s) would like to thank all those who - * helped with testing, bug fixes, and patience. This wouldn't - * have been possible without all of you. - *

- * - *

- * COPYRIGHT NOTICE:
- * - * This software and documentation is provided "as is," and - * the copyright holders and contributing author(s) make no - * representations or warranties, express or implied, including - * but not limited to, warranties of merchantability or fitness - * for any particular purpose or that the use of the software or - * documentation will not infringe any third party patents, - * copyrights, trademarks or other rights. - *

- * - *

- * The copyright holders and contributing author(s) will not be - * liable for any direct, indirect, special or consequential damages - * arising out of any use of the software or documentation, even if - * advised of the possibility of such damage. - *

- * - *

- * Permission is hereby granted to use, copy, modify, and distribute - * this source code, or portions hereof, documentation and executables, - * for any purpose, without fee, subject to the following restrictions: - *

- * - *

- *

    - *
  1. The origin of this source code must not be misrepresented.
  2. - *
  3. Altered versions must be plainly marked as such and must - * not be misrepresented as being the original source.
  4. - *
  5. This Copyright notice may not be removed or altered from any - * source or altered source distribution.
  6. - *
- *

- * - *

- * The copyright holders and contributing author(s) specifically - * permit, without fee, and encourage the use of this source code - * as a component for supporting the Hypertext Markup Language in - * commercial products. If you use this source code in a product, - * acknowledgment is not required but would be appreciated. - *

- * - * @author Dave Raggett - * @author Andy Quick (translation to Java) - * @version 1.0, 1999/05/22 - * @version 1.0.1, 1999/05/29 - * @version 1.1, 1999/06/18 Java Bean - * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 - * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 - * @version 1.4, 1999/09/04 DOM support - * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 - * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 - * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 - * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 - * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 - * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 - * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 - * - */ - -public class Tidy implements java.io.Serializable { - - static final long serialVersionUID = -2794371560623987718L; - - private boolean initialized = false; - private PrintWriter errout = null; /* error output stream */ - private PrintWriter stderr = null; - private Configuration configuration = null; - private String inputStreamName = "InputStream"; - private int parseErrors = 0; - private int parseWarnings = 0; - - public Tidy() { - init(); - } - - public Configuration getConfiguration() { - return configuration; - } - - public PrintWriter getStderr() { - return stderr; - } - - /** - * ParseErrors - the number of errors that occurred in the most - * recent parse operation - */ - - public int getParseErrors() { - return parseErrors; - } - - /** - * ParseWarnings - the number of warnings that occurred in the most - * recent parse operation - */ - - public int getParseWarnings() { - return parseWarnings; - } - - /** - * Errout - the error output stream - */ - - public PrintWriter getErrout() { - return errout; - } - - public void setErrout(PrintWriter errout) { - this.errout = errout; - } - - /** - * Spaces - default indentation - * @see org.w3c.tidy.Configuration#spaces - */ - - public void setSpaces(int spaces) { - configuration.spaces = spaces; - } - - public int getSpaces() { - return configuration.spaces; - } - - /** - * Wraplen - default wrap margin - * @see org.w3c.tidy.Configuration#wraplen - */ - - public void setWraplen(int wraplen) { - configuration.wraplen = wraplen; - } - - public int getWraplen() { - return configuration.wraplen; - } - - /** - * CharEncoding - * @see org.w3c.tidy.Configuration#CharEncoding - */ - - public void setCharEncoding(int charencoding) { - configuration.CharEncoding = charencoding; - } - - public int getCharEncoding() { - return configuration.CharEncoding; - } - - /** - * Tabsize - * @see org.w3c.tidy.Configuration#tabsize - */ - - public void setTabsize(int tabsize) { - configuration.tabsize = tabsize; - } - - public int getTabsize() { - return configuration.tabsize; - } - - /** - * Errfile - file name to write errors to - * @see org.w3c.tidy.Configuration#errfile - */ - - public void setErrfile(String errfile) { - configuration.errfile = errfile; - } - - public String getErrfile() { - return configuration.errfile; - } - - /** - * Writeback - if true then output tidied markup - * NOTE: this property is ignored when parsing from an InputStream. - * @see org.w3c.tidy.Configuration#writeback - */ - - public void setWriteback(boolean writeback) { - configuration.writeback = writeback; - } - - public boolean getWriteback() { - return configuration.writeback; - } - - /** - * OnlyErrors - if true normal output is suppressed - * @see org.w3c.tidy.Configuration#OnlyErrors - */ - - public void setOnlyErrors(boolean OnlyErrors) { - configuration.OnlyErrors = OnlyErrors; - } - - public boolean getOnlyErrors() { - return configuration.OnlyErrors; - } - - /** - * ShowWarnings - however errors are always shown - * @see org.w3c.tidy.Configuration#ShowWarnings - */ - - public void setShowWarnings(boolean ShowWarnings) { - configuration.ShowWarnings = ShowWarnings; - } - - public boolean getShowWarnings() { - return configuration.ShowWarnings; - } - - /** - * Quiet - no 'Parsing X', guessed DTD or summary - * @see org.w3c.tidy.Configuration#Quiet - */ - - public void setQuiet(boolean Quiet) { - configuration.Quiet = Quiet; - } - - public boolean getQuiet() { - return configuration.Quiet; - } - - /** - * IndentContent - indent content of appropriate tags - * @see org.w3c.tidy.Configuration#IndentContent - */ - - public void setIndentContent(boolean IndentContent) { - configuration.IndentContent = IndentContent; - } - - public boolean getIndentContent() { - return configuration.IndentContent; - } - - /** - * SmartIndent - does text/block level content effect indentation - * @see org.w3c.tidy.Configuration#SmartIndent - */ - - public void setSmartIndent(boolean SmartIndent) { - configuration.SmartIndent = SmartIndent; - } - - public boolean getSmartIndent() { - return configuration.SmartIndent; - } - - /** - * HideEndTags - suppress optional end tags - * @see org.w3c.tidy.Configuration#HideEndTags - */ - - public void setHideEndTags(boolean HideEndTags) { - configuration.HideEndTags = HideEndTags; - } - - public boolean getHideEndTags() { - return configuration.HideEndTags; - } - - /** - * XmlTags - treat input as XML - * @see org.w3c.tidy.Configuration#XmlTags - */ - - public void setXmlTags(boolean XmlTags) { - configuration.XmlTags = XmlTags; - } - - public boolean getXmlTags() { - return configuration.XmlTags; - } - - /** - * XmlOut - create output as XML - * @see org.w3c.tidy.Configuration#XmlOut - */ - - public void setXmlOut(boolean XmlOut) { - configuration.XmlOut = XmlOut; - } - - public boolean getXmlOut() { - return configuration.XmlOut; - } - - /** - * XHTML - output extensible HTML - * @see org.w3c.tidy.Configuration#xHTML - */ - - public void setXHTML(boolean xHTML) { - configuration.xHTML = xHTML; - } - - public boolean getXHTML() { - return configuration.xHTML; - } - - /** - * RawOut - avoid mapping values > 127 to entities - * @see org.w3c.tidy.Configuration#RawOut - */ - - public void setRawOut(boolean RawOut) { - configuration.RawOut = RawOut; - } - - public boolean getRawOut() { - return configuration.RawOut; - } - - /** - * UpperCaseTags - output tags in upper not lower case - * @see org.w3c.tidy.Configuration#UpperCaseTags - */ - - public void setUpperCaseTags(boolean UpperCaseTags) { - configuration.UpperCaseTags = UpperCaseTags; - } - - public boolean getUpperCaseTags() { - return configuration.UpperCaseTags; - } - - /** - * UpperCaseAttrs - output attributes in upper not lower case - * @see org.w3c.tidy.Configuration#UpperCaseAttrs - */ - - public void setUpperCaseAttrs(boolean UpperCaseAttrs) { - configuration.UpperCaseAttrs = UpperCaseAttrs; - } - - public boolean getUpperCaseAttrs() { - return configuration.UpperCaseAttrs; - } - - /** - * MakeClean - remove presentational clutter - * @see org.w3c.tidy.Configuration#MakeClean - */ - - public void setMakeClean(boolean MakeClean) { - configuration.MakeClean = MakeClean; - } - - public boolean getMakeClean() { - return configuration.MakeClean; - } - - /** - * BreakBeforeBR - o/p newline before <br> or not? - * @see org.w3c.tidy.Configuration#BreakBeforeBR - */ - - public void setBreakBeforeBR(boolean BreakBeforeBR) { - configuration.BreakBeforeBR = BreakBeforeBR; - } - - public boolean getBreakBeforeBR() { - return configuration.BreakBeforeBR; - } - - /** - * BurstSlides - create slides on each h2 element - * @see org.w3c.tidy.Configuration#BurstSlides - */ - - public void setBurstSlides(boolean BurstSlides) { - configuration.BurstSlides = BurstSlides; - } - - public boolean getBurstSlides() { - return configuration.BurstSlides; - } - - /** - * NumEntities - use numeric entities - * @see org.w3c.tidy.Configuration#NumEntities - */ - - public void setNumEntities(boolean NumEntities) { - configuration.NumEntities = NumEntities; - } - - public boolean getNumEntities() { - return configuration.NumEntities; - } - - /** - * QuoteMarks - output " marks as &quot; - * @see org.w3c.tidy.Configuration#QuoteMarks - */ - - public void setQuoteMarks(boolean QuoteMarks) { - configuration.QuoteMarks = QuoteMarks; - } - - public boolean getQuoteMarks() { - return configuration.QuoteMarks; - } - - /** - * QuoteNbsp - output non-breaking space as entity - * @see org.w3c.tidy.Configuration#QuoteNbsp - */ - - public void setQuoteNbsp(boolean QuoteNbsp) { - configuration.QuoteNbsp = QuoteNbsp; - } - - public boolean getQuoteNbsp() { - return configuration.QuoteNbsp; - } - - /** - * QuoteAmpersand - output naked ampersand as & - * @see org.w3c.tidy.Configuration#QuoteAmpersand - */ - - public void setQuoteAmpersand(boolean QuoteAmpersand) { - configuration.QuoteAmpersand = QuoteAmpersand; - } - - public boolean getQuoteAmpersand() { - return configuration.QuoteAmpersand; - } - - /** - * WrapAttVals - wrap within attribute values - * @see org.w3c.tidy.Configuration#WrapAttVals - */ - - public void setWrapAttVals(boolean WrapAttVals) { - configuration.WrapAttVals = WrapAttVals; - } - - public boolean getWrapAttVals() { - return configuration.WrapAttVals; - } - - /** - * WrapScriptlets - wrap within JavaScript string literals - * @see org.w3c.tidy.Configuration#WrapScriptlets - */ - - public void setWrapScriptlets(boolean WrapScriptlets) { - configuration.WrapScriptlets = WrapScriptlets; - } - - public boolean getWrapScriptlets() { - return configuration.WrapScriptlets; - } - - /** - * WrapSection - wrap within <![ ... ]> section tags - * @see org.w3c.tidy.Configuration#WrapSection - */ - - public void setWrapSection(boolean WrapSection) { - configuration.WrapSection = WrapSection; - } - - public boolean getWrapSection() { - return configuration.WrapSection; - } - - /** - * AltText - default text for alt attribute - * @see org.w3c.tidy.Configuration#altText - */ - - public void setAltText(String altText) { - configuration.altText = altText; - } - - public String getAltText() { - return configuration.altText; - } - - /** - * Slidestyle - style sheet for slides - * @see org.w3c.tidy.Configuration#slidestyle - */ - - public void setSlidestyle(String slidestyle) { - configuration.slidestyle = slidestyle; - } - - public String getSlidestyle() { - return configuration.slidestyle; - } - - /** - * XmlPi - add <?xml?> for XML docs - * @see org.w3c.tidy.Configuration#XmlPi - */ - - public void setXmlPi(boolean XmlPi) { - configuration.XmlPi = XmlPi; - } - - public boolean getXmlPi() { - return configuration.XmlPi; - } - - /** - * DropFontTags - discard presentation tags - * @see org.w3c.tidy.Configuration#DropFontTags - */ - - public void setDropFontTags(boolean DropFontTags) { - configuration.DropFontTags = DropFontTags; - } - - public boolean getDropFontTags() { - return configuration.DropFontTags; - } - - /** - * DropEmptyParas - discard empty p elements - * @see org.w3c.tidy.Configuration#DropEmptyParas - */ - - public void setDropEmptyParas(boolean DropEmptyParas) { - configuration.DropEmptyParas = DropEmptyParas; - } - - public boolean getDropEmptyParas() { - return configuration.DropEmptyParas; - } - - /** - * FixComments - fix comments with adjacent hyphens - * @see org.w3c.tidy.Configuration#FixComments - */ - - public void setFixComments(boolean FixComments) { - configuration.FixComments = FixComments; - } - - public boolean getFixComments() { - return configuration.FixComments; - } - - /** - * WrapAsp - wrap within ASP pseudo elements - * @see org.w3c.tidy.Configuration#WrapAsp - */ - - public void setWrapAsp(boolean WrapAsp) { - configuration.WrapAsp = WrapAsp; - } - - public boolean getWrapAsp() { - return configuration.WrapAsp; - } - - /** - * WrapJste - wrap within JSTE pseudo elements - * @see org.w3c.tidy.Configuration#WrapJste - */ - - public void setWrapJste(boolean WrapJste) { - configuration.WrapJste = WrapJste; - } - - public boolean getWrapJste() { - return configuration.WrapJste; - } - - /** - * WrapPhp - wrap within PHP pseudo elements - * @see org.w3c.tidy.Configuration#WrapPhp - */ - - public void setWrapPhp(boolean WrapPhp) { - configuration.WrapPhp = WrapPhp; - } - - public boolean getWrapPhp() { - return configuration.WrapPhp; - } - - /** - * FixBackslash - fix URLs by replacing \ with / - * @see org.w3c.tidy.Configuration#FixBackslash - */ - - public void setFixBackslash(boolean FixBackslash) { - configuration.FixBackslash = FixBackslash; - } - - public boolean getFixBackslash() { - return configuration.FixBackslash; - } - - /** - * IndentAttributes - newline+indent before each attribute - * @see org.w3c.tidy.Configuration#IndentAttributes - */ - - public void setIndentAttributes(boolean IndentAttributes) { - configuration.IndentAttributes = IndentAttributes; - } - - public boolean getIndentAttributes() { - return configuration.IndentAttributes; - } - - /** - * DocType - user specified doctype - * omit | auto | strict | loose | fpi - * where the fpi is a string similar to - * "-//ACME//DTD HTML 3.14159//EN" - * Note: for fpi include the double-quotes in the string. - * @see org.w3c.tidy.Configuration#docTypeStr - * @see org.w3c.tidy.Configuration#docTypeMode - */ - - public void setDocType(String doctype) { - if (doctype != null) - configuration.docTypeStr = configuration.parseDocType(doctype, "doctype"); - } - - public String getDocType() { - String result = null; - switch (configuration.docTypeMode) { - case Configuration.DOCTYPE_OMIT : - result = "omit"; - break; - case Configuration.DOCTYPE_AUTO : - result = "auto"; - break; - case Configuration.DOCTYPE_STRICT : - result = "strict"; - break; - case Configuration.DOCTYPE_LOOSE : - result = "loose"; - break; - case Configuration.DOCTYPE_USER : - result = configuration.docTypeStr; - break; - } - return result; - } - - /** - * LogicalEmphasis - replace i by em and b by strong - * @see org.w3c.tidy.Configuration#LogicalEmphasis - */ - - public void setLogicalEmphasis(boolean LogicalEmphasis) { - configuration.LogicalEmphasis = LogicalEmphasis; - } - - public boolean getLogicalEmphasis() { - return configuration.LogicalEmphasis; - } - - /** - * XmlPIs - if set to true PIs must end with ?> - * @see org.w3c.tidy.Configuration#XmlPIs - */ - - public void setXmlPIs(boolean XmlPIs) { - configuration.XmlPIs = XmlPIs; - } - - public boolean getXmlPIs() { - return configuration.XmlPIs; - } - - /** - * EncloseText - if true text at body is wrapped in <p>'s - * @see org.w3c.tidy.Configuration#EncloseBodyText - */ - - public void setEncloseText(boolean EncloseText) { - configuration.EncloseBodyText = EncloseText; - } - - public boolean getEncloseText() { - return configuration.EncloseBodyText; - } - - /** - * EncloseBlockText - if true text in blocks is wrapped in <p>'s - * @see org.w3c.tidy.Configuration#EncloseBlockText - */ - - public void setEncloseBlockText(boolean EncloseBlockText) { - configuration.EncloseBlockText = EncloseBlockText; - } - - public boolean getEncloseBlockText() { - return configuration.EncloseBlockText; - } - - /** - * KeepFileTimes - if true last modified time is preserved
- * this is NOT supported at this time. - * @see org.w3c.tidy.Configuration#KeepFileTimes - */ - - public void setKeepFileTimes(boolean KeepFileTimes) { - configuration.KeepFileTimes = KeepFileTimes; - } - - public boolean getKeepFileTimes() { - return configuration.KeepFileTimes; - } - - /** - * Word2000 - draconian cleaning for Word2000 - * @see org.w3c.tidy.Configuration#Word2000 - */ - - public void setWord2000(boolean Word2000) { - configuration.Word2000 = Word2000; - } - - public boolean getWord2000() { - return configuration.Word2000; - } - - /** - * TidyMark - add meta element indicating tidied doc - * @see org.w3c.tidy.Configuration#TidyMark - */ - - public void setTidyMark(boolean TidyMark) { - configuration.TidyMark = TidyMark; - } - - public boolean getTidyMark() { - return configuration.TidyMark; - } - - /** - * XmlSpace - if set to yes adds xml:space attr as needed - * @see org.w3c.tidy.Configuration#XmlSpace - */ - - public void setXmlSpace(boolean XmlSpace) { - configuration.XmlSpace = XmlSpace; - } - - public boolean getXmlSpace() { - return configuration.XmlSpace; - } - - /** - * Emacs - if true format error output for GNU Emacs - * @see org.w3c.tidy.Configuration#Emacs - */ - - public void setEmacs(boolean Emacs) { - configuration.Emacs = Emacs; - } - - public boolean getEmacs() { - return configuration.Emacs; - } - - /** - * LiteralAttribs - if true attributes may use newlines - * @see org.w3c.tidy.Configuration#LiteralAttribs - */ - - public void setLiteralAttribs(boolean LiteralAttribs) { - configuration.LiteralAttribs = LiteralAttribs; - } - - public boolean getLiteralAttribs() { - return configuration.LiteralAttribs; - } - - /** - * InputStreamName - the name of the input stream (printed in the - * header information). - */ - public void setInputStreamName(String name) { - if (name != null) - inputStreamName = name; - } - - public String getInputStreamName() { - return inputStreamName; - } - - /** - * Sets the configuration from a configuration file. - */ - - public void setConfigurationFromFile(String filename) { - configuration.parseFile(filename); - } - - /** - * Sets the configuration from a properties object. - */ - - public void setConfigurationFromProps(Properties props) { - configuration.addProps(props); - } - - /** - * first time initialization which should - * precede reading the command line - */ - - private void init() { - configuration = new Configuration(); - if (configuration == null) - return; - - AttributeTable at = AttributeTable.getDefaultAttributeTable(); - if (at == null) - return; - TagTable tt = new TagTable(); - if (tt == null) - return; - tt.setConfiguration(configuration); - configuration.tt = tt; - EntityTable et = EntityTable.getDefaultEntityTable(); - if (et == null) - return; - - /* Unnecessary - same initial values in Configuration - Configuration.XmlTags = false; - Configuration.XmlOut = false; - Configuration.HideEndTags = false; - Configuration.UpperCaseTags = false; - Configuration.MakeClean = false; - Configuration.writeback = false; - Configuration.OnlyErrors = false; - */ - - configuration.errfile = null; - stderr = new PrintWriter(System.err, true); - errout = stderr; - initialized = true; - } - - /** - * Parses InputStream in and returns the root Node. - * If out is non-null, pretty prints to OutputStream out. - */ - - public Node parse(IFile iFile, InputStream in, OutputStream out) { - Node document = null; - - try { - iFile.deleteMarkers(IMarker.PROBLEM, false, 0); - document = parse(iFile, in, null, out); - } catch (CoreException e) { - } catch (FileNotFoundException fnfe) { - } catch (IOException e) { - } - - return document; - } - - /** - * Internal routine that actually does the parsing. The caller - * can pass either an InputStream or file name. If both are passed, - * the file name is preferred. - */ - - private Node parse(IFile iFile, InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException { - Lexer lexer; - Node document = null; - Node doctype; - Out o = new OutImpl(); /* normal output stream */ - PPrint pprint; - - if (!initialized) - return null; - - if (errout == null) - return null; - - parseErrors = 0; - parseWarnings = 0; - - /* ensure config is self-consistent */ - configuration.adjust(); - - if (file != null) { - in = new FileInputStream(file); - inputStreamName = file; - } else if (in == null) { - in = System.in; - inputStreamName = "stdin"; - } - - if (in != null) { - lexer = new Lexer(iFile,new StreamInImpl(in, configuration.CharEncoding, configuration.tabsize), configuration); - lexer.errout = errout; - - /* - store pointer to lexer in input stream - to allow character encoding errors to be - reported - */ - lexer.in.lexer = lexer; - - /* Tidy doesn't alter the doctype for generic XML docs */ - if (configuration.XmlTags) - document = ParserImpl.parseXMLDocument(lexer); - else { - lexer.warnings = 0; - if (!configuration.Quiet) - Report.helloMessage(errout, Report.RELEASE_DATE, inputStreamName); - - document = ParserImpl.parseDocument(lexer); - - if (!document.checkNodeIntegrity()) { - Report.badTree(errout); - return null; - } - - Clean cleaner = new Clean(configuration.tt); - - /* simplifies ... ... etc. */ - cleaner.nestedEmphasis(document); - - /* cleans up indented text etc. */ - cleaner.list2BQ(document); - cleaner.bQ2Div(document); - - /* replaces i by em and b by strong */ - if (configuration.LogicalEmphasis) - cleaner.emFromI(document); - - if (configuration.Word2000 && cleaner.isWord2000(document, configuration.tt)) { - /* prune Word2000's ... */ - cleaner.dropSections(lexer, document); - - /* drop style & class attributes and empty p, span elements */ - cleaner.cleanWord2000(lexer, document); - } - - /* replaces presentational markup by style rules */ - if (configuration.MakeClean || configuration.DropFontTags) - cleaner.cleanTree(lexer, document); - - if (!document.checkNodeIntegrity()) { - Report.badTree(errout); - return null; - } - doctype = document.findDocType(); - if (document.content != null) { - if (configuration.xHTML) - lexer.setXHTMLDocType(document); - else - lexer.fixDocType(document); - - if (configuration.TidyMark) - lexer.addGenerator(document); - } - - /* ensure presence of initial */ - if (configuration.XmlOut && configuration.XmlPi) - lexer.fixXMLPI(document); - - if (!configuration.Quiet && document.content != null) { - Report.reportVersion(errout, lexer, inputStreamName, doctype); - Report.reportNumWarnings(errout, lexer); - } - } - - parseWarnings = lexer.warnings; - parseErrors = lexer.errors; - - // Try to close the InputStream but only if if we created it. - - if ((file != null) && (in != System.in)) { - try { - in.close(); - } catch (IOException e) { - } - } - - if (lexer.errors > 0) - Report.needsAuthorIntervention(errout); - - o.state = StreamIn.FSM_ASCII; - o.encoding = configuration.CharEncoding; - - if (!configuration.OnlyErrors && lexer.errors == 0) { - if (configuration.BurstSlides) { - Node body; - - body = null; - /* - remove doctype to avoid potential clash with - markup introduced when bursting into slides - */ - /* discard the document type */ - doctype = document.findDocType(); - - if (doctype != null) - Node.discardElement(doctype); - - /* slides use transitional features */ - lexer.versions |= Dict.VERS_HTML40_LOOSE; - - /* and patch up doctype to match */ - if (configuration.xHTML) - lexer.setXHTMLDocType(document); - else - lexer.fixDocType(document); - - /* find the body element which may be implicit */ - body = document.findBody(configuration.tt); - - if (body != null) { - pprint = new PPrint(configuration); - Report.reportNumberOfSlides(errout, pprint.countSlides(body)); - pprint.createSlides(lexer, document); - } else - Report.missingBody(errout); - } else if (configuration.writeback && (file != null)) { - try { - pprint = new PPrint(configuration); - o.out = new FileOutputStream(file); - - if (configuration.XmlTags) - pprint.printXMLTree(o, (short) 0, 0, lexer, document); - else - pprint.printTree(o, (short) 0, 0, lexer, document); - - pprint.flushLine(o, 0); - o.out.close(); - } catch (IOException e) { - errout.println(file + e.toString()); - } - } else if (out != null) { - pprint = new PPrint(configuration); - o.out = out; - - if (configuration.XmlTags) - pprint.printXMLTree(o, (short) 0, 0, lexer, document); - else - pprint.printTree(o, (short) 0, 0, lexer, document); - - pprint.flushLine(o, 0); - } - - } - - Report.errorSummary(lexer); - } - return document; - } - - /** - * Parses InputStream in and returns a DOM Document node. - * If out is non-null, pretty prints to OutputStream out. - */ - - public org.w3c.dom.Document parseDOM(IFile file, InputStream in, OutputStream out) { - Node document = parse(file, in, out); - if (document != null) - return (org.w3c.dom.Document) document.getAdapter(); - else - return null; - } - - /** - * Creates an empty DOM Document. - */ - - public static org.w3c.dom.Document createEmptyDocument() { - Node document = new Node(Node.RootNode, new byte[0], 0, 0); - Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html", new TagTable()); - if (document != null && node != null) { - Node.insertNodeAtStart(document, node); - return (org.w3c.dom.Document) document.getAdapter(); - } else { - return null; - } - } - - /** - * Pretty-prints a DOM Document. - */ - - public void pprint(org.w3c.dom.Document doc, OutputStream out) { - Out o = new OutImpl(); - PPrint pprint; - Node document; - - if (!(doc instanceof DOMDocumentImpl)) { - return; - } - document = ((DOMDocumentImpl) doc).adaptee; - - o.state = StreamIn.FSM_ASCII; - o.encoding = configuration.CharEncoding; - - if (out != null) { - pprint = new PPrint(configuration); - o.out = out; - - if (configuration.XmlTags) - pprint.printXMLTree(o, (short) 0, 0, null, document); - else - pprint.printTree(o, (short) 0, 0, null, document); - - pprint.flushLine(o, 0); - } - } - - /** - * Command line interface to parser and pretty printer. - */ - - public static void main(String[] argv) { - int totalerrors = 0; - int totalwarnings = 0; - String file; - InputStream in; - String prog = "Tidy"; - Node document; - Node doctype; - Lexer lexer; - String s; - Out out = new OutImpl(); /* normal output stream */ - PPrint pprint; - int argc = argv.length + 1; - int argIndex = 0; - Tidy tidy; - Configuration configuration; - String arg; - String current_errorfile = "stderr"; - - tidy = new Tidy(); - configuration = tidy.getConfiguration(); - - /* read command line */ - - while (argc > 0) { - if (argc > 1 && argv[argIndex].startsWith("-")) { - /* support -foo and --foo */ - arg = argv[argIndex].substring(1); - - if (arg.length() > 0 && arg.charAt(0) == '-') - arg = arg.substring(1); - - if (arg.equals("xml")) - configuration.XmlTags = true; - else if (arg.equals("asxml") || arg.equals("asxhtml")) - configuration.xHTML = true; - else if (arg.equals("indent")) { - configuration.IndentContent = true; - configuration.SmartIndent = true; - } else if (arg.equals("omit")) - configuration.HideEndTags = true; - else if (arg.equals("upper")) - configuration.UpperCaseTags = true; - else if (arg.equals("clean")) - configuration.MakeClean = true; - else if (arg.equals("raw")) - configuration.CharEncoding = Configuration.RAW; - else if (arg.equals("ascii")) - configuration.CharEncoding = Configuration.ASCII; - else if (arg.equals("latin1")) - configuration.CharEncoding = Configuration.LATIN1; - else if (arg.equals("utf8")) - configuration.CharEncoding = Configuration.UTF8; - else if (arg.equals("iso2022")) - configuration.CharEncoding = Configuration.ISO2022; - else if (arg.equals("mac")) - configuration.CharEncoding = Configuration.MACROMAN; - else if (arg.equals("numeric")) - configuration.NumEntities = true; - else if (arg.equals("modify")) - configuration.writeback = true; - else if (arg.equals("change")) /* obsolete */ - configuration.writeback = true; - else if (arg.equals("update")) /* obsolete */ - configuration.writeback = true; - else if (arg.equals("errors")) - configuration.OnlyErrors = true; - else if (arg.equals("quiet")) - configuration.Quiet = true; - else if (arg.equals("slides")) - configuration.BurstSlides = true; - else if (arg.equals("help") || argv[argIndex].charAt(1) == '?' || argv[argIndex].charAt(1) == 'h') { - Report.helpText(new PrintWriter(System.out, true), prog); - System.exit(1); - } else if (arg.equals("config")) { - if (argc >= 3) { - configuration.parseFile(argv[argIndex + 1]); - --argc; - ++argIndex; - } - } else if (argv[argIndex].equals("-file") || argv[argIndex].equals("--file") || argv[argIndex].equals("-f")) { - if (argc >= 3) { - configuration.errfile = argv[argIndex + 1]; - --argc; - ++argIndex; - } - } else if (argv[argIndex].equals("-wrap") || argv[argIndex].equals("--wrap") || argv[argIndex].equals("-w")) { - if (argc >= 3) { - configuration.wraplen = Integer.parseInt(argv[argIndex + 1]); - --argc; - ++argIndex; - } - } else if (argv[argIndex].equals("-version") || argv[argIndex].equals("--version") || argv[argIndex].equals("-v")) { - Report.showVersion(tidy.getErrout()); - System.exit(0); - } else { - s = argv[argIndex]; - - for (int i = 1; i < s.length(); i++) { - if (s.charAt(i) == 'i') { - configuration.IndentContent = true; - configuration.SmartIndent = true; - } else if (s.charAt(i) == 'o') - configuration.HideEndTags = true; - else if (s.charAt(i) == 'u') - configuration.UpperCaseTags = true; - else if (s.charAt(i) == 'c') - configuration.MakeClean = true; - else if (s.charAt(i) == 'n') - configuration.NumEntities = true; - else if (s.charAt(i) == 'm') - configuration.writeback = true; - else if (s.charAt(i) == 'e') - configuration.OnlyErrors = true; - else if (s.charAt(i) == 'q') - configuration.Quiet = true; - else - Report.unknownOption(tidy.getErrout(), s.charAt(i)); - } - } - - --argc; - ++argIndex; - continue; - } - - /* ensure config is self-consistent */ - configuration.adjust(); - - /* user specified error file */ - if (configuration.errfile != null) { - /* is it same as the currently opened file? */ - if (!configuration.errfile.equals(current_errorfile)) { - /* no so close previous error file */ - - if (tidy.getErrout() != tidy.getStderr()) - tidy.getErrout().close(); - - /* and try to open the new error file */ - try { - tidy.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true)); - current_errorfile = configuration.errfile; - } catch (IOException e) { - /* can't be opened so fall back to stderr */ - current_errorfile = "stderr"; - tidy.setErrout(tidy.getStderr()); - } - } - } - - if (argc > 1) { - file = argv[argIndex]; - } else { - file = "stdin"; - } - - try { - document = tidy.parse(null, null, file, System.out); - totalwarnings += tidy.parseWarnings; - totalerrors += tidy.parseErrors; - } catch (FileNotFoundException fnfe) { - Report.unknownFile(tidy.getErrout(), prog, file); - } catch (IOException ioe) { - Report.unknownFile(tidy.getErrout(), prog, file); - } - - --argc; - ++argIndex; - - if (argc <= 1) - break; - } - - if (totalerrors + totalwarnings > 0) - Report.generalInfo(tidy.getErrout()); - - if (tidy.getErrout() != tidy.getStderr()) - tidy.getErrout().close(); - - /* return status can be used by scripts */ - - if (totalerrors > 0) - System.exit(2); - - if (totalwarnings > 0) - System.exit(1); - - /* 0 signifies all is ok */ - System.exit(0); - } -}