X-Git-Url: http://secure.phpeclipse.com
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java
new file mode 100644
index 0000000..20862c1
--- /dev/null
+++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/tidy/Tidy.java
@@ -0,0 +1,1424 @@
+/*
+ * @(#)Tidy.java 1.11 2000/08/16
+ *
+ */
+
+/*
+ HTML parser and pretty printer
+
+ Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ Institute of Technology, Institut National de Recherche en
+ Informatique et en Automatique, Keio University). All Rights
+ Reserved.
+
+ Contributing Author(s):
+
+ Dave Raggett
+ Andy Quick (translation to Java)
+
+ The contributing author(s) would like to thank all those who
+ helped with testing, bug fixes, and patience. This wouldn't
+ have been possible without all of you.
+
+ COPYRIGHT NOTICE:
+
+ This software and documentation is provided "as is," and
+ the copyright holders and contributing author(s) make no
+ representations or warranties, express or implied, including
+ but not limited to, warranties of merchantability or fitness
+ for any particular purpose or that the use of the software or
+ documentation will not infringe any third party patents,
+ copyrights, trademarks or other rights.
+
+ The copyright holders and contributing author(s) will not be
+ liable for any direct, indirect, special or consequential damages
+ arising out of any use of the software or documentation, even if
+ advised of the possibility of such damage.
+
+ Permission is hereby granted to use, copy, modify, and distribute
+ this source code, or portions hereof, documentation and executables,
+ for any purpose, without fee, subject to the following restrictions:
+
+ 1. The origin of this source code must not be misrepresented.
+ 2. Altered versions must be plainly marked as such and must
+ not be misrepresented as being the original source.
+ 3. This Copyright notice may not be removed or altered from any
+ source or altered source distribution.
+
+ The copyright holders and contributing author(s) specifically
+ permit, without fee, and encourage the use of this source code
+ as a component for supporting the Hypertext Markup Language in
+ commercial products. If you use this source code in a product,
+ acknowledgment is not required but would be appreciated.
+*/
+
+package net.sourceforge.phpdt.tidy;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.util.Properties;
+
+import org.eclipse.core.resources.IFile;
+import org.eclipse.core.resources.IMarker;
+import org.eclipse.core.runtime.CoreException;
+
+/**
+ *
+ * HTML parser and pretty printer
+ *
+ *
+ * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
+ * See Tidy.java for the copyright notice.
+ * Derived from
+ * HTML Tidy Release 4 Aug 2000
+ *
+ *
+ *
+ * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+ * Institute of Technology, Institut National de Recherche en
+ * Informatique et en Automatique, Keio University). All Rights
+ * Reserved.
+ *
+ *
+ *
+ * Contributing Author(s):
+ * Dave Raggett
+ * Andy Quick (translation to Java)
+ *
+ *
+ *
+ * The contributing author(s) would like to thank all those who
+ * helped with testing, bug fixes, and patience. This wouldn't
+ * have been possible without all of you.
+ *
+ *
+ *
+ * COPYRIGHT NOTICE:
+ *
+ * This software and documentation is provided "as is," and
+ * the copyright holders and contributing author(s) make no
+ * representations or warranties, express or implied, including
+ * but not limited to, warranties of merchantability or fitness
+ * for any particular purpose or that the use of the software or
+ * documentation will not infringe any third party patents,
+ * copyrights, trademarks or other rights.
+ *
+ *
+ *
+ * The copyright holders and contributing author(s) will not be
+ * liable for any direct, indirect, special or consequential damages
+ * arising out of any use of the software or documentation, even if
+ * advised of the possibility of such damage.
+ *
+ *
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this source code, or portions hereof, documentation and executables,
+ * for any purpose, without fee, subject to the following restrictions:
+ *
+ *
+ *
+ *
+ * - The origin of this source code must not be misrepresented.
+ * - Altered versions must be plainly marked as such and must
+ * not be misrepresented as being the original source.
+ * - This Copyright notice may not be removed or altered from any
+ * source or altered source distribution.
+ *
+ *
+ *
+ *
+ * The copyright holders and contributing author(s) specifically
+ * permit, without fee, and encourage the use of this source code
+ * as a component for supporting the Hypertext Markup Language in
+ * commercial products. If you use this source code in a product,
+ * acknowledgment is not required but would be appreciated.
+ *
+ *
+ * @author Dave Raggett
+ * @author Andy Quick (translation to Java)
+ * @version 1.0, 1999/05/22
+ * @version 1.0.1, 1999/05/29
+ * @version 1.1, 1999/06/18 Java Bean
+ * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
+ * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
+ * @version 1.4, 1999/09/04 DOM support
+ * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
+ * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
+ * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
+ * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
+ * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
+ * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
+ * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
+ *
+ */
+
+public class Tidy implements java.io.Serializable {
+
+ static final long serialVersionUID = -2794371560623987718L;
+
+ private boolean initialized = false;
+ private PrintWriter errout = null; /* error output stream */
+ private PrintWriter stderr = null;
+ private Configuration configuration = null;
+ private String inputStreamName = "InputStream";
+ private int parseErrors = 0;
+ private int parseWarnings = 0;
+
+ public Tidy() {
+ init();
+ }
+
+ public Configuration getConfiguration() {
+ return configuration;
+ }
+
+ public PrintWriter getStderr() {
+ return stderr;
+ }
+
+ /**
+ * ParseErrors - the number of errors that occurred in the most
+ * recent parse operation
+ */
+
+ public int getParseErrors() {
+ return parseErrors;
+ }
+
+ /**
+ * ParseWarnings - the number of warnings that occurred in the most
+ * recent parse operation
+ */
+
+ public int getParseWarnings() {
+ return parseWarnings;
+ }
+
+ /**
+ * Errout - the error output stream
+ */
+
+ public PrintWriter getErrout() {
+ return errout;
+ }
+
+ public void setErrout(PrintWriter errout) {
+ this.errout = errout;
+ }
+
+ /**
+ * Spaces - default indentation
+ * @see org.w3c.tidy.Configuration#spaces
+ */
+
+ public void setSpaces(int spaces) {
+ configuration.spaces = spaces;
+ }
+
+ public int getSpaces() {
+ return configuration.spaces;
+ }
+
+ /**
+ * Wraplen - default wrap margin
+ * @see org.w3c.tidy.Configuration#wraplen
+ */
+
+ public void setWraplen(int wraplen) {
+ configuration.wraplen = wraplen;
+ }
+
+ public int getWraplen() {
+ return configuration.wraplen;
+ }
+
+ /**
+ * CharEncoding
+ * @see org.w3c.tidy.Configuration#CharEncoding
+ */
+
+ public void setCharEncoding(int charencoding) {
+ configuration.CharEncoding = charencoding;
+ }
+
+ public int getCharEncoding() {
+ return configuration.CharEncoding;
+ }
+
+ /**
+ * Tabsize
+ * @see org.w3c.tidy.Configuration#tabsize
+ */
+
+ public void setTabsize(int tabsize) {
+ configuration.tabsize = tabsize;
+ }
+
+ public int getTabsize() {
+ return configuration.tabsize;
+ }
+
+ /**
+ * Errfile - file name to write errors to
+ * @see org.w3c.tidy.Configuration#errfile
+ */
+
+ public void setErrfile(String errfile) {
+ configuration.errfile = errfile;
+ }
+
+ public String getErrfile() {
+ return configuration.errfile;
+ }
+
+ /**
+ * Writeback - if true then output tidied markup
+ * NOTE: this property is ignored when parsing from an InputStream.
+ * @see org.w3c.tidy.Configuration#writeback
+ */
+
+ public void setWriteback(boolean writeback) {
+ configuration.writeback = writeback;
+ }
+
+ public boolean getWriteback() {
+ return configuration.writeback;
+ }
+
+ /**
+ * OnlyErrors - if true normal output is suppressed
+ * @see org.w3c.tidy.Configuration#OnlyErrors
+ */
+
+ public void setOnlyErrors(boolean OnlyErrors) {
+ configuration.OnlyErrors = OnlyErrors;
+ }
+
+ public boolean getOnlyErrors() {
+ return configuration.OnlyErrors;
+ }
+
+ /**
+ * ShowWarnings - however errors are always shown
+ * @see org.w3c.tidy.Configuration#ShowWarnings
+ */
+
+ public void setShowWarnings(boolean ShowWarnings) {
+ configuration.ShowWarnings = ShowWarnings;
+ }
+
+ public boolean getShowWarnings() {
+ return configuration.ShowWarnings;
+ }
+
+ /**
+ * Quiet - no 'Parsing X', guessed DTD or summary
+ * @see org.w3c.tidy.Configuration#Quiet
+ */
+
+ public void setQuiet(boolean Quiet) {
+ configuration.Quiet = Quiet;
+ }
+
+ public boolean getQuiet() {
+ return configuration.Quiet;
+ }
+
+ /**
+ * IndentContent - indent content of appropriate tags
+ * @see org.w3c.tidy.Configuration#IndentContent
+ */
+
+ public void setIndentContent(boolean IndentContent) {
+ configuration.IndentContent = IndentContent;
+ }
+
+ public boolean getIndentContent() {
+ return configuration.IndentContent;
+ }
+
+ /**
+ * SmartIndent - does text/block level content effect indentation
+ * @see org.w3c.tidy.Configuration#SmartIndent
+ */
+
+ public void setSmartIndent(boolean SmartIndent) {
+ configuration.SmartIndent = SmartIndent;
+ }
+
+ public boolean getSmartIndent() {
+ return configuration.SmartIndent;
+ }
+
+ /**
+ * HideEndTags - suppress optional end tags
+ * @see org.w3c.tidy.Configuration#HideEndTags
+ */
+
+ public void setHideEndTags(boolean HideEndTags) {
+ configuration.HideEndTags = HideEndTags;
+ }
+
+ public boolean getHideEndTags() {
+ return configuration.HideEndTags;
+ }
+
+ /**
+ * XmlTags - treat input as XML
+ * @see org.w3c.tidy.Configuration#XmlTags
+ */
+
+ public void setXmlTags(boolean XmlTags) {
+ configuration.XmlTags = XmlTags;
+ }
+
+ public boolean getXmlTags() {
+ return configuration.XmlTags;
+ }
+
+ /**
+ * XmlOut - create output as XML
+ * @see org.w3c.tidy.Configuration#XmlOut
+ */
+
+ public void setXmlOut(boolean XmlOut) {
+ configuration.XmlOut = XmlOut;
+ }
+
+ public boolean getXmlOut() {
+ return configuration.XmlOut;
+ }
+
+ /**
+ * XHTML - output extensible HTML
+ * @see org.w3c.tidy.Configuration#xHTML
+ */
+
+ public void setXHTML(boolean xHTML) {
+ configuration.xHTML = xHTML;
+ }
+
+ public boolean getXHTML() {
+ return configuration.xHTML;
+ }
+
+ /**
+ * RawOut - avoid mapping values > 127 to entities
+ * @see org.w3c.tidy.Configuration#RawOut
+ */
+
+ public void setRawOut(boolean RawOut) {
+ configuration.RawOut = RawOut;
+ }
+
+ public boolean getRawOut() {
+ return configuration.RawOut;
+ }
+
+ /**
+ * UpperCaseTags - output tags in upper not lower case
+ * @see org.w3c.tidy.Configuration#UpperCaseTags
+ */
+
+ public void setUpperCaseTags(boolean UpperCaseTags) {
+ configuration.UpperCaseTags = UpperCaseTags;
+ }
+
+ public boolean getUpperCaseTags() {
+ return configuration.UpperCaseTags;
+ }
+
+ /**
+ * UpperCaseAttrs - output attributes in upper not lower case
+ * @see org.w3c.tidy.Configuration#UpperCaseAttrs
+ */
+
+ public void setUpperCaseAttrs(boolean UpperCaseAttrs) {
+ configuration.UpperCaseAttrs = UpperCaseAttrs;
+ }
+
+ public boolean getUpperCaseAttrs() {
+ return configuration.UpperCaseAttrs;
+ }
+
+ /**
+ * MakeClean - remove presentational clutter
+ * @see org.w3c.tidy.Configuration#MakeClean
+ */
+
+ public void setMakeClean(boolean MakeClean) {
+ configuration.MakeClean = MakeClean;
+ }
+
+ public boolean getMakeClean() {
+ return configuration.MakeClean;
+ }
+
+ /**
+ * BreakBeforeBR - o/p newline before <br> or not?
+ * @see org.w3c.tidy.Configuration#BreakBeforeBR
+ */
+
+ public void setBreakBeforeBR(boolean BreakBeforeBR) {
+ configuration.BreakBeforeBR = BreakBeforeBR;
+ }
+
+ public boolean getBreakBeforeBR() {
+ return configuration.BreakBeforeBR;
+ }
+
+ /**
+ * BurstSlides - create slides on each h2 element
+ * @see org.w3c.tidy.Configuration#BurstSlides
+ */
+
+ public void setBurstSlides(boolean BurstSlides) {
+ configuration.BurstSlides = BurstSlides;
+ }
+
+ public boolean getBurstSlides() {
+ return configuration.BurstSlides;
+ }
+
+ /**
+ * NumEntities - use numeric entities
+ * @see org.w3c.tidy.Configuration#NumEntities
+ */
+
+ public void setNumEntities(boolean NumEntities) {
+ configuration.NumEntities = NumEntities;
+ }
+
+ public boolean getNumEntities() {
+ return configuration.NumEntities;
+ }
+
+ /**
+ * QuoteMarks - output " marks as "
+ * @see org.w3c.tidy.Configuration#QuoteMarks
+ */
+
+ public void setQuoteMarks(boolean QuoteMarks) {
+ configuration.QuoteMarks = QuoteMarks;
+ }
+
+ public boolean getQuoteMarks() {
+ return configuration.QuoteMarks;
+ }
+
+ /**
+ * QuoteNbsp - output non-breaking space as entity
+ * @see org.w3c.tidy.Configuration#QuoteNbsp
+ */
+
+ public void setQuoteNbsp(boolean QuoteNbsp) {
+ configuration.QuoteNbsp = QuoteNbsp;
+ }
+
+ public boolean getQuoteNbsp() {
+ return configuration.QuoteNbsp;
+ }
+
+ /**
+ * QuoteAmpersand - output naked ampersand as &
+ * @see org.w3c.tidy.Configuration#QuoteAmpersand
+ */
+
+ public void setQuoteAmpersand(boolean QuoteAmpersand) {
+ configuration.QuoteAmpersand = QuoteAmpersand;
+ }
+
+ public boolean getQuoteAmpersand() {
+ return configuration.QuoteAmpersand;
+ }
+
+ /**
+ * WrapAttVals - wrap within attribute values
+ * @see org.w3c.tidy.Configuration#WrapAttVals
+ */
+
+ public void setWrapAttVals(boolean WrapAttVals) {
+ configuration.WrapAttVals = WrapAttVals;
+ }
+
+ public boolean getWrapAttVals() {
+ return configuration.WrapAttVals;
+ }
+
+ /**
+ * WrapScriptlets - wrap within JavaScript string literals
+ * @see org.w3c.tidy.Configuration#WrapScriptlets
+ */
+
+ public void setWrapScriptlets(boolean WrapScriptlets) {
+ configuration.WrapScriptlets = WrapScriptlets;
+ }
+
+ public boolean getWrapScriptlets() {
+ return configuration.WrapScriptlets;
+ }
+
+ /**
+ * WrapSection - wrap within <![ ... ]> section tags
+ * @see org.w3c.tidy.Configuration#WrapSection
+ */
+
+ public void setWrapSection(boolean WrapSection) {
+ configuration.WrapSection = WrapSection;
+ }
+
+ public boolean getWrapSection() {
+ return configuration.WrapSection;
+ }
+
+ /**
+ * AltText - default text for alt attribute
+ * @see org.w3c.tidy.Configuration#altText
+ */
+
+ public void setAltText(String altText) {
+ configuration.altText = altText;
+ }
+
+ public String getAltText() {
+ return configuration.altText;
+ }
+
+ /**
+ * Slidestyle - style sheet for slides
+ * @see org.w3c.tidy.Configuration#slidestyle
+ */
+
+ public void setSlidestyle(String slidestyle) {
+ configuration.slidestyle = slidestyle;
+ }
+
+ public String getSlidestyle() {
+ return configuration.slidestyle;
+ }
+
+ /**
+ * XmlPi - add <?xml?> for XML docs
+ * @see org.w3c.tidy.Configuration#XmlPi
+ */
+
+ public void setXmlPi(boolean XmlPi) {
+ configuration.XmlPi = XmlPi;
+ }
+
+ public boolean getXmlPi() {
+ return configuration.XmlPi;
+ }
+
+ /**
+ * DropFontTags - discard presentation tags
+ * @see org.w3c.tidy.Configuration#DropFontTags
+ */
+
+ public void setDropFontTags(boolean DropFontTags) {
+ configuration.DropFontTags = DropFontTags;
+ }
+
+ public boolean getDropFontTags() {
+ return configuration.DropFontTags;
+ }
+
+ /**
+ * DropEmptyParas - discard empty p elements
+ * @see org.w3c.tidy.Configuration#DropEmptyParas
+ */
+
+ public void setDropEmptyParas(boolean DropEmptyParas) {
+ configuration.DropEmptyParas = DropEmptyParas;
+ }
+
+ public boolean getDropEmptyParas() {
+ return configuration.DropEmptyParas;
+ }
+
+ /**
+ * FixComments - fix comments with adjacent hyphens
+ * @see org.w3c.tidy.Configuration#FixComments
+ */
+
+ public void setFixComments(boolean FixComments) {
+ configuration.FixComments = FixComments;
+ }
+
+ public boolean getFixComments() {
+ return configuration.FixComments;
+ }
+
+ /**
+ * WrapAsp - wrap within ASP pseudo elements
+ * @see org.w3c.tidy.Configuration#WrapAsp
+ */
+
+ public void setWrapAsp(boolean WrapAsp) {
+ configuration.WrapAsp = WrapAsp;
+ }
+
+ public boolean getWrapAsp() {
+ return configuration.WrapAsp;
+ }
+
+ /**
+ * WrapJste - wrap within JSTE pseudo elements
+ * @see org.w3c.tidy.Configuration#WrapJste
+ */
+
+ public void setWrapJste(boolean WrapJste) {
+ configuration.WrapJste = WrapJste;
+ }
+
+ public boolean getWrapJste() {
+ return configuration.WrapJste;
+ }
+
+ /**
+ * WrapPhp - wrap within PHP pseudo elements
+ * @see org.w3c.tidy.Configuration#WrapPhp
+ */
+
+ public void setWrapPhp(boolean WrapPhp) {
+ configuration.WrapPhp = WrapPhp;
+ }
+
+ public boolean getWrapPhp() {
+ return configuration.WrapPhp;
+ }
+
+ /**
+ * FixBackslash - fix URLs by replacing \ with /
+ * @see org.w3c.tidy.Configuration#FixBackslash
+ */
+
+ public void setFixBackslash(boolean FixBackslash) {
+ configuration.FixBackslash = FixBackslash;
+ }
+
+ public boolean getFixBackslash() {
+ return configuration.FixBackslash;
+ }
+
+ /**
+ * IndentAttributes - newline+indent before each attribute
+ * @see org.w3c.tidy.Configuration#IndentAttributes
+ */
+
+ public void setIndentAttributes(boolean IndentAttributes) {
+ configuration.IndentAttributes = IndentAttributes;
+ }
+
+ public boolean getIndentAttributes() {
+ return configuration.IndentAttributes;
+ }
+
+ /**
+ * DocType - user specified doctype
+ * omit | auto | strict | loose | fpi
+ * where the fpi is a string similar to
+ * "-//ACME//DTD HTML 3.14159//EN"
+ * Note: for fpi include the double-quotes in the string.
+ * @see org.w3c.tidy.Configuration#docTypeStr
+ * @see org.w3c.tidy.Configuration#docTypeMode
+ */
+
+ public void setDocType(String doctype) {
+ if (doctype != null)
+ configuration.docTypeStr = configuration.parseDocType(doctype, "doctype");
+ }
+
+ public String getDocType() {
+ String result = null;
+ switch (configuration.docTypeMode) {
+ case Configuration.DOCTYPE_OMIT :
+ result = "omit";
+ break;
+ case Configuration.DOCTYPE_AUTO :
+ result = "auto";
+ break;
+ case Configuration.DOCTYPE_STRICT :
+ result = "strict";
+ break;
+ case Configuration.DOCTYPE_LOOSE :
+ result = "loose";
+ break;
+ case Configuration.DOCTYPE_USER :
+ result = configuration.docTypeStr;
+ break;
+ }
+ return result;
+ }
+
+ /**
+ * LogicalEmphasis - replace i by em and b by strong
+ * @see org.w3c.tidy.Configuration#LogicalEmphasis
+ */
+
+ public void setLogicalEmphasis(boolean LogicalEmphasis) {
+ configuration.LogicalEmphasis = LogicalEmphasis;
+ }
+
+ public boolean getLogicalEmphasis() {
+ return configuration.LogicalEmphasis;
+ }
+
+ /**
+ * XmlPIs - if set to true PIs must end with ?>
+ * @see org.w3c.tidy.Configuration#XmlPIs
+ */
+
+ public void setXmlPIs(boolean XmlPIs) {
+ configuration.XmlPIs = XmlPIs;
+ }
+
+ public boolean getXmlPIs() {
+ return configuration.XmlPIs;
+ }
+
+ /**
+ * EncloseText - if true text at body is wrapped in <p>'s
+ * @see org.w3c.tidy.Configuration#EncloseBodyText
+ */
+
+ public void setEncloseText(boolean EncloseText) {
+ configuration.EncloseBodyText = EncloseText;
+ }
+
+ public boolean getEncloseText() {
+ return configuration.EncloseBodyText;
+ }
+
+ /**
+ * EncloseBlockText - if true text in blocks is wrapped in <p>'s
+ * @see org.w3c.tidy.Configuration#EncloseBlockText
+ */
+
+ public void setEncloseBlockText(boolean EncloseBlockText) {
+ configuration.EncloseBlockText = EncloseBlockText;
+ }
+
+ public boolean getEncloseBlockText() {
+ return configuration.EncloseBlockText;
+ }
+
+ /**
+ * KeepFileTimes - if true last modified time is preserved
+ * this is NOT supported at this time.
+ * @see org.w3c.tidy.Configuration#KeepFileTimes
+ */
+
+ public void setKeepFileTimes(boolean KeepFileTimes) {
+ configuration.KeepFileTimes = KeepFileTimes;
+ }
+
+ public boolean getKeepFileTimes() {
+ return configuration.KeepFileTimes;
+ }
+
+ /**
+ * Word2000 - draconian cleaning for Word2000
+ * @see org.w3c.tidy.Configuration#Word2000
+ */
+
+ public void setWord2000(boolean Word2000) {
+ configuration.Word2000 = Word2000;
+ }
+
+ public boolean getWord2000() {
+ return configuration.Word2000;
+ }
+
+ /**
+ * TidyMark - add meta element indicating tidied doc
+ * @see org.w3c.tidy.Configuration#TidyMark
+ */
+
+ public void setTidyMark(boolean TidyMark) {
+ configuration.TidyMark = TidyMark;
+ }
+
+ public boolean getTidyMark() {
+ return configuration.TidyMark;
+ }
+
+ /**
+ * XmlSpace - if set to yes adds xml:space attr as needed
+ * @see org.w3c.tidy.Configuration#XmlSpace
+ */
+
+ public void setXmlSpace(boolean XmlSpace) {
+ configuration.XmlSpace = XmlSpace;
+ }
+
+ public boolean getXmlSpace() {
+ return configuration.XmlSpace;
+ }
+
+ /**
+ * Emacs - if true format error output for GNU Emacs
+ * @see org.w3c.tidy.Configuration#Emacs
+ */
+
+ public void setEmacs(boolean Emacs) {
+ configuration.Emacs = Emacs;
+ }
+
+ public boolean getEmacs() {
+ return configuration.Emacs;
+ }
+
+ /**
+ * LiteralAttribs - if true attributes may use newlines
+ * @see org.w3c.tidy.Configuration#LiteralAttribs
+ */
+
+ public void setLiteralAttribs(boolean LiteralAttribs) {
+ configuration.LiteralAttribs = LiteralAttribs;
+ }
+
+ public boolean getLiteralAttribs() {
+ return configuration.LiteralAttribs;
+ }
+
+ /**
+ * InputStreamName - the name of the input stream (printed in the
+ * header information).
+ */
+ public void setInputStreamName(String name) {
+ if (name != null)
+ inputStreamName = name;
+ }
+
+ public String getInputStreamName() {
+ return inputStreamName;
+ }
+
+ /**
+ * Sets the configuration from a configuration file.
+ */
+
+ public void setConfigurationFromFile(String filename) {
+ configuration.parseFile(filename);
+ }
+
+ /**
+ * Sets the configuration from a properties object.
+ */
+
+ public void setConfigurationFromProps(Properties props) {
+ configuration.addProps(props);
+ }
+
+ /**
+ * first time initialization which should
+ * precede reading the command line
+ */
+
+ private void init() {
+ configuration = new Configuration();
+ if (configuration == null)
+ return;
+
+ AttributeTable at = AttributeTable.getDefaultAttributeTable();
+ if (at == null)
+ return;
+ TagTable tt = new TagTable();
+ if (tt == null)
+ return;
+ tt.setConfiguration(configuration);
+ configuration.tt = tt;
+ EntityTable et = EntityTable.getDefaultEntityTable();
+ if (et == null)
+ return;
+
+ /* Unnecessary - same initial values in Configuration
+ Configuration.XmlTags = false;
+ Configuration.XmlOut = false;
+ Configuration.HideEndTags = false;
+ Configuration.UpperCaseTags = false;
+ Configuration.MakeClean = false;
+ Configuration.writeback = false;
+ Configuration.OnlyErrors = false;
+ */
+
+ configuration.errfile = null;
+ stderr = new PrintWriter(System.err, true);
+ errout = stderr;
+ initialized = true;
+ }
+
+ /**
+ * Parses InputStream in and returns the root Node.
+ * If out is non-null, pretty prints to OutputStream out.
+ */
+
+ public Node parse(IFile iFile, InputStream in, OutputStream out) {
+ Node document = null;
+
+ try {
+ iFile.deleteMarkers(IMarker.PROBLEM, false, 0);
+ document = parse(iFile, in, null, out);
+ } catch (CoreException e) {
+ } catch (FileNotFoundException fnfe) {
+ } catch (IOException e) {
+ }
+
+ return document;
+ }
+
+ /**
+ * Internal routine that actually does the parsing. The caller
+ * can pass either an InputStream or file name. If both are passed,
+ * the file name is preferred.
+ */
+
+ private Node parse(IFile iFile, InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException {
+ Lexer lexer;
+ Node document = null;
+ Node doctype;
+ Out o = new OutImpl(); /* normal output stream */
+ PPrint pprint;
+
+ if (!initialized)
+ return null;
+
+ if (errout == null)
+ return null;
+
+ parseErrors = 0;
+ parseWarnings = 0;
+
+ /* ensure config is self-consistent */
+ configuration.adjust();
+
+ if (file != null) {
+ in = new FileInputStream(file);
+ inputStreamName = file;
+ } else if (in == null) {
+ in = System.in;
+ inputStreamName = "stdin";
+ }
+
+ if (in != null) {
+ lexer = new Lexer(iFile,new StreamInImpl(in, configuration.CharEncoding, configuration.tabsize), configuration);
+ lexer.errout = errout;
+
+ /*
+ store pointer to lexer in input stream
+ to allow character encoding errors to be
+ reported
+ */
+ lexer.in.lexer = lexer;
+
+ /* Tidy doesn't alter the doctype for generic XML docs */
+ if (configuration.XmlTags)
+ document = ParserImpl.parseXMLDocument(lexer);
+ else {
+ lexer.warnings = 0;
+ if (!configuration.Quiet)
+ Report.helloMessage(errout, Report.RELEASE_DATE, inputStreamName);
+
+ document = ParserImpl.parseDocument(lexer);
+
+ if (!document.checkNodeIntegrity()) {
+ Report.badTree(errout);
+ return null;
+ }
+
+ Clean cleaner = new Clean(configuration.tt);
+
+ /* simplifies ... ... etc. */
+ cleaner.nestedEmphasis(document);
+
+ /* cleans up indented text etc. */
+ cleaner.list2BQ(document);
+ cleaner.bQ2Div(document);
+
+ /* replaces i by em and b by strong */
+ if (configuration.LogicalEmphasis)
+ cleaner.emFromI(document);
+
+ if (configuration.Word2000 && cleaner.isWord2000(document, configuration.tt)) {
+ /* prune Word2000's ... */
+ cleaner.dropSections(lexer, document);
+
+ /* drop style & class attributes and empty p, span elements */
+ cleaner.cleanWord2000(lexer, document);
+ }
+
+ /* replaces presentational markup by style rules */
+ if (configuration.MakeClean || configuration.DropFontTags)
+ cleaner.cleanTree(lexer, document);
+
+ if (!document.checkNodeIntegrity()) {
+ Report.badTree(errout);
+ return null;
+ }
+ doctype = document.findDocType();
+ if (document.content != null) {
+ if (configuration.xHTML)
+ lexer.setXHTMLDocType(document);
+ else
+ lexer.fixDocType(document);
+
+ if (configuration.TidyMark)
+ lexer.addGenerator(document);
+ }
+
+ /* ensure presence of initial */
+ if (configuration.XmlOut && configuration.XmlPi)
+ lexer.fixXMLPI(document);
+
+ if (!configuration.Quiet && document.content != null) {
+ Report.reportVersion(errout, lexer, inputStreamName, doctype);
+ Report.reportNumWarnings(errout, lexer);
+ }
+ }
+
+ parseWarnings = lexer.warnings;
+ parseErrors = lexer.errors;
+
+ // Try to close the InputStream but only if if we created it.
+
+ if ((file != null) && (in != System.in)) {
+ try {
+ in.close();
+ } catch (IOException e) {
+ }
+ }
+
+ if (lexer.errors > 0)
+ Report.needsAuthorIntervention(errout);
+
+ o.state = StreamIn.FSM_ASCII;
+ o.encoding = configuration.CharEncoding;
+
+ if (!configuration.OnlyErrors && lexer.errors == 0) {
+ if (configuration.BurstSlides) {
+ Node body;
+
+ body = null;
+ /*
+ remove doctype to avoid potential clash with
+ markup introduced when bursting into slides
+ */
+ /* discard the document type */
+ doctype = document.findDocType();
+
+ if (doctype != null)
+ Node.discardElement(doctype);
+
+ /* slides use transitional features */
+ lexer.versions |= Dict.VERS_HTML40_LOOSE;
+
+ /* and patch up doctype to match */
+ if (configuration.xHTML)
+ lexer.setXHTMLDocType(document);
+ else
+ lexer.fixDocType(document);
+
+ /* find the body element which may be implicit */
+ body = document.findBody(configuration.tt);
+
+ if (body != null) {
+ pprint = new PPrint(configuration);
+ Report.reportNumberOfSlides(errout, pprint.countSlides(body));
+ pprint.createSlides(lexer, document);
+ } else
+ Report.missingBody(errout);
+ } else if (configuration.writeback && (file != null)) {
+ try {
+ pprint = new PPrint(configuration);
+ o.out = new FileOutputStream(file);
+
+ if (configuration.XmlTags)
+ pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+ else
+ pprint.printTree(o, (short) 0, 0, lexer, document);
+
+ pprint.flushLine(o, 0);
+ o.out.close();
+ } catch (IOException e) {
+ errout.println(file + e.toString());
+ }
+ } else if (out != null) {
+ pprint = new PPrint(configuration);
+ o.out = out;
+
+ if (configuration.XmlTags)
+ pprint.printXMLTree(o, (short) 0, 0, lexer, document);
+ else
+ pprint.printTree(o, (short) 0, 0, lexer, document);
+
+ pprint.flushLine(o, 0);
+ }
+
+ }
+
+ Report.errorSummary(lexer);
+ }
+ return document;
+ }
+
+ /**
+ * Parses InputStream in and returns a DOM Document node.
+ * If out is non-null, pretty prints to OutputStream out.
+ */
+
+ public org.w3c.dom.Document parseDOM(IFile file, InputStream in, OutputStream out) {
+ Node document = parse(file, in, out);
+ if (document != null)
+ return (org.w3c.dom.Document) document.getAdapter();
+ else
+ return null;
+ }
+
+ /**
+ * Creates an empty DOM Document.
+ */
+
+ public static org.w3c.dom.Document createEmptyDocument() {
+ Node document = new Node(Node.RootNode, new byte[0], 0, 0);
+ Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html", new TagTable());
+ if (document != null && node != null) {
+ Node.insertNodeAtStart(document, node);
+ return (org.w3c.dom.Document) document.getAdapter();
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Pretty-prints a DOM Document.
+ */
+
+ public void pprint(org.w3c.dom.Document doc, OutputStream out) {
+ Out o = new OutImpl();
+ PPrint pprint;
+ Node document;
+
+ if (!(doc instanceof DOMDocumentImpl)) {
+ return;
+ }
+ document = ((DOMDocumentImpl) doc).adaptee;
+
+ o.state = StreamIn.FSM_ASCII;
+ o.encoding = configuration.CharEncoding;
+
+ if (out != null) {
+ pprint = new PPrint(configuration);
+ o.out = out;
+
+ if (configuration.XmlTags)
+ pprint.printXMLTree(o, (short) 0, 0, null, document);
+ else
+ pprint.printTree(o, (short) 0, 0, null, document);
+
+ pprint.flushLine(o, 0);
+ }
+ }
+
+ /**
+ * Command line interface to parser and pretty printer.
+ */
+
+ public static void main(String[] argv) {
+ int totalerrors = 0;
+ int totalwarnings = 0;
+ String file;
+ InputStream in;
+ String prog = "Tidy";
+ Node document;
+ Node doctype;
+ Lexer lexer;
+ String s;
+ Out out = new OutImpl(); /* normal output stream */
+ PPrint pprint;
+ int argc = argv.length + 1;
+ int argIndex = 0;
+ Tidy tidy;
+ Configuration configuration;
+ String arg;
+ String current_errorfile = "stderr";
+
+ tidy = new Tidy();
+ configuration = tidy.getConfiguration();
+
+ /* read command line */
+
+ while (argc > 0) {
+ if (argc > 1 && argv[argIndex].startsWith("-")) {
+ /* support -foo and --foo */
+ arg = argv[argIndex].substring(1);
+
+ if (arg.length() > 0 && arg.charAt(0) == '-')
+ arg = arg.substring(1);
+
+ if (arg.equals("xml"))
+ configuration.XmlTags = true;
+ else if (arg.equals("asxml") || arg.equals("asxhtml"))
+ configuration.xHTML = true;
+ else if (arg.equals("indent")) {
+ configuration.IndentContent = true;
+ configuration.SmartIndent = true;
+ } else if (arg.equals("omit"))
+ configuration.HideEndTags = true;
+ else if (arg.equals("upper"))
+ configuration.UpperCaseTags = true;
+ else if (arg.equals("clean"))
+ configuration.MakeClean = true;
+ else if (arg.equals("raw"))
+ configuration.CharEncoding = Configuration.RAW;
+ else if (arg.equals("ascii"))
+ configuration.CharEncoding = Configuration.ASCII;
+ else if (arg.equals("latin1"))
+ configuration.CharEncoding = Configuration.LATIN1;
+ else if (arg.equals("utf8"))
+ configuration.CharEncoding = Configuration.UTF8;
+ else if (arg.equals("iso2022"))
+ configuration.CharEncoding = Configuration.ISO2022;
+ else if (arg.equals("mac"))
+ configuration.CharEncoding = Configuration.MACROMAN;
+ else if (arg.equals("numeric"))
+ configuration.NumEntities = true;
+ else if (arg.equals("modify"))
+ configuration.writeback = true;
+ else if (arg.equals("change")) /* obsolete */
+ configuration.writeback = true;
+ else if (arg.equals("update")) /* obsolete */
+ configuration.writeback = true;
+ else if (arg.equals("errors"))
+ configuration.OnlyErrors = true;
+ else if (arg.equals("quiet"))
+ configuration.Quiet = true;
+ else if (arg.equals("slides"))
+ configuration.BurstSlides = true;
+ else if (arg.equals("help") || argv[argIndex].charAt(1) == '?' || argv[argIndex].charAt(1) == 'h') {
+ Report.helpText(new PrintWriter(System.out, true), prog);
+ System.exit(1);
+ } else if (arg.equals("config")) {
+ if (argc >= 3) {
+ configuration.parseFile(argv[argIndex + 1]);
+ --argc;
+ ++argIndex;
+ }
+ } else if (argv[argIndex].equals("-file") || argv[argIndex].equals("--file") || argv[argIndex].equals("-f")) {
+ if (argc >= 3) {
+ configuration.errfile = argv[argIndex + 1];
+ --argc;
+ ++argIndex;
+ }
+ } else if (argv[argIndex].equals("-wrap") || argv[argIndex].equals("--wrap") || argv[argIndex].equals("-w")) {
+ if (argc >= 3) {
+ configuration.wraplen = Integer.parseInt(argv[argIndex + 1]);
+ --argc;
+ ++argIndex;
+ }
+ } else if (argv[argIndex].equals("-version") || argv[argIndex].equals("--version") || argv[argIndex].equals("-v")) {
+ Report.showVersion(tidy.getErrout());
+ System.exit(0);
+ } else {
+ s = argv[argIndex];
+
+ for (int i = 1; i < s.length(); i++) {
+ if (s.charAt(i) == 'i') {
+ configuration.IndentContent = true;
+ configuration.SmartIndent = true;
+ } else if (s.charAt(i) == 'o')
+ configuration.HideEndTags = true;
+ else if (s.charAt(i) == 'u')
+ configuration.UpperCaseTags = true;
+ else if (s.charAt(i) == 'c')
+ configuration.MakeClean = true;
+ else if (s.charAt(i) == 'n')
+ configuration.NumEntities = true;
+ else if (s.charAt(i) == 'm')
+ configuration.writeback = true;
+ else if (s.charAt(i) == 'e')
+ configuration.OnlyErrors = true;
+ else if (s.charAt(i) == 'q')
+ configuration.Quiet = true;
+ else
+ Report.unknownOption(tidy.getErrout(), s.charAt(i));
+ }
+ }
+
+ --argc;
+ ++argIndex;
+ continue;
+ }
+
+ /* ensure config is self-consistent */
+ configuration.adjust();
+
+ /* user specified error file */
+ if (configuration.errfile != null) {
+ /* is it same as the currently opened file? */
+ if (!configuration.errfile.equals(current_errorfile)) {
+ /* no so close previous error file */
+
+ if (tidy.getErrout() != tidy.getStderr())
+ tidy.getErrout().close();
+
+ /* and try to open the new error file */
+ try {
+ tidy.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true));
+ current_errorfile = configuration.errfile;
+ } catch (IOException e) {
+ /* can't be opened so fall back to stderr */
+ current_errorfile = "stderr";
+ tidy.setErrout(tidy.getStderr());
+ }
+ }
+ }
+
+ if (argc > 1) {
+ file = argv[argIndex];
+ } else {
+ file = "stdin";
+ }
+
+ try {
+ document = tidy.parse(null, null, file, System.out);
+ totalwarnings += tidy.parseWarnings;
+ totalerrors += tidy.parseErrors;
+ } catch (FileNotFoundException fnfe) {
+ Report.unknownFile(tidy.getErrout(), prog, file);
+ } catch (IOException ioe) {
+ Report.unknownFile(tidy.getErrout(), prog, file);
+ }
+
+ --argc;
+ ++argIndex;
+
+ if (argc <= 1)
+ break;
+ }
+
+ if (totalerrors + totalwarnings > 0)
+ Report.generalInfo(tidy.getErrout());
+
+ if (tidy.getErrout() != tidy.getStderr())
+ tidy.getErrout().close();
+
+ /* return status can be used by scripts */
+
+ if (totalerrors > 0)
+ System.exit(2);
+
+ if (totalwarnings > 0)
+ System.exit(1);
+
+ /* 0 signifies all is ok */
+ System.exit(0);
+ }
+}