2 * @(#)Configuration.java 1.11 2000/08/16
6 package net.sourceforge.phpdt.tidy.w3c;
10 * Read configuration file and manage configuration properties.
12 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13 * See Tidy.java for the copyright notice.
14 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15 * HTML Tidy Release 4 Aug 2000</a>
17 * @author Dave Raggett <dsr@w3.org>
18 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19 * @version 1.0, 1999/05/22
20 * @version 1.0.1, 1999/05/29
21 * @version 1.1, 1999/06/18 Java Bean
22 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24 * @version 1.4, 1999/09/04 DOM support
25 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
35 Configuration files associate a property name with a value.
36 The format is that of a Java .properties file.
39 import java.io.FileInputStream;
40 import java.io.IOException;
41 import java.util.Enumeration;
42 import java.util.Properties;
43 import java.util.StringTokenizer;
45 public class Configuration implements java.io.Serializable {
47 /* character encodings */
48 public static final int RAW = 0;
49 public static final int ASCII = 1;
50 public static final int LATIN1 = 2;
51 public static final int UTF8 = 3;
52 public static final int ISO2022 = 4;
53 public static final int MACROMAN = 5;
55 /* mode controlling treatment of doctype */
56 public static final int DOCTYPE_OMIT = 0;
57 public static final int DOCTYPE_AUTO = 1;
58 public static final int DOCTYPE_STRICT= 2;
59 public static final int DOCTYPE_LOOSE = 3;
60 public static final int DOCTYPE_USER = 4;
62 protected int spaces = 2; /* default indentation */
63 protected int wraplen = 68; /* default wrap margin */
64 protected int CharEncoding = ASCII;
65 protected int tabsize = 4;
67 protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
68 public String altText = null; /* default text for alt attribute */
69 protected String slidestyle = null; /* style sheet for slides */
70 protected String docTypeStr = null; /* user specified doctype */
71 protected String errfile = null; /* file name to write errors to */
72 protected boolean writeback = false; /* if true then output tidied markup */
74 public boolean OnlyErrors = false; /* if true normal output is suppressed */
75 public boolean ShowWarnings = true; /* however errors are always shown */
76 public boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
77 public boolean IndentContent = false; /* indent content of appropriate tags */
78 public boolean SmartIndent = false; /* does text/block level content effect indentation */
79 public boolean HideEndTags = false; /* suppress optional end tags */
80 public boolean XmlTags = false; /* treat input as XML */
81 public boolean XmlOut = false; /* create output as XML */
82 public boolean xHTML = false; /* output extensible HTML */
83 public boolean XmlPi = false; /* add <?xml?> for XML docs */
84 public boolean RawOut = false; /* avoid mapping values > 127 to entities */
85 public boolean UpperCaseTags = false; /* output tags in upper not lower case */
86 public boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
87 public boolean MakeClean = false; /* remove presentational clutter */
88 public boolean LogicalEmphasis = false; /* replace i by em and b by strong */
89 public boolean DropFontTags = false; /* discard presentation tags */
90 public boolean DropEmptyParas = true; /* discard empty p elements */
91 public boolean FixComments = true; /* fix comments with adjacent hyphens */
92 public boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
93 public boolean BurstSlides = false; /* create slides on each h2 element */
94 public boolean NumEntities = false; /* use numeric entities */
95 public boolean QuoteMarks = false; /* output " marks as " */
96 public boolean QuoteNbsp = true; /* output non-breaking space as entity */
97 public boolean QuoteAmpersand = true; /* output naked ampersand as & */
98 public boolean WrapAttVals = false; /* wrap within attribute values */
99 public boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
100 public boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
101 public boolean WrapAsp = true; /* wrap within ASP pseudo elements */
102 public boolean WrapJste = true; /* wrap within JSTE pseudo elements */
103 public boolean WrapPhp = true; /* wrap within PHP pseudo elements */
104 public boolean FixBackslash = true; /* fix URLs by replacing \ with / */
105 public boolean IndentAttributes = false; /* newline+indent before each attribute */
106 public boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
107 public boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
108 public boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
109 public boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
110 public boolean KeepFileTimes = true; /* if yes last modied time is preserved */
111 public boolean Word2000 = false; /* draconian cleaning for Word2000 */
112 public boolean TidyMark = true; /* add meta element indicating tidied doc */
113 public boolean Emacs = false; /* if true format error output for GNU Emacs */
114 public boolean LiteralAttribs = false; /* if true attributes may use newlines */
116 // gschadow patch start
117 /** Remove all scripting XML tags (ASP, JSP, PHP,...) */
118 public boolean DropPseudoXMLCrap = false;
119 // gschadow patch end
121 protected TagTable tt; /* TagTable associated with this Configuration */
123 private transient Properties _properties = new Properties();
125 public Configuration()
129 public void addProps( Properties p )
131 Enumeration enum = p.propertyNames();
132 while (enum.hasMoreElements())
134 String key = (String) enum.nextElement();
135 String value = p.getProperty(key);
136 _properties.put(key, value);
141 public void parseFile( String filename )
145 _properties.load( new FileInputStream( filename ) );
147 catch (IOException e)
149 System.err.println(filename + e.toString());
155 private void parseProps()
159 value = _properties.getProperty("indent-spaces");
161 spaces = parseInt(value, "indent-spaces");
163 value = _properties.getProperty("wrap");
165 wraplen = parseInt(value, "wrap");
167 value = _properties.getProperty("wrap-attributes");
169 WrapAttVals = parseBool(value, "wrap-attributes");
171 value = _properties.getProperty("wrap-script-literals");
173 WrapScriptlets = parseBool(value, "wrap-script-literals");
175 value = _properties.getProperty("wrap-sections");
177 WrapSection = parseBool(value, "wrap-sections");
179 value = _properties.getProperty("wrap-asp");
181 WrapAsp = parseBool(value, "wrap-asp");
183 value = _properties.getProperty("wrap-jste");
185 WrapJste = parseBool(value, "wrap-jste");
187 value = _properties.getProperty("wrap-php");
189 WrapPhp = parseBool(value, "wrap-php");
191 value = _properties.getProperty("literal-attributes");
193 LiteralAttribs = parseBool(value, "literal-attributes");
195 value = _properties.getProperty("tab-size");
197 tabsize = parseInt(value, "tab-size");
199 value = _properties.getProperty("markup");
201 OnlyErrors = parseInvBool(value, "markup");
203 value = _properties.getProperty("quiet");
205 Quiet = parseBool(value, "quiet");
207 value = _properties.getProperty("tidy-mark");
209 TidyMark = parseBool(value, "tidy-mark");
211 value = _properties.getProperty("indent");
213 IndentContent = parseIndent(value, "indent");
215 value = _properties.getProperty("indent-attributes");
217 IndentAttributes = parseBool(value, "ident-attributes");
219 value = _properties.getProperty("hide-endtags");
221 HideEndTags = parseBool(value, "hide-endtags");
223 value = _properties.getProperty("input-xml");
225 XmlTags = parseBool(value, "input-xml");
227 value = _properties.getProperty("output-xml");
229 XmlOut = parseBool(value, "output-xml");
231 value = _properties.getProperty("output-xhtml");
233 xHTML = parseBool(value, "output-xhtml");
235 value = _properties.getProperty("add-xml-pi");
237 XmlPi = parseBool(value, "add-xml-pi");
239 value = _properties.getProperty("add-xml-decl");
241 XmlPi = parseBool(value, "add-xml-decl");
243 value = _properties.getProperty("assume-xml-procins");
245 XmlPIs = parseBool(value, "assume-xml-procins");
247 value = _properties.getProperty("raw");
249 RawOut = parseBool(value, "raw");
251 value = _properties.getProperty("uppercase-tags");
253 UpperCaseTags = parseBool(value, "uppercase-tags");
255 value = _properties.getProperty("uppercase-attributes");
257 UpperCaseAttrs = parseBool(value, "uppercase-attributes");
259 value = _properties.getProperty("clean");
261 MakeClean = parseBool(value, "clean");
263 value = _properties.getProperty("logical-emphasis");
265 LogicalEmphasis = parseBool(value, "logical-emphasis");
267 value = _properties.getProperty("word-2000");
269 Word2000 = parseBool(value, "word-2000");
271 value = _properties.getProperty("drop-empty-paras");
273 DropEmptyParas = parseBool(value, "drop-empty-paras");
275 value = _properties.getProperty("drop-font-tags");
277 DropFontTags = parseBool(value, "drop-font-tags");
279 //gschadow patch start
280 value = _properties.getProperty("drop-pseudo-xml-crap");
282 DropPseudoXMLCrap = parseBool(value, "drop-pseudo-xml-crap");
285 value = _properties.getProperty("enclose-text");
287 EncloseBodyText = parseBool(value, "enclose-text");
289 value = _properties.getProperty("enclose-block-text");
291 EncloseBlockText = parseBool(value, "enclose-block-text");
293 value = _properties.getProperty("alt-text");
297 value = _properties.getProperty("add-xml-space");
299 XmlSpace = parseBool(value, "add-xml-space");
301 value = _properties.getProperty("fix-bad-comments");
303 FixComments = parseBool(value, "fix-bad-comments");
305 value = _properties.getProperty("split");
307 BurstSlides = parseBool(value, "split");
309 value = _properties.getProperty("break-before-br");
311 BreakBeforeBR = parseBool(value, "break-before-br");
313 value = _properties.getProperty("numeric-entities");
315 NumEntities = parseBool(value, "numeric-entities");
317 value = _properties.getProperty("quote-marks");
319 QuoteMarks = parseBool(value, "quote-marks");
321 value = _properties.getProperty("quote-nbsp");
323 QuoteNbsp = parseBool(value, "quote-nbsp");
325 value = _properties.getProperty("quote-ampersand");
327 QuoteAmpersand = parseBool(value, "quote-ampersand");
329 value = _properties.getProperty("write-back");
331 writeback = parseBool(value, "write-back");
333 value = _properties.getProperty("keep-time");
335 KeepFileTimes = parseBool(value, "keep-time");
337 value = _properties.getProperty("show-warnings");
339 ShowWarnings = parseBool(value, "show-warnings");
341 value = _properties.getProperty("error-file");
343 errfile = parseName(value, "error-file");
345 value = _properties.getProperty("slide-style");
347 slidestyle = parseName(value, "slide-style");
349 value = _properties.getProperty("new-inline-tags");
351 parseInlineTagNames(value, "new-inline-tags");
353 value = _properties.getProperty("new-blocklevel-tags");
355 parseBlockTagNames(value, "new-blocklevel-tags");
357 value = _properties.getProperty("new-empty-tags");
359 parseEmptyTagNames(value, "new-empty-tags");
361 value = _properties.getProperty("new-pre-tags");
363 parsePreTagNames(value, "new-pre-tags");
365 value = _properties.getProperty("char-encoding");
367 CharEncoding = parseCharEncoding(value, "char-encoding");
369 value = _properties.getProperty("doctype");
371 docTypeStr = parseDocType(value, "doctype");
373 value = _properties.getProperty("fix-backslash");
375 FixBackslash = parseBool(value, "fix-backslash");
377 value = _properties.getProperty("gnu-emacs");
379 Emacs = parseBool(value, "gnu-emacs");
382 /* ensure that config is self consistent */
385 if (EncloseBlockText)
386 EncloseBodyText = true;
388 /* avoid the need to set IndentContent when SmartIndent is set */
391 IndentContent = true;
393 /* disable wrapping */
395 wraplen = 0x7FFFFFFF;
397 /* Word 2000 needs o:p to be declared as inline */
400 tt.defineInlineTag("o:p");
403 /* XHTML is written in lower case */
407 UpperCaseTags = false;
408 UpperCaseAttrs = false;
411 /* if XML in, then XML out */
418 /* XML requires end tags */
421 QuoteAmpersand = true;
426 private static int parseInt( String s, String option )
430 i = Integer.parseInt( s );
432 catch ( NumberFormatException e ) {
433 Report.badArgument(option);
439 private static boolean parseBool( String s, String option )
442 if ( s != null && s.length() > 0 ) {
443 char c = s.charAt(0);
444 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
446 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
449 Report.badArgument(option);
454 private static boolean parseInvBool( String s, String option )
457 if ( s != null && s.length() > 0 ) {
458 char c = s.charAt(0);
459 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
461 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
464 Report.badArgument(option);
469 private static String parseName( String s, String option )
471 StringTokenizer t = new StringTokenizer( s );
473 if ( t.countTokens() >= 1 )
476 Report.badArgument(option);
480 private static int parseCharEncoding( String s, String option )
484 if (Lexer.wstrcasecmp(s, "ascii") == 0)
486 else if (Lexer.wstrcasecmp(s, "latin1") == 0)
488 else if (Lexer.wstrcasecmp(s, "raw") == 0)
490 else if (Lexer.wstrcasecmp(s, "utf8") == 0)
492 else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
494 else if (Lexer.wstrcasecmp(s, "mac") == 0)
497 Report.badArgument(option);
502 /* slight hack to avoid changes to pprint.c */
503 private boolean parseIndent( String s, String option )
505 boolean b = IndentContent;
507 if (Lexer.wstrcasecmp(s, "yes") == 0)
512 else if (Lexer.wstrcasecmp(s, "true") == 0)
517 else if (Lexer.wstrcasecmp(s, "no") == 0)
522 else if (Lexer.wstrcasecmp(s, "false") == 0)
527 else if (Lexer.wstrcasecmp(s, "auto") == 0)
533 Report.badArgument(option);
537 public void parseInlineTagNames( String s, String option )
539 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
540 while ( t.hasMoreTokens() ) {
541 tt.defineInlineTag( t.nextToken() );
545 public void parseBlockTagNames( String s, String option )
547 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
548 while ( t.hasMoreTokens() ) {
549 tt.defineBlockTag( t.nextToken() );
553 public void parseEmptyTagNames( String s, String option )
555 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
556 while ( t.hasMoreTokens() ) {
557 tt.defineEmptyTag( t.nextToken() );
561 public void parsePreTagNames( String s, String option )
563 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
564 while ( t.hasMoreTokens() ) {
565 tt.definePreTag( t.nextToken() );
570 doctype: omit | auto | strict | loose | <fpi>
572 where the fpi is a string similar to
574 "-//ACME//DTD HTML 3.14159//EN"
576 protected String parseDocType( String s, String option )
580 /* "-//ACME//DTD HTML 3.14159//EN" or similar */
582 if (s.startsWith("\""))
584 docTypeMode = DOCTYPE_USER;
588 /* read first word */
590 StringTokenizer t = new StringTokenizer( s, " \t\n\r," );
591 if (t.hasMoreTokens())
592 word = t.nextToken();
594 if (Lexer.wstrcasecmp(word, "omit") == 0)
595 docTypeMode = DOCTYPE_OMIT;
596 else if (Lexer.wstrcasecmp(word, "strict") == 0)
597 docTypeMode = DOCTYPE_STRICT;
598 else if (Lexer.wstrcasecmp(word, "loose") == 0 ||
599 Lexer.wstrcasecmp(word, "transitional") == 0)
600 docTypeMode = DOCTYPE_LOOSE;
601 else if (Lexer.wstrcasecmp(word, "auto") == 0)
602 docTypeMode = DOCTYPE_AUTO;
605 docTypeMode = DOCTYPE_AUTO;
606 Report.badArgument(option);