2 * Copyright (c) 2004 Christopher Lenz and others
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * Christopher Lenz - initial API
11 * $Id: XMLParser.java,v 1.2 2006-10-21 23:13:43 pombredanne Exp $
14 package net.sourceforge.phpeclipse.xml.core.internal.parser;
16 import java.io.IOException;
17 import java.io.StringReader;
19 import javax.xml.parsers.ParserConfigurationException;
20 import javax.xml.parsers.SAXParser;
21 import javax.xml.parsers.SAXParserFactory;
23 import net.sourceforge.phpeclipse.xml.core.internal.model.XMLDocument;
24 import net.sourceforge.phpeclipse.xml.core.internal.model.XMLElement;
25 import net.sourceforge.phpeclipse.xml.core.model.IXMLDocument;
26 import net.sourceforge.phpeclipse.xml.core.model.IXMLElement;
27 import net.sourceforge.phpeclipse.xml.core.parser.IProblem;
28 import net.sourceforge.phpeclipse.xml.core.parser.IProblemCollector;
29 import net.sourceforge.phpeclipse.xml.core.parser.IXMLParser;
31 import org.eclipse.jface.text.BadLocationException;
32 import org.eclipse.jface.text.IDocument;
33 import org.eclipse.jface.text.IRegion;
34 import org.eclipse.jface.text.Region;
35 import org.xml.sax.Attributes;
36 import org.xml.sax.InputSource;
37 import org.xml.sax.Locator;
38 import org.xml.sax.SAXException;
39 import org.xml.sax.SAXParseException;
40 import org.xml.sax.helpers.DefaultHandler;
43 * SAX-based default implementation of the {@link IXMLParser} interface.
45 * TODO This implementation doesn't do error recovery, as SAX doesn't allow it.
46 * Maybe we partition the document and parse individual fragments so that errors
47 * can be isolated to their source
49 public class XMLParser implements IXMLParser {
51 * SAX content handler that builds a model of the XML document.
53 class ModelBuilder extends DefaultHandler {
55 * The document model being built.
60 * The current top element. That is the element that has been most
61 * recently opened by a start tag.
63 private XMLElement top;
66 * The SAX locator provided by the parser, used to calculate the source
67 * regions covered by elements.
69 private Locator locator;
72 * Limits parsing time.
77 * @see org.xml.sax.ContentHandler#startDocument()
79 public void startDocument() throws SAXException {
80 timeout = System.currentTimeMillis() + 2000;
81 document = new XMLDocument(source, systemId);
85 * @see org.xml.sax.ContentHandler#startElement(String, String, String,
88 public void startElement(String namespaceURI, String localName,
89 String qName, Attributes atts) throws SAXException {
90 if (System.currentTimeMillis() > timeout) {
91 throw new SAXException("timeout");
94 XMLElement newTop = new XMLElement(source);
95 newTop.setLocalName(localName);
96 newTop.setNamespaceURI(namespaceURI);
99 int colonIndex = qName.indexOf(':');
100 if (colonIndex >= 0) {
101 newTop.setPrefix(qName.substring(0, colonIndex));
105 int offset = computeOffset(newTop, locator.getLineNumber(), locator
109 newTop.setSourceRegion(offset, 0);
112 newTop.setParent(top);
118 * @see org.xml.sax.ContentHandler#endElement(String, String, String)
120 public void endElement(String namespaceURI, String localName,
121 String qName) throws SAXException {
122 int length = computeLength(top, locator.getLineNumber(), locator
126 top.setSourceRegion(top.getSourceRegion().getOffset(), length);
129 XMLElement previousTop = (XMLElement) top.getParent();
130 if (previousTop != null) {
131 previousTop.addChild(top);
133 // this is the root element
134 document.setRoot(top);
140 * @see org.xml.sax.ErrorHandler#error(SAXParseException)
142 public void error(SAXParseException e) throws SAXException {
143 if (problemCollector != null) {
144 problemCollector.addProblem(createProblem(e, true));
149 * @see org.xml.sax.ErrorHandler#fatalError(SAXParseException)
151 public void fatalError(SAXParseException e) throws SAXException {
152 if (problemCollector != null) {
153 problemCollector.addProblem(createProblem(e, true));
158 * @see org.xml.sax.ErrorHandler#warning(SAXParseException)
160 public void warning(SAXParseException e) throws SAXException {
161 if (problemCollector != null) {
162 problemCollector.addProblem(createProblem(e, false));
167 * @see org.xml.sax.ContentHandler#setDocumentLocator(Locator)
169 public void setDocumentLocator(Locator locator) {
170 this.locator = locator;
174 * Creates a <tt>IProblem</tt> instance based on the information
175 * accessible from the parse exception. This method estimates the exact
176 * location of the error based on the line and column numbers provided
177 * with the exception.
179 * TODO Limit the location to the current top element
182 * the SAX parse exception
184 * whether the problem is an error or a warning
185 * @return the created problem object
187 private IProblem createProblem(SAXParseException e, boolean error) {
188 int line = e.getLineNumber();
189 int column = e.getColumnNumber();
197 int offset = 0, length = 1;
199 offset = getOffset(line, column);
200 length = getLastCharColumn(line) - column;
201 } catch (BadLocationException ble) {
202 ble.printStackTrace();
205 return new DefaultProblem(e.getLocalizedMessage(), offset, offset
206 + length, line, error);
210 // Instance Variables ------------------------------------------------------
213 * The associated problem collector.
215 IProblemCollector problemCollector;
218 * The document containing the source that should be parsed.
223 * The system ID of the document to parse, if available. This is necessary
224 * to resolve relative external entities. Can be <tt>null</tt>.
228 // IXMLParser Implementation -----------------------------------------------
231 * @see IXMLParser#parse()
233 public IXMLDocument parse() {
234 SAXParserFactory factory = SAXParserFactory.newInstance();
235 factory.setNamespaceAware(true);
236 factory.setValidating(false);
239 SAXParser parser = factory.newSAXParser();
241 InputSource in = new InputSource(new StringReader(source.get()));
242 if (systemId != null) {
243 in.setSystemId(systemId);
246 ModelBuilder builder = new ModelBuilder();
247 parser.parse(in, builder);
248 return builder.document;
249 } catch (ParserConfigurationException e) {
250 // TODO Throw CoreException or at least log the error
251 } catch (SAXParseException e) {
252 // Already handled by the ModelBuilder
253 } catch (SAXException e) {
254 // SAX exceptions that are not parsing errors
255 // TODO Throw CoreException or at least log the error
256 } catch (IOException e) {
257 // TODO Throw CoreException or at least log the error
264 * @see IProblemReporter#setProblemCollector(IProblemCollector)
266 public void setProblemCollector(IProblemCollector problemCollector) {
267 this.problemCollector = problemCollector;
271 * @see IXMLParser#setSource(IDocument)
273 public void setSource(IDocument source) {
274 this.source = source;
278 * @see IXMLParser#setSystemId(String)
280 public void setSystemId(String systemId) {
281 this.systemId = systemId;
284 // Private Methods ---------------------------------------------------------
287 * Computes the exact length of the given element by searching for the
288 * offset of the last character of the end tag.
290 int computeLength(XMLElement element, int line, int column) {
294 int lineOffset = source.getLineOffset(line);
295 String endTag = getEndTag(element);
297 IRegion result = findStringForward(lineOffset, endTag);
298 if (result != null) {
299 offset = result.getOffset() + endTag.length();
301 result = findStringForward(lineOffset, "/>"); //$NON-NLS-1$
302 if (result == null) {
305 offset = result.getOffset() + 2;
309 if ((offset < 0) || (getLine(offset) != line)) {
315 offset = getOffset(line, column);
318 return offset - element.getSourceRegion().getOffset();
319 } catch (BadLocationException e) {
320 // ignore as the parser may be out of sync with the document during
328 * Computes the offset at which the specified elements start tag begins in
331 int computeOffset(XMLElement element, int line, int column) {
334 String prefix = "<"; //$NON-NLS-1$
336 offset = getOffset(line, 0);
337 int lastCharColumn = getLastCharColumn(line);
338 String lineText = source.get(source.getLineOffset(line - 1),
340 String startTag = getStartTag(element);
342 int lastIndex = lineText.indexOf(startTag);
343 if (lastIndex > -1) {
344 offset += lastIndex + 1;
346 offset = getOffset(line, lastCharColumn);
347 IRegion result = findStringBackward(offset - 1, prefix);
348 offset = result.getOffset();
351 offset = getOffset(line, column);
352 IRegion result = findStringForward(offset - 1, prefix);
353 offset = result.getOffset();
357 } catch (BadLocationException e) {
358 // ignore as the parser may be out of sync with the document during
365 private IRegion findStringBackward(int startOffset, String string)
366 throws BadLocationException {
367 int offset = startOffset;
368 int length = string.length();
371 while (offset >= 0) {
372 match = source.get(offset, length);
373 if (match.equals(string)) {
374 return new Region(offset, length);
382 private IRegion findStringForward(int startOffset, String string)
383 throws BadLocationException {
384 int offset = startOffset;
385 int length = string.length();
388 int sourceLength = source.getLength();
389 while (offset + length <= sourceLength) {
390 match = source.get(offset, length);
391 if (match.equals(string)) {
392 return new Region(offset, length);
401 * Given an XML element, this method reconstructs the corresponding end tag
402 * of the element, including the namespace prefix if there was one.
405 * the XML element for which the end tag should be contructed
406 * @return the end tag as string
408 private String getEndTag(IXMLElement element) {
409 StringBuffer buf = new StringBuffer("</"); //$NON-NLS-1$
410 if (element.getPrefix() != null) {
411 buf.append(element.getPrefix());
414 buf.append(element.getLocalName());
417 return buf.toString();
421 * Reconstructs and returns the start tag corresponding to the given XML
422 * element, excluding any attribute specifications or the closing
423 * <tt>></tt> character.
426 * the XML element for which the start tag should be constructed
427 * @return the start tag as string, excluding everything after the tag name
430 private String getStartTag(IXMLElement element) {
431 StringBuffer buf = new StringBuffer("<"); //$NON-NLS-1$
432 if (element.getPrefix() != null) {
433 buf.append(element.getPrefix());
436 buf.append(element.getLocalName());
438 return buf.toString();
441 int getOffset(int line, int column) throws BadLocationException {
442 return source.getLineOffset(line - 1) + column - 1;
445 private int getLine(int offset) throws BadLocationException {
446 return source.getLineOfOffset(offset) + 1;
449 int getLastCharColumn(int line) throws BadLocationException {
450 String lineDelimiter = source.getLineDelimiter(line - 1);
451 int lineDelimiterLength = (lineDelimiter != null) ? lineDelimiter
454 return source.getLineLength(line - 1) - lineDelimiterLength;