| /* | ||
| * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved. | ||
| * | ||
| * This software is open source. | ||
| * See the bottom of this file for the licence. | ||
| * | ||
| * $Id: SAXReader.java,v 1.55 2004/08/04 18:22:39 maartenc Exp $ | ||
| */ | ||
| package org.dom4j.io; | ||
| import java.io.File; | ||
| import java.io.FileInputStream; | ||
| import java.io.FileNotFoundException; | ||
| import java.io.InputStream; | ||
| import java.io.Reader; | ||
| import java.io.Serializable; | ||
| import java.net.URL; | ||
| import org.dom4j.Document; | ||
| import org.dom4j.DocumentException; | ||
| import org.dom4j.DocumentFactory; | ||
| import org.dom4j.ElementHandler; | ||
| import org.xml.sax.EntityResolver; | ||
| import org.xml.sax.ErrorHandler; | ||
| import org.xml.sax.InputSource; | ||
| import org.xml.sax.SAXException; | ||
| import org.xml.sax.SAXParseException; | ||
| import org.xml.sax.XMLFilter; | ||
| import org.xml.sax.XMLReader; | ||
| import org.xml.sax.helpers.DefaultHandler; | ||
| import org.xml.sax.helpers.XMLReaderFactory; | ||
| /** <p><code>SAXReader</code> creates a DOM4J tree from SAX parsing events.</p> | ||
| * | ||
| * <p>The actual SAX parser that is used by this class is configurable | ||
| * so you can use your favourite SAX parser if you wish. DOM4J comes | ||
| * configured with its own SAX parser so you do not need to worry about | ||
| * configuring the SAX parser.</p> | ||
| * | ||
| * <p>To explicitly configure the SAX parser that is used via Java code you | ||
| * can use a constructor or use the | ||
| * {@link #setXMLReader(XMLReader)} or | ||
| * {@link #setXMLReaderClassName(String)} methods.</p> | ||
| * | ||
| * <p>If the parser is not specified explicitly then the standard SAX | ||
| * policy of using the <code>org.xml.sax.driver</code> system property is | ||
| * used to determine the implementation class of {@link XMLReader}.</p> | ||
| * | ||
| * <p>If the <code>org.xml.sax.driver</code> system property is not defined | ||
| * then JAXP is used via reflection (so that DOM4J is not explicitly dependent | ||
| * on the JAXP classes) to load the JAXP configured SAXParser. | ||
| * If there is any error creating a JAXP SAXParser an informational message is | ||
| * output and then the default (Aelfred) SAX parser is used instead.</p> | ||
| * | ||
| * <p>If you are trying to use JAXP to explicitly set your SAX parser | ||
| * and are experiencing problems, you can turn on verbose error reporting | ||
| * by defining the system property <code>org.dom4j.verbose</code> to be "true" | ||
| * which will output a more detailed description of why JAXP could not find a | ||
| * SAX parser</p> | ||
| * | ||
| * <p> | ||
| * For more information on JAXP please go to | ||
| * <a href="http://java.sun.com/xml/">Sun's Java & XML site</a></p> | ||
| * | ||
| * @author <a href="mailto:james.strachan@metastuff.com">James Strachan</a> | ||
| * @version $Revision: 1.55 $ | ||
| */ | ||
| public class SAXReader { | ||
| /** <code>DocumentFactory</code> used to create new document objects */ | ||
| private DocumentFactory factory; | ||
| /** <code>XMLReader</code> used to parse the SAX events */ | ||
| private XMLReader xmlReader; | ||
| /** Whether validation should occur */ | ||
| private boolean validating; | ||
| /** DispatchHandler to call when each <code>Element</code> is encountered */ | ||
| private DispatchHandler dispatchHandler; | ||
| /** ErrorHandler class to use */ | ||
| private ErrorHandler errorHandler; | ||
| /** The entity resolver */ | ||
| private EntityResolver entityResolver; | ||
| /** Should element & attribute names and namespace URIs be interned? */ | ||
| 11348x | private boolean stringInternEnabled = true; | |
| /** Should internal DTD declarations be expanded into a List in the DTD */ | ||
| 11348x | private boolean includeInternalDTDDeclarations = false; | |
| /** Should external DTD declarations be expanded into a List in the DTD */ | ||
| 11348x | private boolean includeExternalDTDDeclarations = false; | |
| /** Whether adjacent text nodes should be merged */ | ||
| 11348x | private boolean mergeAdjacentText = false; | |
| /** Holds value of property stripWhitespaceText. */ | ||
| 11348x | private boolean stripWhitespaceText = false; | |
| /** Should we ignore comments */ | ||
| 11348x | private boolean ignoreComments = false; | |
| //private boolean includeExternalGeneralEntities = false; | ||
| //private boolean includeExternalParameterEntities = false; | ||
| /** The SAX filter used to filter SAX events */ | ||
| private XMLFilter xmlFilter; | ||
| 11348x | public SAXReader() { | |
| 11348x | } | |
| 2x | public SAXReader(boolean validating) { | |
| 2x | this.validating = validating; | |
| 2x | } | |
| 86x | public SAXReader(DocumentFactory factory) { | |
| 86x | this.factory = factory; | |
| 86x | } | |
| 0x | public SAXReader(DocumentFactory factory, boolean validating) { | |
| 0x | this.factory = factory; | |
| 0x | this.validating = validating; | |
| 0x | } | |
| 0x | public SAXReader(XMLReader xmlReader) { | |
| 0x | this.xmlReader = xmlReader; | |
| 0x | } | |
| 0x | public SAXReader(XMLReader xmlReader, boolean validating) { | |
| 0x | this.xmlReader = xmlReader; | |
| 0x | this.validating = validating; | |
| 0x | } | |
| 6x | public SAXReader(String xmlReaderClassName) throws SAXException { | |
| 1/2 6x | if (xmlReaderClassName != null) { | |
| 6x | this.xmlReader = XMLReaderFactory.createXMLReader(xmlReaderClassName); | |
| } | ||
| 6x | } | |
| 0x | public SAXReader(String xmlReaderClassName, boolean validating) throws SAXException { | |
| 0/2 0x | if (xmlReaderClassName != null) { | |
| 0x | this.xmlReader = XMLReaderFactory.createXMLReader(xmlReaderClassName); | |
| } | ||
| 0x | this.validating = validating; | |
| 0x | } | |
| /** Allows a SAX property to be set on the underlying SAX parser. | ||
| * This can be useful to set parser-specific properties | ||
| * such as the location of schema or DTD resources. | ||
| * Though use this method with caution as it has the possibility | ||
| * of breaking the standard behaviour. | ||
| * An alternative to calling this method is to correctly configure an | ||
| * XMLReader object instance and call the {@link #setXMLReader(XMLReader)} method | ||
| * | ||
| * @param name is the SAX property name | ||
| * @param value is the value of the SAX property | ||
| * @throws SAXException if the XMLReader could not be created or | ||
| * the property could not be changed. | ||
| */ | ||
| public void setProperty(String name, Object value) throws SAXException { | ||
| 0x | getXMLReader().setProperty(name, value); | |
| 0x | } | |
| /** Sets a SAX feature on the underlying SAX parser. | ||
| * This can be useful to set parser-specific features. | ||
| * Though use this method with caution as it has the possibility | ||
| * of breaking the standard behaviour. | ||
| * An alternative to calling this method is to correctly configure an | ||
| * XMLReader object instance and call the {@link #setXMLReader(XMLReader)} method | ||
| * | ||
| * @param name is the SAX feature name | ||
| * @param value is the value of the SAX feature | ||
| * @throws SAXException if the XMLReader could not be created or | ||
| * the feature could not be changed. | ||
| */ | ||
| public void setFeature(String name, boolean value) throws SAXException { | ||
| 0x | getXMLReader().setFeature(name, value); | |
| 0x | } | |
| /** <p>Reads a Document from the given <code>File</code></p> | ||
| * | ||
| * @param file is the <code>File</code> to read from. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(File file) throws DocumentException { | ||
| try { | ||
| /* | ||
| * We cannot convert the file to an URL because if the filename | ||
| * contains '#' characters, there will be problems with the | ||
| * URL in the InputSource (because a URL like | ||
| * http://myhost.com/index#anchor is treated the same as | ||
| * http://myhost.com/index) | ||
| * Thanks to Christian Oetterli | ||
| */ | ||
| 62x | InputSource source = new InputSource(new FileInputStream(file)); | |
| 62x | String path = file.getAbsolutePath(); | |
| 1/2 62x | if (path != null) { | |
| // Code taken from Ant FileUtils | ||
| 62x | StringBuffer sb = new StringBuffer("file://"); | |
| // add an extra slash for filesystems with drive-specifiers | ||
| 1/2 62x | if (!path.startsWith(File.separator)) { | |
| 0x | sb.append("/"); | |
| } | ||
| 62x | path = path.replace('\\', '/'); | |
| 62x | sb.append(path); | |
| 62x | source.setSystemId(sb.toString()); | |
| } | ||
| 62x | return read(source); | |
| } catch (FileNotFoundException e) { | ||
| 0x | throw new DocumentException(e.getMessage(), e); | |
| } | ||
| } | ||
| /** <p>Reads a Document from the given <code>URL</code> using SAX</p> | ||
| * | ||
| * @param url <code>URL</code> to read from. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(URL url) throws DocumentException { | ||
| 196x | String systemID = url.toExternalForm(); | |
| 2x | return read(new InputSource(systemID)); | |
| } | ||
| /** <p>Reads a Document from the given URL or filename using SAX.</p> | ||
| * | ||
| * <p> | ||
| * If the systemId contains a <code>':'</code> character then it is | ||
| * assumed to be a URL otherwise its assumed to be a file name. | ||
| * If you want finer grained control over this mechansim then please | ||
| * explicitly pass in either a {@link URL} or a {@link File} instance | ||
| * instead of a {@link String} to denote the source of the document. | ||
| * </p> | ||
| * | ||
| * @param systemId is a URL for a document or a file name. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(String systemId) throws DocumentException { | ||
| 2x | return read(new InputSource(systemId)); | |
| } | ||
| /** <p>Reads a Document from the given stream using SAX</p> | ||
| * | ||
| * @param in <code>InputStream</code> to read from. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(InputStream in) throws DocumentException { | ||
| 2x | return read(new InputSource(in)); | |
| } | ||
| /** <p>Reads a Document from the given <code>Reader</code> using SAX</p> | ||
| * | ||
| * @param reader is the reader for the input | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(Reader reader) throws DocumentException { | ||
| 34x | return read(new InputSource(reader)); | |
| } | ||
| /** <p>Reads a Document from the given stream using SAX</p> | ||
| * | ||
| * @param in <code>InputStream</code> to read from. | ||
| * @param systemId is the URI for the input | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(InputStream in, String systemId) throws DocumentException { | ||
| 0x | InputSource source = new InputSource(in); | |
| 0x | source.setSystemId(systemId); | |
| 0x | return read(source); | |
| } | ||
| /** <p>Reads a Document from the given <code>Reader</code> using SAX</p> | ||
| * | ||
| * @param reader is the reader for the input | ||
| * @param systemId is the URI for the input | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(Reader reader, String systemId) throws DocumentException { | ||
| 0x | InputSource source = new InputSource(reader); | |
| 0x | source.setSystemId(systemId); | |
| 0x | return read(source); | |
| } | ||
| /** <p>Reads a Document from the given <code>InputSource</code> using SAX</p> | ||
| * | ||
| * @param in <code>InputSource</code> to read from. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(InputSource in) throws DocumentException { | ||
| try { | ||
| 11184x | XMLReader xmlReader = getXMLReader(); | |
| 11184x | xmlReader = installXMLFilter(xmlReader); | |
| 11184x | EntityResolver thatEntityResolver = this.entityResolver; | |
| 2/2 11184x | if (thatEntityResolver==null) { | |
| 11174x | thatEntityResolver = createDefaultEntityResolver( in.getSystemId() ); | |
| 11174x | this.entityResolver=thatEntityResolver; | |
| } | ||
| 11184x | xmlReader.setEntityResolver( thatEntityResolver ); | |
| 11184x | SAXContentHandler contentHandler = createContentHandler(xmlReader); | |
| 11184x | contentHandler.setEntityResolver( thatEntityResolver ); | |
| 11184x | contentHandler.setInputSource( in ); | |
| 11184x | contentHandler.setIncludeInternalDTDDeclarations( isIncludeInternalDTDDeclarations() ); | |
| 11184x | contentHandler.setIncludeExternalDTDDeclarations( isIncludeExternalDTDDeclarations() ); | |
| 11184x | contentHandler.setMergeAdjacentText( isMergeAdjacentText() ); | |
| 11184x | contentHandler.setStripWhitespaceText( isStripWhitespaceText() ); | |
| 11184x | contentHandler.setIgnoreComments( isIgnoreComments() ); | |
| 11184x | xmlReader.setContentHandler(contentHandler); | |
| 11184x | configureReader(xmlReader, contentHandler); | |
| 11184x | xmlReader.parse(in); | |
| 11182x | return contentHandler.getDocument(); | |
| } | ||
| catch (Exception e) { | ||
| 1/2 2x | if (e instanceof SAXParseException) { | |
| //e.printStackTrace(); | ||
| 0x | SAXParseException parseException = (SAXParseException) e; | |
| 0x | String systemId = parseException.getSystemId(); | |
| 0/2 0x | if ( systemId == null ) { | |
| 0x | systemId = ""; | |
| } | ||
| 0x | String message = "Error on line " | |
| + parseException.getLineNumber() | ||
| + " of document " + systemId | ||
| + " : " + parseException.getMessage(); | ||
| 0x | throw new DocumentException(message, e); | |
| } | ||
| else { | ||
| 2x | throw new DocumentException(e.getMessage(), e); | |
| } | ||
| } | ||
| } | ||
| // Properties | ||
| //------------------------------------------------------------------------- | ||
| /** @return the validation mode, true if validating will be done | ||
| * otherwise false. | ||
| */ | ||
| public boolean isValidating() { | ||
| 22354x | return validating; | |
| } | ||
| /** Sets the validation mode. | ||
| * | ||
| * @param validating indicates whether or not validation should occur. | ||
| */ | ||
| public void setValidation(boolean validating) { | ||
| 0x | this.validating = validating; | |
| 0x | } | |
| /** @return whether internal DTD declarations should be expanded into the DocumentType | ||
| * object or not. | ||
| */ | ||
| public boolean isIncludeInternalDTDDeclarations() { | ||
| 11184x | return includeInternalDTDDeclarations; | |
| } | ||
| /** Sets whether internal DTD declarations should be expanded into the DocumentType | ||
| * object or not. | ||
| * | ||
| * @param includeInternalDTDDeclarations whether or not DTD declarations should be expanded | ||
| * and included into the DocumentType object. | ||
| */ | ||
| public void setIncludeInternalDTDDeclarations(boolean includeInternalDTDDeclarations) { | ||
| 10x | this.includeInternalDTDDeclarations = includeInternalDTDDeclarations; | |
| 10x | } | |
| /** @return whether external DTD declarations should be expanded into the DocumentType | ||
| * object or not. | ||
| */ | ||
| public boolean isIncludeExternalDTDDeclarations() { | ||
| 11184x | return includeExternalDTDDeclarations; | |
| } | ||
| /** Sets whether DTD external declarations should be expanded into the DocumentType | ||
| * object or not. | ||
| * | ||
| * @param includeExternalDTDDeclarations whether or not DTD declarations should be expanded | ||
| * and included into the DocumentType object. | ||
| */ | ||
| public void setIncludeExternalDTDDeclarations(boolean includeExternalDTDDeclarations) { | ||
| 6x | this.includeExternalDTDDeclarations = includeExternalDTDDeclarations; | |
| 6x | } | |
| /** Sets whether String interning | ||
| * is enabled or disabled for element & attribute names and namespace URIs. | ||
| * This proprety is enabled by default. | ||
| */ | ||
| public boolean isStringInternEnabled() { | ||
| 11184x | return stringInternEnabled; | |
| } | ||
| /** Sets whether String interning | ||
| * is enabled or disabled for element & attribute names and namespace URIs | ||
| */ | ||
| public void setStringInternEnabled(boolean stringInternEnabled) { | ||
| 0x | this.stringInternEnabled = stringInternEnabled; | |
| 0x | } | |
| /** Returns whether adjacent text nodes should be merged together. | ||
| * @return Value of property mergeAdjacentText. | ||
| */ | ||
| public boolean isMergeAdjacentText() { | ||
| 11184x | return mergeAdjacentText; | |
| } | ||
| /** Sets whether or not adjacent text nodes should be merged | ||
| * together when parsing. | ||
| * @param mergeAdjacentText New value of property mergeAdjacentText. | ||
| */ | ||
| public void setMergeAdjacentText(boolean mergeAdjacentText) { | ||
| 14x | this.mergeAdjacentText = mergeAdjacentText; | |
| 14x | } | |
| /** Sets whether whitespace between element start and end tags should be ignored | ||
| * | ||
| * @return Value of property stripWhitespaceText. | ||
| */ | ||
| public boolean isStripWhitespaceText() { | ||
| 11184x | return stripWhitespaceText; | |
| } | ||
| /** Sets whether whitespace between element start and end tags should be ignored. | ||
| * | ||
| * @param stripWhitespaceText New value of property stripWhitespaceText. | ||
| */ | ||
| public void setStripWhitespaceText(boolean stripWhitespaceText) { | ||
| 0x | this.stripWhitespaceText = stripWhitespaceText; | |
| 0x | } | |
| /** | ||
| * Returns whether we should ignore comments or not. | ||
| * @return boolean | ||
| */ | ||
| public boolean isIgnoreComments() { | ||
| 11184x | return ignoreComments; | |
| } | ||
| /** | ||
| * Sets whether we should ignore comments or not. | ||
| * @param ignoreComments whether we should ignore comments or not. | ||
| */ | ||
| public void setIgnoreComments(boolean ignoreComments) { | ||
| 0x | this.ignoreComments = ignoreComments; | |
| 0x | } | |
| /** @return the <code>DocumentFactory |