| /* | ||
| * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved. | ||
| * | ||
| * This software is open source. | ||
| * See the bottom of this file for the licence. | ||
| * | ||
| * $Id: XPP3Reader.java,v 1.1 2004/09/08 19:31:11 maartenc Exp $ | ||
| */ | ||
| package org.dom4j.io; | ||
| import org.dom4j.*; | ||
| import org.xmlpull.v1.XmlPullParser; | ||
| import org.xmlpull.v1.XmlPullParserException; | ||
| import org.xmlpull.v1.XmlPullParserFactory; | ||
| import java.io.*; | ||
| import java.net.URL; | ||
| /** | ||
| * <p><code>XPP3Reader</code> is a Reader of DOM4J documents that | ||
| * uses the fast | ||
| * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 3.x</a>. | ||
| * It is very fast for use in SOAP style environments.</p> | ||
| * | ||
| * @author <a href="mailto:pelle@neubia.com">Pelle Braendgaard</a> | ||
| * @author <a href="mailto:jstrachan@apache.org">James Strachan</a> | ||
| * @version $Revision: 1.1 $ | ||
| */ | ||
| public class XPP3Reader { | ||
| /** | ||
| * <code>DocumentFactory</code> used to create new document objects | ||
| */ | ||
| private DocumentFactory factory; | ||
| /** | ||
| * <code>XmlPullParser</code> used to parse XML | ||
| */ | ||
| private XmlPullParser xppParser; | ||
| /** | ||
| * <code>XmlPullParser</code> used to parse XML | ||
| */ | ||
| private XmlPullParserFactory xppFactory; | ||
| /** | ||
| * DispatchHandler to call when each <code>Element</code> is encountered | ||
| */ | ||
| private DispatchHandler dispatchHandler; | ||
| 0x | public XPP3Reader() { | |
| 0x | } | |
| 0x | public XPP3Reader(DocumentFactory factory) { | |
| 0x | this.factory = factory; | |
| 0x | } | |
| /** | ||
| * <p>Reads a Document from the given <code>File</code></p> | ||
| * | ||
| * @param file is the <code>File</code> to read from. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| * @throws java.net.MalformedURLException if a URL could not be made for the given File | ||
| */ | ||
| public Document read(File file) throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | String systemID = file.getAbsolutePath(); | |
| 0x | return read(new BufferedReader(new FileReader(file)), systemID); | |
| } | ||
| /** | ||
| * <p>Reads a Document from the given <code>URL</code></p> | ||
| * | ||
| * @param url <code>URL</code> to read from. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(URL url) throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | String systemID = url.toExternalForm(); | |
| 0x | return read(createReader(url.openStream()), systemID); | |
| } | ||
| /** | ||
| * <p>Reads a Document from the given URL or filename.</p> | ||
| * <p/> | ||
| * <p/> | ||
| * If the systemID contains a <code>':'</code> character then it is | ||
| * assumed to be a URL otherwise its assumed to be a file name. | ||
| * If you want finer grained control over this mechansim then please | ||
| * explicitly pass in either a {@link URL} or a {@link File} instance | ||
| * instead of a {@link String} to denote the source of the document. | ||
| * </p> | ||
| * | ||
| * @param systemID is a URL for a document or a file name. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| * @throws java.net.MalformedURLException if a URL could not be made for the given File | ||
| */ | ||
| public Document read(String systemID) throws DocumentException, IOException, XmlPullParserException { | ||
| 0/2 0x | if (systemID.indexOf(':') >= 0) { | |
| // lets assume its a URL | ||
| 0x | return read(new URL(systemID)); | |
| } else { | ||
| // lets assume that we are given a file name | ||
| 0x | return read(new File(systemID)); | |
| } | ||
| } | ||
| /** | ||
| * <p>Reads a Document from the given stream</p> | ||
| * | ||
| * @param in <code>InputStream</code> to read from. | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | return read(createReader(in)); | |
| } | ||
| /** | ||
| * <p>Reads a Document from the given <code>Reader</code></p> | ||
| * | ||
| * @param reader is the reader for the input | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(Reader reader) throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | getXPPParser().setInput(reader); | |
| 0x | return parseDocument(); | |
| } | ||
| /** | ||
| * <p>Reads a Document from the given array of characters</p> | ||
| * | ||
| * @param text is the text to parse | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | getXPPParser().setInput(new CharArrayReader(text)); | |
| 0x | return parseDocument(); | |
| } | ||
| /** | ||
| * <p>Reads a Document from the given stream</p> | ||
| * | ||
| * @param in <code>InputStream</code> to read from. | ||
| * @param systemID is the URI for the input | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | return read(createReader(in), systemID); | |
| } | ||
| /** | ||
| * <p>Reads a Document from the given <code>Reader</code></p> | ||
| * | ||
| * @param reader is the reader for the input | ||
| * @param systemID is the URI for the input | ||
| * @return the newly created Document instance | ||
| * @throws DocumentException if an error occurs during parsing. | ||
| */ | ||
| public Document read(Reader reader, String systemID) throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | Document document = read(reader); | |
| 0x | document.setName(systemID); | |
| 0x | return document; | |
| } | ||
| // Properties | ||
| //------------------------------------------------------------------------- | ||
| public XmlPullParser getXPPParser() throws XmlPullParserException { | ||
| 0/2 0x | if (xppParser == null) { | |
| 0x | xppParser = getXPPFactory().newPullParser(); | |
| } | ||
| 0x | return xppParser; | |
| } | ||
| public XmlPullParserFactory getXPPFactory() throws XmlPullParserException { | ||
| 0/2 0x | if (xppFactory == null) { | |
| 0x | xppFactory = XmlPullParserFactory.newInstance(); | |
| } | ||
| 0x | xppFactory.setNamespaceAware(true); | |
| 0x | return xppFactory; | |
| } | ||
| public void setXPPFactory(XmlPullParserFactory xppFactory) { | ||
| 0x | this.xppFactory = xppFactory; | |
| 0x | } | |
| /** | ||
| * @return the <code>DocumentFactory</code> used to create document objects | ||
| */ | ||
| public DocumentFactory getDocumentFactory() { | ||
| 0/2 0x | if (factory == null) { | |
| 0x | factory = DocumentFactory.getInstance(); | |
| } | ||
| 0x | return factory; | |
| } | ||
| /** | ||
| * <p>This sets the <code>DocumentFactory</code> used to create new documents. | ||
| * This method allows the building of custom DOM4J tree objects to be implemented | ||
| * easily using a custom derivation of {@link DocumentFactory}</p> | ||
| * | ||
| * @param factory <code>DocumentFactory</code> used to create DOM4J objects | ||
| */ | ||
| public void setDocumentFactory(DocumentFactory factory) { | ||
| 0x | this.factory = factory; | |
| 0x | } | |
| /** | ||
| * Adds the <code>ElementHandler</code> to be called when the | ||
| * specified path is encounted. | ||
| * | ||
| * @param path is the path to be handled | ||
| * @param handler is the <code>ElementHandler</code> to be called | ||
| * by the event based processor. | ||
| */ | ||
| public void addHandler(String path, ElementHandler handler) { | ||
| 0x | getDispatchHandler().addHandler(path, handler); | |
| 0x | } | |
| /** | ||
| * Removes the <code>ElementHandler</code> from the event based | ||
| * processor, for the specified path. | ||
| * | ||
| * @param path is the path to remove the <code>ElementHandler</code> for. | ||
| */ | ||
| public void removeHandler(String path) { | ||
| 0x | getDispatchHandler().removeHandler(path); | |
| 0x | } | |
| /** | ||
| * When multiple <code>ElementHandler</code> instances have been | ||
| * registered, this will set a default <code>ElementHandler</code> | ||
| * to be called for any path which does <b>NOT</b> have a handler | ||
| * registered. | ||
| * | ||
| * @param handler is the <code>ElementHandler</code> to be called | ||
| * by the event based processor. | ||
| */ | ||
| public void setDefaultHandler(ElementHandler handler) { | ||
| 0x | getDispatchHandler().setDefaultHandler(handler); | |
| 0x | } | |
| // Implementation methods | ||
| //------------------------------------------------------------------------- | ||
| protected Document parseDocument() throws DocumentException, IOException, XmlPullParserException { | ||
| 0x | DocumentFactory df = getDocumentFactory(); | |
| 0x | Document document = df.createDocument(); | |
| 0x | Element parent = null; | |
| 0x | XmlPullParser pp = getXPPParser(); | |
| 0x | pp.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true); | |
| // pp.setFeature(XmlPullParser.FEATURE_PROCESS_DOCDECL, true); | ||
| // pp.setFeature(XmlPullParser.FEATURE_VALIDATION, true); | ||
| // pp.setFeature("http://xmlpull.org/v1/doc/features.html#xml-roundtrip", true); | ||
| while (true) { | ||
| // int type = pp.next(); | ||
| 0x | int type = pp.nextToken(); | |
| 0x | switch (type) { | |
| case XmlPullParser.PROCESSING_INSTRUCTION: | ||
| { | ||
| 0x | String text = pp.getText(); | |
| 0x | int loc = text.indexOf(" "); | |
| 0/2 0x | if (loc >= 0) { | |
| 0x | document.addProcessingInstruction(text.substring(0, loc), text.substring(loc + 1)); | |
| } else | ||
| 0x | document.addProcessingInstruction(text, ""); | |
| 0x | break; | |
| } | ||
| case XmlPullParser.COMMENT: | ||
| { | ||
| 0/2 0x | if (parent != null) | |
| 0x | parent.addComment(pp.getText()); | |
| else | ||
| 0x | document.addComment(pp.getText()); | |
| 0x | break; | |
| } | ||
| case XmlPullParser.CDSECT: | ||
| { | ||
| 0/2 0x | if (parent != null) { | |
| 0x | parent.addCDATA(pp.getText()); | |
| } else { | ||
| 0x | throw new DocumentException("Cannot have text content outside of the root document"); | |
| } | ||
| break; | ||
| } | ||
| case XmlPullParser.ENTITY_REF: | ||
| { | ||
| 0x | break; | |
| } | ||
| case XmlPullParser.END_DOCUMENT: | ||
| { | ||
| 0x | return document; | |
| } | ||
| case XmlPullParser.START_TAG: | ||
| { | ||
| 0/2 0x | QName qname = (pp.getPrefix() == null) ? df.createQName(pp.getName(), pp.getNamespace()) : df.createQName(pp.getName(), pp.getPrefix(), pp.getNamespace()); | |
| 0x | Element newElement = df.createElement(qname); | |
| 0x | int nsStart = pp.getNamespaceCount(pp.getDepth() - 1); | |
| 0x | int nsEnd = pp.getNamespaceCount(pp.getDepth()); | |
| 0/2 0x | for (int i = nsStart; i < nsEnd; i++) | |
| 0/2 0x | if (pp.getNamespacePrefix(i) != null) | |
| 0x | newElement.addNamespace(pp.getNamespacePrefix(i), pp.getNamespaceUri(i)); | |
| 0/2 0x | for (int i = 0; i < pp.getAttributeCount(); i++) { | |
| 0/2 0x | QName qa = (pp.getAttributePrefix(i) == null) ? df.createQName(pp.getAttributeName(i)) : df.createQName(pp.getAttributeName(i), pp.getAttributePrefix(i), pp.getAttributeNamespace(i)); | |
| 0x | newElement.addAttribute(qa, pp.getAttributeValue(i)); | |
| } | ||
| 0/2 0x | if (parent != null) { | |
| 0x | parent.add(newElement); | |
| } else { | ||
| 0x | document.add(newElement); | |
| } | ||
| 0x | parent = newElement; | |
| 0x | break; | |
| } | ||
| case XmlPullParser.END_TAG: | ||
| { | ||
| 0/2 0x | if (parent != null) { | |
| 0x | parent = parent.getParent(); | |
| } | ||
| break; | ||
| } | ||
| case XmlPullParser.TEXT: | ||
| { | ||
| 0x | String text = pp.getText(); | |
| 0/2 0x | if (parent != null) { | |
| 0x | parent.addText(text); | |
| } else { | ||
| 0x | throw new DocumentException("Cannot have text content outside of the root document"); | |
| } | ||
| break; | ||
| } | ||
| default: | ||
| { | ||
| 0x | ; | |
| } | ||
| } | ||
| } | ||
| } | ||
| protected DispatchHandler getDispatchHandler() { | ||
| 0/2 0x | if (dispatchHandler == null) { | |
| 0x | dispatchHandler = new DispatchHandler(); | |
| } | ||
| 0x | return dispatchHandler; | |
| } | ||
| protected void setDispatchHandler(DispatchHandler dispatchHandler) { | ||
| 0x | this.dispatchHandler = dispatchHandler; | |
| 0x | } | |
| /** | ||
| * Factory method to create a Reader from the given InputStream. | ||
| */ | ||
| protected Reader createReader(InputStream in) throws IOException { | ||
| 0x | return new BufferedReader(new InputStreamReader(in)); | |
| } | ||
| } | ||
| /* | ||
| * Redistribution and use of this software and associated documentation | ||
| * ("Software"), with or without modification, are permitted provided | ||
| * that the following conditions are met: | ||
| * | ||
| * 1. Redistributions of source code must retain copyright | ||
| * statements and notices. Redistributions must also contain a | ||
| * copy of this document. | ||
| * | ||
| * 2. Redistributions in binary form must reproduce the | ||
| * above copyright notice, this list of conditions and the | ||
| * following disclaimer in the documentation and/or other | ||
| * materials provided with the distribution. | ||
| * | ||
| * 3. The name "DOM4J" must not be used to endorse or promote | ||
| * products derived from this Software without prior written | ||
| * permission of MetaStuff, Ltd. For written permission, | ||
| * please contact dom4j-info@metastuff.com. | ||
| * | ||
| * 4. Products derived from this Software may not be called "DOM4J" | ||
| * nor may "DOM4J" appear in their names without prior written | ||
| * permission of MetaStuff, Ltd. DOM4J is a registered | ||
| * trademark of MetaStuff, Ltd. | ||
| * | ||
| * 5. Due credit should be given to the DOM4J Project - | ||
| * http://www.dom4j.org | ||
| * | ||
| * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS | ||
| * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT | ||
| * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | ||
| * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | ||
| * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | ||
| * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
| * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
| * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| * | ||
| * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved. | ||
| * | ||
| * $Id: XPP3Reader.java,v 1.1 2004/09/08 19:31:11 maartenc Exp $ | ||
| */ |