| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * |
| * Copyright (c) 2001 The Apache Software Foundation. |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.apache.org. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| package org.apache.xerces.parsers; |
| |
| import org.apache.xerces.dom.DocumentImpl; |
| import org.apache.xerces.dom.EntityReferenceImpl; |
| import org.apache.xerces.dom.TextImpl; |
| |
| import org.apache.xerces.xni.QName; |
| import org.apache.xerces.xni.XMLAttributes; |
| import org.apache.xerces.xni.XMLString; |
| import org.apache.xerces.xni.parser.XMLParserConfiguration; |
| |
| import org.w3c.dom.Attr; |
| import org.w3c.dom.CDATASection; |
| import org.w3c.dom.Comment; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.DocumentType; |
| import org.w3c.dom.DOMImplementation; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.EntityReference; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.ProcessingInstruction; |
| import org.w3c.dom.Text; |
| |
| import org.xml.sax.SAXException; |
| |
| /** |
| * This is the base class of all DOM parsers. It implements the XNI |
| * callback methods to create the DOM tree. After a successful parse of |
| * an XML document, the DOM Document object can be queried using the |
| * <code>getDocument</code> method. The actual pipeline is defined in |
| * parser configuration. |
| * |
| * @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000 |
| * @author Arnaud Le Hors, IBM |
| * @author Andy Clark, IBM |
| * |
| * @version $Id$ |
| */ |
| public abstract class AbstractDOMParser |
| extends AbstractXMLDocumentParser { |
| |
| // |
| // Data |
| // |
| |
| // dom information |
| |
| /** The document. */ |
| protected Document fDocument; |
| |
| /** The default Xerces document implementation, if used. */ |
| protected DocumentImpl fDocumentImpl; |
| |
| /** Current node. */ |
| protected Node fCurrentNode; |
| |
| // state |
| |
| /** True if inside document. */ |
| protected boolean fInDocument; |
| |
| /** True if inside CDATA section. */ |
| protected boolean fInCDATASection; |
| |
| // data |
| |
| /** Attribute QName. */ |
| private QName fAttrQName = new QName(); |
| |
| // |
| // Constructors |
| // |
| |
| /** Default constructor. */ |
| protected AbstractDOMParser(XMLParserConfiguration config) { |
| super(config); |
| } // <init>(XMLParserConfiguration) |
| |
| // |
| // Public methods |
| // |
| |
| /** Returns the DOM document object. */ |
| public Document getDocument() { |
| return fDocument; |
| } // getDocument():Document |
| |
| // |
| // XMLDocumentParser methods |
| // |
| |
| /** |
| * Resets the parser state. |
| * |
| * @throws SAXException Thrown on initialization error. |
| */ |
| public void reset() throws SAXException { |
| super.reset(); |
| |
| // reset dom information |
| fDocument = null; |
| fCurrentNode = null; |
| |
| // reset state information |
| fInDocument = false; |
| fInDTD = false; |
| fInCDATASection = false; |
| |
| } // reset() |
| |
| // |
| // XMLDocumentHandler methods |
| // |
| |
| /** |
| * This method notifies of the start of an entity. The DTD has the |
| * pseudo-name of "[dtd]; parameter entity names start with '%'; and |
| * general entity names are just the entity name. |
| * <p> |
| * <strong>Note:</strong> Since the DTD is an entity, the handler |
| * will be notified of the start of the DTD entity by calling the |
| * startEntity method with the entity name "[dtd]" <em>before</em> calling |
| * the startDTD method. |
| * <p> |
| * <strong>Note:</strong> This method is not called for entity references |
| * appearing as part of attribute values. |
| * |
| * @param name The name of the entity. |
| * @param publicId The public identifier of the entity if the entity |
| * is external, null otherwise. |
| * @param systemId The system identifier of the entity if the entity |
| * is external, null otherwise. |
| * @param encoding The auto-detected IANA encoding name of the entity |
| * stream. This value will be null in those situations |
| * where the entity encoding is not auto-detected (e.g. |
| * internal parameter entities). |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void startEntity(String name, String publicId, String systemId, |
| String encoding) throws SAXException { |
| |
| if (fInDocument && !fInDTD) { |
| EntityReference entityRef = fDocument.createEntityReference(name); |
| fCurrentNode.appendChild(entityRef); |
| fCurrentNode = entityRef; |
| } |
| |
| } // startEntity(String,String,String,String) |
| |
| /** |
| * A comment. |
| * |
| * @param text The text in the comment. |
| * |
| * @throws SAXException Thrown by application to signal an error. |
| */ |
| public void comment(XMLString text) throws SAXException { |
| |
| Comment comment = fDocument.createComment(text.toString()); |
| fCurrentNode.appendChild(comment); |
| |
| } // comment(XMLString) |
| |
| /** |
| * A processing instruction. Processing instructions consist of a |
| * target name and, optionally, text data. The data is only meaningful |
| * to the application. |
| * <p> |
| * Typically, a processing instruction's data will contain a series |
| * of pseudo-attributes. These pseudo-attributes follow the form of |
| * element attributes but are <strong>not</strong> parsed or presented |
| * to the application as anything other than text. The application is |
| * responsible for parsing the data. |
| * |
| * @param target The target. |
| * @param data The data or null if none specified. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void processingInstruction(String target, XMLString data) |
| throws SAXException { |
| |
| ProcessingInstruction pi = fDocument.createProcessingInstruction(target, data.toString()); |
| fCurrentNode.appendChild(pi); |
| |
| } // processingInstruction(String,XMLString) |
| |
| /** |
| * The start of the document. |
| * |
| * @param systemId The system identifier of the entity if the entity |
| * is external, null otherwise. |
| * @param encoding The auto-detected IANA encoding name of the entity |
| * stream. This value will be null in those situations |
| * where the entity encoding is not auto-detected (e.g. |
| * internal entities or a document entity that is |
| * parsed from a java.io.Reader). |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void startDocument(String systemId, String encoding) |
| throws SAXException { |
| |
| fInDocument = true; |
| fDocument = new DocumentImpl(); |
| fDocumentImpl = (DocumentImpl)fDocument; |
| fCurrentNode = fDocument; |
| // set DOM error checking off |
| fDocumentImpl.setErrorChecking(false); |
| |
| } // startDocument(String,String) |
| |
| /** |
| * Notifies of the presence of the DOCTYPE line in the document. |
| * |
| * @param rootElement The name of the root element. |
| * @param publicId The public identifier if an external DTD or null |
| * if the external DTD is specified using SYSTEM. |
| * @param systemId The system identifier if an external DTD, null |
| * otherwise. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void doctypeDecl(String rootElement, String publicId, String systemId) |
| throws SAXException { |
| |
| DocumentImpl docimpl = (DocumentImpl)fDocument; |
| DocumentType doctype = docimpl.createDocumentType(rootElement, publicId, systemId); |
| fCurrentNode.appendChild(doctype); |
| |
| } // doctypeDecl(String,String,String) |
| |
| /** |
| * The start of an element. If the document specifies the start element |
| * by using an empty tag, then the startElement method will immediately |
| * be followed by the endElement method, with no intervening methods. |
| * |
| * @param element The name of the element. |
| * @param attributes The element attributes. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void startElement(QName element, XMLAttributes attributes) |
| throws SAXException { |
| |
| Element elementNode = element.prefix != null |
| ? fDocument.createElementNS(element.uri, element.rawname) |
| : fDocument.createElement(element.rawname); |
| int attrCount = attributes.getLength(); |
| for (int i = 0; i < attrCount; i++) { |
| attributes.getName(i, fAttrQName); |
| Attr attr = fAttrQName.prefix != null |
| ? fDocument.createAttributeNS(fAttrQName.uri, fAttrQName.rawname) |
| : fDocument.createAttribute(fAttrQName.rawname); |
| attr.setNodeValue(attributes.getValue(i)); |
| // REVISIT: Handle entities in attribute value. |
| elementNode.setAttributeNode(attr); |
| |
| // build entity references |
| int entityCount = attributes.getEntityCount(i); |
| if (entityCount > 0) { |
| Text text = (Text)attr.getFirstChild(); |
| buildAttrEntityRefs(text, attributes, i, entityCount, 0, 0); |
| } |
| } |
| fCurrentNode.appendChild(elementNode); |
| fCurrentNode = elementNode; |
| |
| } // startElement(QName,XMLAttributes) |
| |
| /** |
| * Character content. |
| * |
| * @param text The content. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void characters(XMLString text) throws SAXException { |
| |
| if (fInCDATASection) { |
| CDATASection cdataSection = (CDATASection)fCurrentNode; |
| cdataSection.appendData(text.toString()); |
| } |
| else if (!fInDTD) { |
| Node child = fCurrentNode.getLastChild(); |
| if (child != null && child.getNodeType() == Node.TEXT_NODE) { |
| Text textNode = (Text)child; |
| textNode.appendData(text.toString()); |
| } |
| else { |
| Text textNode = fDocument.createTextNode(text.toString()); |
| fCurrentNode.appendChild(textNode); |
| } |
| } |
| |
| } // characters(XMLString) |
| |
| /** |
| * Ignorable whitespace. For this method to be called, the document |
| * source must have some way of determining that the text containing |
| * only whitespace characters should be considered ignorable. For |
| * example, the validator can determine if a length of whitespace |
| * characters in the document are ignorable based on the element |
| * content model. |
| * |
| * @param text The ignorable whitespace. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void ignorableWhitespace(XMLString text) throws SAXException { |
| |
| Node child = fCurrentNode.getLastChild(); |
| if (child != null && child.getNodeType() == Node.TEXT_NODE) { |
| Text textNode = (Text)child; |
| textNode.appendData(text.toString()); |
| } |
| else { |
| Text textNode = fDocument.createTextNode(text.toString()); |
| if (fDocumentImpl != null) { |
| TextImpl textNodeImpl = (TextImpl)textNode; |
| textNodeImpl.setIgnorableWhitespace(true); |
| } |
| fCurrentNode.appendChild(textNode); |
| } |
| |
| } // ignorableWhitespace(XMLString) |
| |
| /** |
| * The end of an element. |
| * |
| * @param element The name of the element. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void endElement(QName element) throws SAXException { |
| |
| fCurrentNode = fCurrentNode.getParentNode(); |
| |
| } // endElement(QName) |
| |
| /** |
| * The end of a namespace prefix mapping. This method will only be |
| * called when namespace processing is enabled. |
| * |
| * @param prefix The namespace prefix. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void endPrefixMapping(String prefix) throws SAXException { |
| } // endPrefixMapping(String) |
| |
| /** |
| * The start of a CDATA section. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void startCDATA() throws SAXException { |
| |
| fInCDATASection = true; |
| CDATASection cdataSection = fDocument.createCDATASection(""); |
| fCurrentNode.appendChild(cdataSection); |
| fCurrentNode = cdataSection; |
| |
| } // startCDATA() |
| |
| /** |
| * The end of a CDATA section. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void endCDATA() throws SAXException { |
| |
| fInCDATASection = false; |
| fCurrentNode = fCurrentNode.getParentNode(); |
| |
| } // endCDATA() |
| |
| /** |
| * The end of the document. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void endDocument() throws SAXException { |
| |
| // set DOM error checking back on |
| if (fDocumentImpl != null) { |
| fDocumentImpl.setErrorChecking(true); |
| } |
| fInDocument = false; |
| fCurrentNode = null; |
| |
| } // endDocument() |
| |
| /** |
| * This method notifies the end of an entity. The DTD has the pseudo-name |
| * of "[dtd]; parameter entity names start with '%'; and general entity |
| * names are just the entity name. |
| * <p> |
| * <strong>Note:</strong> Since the DTD is an entity, the handler |
| * will be notified of the end of the DTD entity by calling the |
| * endEntity method with the entity name "[dtd]" <em>after</em> calling |
| * the endDTD method. |
| * <p> |
| * <strong>Note:</strong> This method is not called for entity references |
| * appearing as part of attribute values. |
| * |
| * @param name The name of the entity. |
| * |
| * @throws SAXException Thrown by handler to signal an error. |
| */ |
| public void endEntity(String name) throws SAXException { |
| |
| if (fInDocument && !fInDTD) { |
| fCurrentNode = fCurrentNode.getParentNode(); |
| } |
| |
| } // endEntity(String) |
| |
| // |
| // Protected methods |
| // |
| |
| /** |
| * Builds entity references in attribute values. This method is |
| * recursive because entity references can contain entity |
| * references. |
| * |
| * @param text The text node that needs to be split. |
| * @param attributes The attribute information. |
| * @param attrIndex The attribute index. |
| * @param entityCount The number of entities. This is passed as |
| * a convenience so that this method doesn't |
| * have to call XMLAttributes#getEntityCount. |
| * The caller already has the entity count so |
| * it's kind of a waste to make each invocation |
| * of this method query it again. |
| * @param entityIndex The entity index that this method invocation |
| * should start building from. |
| * @param textOffset The offset at which the start of this text |
| * should be considered. We need this to adjust |
| * the offset since the characters in the current |
| * text string are indexed from zero. |
| * |
| * @return Returns the number of entities built by this method. |
| */ |
| protected int buildAttrEntityRefs(Text text, XMLAttributes attributes, |
| int attrIndex, |
| int entityCount, int entityIndex, |
| int textOffset) { |
| |
| // iterate over entities |
| String textString = text.getNodeValue(); |
| int textLength = textString.length(); |
| int i = entityIndex; |
| while (i < entityCount) { |
| |
| // get entity information |
| String entityName = attributes.getEntityName(attrIndex, i); |
| int entityOffset = attributes.getEntityOffset(attrIndex, i); |
| int entityLength = attributes.getEntityLength(attrIndex, i); |
| |
| // is this entity not in this text? |
| if (entityOffset > textOffset + textLength) { |
| break; |
| } |
| |
| // split text into 3 parts; first part remains the |
| // text node that was passed into this method |
| Text text1 = text.splitText(entityOffset - textOffset); |
| Text text2 = text1.splitText(entityLength); |
| |
| // create entity reference |
| EntityReference entityRef = fDocument.createEntityReference(entityName); |
| ((EntityReferenceImpl)entityRef).setReadOnly(false, false); |
| |
| // insert entity ref into tree and append middle text |
| Node parent = text.getParentNode(); |
| parent.replaceChild(entityRef, text1); |
| entityRef.appendChild(text1); |
| |
| // see if there are any nested entity refs |
| if (i < entityCount - 1) { |
| int nextEntityOffset = attributes.getEntityOffset(attrIndex, i + 1); |
| if (nextEntityOffset < entityOffset + entityLength) { |
| // NOTE: Notice that we're incrementing the entity |
| // index variable. Since the following call will |
| // "consume" some of the entities. |
| i += buildAttrEntityRefs(text1, attributes, attrIndex, entityCount, i + 1, entityOffset); |
| } |
| } |
| ((EntityReferenceImpl)entityRef).setReadOnly(true, false); |
| |
| // adjust text node |
| textOffset += text.getLength() + entityLength; |
| text = text2; |
| |
| // increment and keep going |
| i++; |
| } |
| |
| // return number of entities we handled |
| return i - entityIndex; |
| |
| } // buildAttrEntityRefs(Text,XMLAttributes,int,int,int,int):int |
| |
| } // class AbstractDOMParser |