| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * |
| * Copyright (c) 1999-2003 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.apache.org. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| package dom; |
| |
| import java.lang.reflect.Method; |
| |
| import java.io.OutputStream; |
| import java.io.OutputStreamWriter; |
| import java.io.PrintWriter; |
| import java.io.UnsupportedEncodingException; |
| |
| import org.w3c.dom.Attr; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.DocumentType; |
| import org.w3c.dom.NamedNodeMap; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| |
| /** |
| * A sample DOM writer. This sample program illustrates how to |
| * traverse a DOM tree in order to print a document that is parsed. |
| * |
| * @author Andy Clark, IBM |
| * |
| * @version $Id$ |
| */ |
| public class Writer { |
| |
| // |
| // Constants |
| // |
| |
| // feature ids |
| |
| /** Namespaces feature id (http://xml.org/sax/features/namespaces). */ |
| protected static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces"; |
| |
| /** Validation feature id (http://xml.org/sax/features/validation). */ |
| protected static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation"; |
| |
| /** Schema validation feature id (http://apache.org/xml/features/validation/schema). */ |
| protected static final String SCHEMA_VALIDATION_FEATURE_ID = "http://apache.org/xml/features/validation/schema"; |
| |
| /** Schema full checking feature id (http://apache.org/xml/features/validation/schema-full-checking). */ |
| protected static final String SCHEMA_FULL_CHECKING_FEATURE_ID = "http://apache.org/xml/features/validation/schema-full-checking"; |
| |
| // property ids |
| |
| /** Lexical handler property id (http://xml.org/sax/properties/lexical-handler). */ |
| protected static final String LEXICAL_HANDLER_PROPERTY_ID = "http://xml.org/sax/properties/lexical-handler"; |
| |
| // default settings |
| |
| /** Default parser name. */ |
| protected static final String DEFAULT_PARSER_NAME = "dom.wrappers.Xerces"; |
| |
| /** Default namespaces support (true). */ |
| protected static final boolean DEFAULT_NAMESPACES = true; |
| |
| /** Default validation support (false). */ |
| protected static final boolean DEFAULT_VALIDATION = false; |
| |
| /** Default Schema validation support (false). */ |
| protected static final boolean DEFAULT_SCHEMA_VALIDATION = false; |
| |
| /** Default Schema full checking support (false). */ |
| protected static final boolean DEFAULT_SCHEMA_FULL_CHECKING = false; |
| |
| /** Default canonical output (false). */ |
| protected static final boolean DEFAULT_CANONICAL = false; |
| |
| // |
| // Data |
| // |
| |
| /** Print writer. */ |
| protected PrintWriter fOut; |
| |
| /** Canonical output. */ |
| protected boolean fCanonical; |
| |
| /** Processing XML 1.1 document. */ |
| protected boolean fXML11; |
| |
| // |
| // Constructors |
| // |
| |
| /** Default constructor. */ |
| public Writer() { |
| } // <init>() |
| |
| public Writer(boolean canonical) { |
| fCanonical = canonical; |
| } // <init>(boolean) |
| |
| // |
| // Public methods |
| // |
| |
| /** Sets whether output is canonical. */ |
| public void setCanonical(boolean canonical) { |
| fCanonical = canonical; |
| } // setCanonical(boolean) |
| |
| /** Sets the output stream for printing. */ |
| public void setOutput(OutputStream stream, String encoding) |
| throws UnsupportedEncodingException { |
| |
| if (encoding == null) { |
| encoding = "UTF8"; |
| } |
| |
| java.io.Writer writer = new OutputStreamWriter(stream, encoding); |
| fOut = new PrintWriter(writer); |
| |
| } // setOutput(OutputStream,String) |
| |
| /** Sets the output writer. */ |
| public void setOutput(java.io.Writer writer) { |
| |
| fOut = writer instanceof PrintWriter |
| ? (PrintWriter)writer : new PrintWriter(writer); |
| |
| } // setOutput(java.io.Writer) |
| |
| /** Writes the specified node, recursively. */ |
| public void write(Node node) { |
| |
| // is there anything to do? |
| if (node == null) { |
| return; |
| } |
| |
| short type = node.getNodeType(); |
| switch (type) { |
| case Node.DOCUMENT_NODE: { |
| Document document = (Document)node; |
| fXML11 = "1.1".equals(getVersion(document)); |
| if (!fCanonical) { |
| if (fXML11) { |
| fOut.println("<?xml version=\"1.1\" encoding=\"UTF-8\"?>"); |
| } |
| else { |
| fOut.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); |
| } |
| fOut.flush(); |
| write(document.getDoctype()); |
| } |
| write(document.getDocumentElement()); |
| break; |
| } |
| |
| case Node.DOCUMENT_TYPE_NODE: { |
| DocumentType doctype = (DocumentType)node; |
| fOut.print("<!DOCTYPE "); |
| fOut.print(doctype.getName()); |
| String publicId = doctype.getPublicId(); |
| String systemId = doctype.getSystemId(); |
| if (publicId != null) { |
| fOut.print(" PUBLIC '"); |
| fOut.print(publicId); |
| fOut.print("' '"); |
| fOut.print(systemId); |
| fOut.print('\''); |
| } |
| else if (systemId != null) { |
| fOut.print(" SYSTEM '"); |
| fOut.print(systemId); |
| fOut.print('\''); |
| } |
| String internalSubset = doctype.getInternalSubset(); |
| if (internalSubset != null) { |
| fOut.println(" ["); |
| fOut.print(internalSubset); |
| fOut.print(']'); |
| } |
| fOut.println('>'); |
| break; |
| } |
| |
| case Node.ELEMENT_NODE: { |
| fOut.print('<'); |
| fOut.print(node.getNodeName()); |
| Attr attrs[] = sortAttributes(node.getAttributes()); |
| for (int i = 0; i < attrs.length; i++) { |
| Attr attr = attrs[i]; |
| fOut.print(' '); |
| fOut.print(attr.getNodeName()); |
| fOut.print("=\""); |
| normalizeAndPrint(attr.getNodeValue(), true); |
| fOut.print('"'); |
| } |
| fOut.print('>'); |
| fOut.flush(); |
| |
| Node child = node.getFirstChild(); |
| while (child != null) { |
| write(child); |
| child = child.getNextSibling(); |
| } |
| break; |
| } |
| |
| case Node.ENTITY_REFERENCE_NODE: { |
| if (fCanonical) { |
| Node child = node.getFirstChild(); |
| while (child != null) { |
| write(child); |
| child = child.getNextSibling(); |
| } |
| } |
| else { |
| fOut.print('&'); |
| fOut.print(node.getNodeName()); |
| fOut.print(';'); |
| fOut.flush(); |
| } |
| break; |
| } |
| |
| case Node.CDATA_SECTION_NODE: { |
| if (fCanonical) { |
| normalizeAndPrint(node.getNodeValue(), false); |
| } |
| else { |
| fOut.print("<![CDATA["); |
| fOut.print(node.getNodeValue()); |
| fOut.print("]]>"); |
| } |
| fOut.flush(); |
| break; |
| } |
| |
| case Node.TEXT_NODE: { |
| normalizeAndPrint(node.getNodeValue(), false); |
| fOut.flush(); |
| break; |
| } |
| |
| case Node.PROCESSING_INSTRUCTION_NODE: { |
| fOut.print("<?"); |
| fOut.print(node.getNodeName()); |
| String data = node.getNodeValue(); |
| if (data != null && data.length() > 0) { |
| fOut.print(' '); |
| fOut.print(data); |
| } |
| fOut.print("?>"); |
| fOut.flush(); |
| break; |
| } |
| |
| case Node.COMMENT_NODE: { |
| if (!fCanonical) { |
| fOut.print("<!--"); |
| String comment = node.getNodeValue(); |
| if (comment != null && comment.length() > 0) { |
| fOut.print(comment); |
| } |
| fOut.print("-->"); |
| fOut.flush(); |
| } |
| } |
| } |
| |
| if (type == Node.ELEMENT_NODE) { |
| fOut.print("</"); |
| fOut.print(node.getNodeName()); |
| fOut.print('>'); |
| fOut.flush(); |
| } |
| |
| } // write(Node) |
| |
| /** Returns a sorted list of attributes. */ |
| protected Attr[] sortAttributes(NamedNodeMap attrs) { |
| |
| int len = (attrs != null) ? attrs.getLength() : 0; |
| Attr array[] = new Attr[len]; |
| for (int i = 0; i < len; i++) { |
| array[i] = (Attr)attrs.item(i); |
| } |
| for (int i = 0; i < len - 1; i++) { |
| String name = array[i].getNodeName(); |
| int index = i; |
| for (int j = i + 1; j < len; j++) { |
| String curName = array[j].getNodeName(); |
| if (curName.compareTo(name) < 0) { |
| name = curName; |
| index = j; |
| } |
| } |
| if (index != i) { |
| Attr temp = array[i]; |
| array[i] = array[index]; |
| array[index] = temp; |
| } |
| } |
| |
| return array; |
| |
| } // sortAttributes(NamedNodeMap):Attr[] |
| |
| // |
| // Protected methods |
| // |
| |
| /** Normalizes and prints the given string. */ |
| protected void normalizeAndPrint(String s, boolean isAttValue) { |
| |
| int len = (s != null) ? s.length() : 0; |
| for (int i = 0; i < len; i++) { |
| char c = s.charAt(i); |
| normalizeAndPrint(c, isAttValue); |
| } |
| |
| } // normalizeAndPrint(String,boolean) |
| |
| /** Normalizes and print the given character. */ |
| protected void normalizeAndPrint(char c, boolean isAttValue) { |
| |
| switch (c) { |
| case '<': { |
| fOut.print("<"); |
| break; |
| } |
| case '>': { |
| fOut.print(">"); |
| break; |
| } |
| case '&': { |
| fOut.print("&"); |
| break; |
| } |
| case '"': { |
| // A '"' that appears in character data |
| // does not need to be escaped. |
| if (isAttValue) { |
| fOut.print("""); |
| } |
| else { |
| fOut.print("\""); |
| } |
| break; |
| } |
| case '\r': { |
| // If CR is part of the document's content, it |
| // must not be printed as a literal otherwise |
| // it would be normalized to LF when the document |
| // is reparsed. |
| fOut.print("
"); |
| break; |
| } |
| case '\n': { |
| if (fCanonical) { |
| fOut.print("
"); |
| break; |
| } |
| // else, default print char |
| } |
| default: { |
| // In XML 1.1, control chars in the ranges [#x1-#x1F, #x7F-#x9F] must be escaped. |
| // |
| // Escape space characters that would be normalized to #x20 in attribute values |
| // when the document is reparsed. |
| // |
| // Escape NEL (0x85) and LSEP (0x2028) that appear in content |
| // if the document is XML 1.1, since they would be normalized to LF |
| // when the document is reparsed. |
| if (fXML11 && ((c >= 0x01 && c <= 0x1F && c != 0x09 && c != 0x0A) |
| || (c >= 0x7F && c <= 0x9F) || c == 0x2028) |
| || isAttValue && (c == 0x09 || c == 0x0A)) { |
| fOut.print("&#x"); |
| fOut.print(Integer.toHexString(c).toUpperCase()); |
| fOut.print(";"); |
| } |
| else { |
| fOut.print(c); |
| } |
| } |
| } |
| } // normalizeAndPrint(char,boolean) |
| |
| /** Extracts the XML version from the Document. */ |
| protected String getVersion(Document document) { |
| if (document == null) { |
| return null; |
| } |
| String version = null; |
| Method getXMLVersion = null; |
| try { |
| getXMLVersion = document.getClass().getMethod("getXmlVersion", new Class[]{}); |
| // If Document class implements DOM L3, this method will exist. |
| if (getXMLVersion != null) { |
| version = (String) getXMLVersion.invoke(document, null); |
| } |
| } |
| catch (Exception e) { |
| // Either this locator object doesn't have |
| // this method, or we're on an old JDK. |
| } |
| return version; |
| } // getVersion(Document) |
| |
| // |
| // Main |
| // |
| |
| /** Main program entry point. */ |
| public static void main(String argv[]) { |
| |
| // is there anything to do? |
| if (argv.length == 0) { |
| printUsage(); |
| System.exit(1); |
| } |
| |
| // variables |
| Writer writer = null; |
| ParserWrapper parser = null; |
| boolean namespaces = DEFAULT_NAMESPACES; |
| boolean validation = DEFAULT_VALIDATION; |
| boolean schemaValidation = DEFAULT_SCHEMA_VALIDATION; |
| boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING; |
| boolean canonical = DEFAULT_CANONICAL; |
| |
| // process arguments |
| for (int i = 0; i < argv.length; i++) { |
| String arg = argv[i]; |
| if (arg.startsWith("-")) { |
| String option = arg.substring(1); |
| if (option.equals("p")) { |
| // get parser name |
| if (++i == argv.length) { |
| System.err.println("error: Missing argument to -p option."); |
| } |
| String parserName = argv[i]; |
| |
| // create parser |
| try { |
| parser = (ParserWrapper)Class.forName(parserName).newInstance(); |
| } |
| catch (Exception e) { |
| parser = null; |
| System.err.println("error: Unable to instantiate parser ("+parserName+")"); |
| } |
| continue; |
| } |
| if (option.equalsIgnoreCase("n")) { |
| namespaces = option.equals("n"); |
| continue; |
| } |
| if (option.equalsIgnoreCase("v")) { |
| validation = option.equals("v"); |
| continue; |
| } |
| if (option.equalsIgnoreCase("s")) { |
| schemaValidation = option.equals("s"); |
| continue; |
| } |
| if (option.equalsIgnoreCase("f")) { |
| schemaFullChecking = option.equals("f"); |
| continue; |
| } |
| if (option.equalsIgnoreCase("c")) { |
| canonical = option.equals("c"); |
| continue; |
| } |
| if (option.equals("h")) { |
| printUsage(); |
| continue; |
| } |
| } |
| |
| // use default parser? |
| if (parser == null) { |
| |
| // create parser |
| try { |
| parser = (ParserWrapper)Class.forName(DEFAULT_PARSER_NAME).newInstance(); |
| } |
| catch (Exception e) { |
| System.err.println("error: Unable to instantiate parser ("+DEFAULT_PARSER_NAME+")"); |
| continue; |
| } |
| } |
| |
| // set parser features |
| try { |
| parser.setFeature(NAMESPACES_FEATURE_ID, namespaces); |
| } |
| catch (SAXException e) { |
| System.err.println("warning: Parser does not support feature ("+NAMESPACES_FEATURE_ID+")"); |
| } |
| try { |
| parser.setFeature(VALIDATION_FEATURE_ID, validation); |
| } |
| catch (SAXException e) { |
| System.err.println("warning: Parser does not support feature ("+VALIDATION_FEATURE_ID+")"); |
| } |
| try { |
| parser.setFeature(SCHEMA_VALIDATION_FEATURE_ID, schemaValidation); |
| } |
| catch (SAXException e) { |
| System.err.println("warning: Parser does not support feature ("+SCHEMA_VALIDATION_FEATURE_ID+")"); |
| } |
| try { |
| parser.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking); |
| } |
| catch (SAXException e) { |
| System.err.println("warning: Parser does not support feature ("+SCHEMA_FULL_CHECKING_FEATURE_ID+")"); |
| } |
| |
| // setup writer |
| if (writer == null) { |
| writer = new Writer(); |
| try { |
| writer.setOutput(System.out, "UTF8"); |
| } |
| catch (UnsupportedEncodingException e) { |
| System.err.println("error: Unable to set output. Exiting."); |
| System.exit(1); |
| } |
| } |
| |
| // parse file |
| writer.setCanonical(canonical); |
| try { |
| Document document = parser.parse(arg); |
| writer.write(document); |
| } |
| catch (SAXParseException e) { |
| // ignore |
| } |
| catch (Exception e) { |
| System.err.println("error: Parse error occurred - "+e.getMessage()); |
| if (e instanceof SAXException) { |
| Exception nested = ((SAXException)e).getException(); |
| if (nested != null) { |
| e = nested; |
| } |
| } |
| e.printStackTrace(System.err); |
| } |
| } |
| |
| } // main(String[]) |
| |
| // |
| // Private static methods |
| // |
| |
| /** Prints the usage. */ |
| private static void printUsage() { |
| |
| System.err.println("usage: java dom.Writer (options) uri ..."); |
| System.err.println(); |
| |
| System.err.println("options:"); |
| System.err.println(" -p name Select parser by name."); |
| System.err.println(" -n | -N Turn on/off namespace processing."); |
| System.err.println(" -v | -V Turn on/off validation."); |
| System.err.println(" -s | -S Turn on/off Schema validation support."); |
| System.err.println(" NOTE: Not supported by all parsers."); |
| System.err.println(" -f | -F Turn on/off Schema full checking."); |
| System.err.println(" NOTE: Requires use of -s and not supported by all parsers."); |
| System.err.println(" -c | -C Turn on/off Canonical XML output."); |
| System.err.println(" NOTE: This is not W3C canonical output."); |
| System.err.println(" -h This help screen."); |
| System.err.println(); |
| |
| System.err.println("defaults:"); |
| System.err.println(" Parser: "+DEFAULT_PARSER_NAME); |
| System.err.print(" Namespaces: "); |
| System.err.println(DEFAULT_NAMESPACES ? "on" : "off"); |
| System.err.print(" Validation: "); |
| System.err.println(DEFAULT_VALIDATION ? "on" : "off"); |
| System.err.print(" Schema: "); |
| System.err.println(DEFAULT_SCHEMA_VALIDATION ? "on" : "off"); |
| System.err.print(" Schema full checking: "); |
| System.err.println(DEFAULT_SCHEMA_FULL_CHECKING ? "on" : "off"); |
| System.err.print(" Canonical: "); |
| System.err.println(DEFAULT_CANONICAL ? "on" : "off"); |
| |
| } // printUsage() |
| |
| } // class Writer |