blob: bc13652173a5cdb4c3f3278e7fa7298fdaeaa169 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999, 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package dom;
import util.Arguments;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import org.apache.xerces.dom.TextImpl;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* A sample DOM counter. This sample program illustrates how to
* traverse a DOM tree in order to information about the document.
*
* @version $id$
*/
public class DOMCount {
//
// Constants
//
/** Default parser name. */
private static final String
DEFAULT_PARSER_NAME = "dom.wrappers.DOMParser";
private static boolean setValidation = false; //defaults
private static boolean setNameSpaces = true;
private static boolean setSchemaSupport = true;
private static boolean setDeferredDOM = true;
//
// Data
//
/** Elements. */
private long elements;
/** Attributes. */
private long attributes;
/** Characters. */
private long characters;
/** Ignorable whitespace. */
private long ignorableWhitespace;
//
// Public static methods
//
/** Counts the resulting document tree. */
public static void count(String parserWrapperName, String uri) {
try {
DOMParserWrapper parser =
(DOMParserWrapper)Class.forName(parserWrapperName).newInstance();
DOMCount counter = new DOMCount();
long before = System.currentTimeMillis();
parser.setFeature( "http://apache.org/xml/features/dom/defer-node-expansion",
setDeferredDOM );
parser.setFeature( "http://xml.org/sax/features/validation",
setValidation );
parser.setFeature( "http://xml.org/sax/features/namespaces",
setNameSpaces );
parser.setFeature( "http://apache.org/xml/features/validation/schema",
setSchemaSupport );
Document document = parser.parse(uri);
counter.traverse(document);
long after = System.currentTimeMillis();
counter.printResults(uri, after - before);
} catch (org.xml.sax.SAXParseException spe) {
} catch (org.xml.sax.SAXNotRecognizedException ex ){
} catch (org.xml.sax.SAXNotSupportedException ex ){
} catch (org.xml.sax.SAXException se) {
if (se.getException() != null)
se.getException().printStackTrace(System.err);
else
se.printStackTrace(System.err);
} catch (Exception e) {
e.printStackTrace(System.err);
}
} // print(String,String,boolean)
//
// Public methods
//
/** Traverses the specified node, recursively. */
public void traverse(Node node) {
// is there anything to do?
if (node == null) {
return;
}
int type = node.getNodeType();
switch (type) {
// print document
case Node.DOCUMENT_NODE: {
elements = 0;
attributes = 0;
characters = 0;
ignorableWhitespace = 0;
traverse(((Document)node).getDocumentElement());
break;
}
// print element with attributes
case Node.ELEMENT_NODE: {
elements++;
NamedNodeMap attrs = node.getAttributes();
if (attrs != null) {
attributes += attrs.getLength();
}
NodeList children = node.getChildNodes();
if (children != null) {
int len = children.getLength();
for (int i = 0; i < len; i++) {
traverse(children.item(i));
}
}
break;
}
// handle entity reference nodes
case Node.ENTITY_REFERENCE_NODE: {
NodeList children = node.getChildNodes();
if (children != null) {
int len = children.getLength();
for (int i = 0; i < len; i++) {
traverse(children.item(i));
}
}
break;
}
// print text
case Node.CDATA_SECTION_NODE: {
characters += node.getNodeValue().length();
break;
}
case Node.TEXT_NODE: {
if (node instanceof TextImpl) {
if (((TextImpl)node).isIgnorableWhitespace())
ignorableWhitespace += node.getNodeValue().length();
else
characters += node.getNodeValue().length();
} else
characters += node.getNodeValue().length();
break;
}
}
} // traverse(Node)
/** Prints the results. */
public void printResults(String uri, long time) {
// filename.xml: 631 ms (4 elems, 0 attrs, 78 spaces, 0 chars)
System.out.print(uri);
System.out.print(": ");
System.out.print(time);
System.out.print(" ms (");
System.out.print(elements);
System.out.print(" elems, ");
System.out.print(attributes);
System.out.print(" attrs, ");
System.out.print(ignorableWhitespace);
System.out.print(" spaces, ");
System.out.print(characters);
System.out.print(" chars)");
System.out.println();
} // printResults(String,long)
//
// Main
//
/** Main program entry point. */
public static void main(String argv[]) {
Arguments argopt = new Arguments();
argopt.setUsage( new String[] {
"usage: java dom.DOMCount (options) uri ...",
"",
"options:",
" -p name Specify DOM parser wrapper by name.",
" -n | -N Turn on/off namespace [default=on]",
" -v | -V Turn on/off validation [default=off]",
" -s | -S Turn on/off Schema support [default=on]",
" -d | -D Turn on/off deferred DOM [default=on]",
" -h This help screen."} );
// is there anything to do?
if (argv.length == 0) {
argopt.printUsage();
System.exit(1);
}
// vars
String parserName = DEFAULT_PARSER_NAME;
argopt.parseArgumentTokens(argv , new char[] { 'p'} );
int c;
String arg = null;
while ( ( arg = argopt.getlistFiles() ) != null ) {
outer:
while ( (c = argopt.getArguments()) != -1 ){
switch (c) {
case 'v':
setValidation = true;
//System.out.println( "v" );
break;
case 'V':
setValidation = false;
//System.out.println( "V" );
break;
case 'N':
setNameSpaces = false;
break;
case 'n':
setNameSpaces = true;
break;
case 'p':
//System.out.println('p');
parserName = argopt.getStringParameter();
//System.out.println( "parserName = " + parserName );
break;
case 'd':
setDeferredDOM = true;
break;
case 'D':
setDeferredDOM = false;
break;
case 's':
//System.out.println("s" );
setSchemaSupport = true;
break;
case 'S':
//System.out.println("S" );
setSchemaSupport = false;
break;
case '?':
case 'h':
case '-':
argopt.printUsage();
System.exit(1);
break;
case -1:
//System.out.println( "-1" );
break outer;
default:
break;
}
}
count(parserName, arg ); //count uri
}
} // main(String[])
} // class DOMCount