blob: e8d77e4e9f66852310a10a6cc59050b15cb06a14 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package dom;
import java.io.PrintWriter;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* A sample DOM counter. This sample program illustrates how to
* traverse a DOM tree in order to get information about the document.
* The output of this program shows the time and count of elements,
* attributes, ignorable whitespaces, and characters appearing in
* the document. Three times are shown: the parse time, the first
* traversal of the document, and the second traversal of the tree.
* <p>
* This class is useful as a "poor-man's" performance tester to
* compare the speed and accuracy of various DOM parsers. However,
* it is important to note that the first parse time of a parser
* will include both VM class load time and parser initialization
* that would not be present in subsequent parses with the same
* file.
* <p>
* <strong>Note:</strong> The results produced by this program
* should never be accepted as true performance measurements.
*
* @author Andy Clark, IBM
*
* @version $Id$
*/
public class Counter {
//
// Constants
//
// feature ids
/** Namespaces feature id (http://xml.org/sax/features/namespaces). */
protected static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces";
/** Validation feature id (http://xml.org/sax/features/validation). */
protected static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation";
/** Schema validation feature id (http://apache.org/xml/features/validation/schema). */
protected static final String SCHEMA_VALIDATION_FEATURE_ID = "http://apache.org/xml/features/validation/schema";
/** Schema full checking feature id (http://apache.org/xml/features/validation/schema-full-checking). */
protected static final String SCHEMA_FULL_CHECKING_FEATURE_ID = "http://apache.org/xml/features/validation/schema-full-checking";
// default settings
/** Default parser name (dom.wrappers.Xerces). */
protected static final String DEFAULT_PARSER_NAME = "dom.wrappers.Xerces";
/** Default repetition (1). */
protected static final int DEFAULT_REPETITION = 1;
/** Default namespaces support (true). */
protected static final boolean DEFAULT_NAMESPACES = true;
/** Default namespace prefixes (false). */
protected static final boolean DEFAULT_NAMESPACE_PREFIXES = false;
/** Default validation support (false). */
protected static final boolean DEFAULT_VALIDATION = false;
/** Default Schema validation support (false). */
protected static final boolean DEFAULT_SCHEMA_VALIDATION = false;
/** Default Schema full checking support (false). */
protected static final boolean DEFAULT_SCHEMA_FULL_CHECKING = false;
//
// Data
//
/** Number of elements. */
protected long fElements;
/** Number of attributes. */
protected long fAttributes;
/** Number of characters. */
protected long fCharacters;
/** Number of ignorable whitespace characters. */
protected long fIgnorableWhitespace;
/** Document information. */
protected ParserWrapper.DocumentInfo fDocumentInfo;
//
// Public methods
//
/** Sets the parser wrapper. */
public void setDocumentInfo(ParserWrapper.DocumentInfo documentInfo) {
fDocumentInfo = documentInfo;
} // setDocumentInfo(ParserWrapper.DocumentInfo)
/** Traverses the specified node, recursively. */
public void count(Node node) {
// is there anything to do?
if (node == null) {
return;
}
int type = node.getNodeType();
switch (type) {
case Node.DOCUMENT_NODE: {
fElements = 0;
fAttributes = 0;
fCharacters = 0;
fIgnorableWhitespace = 0;
Document document = (Document)node;
count(document.getDocumentElement());
break;
}
case Node.ELEMENT_NODE: {
fElements++;
NamedNodeMap attrs = node.getAttributes();
if (attrs != null) {
fAttributes += attrs.getLength();
}
// drop through to entity reference
}
case Node.ENTITY_REFERENCE_NODE: {
Node child = node.getFirstChild();
while (child != null) {
count(child);
child = child.getNextSibling();
}
break;
}
case Node.CDATA_SECTION_NODE: {
fCharacters += ((Text)node).getLength();
break;
}
case Node.TEXT_NODE: {
if (fDocumentInfo != null) {
Text text = (Text)node;
int length = text.getLength();
if (fDocumentInfo.isIgnorableWhitespace(text)) {
fIgnorableWhitespace += length;
}
else {
fCharacters += length;
}
}
break;
}
}
} // count(Node)
/** Prints the results. */
public void printResults(PrintWriter out, String uri,
long parse, long traverse1, long traverse2,
int repetition) {
// filename.xml: 631/200/100 ms (4 elems, 0 attrs, 78 spaces, 0 chars)
out.print(uri);
out.print(": ");
if (repetition == 1) {
out.print(parse);
}
else {
out.print(parse);
out.print('/');
out.print(repetition);
out.print('=');
out.print(parse/repetition);
}
out.print(';');
out.print(traverse1);
out.print(';');
out.print(traverse2);
out.print(" ms (");
out.print(fElements);
out.print(" elems, ");
out.print(fAttributes);
out.print(" attrs, ");
out.print(fIgnorableWhitespace);
out.print(" spaces, ");
out.print(fCharacters);
out.print(" chars)");
out.println();
out.flush();
} // printResults(PrintWriter,String,long,long,long)
//
// MAIN
//
/** Main program entry point. */
public static void main(String argv[]) {
// is there anything to do?
if (argv.length == 0) {
printUsage();
System.exit(1);
}
// variables
Counter counter = new Counter();
PrintWriter out = new PrintWriter(System.out);
ParserWrapper parser = null;
int repetition = DEFAULT_REPETITION;
boolean namespaces = DEFAULT_NAMESPACES;
boolean validation = DEFAULT_VALIDATION;
boolean schemaValidation = DEFAULT_SCHEMA_VALIDATION;
boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING;
// process arguments
for (int i = 0; i < argv.length; i++) {
String arg = argv[i];
if (arg.startsWith("-")) {
String option = arg.substring(1);
if (option.equals("p")) {
// get parser name
if (++i == argv.length) {
System.err.println("error: Missing argument to -p option.");
}
String parserName = argv[i];
// create parser
try {
parser = (ParserWrapper)Class.forName(parserName).newInstance();
}
catch (Exception e) {
parser = null;
System.err.println("error: Unable to instantiate parser ("+parserName+")");
}
continue;
}
if (option.equals("x")) {
if (++i == argv.length) {
System.err.println("error: Missing argument to -x option.");
continue;
}
String number = argv[i];
try {
int value = Integer.parseInt(number);
if (value < 1) {
System.err.println("error: Repetition must be at least 1.");
continue;
}
repetition = value;
}
catch (NumberFormatException e) {
System.err.println("error: invalid number ("+number+").");
}
continue;
}
if (option.equalsIgnoreCase("n")) {
namespaces = option.equals("n");
continue;
}
if (option.equalsIgnoreCase("v")) {
validation = option.equals("v");
continue;
}
if (option.equalsIgnoreCase("s")) {
schemaValidation = option.equals("s");
continue;
}
if (option.equalsIgnoreCase("f")) {
schemaFullChecking = option.equals("f");
continue;
}
if (option.equals("h")) {
printUsage();
continue;
}
}
// use default parser?
if (parser == null) {
// create parser
try {
parser = (ParserWrapper)Class.forName(DEFAULT_PARSER_NAME).newInstance();
}
catch (Exception e) {
System.err.println("error: Unable to instantiate parser ("+DEFAULT_PARSER_NAME+")");
continue;
}
}
// set parser features
try {
parser.setFeature(NAMESPACES_FEATURE_ID, namespaces);
}
catch (SAXException e) {
System.err.println("warning: Parser does not support feature ("+NAMESPACES_FEATURE_ID+")");
}
try {
parser.setFeature(VALIDATION_FEATURE_ID, validation);
}
catch (SAXException e) {
System.err.println("warning: Parser does not support feature ("+VALIDATION_FEATURE_ID+")");
}
try {
parser.setFeature(SCHEMA_VALIDATION_FEATURE_ID, schemaValidation);
}
catch (SAXException e) {
System.err.println("warning: Parser does not support feature ("+SCHEMA_VALIDATION_FEATURE_ID+")");
}
try {
parser.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking);
}
catch (SAXException e) {
System.err.println("warning: Parser does not support feature ("+SCHEMA_FULL_CHECKING_FEATURE_ID+")");
}
// parse file
try {
long beforeParse = System.currentTimeMillis();
Document document = null;
for (int j = 0; j < repetition; j++) {
document = parser.parse(arg);
}
long afterParse = System.currentTimeMillis();
long parse = afterParse - beforeParse;
ParserWrapper.DocumentInfo documentInfo = parser.getDocumentInfo();
counter.setDocumentInfo(documentInfo);
long beforeTraverse1 = System.currentTimeMillis();
counter.count(document);
long afterTraverse1 = System.currentTimeMillis();
long traverse1 = afterTraverse1 - beforeTraverse1;
long beforeTraverse2 = System.currentTimeMillis();
counter.count(document);
long afterTraverse2 = System.currentTimeMillis();
long traverse2 = afterTraverse2 - beforeTraverse2;
counter.printResults(out, arg, parse, traverse1, traverse2,
repetition);
}
catch (SAXParseException e) {
// ignore
}
catch (Exception e) {
System.err.println("error: Parse error occurred - "+e.getMessage());
Exception se = e;
if (e instanceof SAXException) {
se = ((SAXException)e).getException();
}
if (se != null)
se.printStackTrace(System.err);
else
e.printStackTrace(System.err);
}
}
} // main(String[])
//
// Private static methods
//
/** Prints the usage. */
private static void printUsage() {
System.err.println("usage: java dom.Counter (options) uri ...");
System.err.println();
System.err.println("options:");
System.err.println(" -p name Select parser by name.");
System.err.println(" -x number Select number of repetitions.");
System.err.println(" -n | -N Turn on/off namespace processing.");
System.err.println(" -np | -NP Turn on/off namespace prefixes.");
System.err.println(" NOTE: Requires use of -n.");
System.err.println(" -v | -V Turn on/off validation.");
System.err.println(" -s | -S Turn on/off Schema validation support.");
System.err.println(" NOTE: Not supported by all parsers.");
System.err.println(" -f | -F Turn on/off Schema full checking.");
System.err.println(" NOTE: Requires use of -s and not supported by all parsers.");
System.err.println(" -h This help screen.");
System.err.println();
System.err.println("defaults:");
System.err.println(" Parser: "+DEFAULT_PARSER_NAME);
System.err.println(" Repetition: "+DEFAULT_REPETITION);
System.err.print(" Namespaces: ");
System.err.println(DEFAULT_NAMESPACES ? "on" : "off");
System.err.print(" Prefixes: ");
System.err.println(DEFAULT_NAMESPACE_PREFIXES ? "on" : "off");
System.err.print(" Validation: ");
System.err.println(DEFAULT_VALIDATION ? "on" : "off");
System.err.print(" Schema: ");
System.err.println(DEFAULT_SCHEMA_VALIDATION ? "on" : "off");
System.err.print(" Schema full checking: ");
System.err.println(DEFAULT_SCHEMA_FULL_CHECKING ? "on" : "off");
} // printUsage()
} // class DOMCount