samples/DOMPrint/DOMPrint.cpp - xerces-c - Git at Google

 /*
  * The Apache Software License, Version 1.1
  *
  * Copyright (c) 1999 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Xerces" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache\@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation, and was
  * originally based on software copyright (c) 1999, International
  * Business Machines, Inc., http://www.ibm.com .  For more information
  * on the Apache Software Foundation, please see
  * <http://www.apache.org/>.
  */

 /**
  * $Log$
  * Revision 1.1  1999/11/09 01:09:51  twl
  * Initial revision
  *
  * Revision 1.7  1999/11/08 20:43:35  rahul
  * Swat for adding in Product name and CVS comment log variable.
  *
  */


 // ---------------------------------------------------------------------------
 //  This sample program which invokes the DOMParser to build a DOM tree for
 //  the specified input file. It then walks the tree, and prints out the data
 //  as an XML file.
 //
 //  The parameters are:
 //
 //      [-?]            - Show usage and exit
 //      [-v]            - Do validation
 //      [-NoEscape]     - Don't escape characters like '<' or '&'.
 //      filename        - The path to the XML file to parse
 //
 //  These are non-case sensitive
 // ---------------------------------------------------------------------------


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <util/PlatformUtils.hpp>
 #include <parsers/DOMParser.hpp>
 #include <dom/DOM_Node.hpp>
 #include <dom/DOM_NamedNodeMap.hpp>
 #include "DOMTreeErrorReporter.hpp"
 #include <string.h>
 #include <stdlib.h>


 // ---------------------------------------------------------------------------
 //  Local data
 //
 //  xmlFile
 //      The path to the file to parser. Set via command line.
 //
 //  doValidation
 //      Indicates whether validation should be done. The default is not
 //      to validate, but -v overrides that.
 //
 //  doEscapes
 //      Indicates whether special chars should be escaped in the output.
 //      Defaults to doing escapes, -NoEscape overrides.
 // ---------------------------------------------------------------------------
 static char*	xmlFile         = 0;
 static bool     doValidation    = false;
 static bool     doEscapes       = true;


 // ---------------------------------------------------------------------------
 //  Forward references
 // ---------------------------------------------------------------------------
 void          outputContent(ostream& target, const DOMString &s);
 void          usage();
 ostream& operator<<(ostream& target, const DOMString& toWrite);
 ostream& operator<<(ostream& target, DOM_Node& toWrite);


 // ---------------------------------------------------------------------------
 //
 //  main
 //
 // ---------------------------------------------------------------------------
 int main(int argC, char* argV[])
 {
     // Initialize the XML4C2 system
     try
     {
         XMLPlatformUtils::Initialize();
     }

     catch(const XMLException& toCatch)
     {
         cerr << "Error during XML4C2 Initialization.\n"
              << "  Exception message:"
              << StrX(toCatch.getMessage()) << endl;
         return 1;
     }

     // Check command line and extract arguments.
     if (argC < 2)
     {
         usage();
         return 1;
     }

     // Watch for special case help request
     if (strcmp(argV[1], "-?") == 0)
     {
         usage();
         return 0;
     }

     // See if non validating dom parser configuration is requested.
     int parmInd;
     for (parmInd = 1; parmInd < argC; parmInd++)
     {
         // Break out on first parm not starting with a dash
         if (argV[parmInd][0] != '-')
             break;

         if (!strcmp(argV[parmInd], "-v")
         ||  !strcmp(argV[parmInd], "-V"))
         {
             doValidation = true;
         }
          else if (!strcmp(argV[parmInd], "-NoEscape"))
         {
             doEscapes = false;
         }
          else
         {
             usage();
             return 1;
         }
     }

     //
     //  And now we have to have only one parameter left and it must be
     //  the file name.
     //
     if (parmInd + 1 != argC)
     {
         usage();
         return 1;
     }
     xmlFile = argV[parmInd];

     //
     //  Create our validator, then attach an error handler to the parser.
     //  The parser will call back to methods of the ErrorHandler if it
     //  discovers errors during the course of parsing the XML document.
     //
     DOMParser parser;
     ErrorHandler *errReporter = new DOMTreeErrorReporter();
     parser.setErrorHandler(errReporter);

     //
     //  Parse the XML file, catching any XML exceptions that might propogate
     //  out of it.
     //
     bool errorsOccured = false;
     try
     {
         parser.parse(xmlFile);
     }

     catch (const XMLException& e)
     {
         cerr << "An error occured during parsing\n   Message: "
              << StrX(e.getMessage()) << endl;
         errorsOccured = true;
     }

     // If the parse was successful, output the document data from the DOM tree
     if (!errorsOccured)
     {
         DOM_Node doc = parser.getDocument();
 		cout << doc << endl;
     }

     //
     //  Clean up the element counter object. The parser does not adopt handlers
     //  since they could be many objects or one object installed for multiple
     //  handlers.
     //
     delete errReporter;

     //
 	//  The DOM document and its contents are reference counted, and need
 	//	no explicit deletion.
     //
     return 0;
 }


 // ---------------------------------------------------------------------------
 //
 //  Usage()
 //
 // ---------------------------------------------------------------------------
 void usage()
 {
     cout << "\nUsage: DOMPrint [options] file\n"
             "   This program invokes the XML4C DOM parser and builds the DOM tree.\n"
             "   It then traverses the DOM tree and prints the contents of the tree\n"
             "   Options are case insensitive\n\n"
             "   Options:\n"
             "     -v          Validate the data according to the DTD\n"
             "     -NoEscape   Don't escape special characters in output\n"
             "     -?          Show this help (must be the only parameter)\n"
           <<  endl;
 }


 // ---------------------------------------------------------------------------
 //
 //  ostream << DOM_Node
 //
 //                Stream out a DOM node, and, recursively, all of its children.
 //                This function is the heart of writing a DOM tree out as
 //                XML source.  Give it a document node and it will do the whole thing.
 //
 // ---------------------------------------------------------------------------
 ostream& operator<<(ostream& target, DOM_Node& toWrite)
 {
     // Get the name and value out for convenience
     DOMString   nodeName = toWrite.getNodeName();
     DOMString   nodeValue = toWrite.getNodeValue();

 	switch (toWrite.getNodeType())
     {
 		case DOM_Node::TEXT_NODE:
         {
             outputContent(target, nodeValue);
             break;
         }

         case DOM_Node::PROCESSING_INSTRUCTION_NODE :
         {
             target  << "<?"
                     << nodeName
                     << ' '
                     << nodeValue
                     << "?>";
             break;
         }

         case DOM_Node::DOCUMENT_NODE :
         {
             target << "<?xml version='1.0' encoding='utf-8' ?>\n";
             DOM_Node child = toWrite.getFirstChild();
             while( child != 0)
             {
                 target << child << endl;
                 child = child.getNextSibling();
             }

             break;
         }

         case DOM_Node::ELEMENT_NODE :
         {
             // Output the element start tag.
             target << '<' << nodeName;

             // Output any attributes on this element
             DOM_NamedNodeMap attributes = toWrite.getAttributes();
             int attrCount = attributes.getLength();
             for (int i = 0; i < attrCount; i++)
             {
                 DOM_Node  attribute = attributes.item(i);

                 target  << ' ' << attribute.getNodeName()
                         << " = \"";
                         //  Note that "<" must be escaped in attribute values.
                         outputContent(target, attribute.getNodeValue());
                         target << '"';
             }

             //
             //  Test for the presence of children, which includes both
             //  text content and nested elements.
             //
             DOM_Node child = toWrite.getFirstChild();
             if (child != 0)
             {
                 // There are children. Close start-tag, and output children.
                 target << ">";
                 while( child != 0)
                 {
                     target << child;
                     child = child.getNextSibling();
                 }

                 // Done with children.  Output the end tag.
                 target << "</" << nodeName << ">";
             }
             else
             {
                 //
                 //  There were no children.  Output the short form close of the
                 //  element start tag, making it an empty-element tag.
                 //
                 target << "/>";
             }
             break;
         }

         case DOM_Node::ENTITY_REFERENCE_NODE:
         {
             DOM_Node child;
             for (child = toWrite.getFirstChild(); child != 0; child = child.getNextSibling())
                 target << child;
             break;
         }

         case DOM_Node::CDATA_SECTION_NODE:
         {
             target << "<![CDATA[" << nodeValue << "]]>";
             break;
         }

         case DOM_Node::COMMENT_NODE:
         {
             target << "<!--" << nodeValue << "-->";
             break;
         }

         default:
             cerr << "Unrecognized node type = "
                  << (long)toWrite.getNodeType() << endl;
     }
 	return target;
 }


 // ---------------------------------------------------------------------------
 //
 //  outputContent  - Write document content from a DOMString to a C++ ostream.
 //                   Escape the XML special characters (<, &, etc.) unless this
 //                   is suppressed by the command line option.
 //
 // ---------------------------------------------------------------------------
 void outputContent(ostream& target, const DOMString &toWrite)
 {

     if (doEscapes == false)
     {
         target << toWrite;
     }
      else
     {
         int            length = toWrite.length();
         const XMLCh*   chars  = toWrite.rawBuffer();

         int index;
         for (index = 0; index < length; index++)
         {
             switch (chars[index])
             {
             case chAmpersand :
                 target << "&amp;";
                 break;

             case chOpenAngle :
                 target << "&lt;";
                 break;

             case chCloseAngle:
                 target << "&gt;";
                 break;

             case chDoubleQuote :
                 target << "&quot;";
                 break;

             default:
                 // If it is none of the special characters, print it as such
                 target << StrX(&chars[index], 1);
                 break;
             }
         }
     }

     return;
 }


 // ---------------------------------------------------------------------------
 //
 //  ostream << DOMString      Stream out a DOM string.
 //
 // ---------------------------------------------------------------------------
 ostream& operator<<(ostream& target, const DOMString& s)
 {
     if (s.length() > 0)
         target << StrX(s.rawBuffer(), s.length());
     return target;
 }
	/*
	* The Apache Software License, Version 1.1
	*
	* Copyright (c) 1999 The Apache Software Foundation. All rights
	* reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* 3. The end-user documentation included with the redistribution,
	* if any, must include the following acknowledgment:
	* "This product includes software developed by the
	* Apache Software Foundation (http://www.apache.org/)."
	* Alternately, this acknowledgment may appear in the software itself,
	* if and wherever such third-party acknowledgments normally appear.
	*
	* 4. The names "Xerces" and "Apache Software Foundation" must
	* not be used to endorse or promote products derived from this
	* software without prior written permission. For written
	* permission, please contact apache\@apache.org.
	*
	* 5. Products derived from this software may not be called "Apache",
	* nor may "Apache" appear in their name, without prior written
	* permission of the Apache Software Foundation.
	*
	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
	* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
	* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	* ====================================================================
	*
	* This software consists of voluntary contributions made by many
	* individuals on behalf of the Apache Software Foundation, and was
	* originally based on software copyright (c) 1999, International
	* Business Machines, Inc., http://www.ibm.com . For more information
	* on the Apache Software Foundation, please see
	* <http://www.apache.org/>.
	*/

	/**
	* $Log$
	* Revision 1.1 1999/11/09 01:09:51 twl
	* Initial revision
	*
	* Revision 1.7 1999/11/08 20:43:35 rahul
	* Swat for adding in Product name and CVS comment log variable.
	*
	*/


	// ---------------------------------------------------------------------------
	// This sample program which invokes the DOMParser to build a DOM tree for
	// the specified input file. It then walks the tree, and prints out the data
	// as an XML file.
	//
	// The parameters are:
	//
	// [-?] - Show usage and exit
	// [-v] - Do validation
	// [-NoEscape] - Don't escape characters like '<' or '&'.
	// filename - The path to the XML file to parse
	//
	// These are non-case sensitive
	// ---------------------------------------------------------------------------


	// ---------------------------------------------------------------------------
	// Includes
	// ---------------------------------------------------------------------------
	#include <util/PlatformUtils.hpp>
	#include <parsers/DOMParser.hpp>
	#include <dom/DOM_Node.hpp>
	#include <dom/DOM_NamedNodeMap.hpp>
	#include "DOMTreeErrorReporter.hpp"
	#include <string.h>
	#include <stdlib.h>



	// ---------------------------------------------------------------------------
	// Local data
	//
	// xmlFile
	// The path to the file to parser. Set via command line.
	//
	// doValidation
	// Indicates whether validation should be done. The default is not
	// to validate, but -v overrides that.
	//
	// doEscapes
	// Indicates whether special chars should be escaped in the output.
	// Defaults to doing escapes, -NoEscape overrides.
	// ---------------------------------------------------------------------------
	static char* xmlFile = 0;
	static bool doValidation = false;
	static bool doEscapes = true;


	// ---------------------------------------------------------------------------
	// Forward references
	// ---------------------------------------------------------------------------
	void outputContent(ostream& target, const DOMString &s);
	void usage();
	ostream& operator<<(ostream& target, const DOMString& toWrite);
	ostream& operator<<(ostream& target, DOM_Node& toWrite);



	// ---------------------------------------------------------------------------
	//
	// main
	//
	// ---------------------------------------------------------------------------
	int main(int argC, char* argV[])
	{
	// Initialize the XML4C2 system
	try
	{
	XMLPlatformUtils::Initialize();
	}

	catch(const XMLException& toCatch)
	{
	cerr << "Error during XML4C2 Initialization.\n"
	<< " Exception message:"
	<< StrX(toCatch.getMessage()) << endl;
	return 1;
	}

	// Check command line and extract arguments.
	if (argC < 2)
	{
	usage();
	return 1;
	}

	// Watch for special case help request
	if (strcmp(argV[1], "-?") == 0)
	{
	usage();
	return 0;
	}

	// See if non validating dom parser configuration is requested.
	int parmInd;
	for (parmInd = 1; parmInd < argC; parmInd++)
	{
	// Break out on first parm not starting with a dash
	if (argV[parmInd][0] != '-')
	break;

	if (!strcmp(argV[parmInd], "-v")
	\|\| !strcmp(argV[parmInd], "-V"))
	{
	doValidation = true;
	}
	else if (!strcmp(argV[parmInd], "-NoEscape"))
	{
	doEscapes = false;
	}
	else
	{
	usage();
	return 1;
	}
	}

	//
	// And now we have to have only one parameter left and it must be
	// the file name.
	//
	if (parmInd + 1 != argC)
	{
	usage();
	return 1;
	}
	xmlFile = argV[parmInd];

	//
	// Create our validator, then attach an error handler to the parser.
	// The parser will call back to methods of the ErrorHandler if it
	// discovers errors during the course of parsing the XML document.
	//
	DOMParser parser;
	ErrorHandler *errReporter = new DOMTreeErrorReporter();
	parser.setErrorHandler(errReporter);

	//
	// Parse the XML file, catching any XML exceptions that might propogate
	// out of it.
	//
	bool errorsOccured = false;
	try
	{
	parser.parse(xmlFile);
	}

	catch (const XMLException& e)
	{
	cerr << "An error occured during parsing\n Message: "
	<< StrX(e.getMessage()) << endl;
	errorsOccured = true;
	}

	// If the parse was successful, output the document data from the DOM tree
	if (!errorsOccured)
	{
	DOM_Node doc = parser.getDocument();
	cout << doc << endl;
	}

	//
	// Clean up the element counter object. The parser does not adopt handlers
	// since they could be many objects or one object installed for multiple
	// handlers.
	//
	delete errReporter;

	//
	// The DOM document and its contents are reference counted, and need
	// no explicit deletion.
	//
	return 0;
	}



	// ---------------------------------------------------------------------------
	//
	// Usage()
	//
	// ---------------------------------------------------------------------------
	void usage()
	{
	cout << "\nUsage: DOMPrint [options] file\n"
	" This program invokes the XML4C DOM parser and builds the DOM tree.\n"
	" It then traverses the DOM tree and prints the contents of the tree\n"
	" Options are case insensitive\n\n"
	" Options:\n"
	" -v Validate the data according to the DTD\n"
	" -NoEscape Don't escape special characters in output\n"
	" -? Show this help (must be the only parameter)\n"
	<< endl;
	}




	// ---------------------------------------------------------------------------
	//
	// ostream << DOM_Node
	//
	// Stream out a DOM node, and, recursively, all of its children.
	// This function is the heart of writing a DOM tree out as
	// XML source. Give it a document node and it will do the whole thing.
	//
	// ---------------------------------------------------------------------------
	ostream& operator<<(ostream& target, DOM_Node& toWrite)
	{
	// Get the name and value out for convenience
	DOMString nodeName = toWrite.getNodeName();
	DOMString nodeValue = toWrite.getNodeValue();

	switch (toWrite.getNodeType())
	{
	case DOM_Node::TEXT_NODE:
	{
	outputContent(target, nodeValue);
	break;
	}

	case DOM_Node::PROCESSING_INSTRUCTION_NODE :
	{
	target << "<?"
	<< nodeName
	<< ' '
	<< nodeValue
	<< "?>";
	break;
	}

	case DOM_Node::DOCUMENT_NODE :
	{
	target << "<?xml version='1.0' encoding='utf-8' ?>\n";
	DOM_Node child = toWrite.getFirstChild();
	while( child != 0)
	{
	target << child << endl;
	child = child.getNextSibling();
	}

	break;
	}

	case DOM_Node::ELEMENT_NODE :
	{
	// Output the element start tag.
	target << '<' << nodeName;

	// Output any attributes on this element
	DOM_NamedNodeMap attributes = toWrite.getAttributes();
	int attrCount = attributes.getLength();
	for (int i = 0; i < attrCount; i++)
	{
	DOM_Node attribute = attributes.item(i);

	target << ' ' << attribute.getNodeName()
	<< " = \"";
	// Note that "<" must be escaped in attribute values.
	outputContent(target, attribute.getNodeValue());
	target << '"';
	}

	//
	// Test for the presence of children, which includes both
	// text content and nested elements.
	//
	DOM_Node child = toWrite.getFirstChild();
	if (child != 0)
	{
	// There are children. Close start-tag, and output children.
	target << ">";
	while( child != 0)
	{
	target << child;
	child = child.getNextSibling();
	}

	// Done with children. Output the end tag.
	target << "</" << nodeName << ">";
	}
	else
	{
	//
	// There were no children. Output the short form close of the
	// element start tag, making it an empty-element tag.
	//
	target << "/>";
	}
	break;
	}

	case DOM_Node::ENTITY_REFERENCE_NODE:
	{
	DOM_Node child;
	for (child = toWrite.getFirstChild(); child != 0; child = child.getNextSibling())
	target << child;
	break;
	}

	case DOM_Node::CDATA_SECTION_NODE:
	{
	target << "<![CDATA[" << nodeValue << "]]>";
	break;
	}

	case DOM_Node::COMMENT_NODE:
	{
	target << "<!--" << nodeValue << "-->";
	break;
	}

	default:
	cerr << "Unrecognized node type = "
	<< (long)toWrite.getNodeType() << endl;
	}
	return target;
	}


	// ---------------------------------------------------------------------------
	//
	// outputContent - Write document content from a DOMString to a C++ ostream.
	// Escape the XML special characters (<, &, etc.) unless this
	// is suppressed by the command line option.
	//
	// ---------------------------------------------------------------------------
	void outputContent(ostream& target, const DOMString &toWrite)
	{

	if (doEscapes == false)
	{
	target << toWrite;
	}
	else
	{
	int length = toWrite.length();
	const XMLCh* chars = toWrite.rawBuffer();

	int index;
	for (index = 0; index < length; index++)
	{
	switch (chars[index])
	{
	case chAmpersand :
	target << "&";
	break;

	case chOpenAngle :
	target << "<";
	break;

	case chCloseAngle:
	target << ">";
	break;

	case chDoubleQuote :
	target << """;
	break;

	default:
	// If it is none of the special characters, print it as such
	target << StrX(&chars[index], 1);
	break;
	}
	}
	}

	return;
	}


	// ---------------------------------------------------------------------------
	//
	// ostream << DOMString Stream out a DOM string.
	//
	// ---------------------------------------------------------------------------
	ostream& operator<<(ostream& target, const DOMString& s)
	{
	if (s.length() > 0)
	target << StrX(s.rawBuffer(), s.length());
	return target;
	}