blob: 94980f1f6149ff492ce0d8db5adead883c194d36 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Id$
*/
// ---------------------------------------------------------------------------
// This sample program invokes the XercesDOMParser to build a DOM tree for
// the specified input file. It then invokes DOMWriter::writeNode() to
// serialize the resultant DOM tree back to XML stream.
//
// Note:
// Application needs to provide its own implementation of
// DOMErrorHandler (in this sample, the DOMPrintErrorHandler),
// if it would like to receive notification from the serializer
// in the case any error occurs during the serialization.
//
// Application needs to provide its own implementation of
// DOMWriterFilter (in this sample, the DOMPrintFilter),
// if it would like to filter out certain part of the DOM
// representation, but must be aware that thus may render the
// resultant XML stream invalid.
//
// Application may choose any combination of characters as the
// end of line sequence to be used in the resultant XML stream,
// but must be aware that thus may render the resultant XML
// stream ill formed.
//
// Application may choose a particular encoding name in which
// the output XML stream would be, but must be aware that if
// characters, unrepresentable in the encoding specified, appearing
// in markups, may force the serializer to terminate serialization
// prematurely, and thus no complete serialization would be done.
//
// Application shall query the serializer first, before set any
// feature/mode(true, false), or be ready to catch exception if this
// feature/mode is not supported by the serializer.
//
// Application needs to clean up the filter, error handler and
// format target objects created for the serialization.
//
// Limitations:
// 1. The encoding="xxx" clause in the XML header should reflect
// the system local code page, but does not.
// 2. Cases where the XML data contains characters that can not
// be represented in the system local code page are not handled.
//
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/dom/DOMImplementation.hpp>
#include <xercesc/dom/DOMImplementationLS.hpp>
#include <xercesc/dom/DOMWriter.hpp>
#include <xercesc/framework/StdOutFormatTarget.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/util/XMLUni.hpp>
#include "DOMTreeErrorReporter.hpp"
#include "DOMPrintFilter.hpp"
#include "DOMPrintErrorHandler.hpp"
#include <string.h>
#include <stdlib.h>
// ---------------------------------------------------------------------------
// Local data
//
// gXmlFile
// The path to the file to parser. Set via command line.
//
// gDoNamespaces
// Indicates whether namespace processing should be done.
//
// gDoSchema
// Indicates whether schema processing should be done.
//
// gSchemaFullChecking
// Indicates whether full schema constraint checking should be done.
//
// gDoCreate
// Indicates whether entity reference nodes needs to be created or not
// Defaults to false
//
// gOutputEncoding
// The encoding we are to output in. If not set on the command line,
// then it is defaults to the encoding of the input XML file.
//
// gMyEOLSequence
// The end of line sequence we are to output.
//
// gSplitCdataSections
// Indicates whether split-cdata-sections is to be enabled or not.
//
// gDiscardDefaultContent
// Indicates whether default content is discarded or not.
//
// gUseFilter
// Indicates if user wants to plug in the DOMPrintFilter.
//
// gValScheme
// Indicates what validation scheme to use. It defaults to 'auto', but
// can be set via the -v= command.
//
// ---------------------------------------------------------------------------
static char* gXmlFile = 0;
static bool gDoNamespaces = false;
static bool gDoSchema = false;
static bool gSchemaFullChecking = false;
static bool gDoCreate = true;
// options for DOMWriter's features
static const XMLCh* gOutputEncoding = 0;
static const XMLCh* gMyEOLSequence = 0;
static bool gSplitCdataSections = true;
static bool gDiscardDefaultContent = true;
static bool gUseFilter = false;
static bool gFormatPrettyPrint = false;
static XercesDOMParser::ValSchemes gValScheme = XercesDOMParser::Val_Auto;
// ---------------------------------------------------------------------------
//
// Usage()
//
// ---------------------------------------------------------------------------
void usage()
{
cout << "\nUsage:\n"
" DOMPrint [options] <XML file>\n\n"
"This program invokes the DOM parser, and builds the DOM tree.\n"
"It then asks the DOMWriter to serialize the DOM tree \n"
"Options:\n"
" -e create entity reference nodes. Default is no expansion.\n"
" -v=xxx Validation scheme [always | never | auto*].\n"
" -n Enable namespace processing. Default is off.\n"
" -s Enable schema processing. Default is off.\n"
" -f Enable full schema constraint checking. Defaults is off.\n"
" -wenc=XXX Use a particular encoding for output. Default is\n"
" the same encoding as the input XML file. UTF-8 if\n"
" input XML file has not XML declaration.\n"
" -weol=xxx Set the end of line sequence. Default set by DOMWriter\n"
" -wscs=xxx Enable/Disable split-cdata-sections. Default on \n"
" -wddc=xxx Enable/Disable discard-default-content. Default on \n"
" -wflt=xxx Enable/Disable filtering. Default off \n"
" -wfpp=xxx Enable/Disable format-pretty-print. Default off \n"
" -? Show this help.\n\n"
" * = Default if not provided explicitly.\n\n"
"The parser has intrinsic support for the following encodings:\n"
" UTF-8, USASCII, ISO8859-1, UTF-16[BL]E, UCS-4[BL]E,\n"
" WINDOWS-1252, IBM1140, IBM037.\n"
<< endl;
}
// ---------------------------------------------------------------------------
//
// main
//
// ---------------------------------------------------------------------------
int main(int argC, char* argV[])
{
int retval = 0;
// Initialize the XML4C2 system
try
{
XMLPlatformUtils::Initialize();
}
catch(const XMLException &toCatch)
{
cerr << "Error during Xerces-c Initialization.\n"
<< " Exception message:"
<< StrX(toCatch.getMessage()) << endl;
return 1;
}
// Check command line and extract arguments.
if (argC < 2)
{
usage();
XMLPlatformUtils::Terminate();
return 1;
}
// See if non validating dom parser configuration is requested.
int parmInd;
for (parmInd = 1; parmInd < argC; parmInd++)
{
// Break out on first parm not starting with a dash
if (argV[parmInd][0] != '-')
break;
// Watch for special case help request
if (!strcmp(argV[parmInd], "-?"))
{
usage();
XMLPlatformUtils::Terminate();
return 2;
}
else if (!strncmp(argV[parmInd], "-v=", 3)
|| !strncmp(argV[parmInd], "-V=", 3))
{
const char* const parm = &argV[parmInd][3];
if (!strcmp(parm, "never"))
gValScheme = XercesDOMParser::Val_Never;
else if (!strcmp(parm, "auto"))
gValScheme = XercesDOMParser::Val_Auto;
else if (!strcmp(parm, "always"))
gValScheme = XercesDOMParser::Val_Always;
else
{
cerr << "Unknown -v= value: " << parm << endl;
XMLPlatformUtils::Terminate();
return 2;
}
}
else if (!strcmp(argV[parmInd], "-n")
|| !strcmp(argV[parmInd], "-N"))
{
gDoNamespaces = true;
}
else if (!strcmp(argV[parmInd], "-s")
|| !strcmp(argV[parmInd], "-S"))
{
gDoSchema = true;
}
else if (!strcmp(argV[parmInd], "-f")
|| !strcmp(argV[parmInd], "-F"))
{
gSchemaFullChecking = true;
}
else if (!strcmp(argV[parmInd], "-e")
|| !strcmp(argV[parmInd], "-E"))
{
gDoCreate = true;
}
else if (!strncmp(argV[parmInd], "-wenc=", 6))
{
// Get out the encoding name
gOutputEncoding = XMLString::transcode( &(argV[parmInd][6]) );
}
else if (!strncmp(argV[parmInd], "-weol=", 6))
{
// Get out the end of line
gMyEOLSequence = XMLString::transcode( &(argV[parmInd][6]) );
}
else if (!strncmp(argV[parmInd], "-wddc=", 6))
{
const char* const parm = &argV[parmInd][6];
if (!strcmp(parm, "on"))
gDiscardDefaultContent = true;
else if (!strcmp(parm, "off"))
gDiscardDefaultContent = false;
else
{
cerr << "Unknown -wddc= value: " << parm << endl;
XMLPlatformUtils::Terminate();
return 2;
}
}
else if (!strncmp(argV[parmInd], "-wcsc=", 6))
{
const char* const parm = &argV[parmInd][6];
if (!strcmp(parm, "on"))
gSplitCdataSections = true;
else if (!strcmp(parm, "off"))
gSplitCdataSections = false;
else
{
cerr << "Unknown -wcsc= value: " << parm << endl;
XMLPlatformUtils::Terminate();
return 2;
}
}
else if (!strncmp(argV[parmInd], "-wflt=", 6))
{
const char* const parm = &argV[parmInd][6];
if (!strcmp(parm, "on"))
gUseFilter = true;
else if (!strcmp(parm, "off"))
gUseFilter = false;
else
{
cerr << "Unknown -wflt= value: " << parm << endl;
XMLPlatformUtils::Terminate();
return 2;
}
}
else if (!strncmp(argV[parmInd], "-wfpp=", 6))
{
const char* const parm = &argV[parmInd][6];
if (!strcmp(parm, "on"))
gFormatPrettyPrint = true;
else if (!strcmp(parm, "off"))
gFormatPrettyPrint = false;
else
{
cerr << "Unknown -wfpp= value: " << parm << endl;
XMLPlatformUtils::Terminate();
return 2;
}
}
else
{
cerr << "Unknown option '" << argV[parmInd]
<< "', ignoring it.\n" << endl;
}
}
//
// And now we have to have only one parameter left and it must be
// the file name.
//
if (parmInd + 1 != argC)
{
usage();
XMLPlatformUtils::Terminate();
return 1;
}
gXmlFile = argV[parmInd];
//
// Create our parser, then attach an error handler to the parser.
// The parser will call back to methods of the ErrorHandler if it
// discovers errors during the course of parsing the XML document.
//
XercesDOMParser *parser = new XercesDOMParser;
parser->setValidationScheme(gValScheme);
parser->setDoNamespaces(gDoNamespaces);
parser->setDoSchema(gDoSchema);
parser->setValidationSchemaFullChecking(gSchemaFullChecking);
parser->setCreateEntityReferenceNodes(gDoCreate);
DOMTreeErrorReporter *errReporter = new DOMTreeErrorReporter();
parser->setErrorHandler(errReporter);
//
// Parse the XML file, catching any XML exceptions that might propogate
// out of it.
//
bool errorsOccured = false;
try
{
parser->parse(gXmlFile);
}
catch (const XMLException& e)
{
cerr << "An error occurred during parsing\n Message: "
<< StrX(e.getMessage()) << endl;
errorsOccured = true;
}
catch (const DOMException& e)
{
cerr << "A DOM error occurred during parsing\n DOMException code: "
<< e.code << endl;
errorsOccured = true;
}
catch (...)
{
cerr << "An error occurred during parsing\n " << endl;
errorsOccured = true;
}
// If the parse was successful, output the document data from the DOM tree
if (!errorsOccured && !errReporter->getSawErrors())
{
DOMPrintFilter *myFilter = 0;
try
{
// get a serializer, an instance of DOMWriter
XMLCh tempStr[100];
XMLString::transcode("LS", tempStr, 99);
DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(tempStr);
DOMWriter *theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter();
// set user specified end of line sequence and output encoding
theSerializer->setNewLine(gMyEOLSequence);
theSerializer->setEncoding(gOutputEncoding);
// plug in user's own filter
if (gUseFilter)
{
// even we say to show attribute, but the DOMWriter
// will not show attribute nodes to the filter as
// the specs explicitly says that DOMWriter shall
// NOT show attributes to DOMWriterFilter.
//
// so DOMNodeFilter::SHOW_ATTRIBUTE has no effect.
// same DOMNodeFilter::SHOW_DOCUMENT_TYPE, no effect.
//
myFilter = new DOMPrintFilter(DOMNodeFilter::SHOW_ELEMENT |
DOMNodeFilter::SHOW_ATTRIBUTE |
DOMNodeFilter::SHOW_DOCUMENT_TYPE
);
theSerializer->setFilter(myFilter);
}
// plug in user's own error handler
DOMErrorHandler *myErrorHandler = new DOMPrintErrorHandler();
theSerializer->setErrorHandler(myErrorHandler);
// set feature if the serializer supports the feature/mode
if (theSerializer->canSetFeature(XMLUni::fgDOMWRTSplitCdataSections, gSplitCdataSections))
theSerializer->setFeature(XMLUni::fgDOMWRTSplitCdataSections, gSplitCdataSections);
if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, gDiscardDefaultContent))
theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, gDiscardDefaultContent);
if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, gFormatPrettyPrint))
theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, gFormatPrettyPrint);
//
// Plug in a format target to receive the resultant
// XML stream from the serializer.
//
// StdOutFormatTarget prints the resultant XML stream
// to stdout once it receives any thing from the serializer.
//
XMLFormatTarget *myFormTarget = new StdOutFormatTarget();
// get the DOM representation
DOMNode *doc = parser->getDocument();
//
// do the serialization through DOMWriter::writeNode();
//
theSerializer->writeNode(myFormTarget, *doc);
delete theSerializer;
//
// Filter, formatTarget and error handler
// are NOT owned by the serializer.
//
delete myFormTarget;
delete myErrorHandler;
if (gUseFilter)
delete myFilter;
}
catch (XMLException& e)
{
cerr << "An error occurred during creation of output transcoder. Msg is:"
<< endl
<< StrX(e.getMessage()) << endl;
retval = 4;
}
}
else
retval = 4;
//
// Clean up the error handler. The parser does not adopt handlers
// since they could be many objects or one object installed for multiple
// handlers.
//
delete errReporter;
//
// Delete the parser itself. Must be done prior to calling Terminate, below.
//
delete parser;
// And call the termination method
XMLPlatformUtils::Terminate();
delete (void *)gOutputEncoding; // const problems.
delete (void *)gMyEOLSequence; // const problems.
return retval;
}