blob: e5365dd971164a5b6f79eb085a699204d0dc8585 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 1999-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Log$
* Revision 1.12 2001/09/12 13:03:43 tng
* [Bug 3155] SAX2 does not offer progressive parse.
*
* Revision 1.11 2001/08/01 19:11:02 tng
* Add full schema constraint checking flag to the samples and the parser.
*
* Revision 1.10 2001/06/27 17:40:16 knoaman
* Fix for bug #2353.
*
* Revision 1.9 2001/06/19 16:44:31 tng
* Add installAdvDocHandler to SAX2XMLReader as the code is there already.
*
* Revision 1.8 2001/06/04 21:03:07 jberry
* Add pure virtual getErrorCount to SAX2XMLReader interface.
*
* Revision 1.7 2001/05/11 13:26:25 tng
* Copyright update.
*
* Revision 1.6 2001/02/15 15:56:31 tng
* Schema: Add setSchemaValidation and getSchemaValidation for DOMParser and SAXParser.
* Add feature "http://apache.org/xml/features/validation/schema" for SAX2XMLReader.
* New data field fSchemaValidation in XMLScanner as the flag.
*
* Revision 1.5 2001/01/12 21:22:00 tng
* Documentation Enhancement: Add list of SAX2 feature strings that are supported.
*
* Revision 1.4 2000/12/22 15:17:04 tng
* SAX2-ext's LexicalHandler support added by David Bertoni.
*
* Revision 1.3 2000/08/09 22:19:29 jpolast
* many conformance & stability changes:
* - ContentHandler::resetDocument() removed
* - attrs param of ContentHandler::startDocument() made const
* - SAXExceptions thrown now have msgs
* - removed duplicate function signatures that had 'const'
* [ eg: getContentHander() ]
* - changed getFeature and getProperty to apply to const objs
* - setProperty now takes a void* instead of const void*
* - SAX2XMLReaderImpl does not inherit from SAXParser anymore
* - Reuse Validator (http://apache.org/xml/features/reuse-validator) implemented
* - Features & Properties now read-only during parse
*
* Revision 1.2 2000/08/07 18:21:27 jpolast
* change SAX_EXPORT module to SAX2_EXPORT
*
* Revision 1.1 2000/08/02 18:02:35 jpolast
* initial checkin of sax2 implementation
* submitted by Simon Fell (simon@fell.com)
* and Joe Polastre (jpolast@apache.org)
*
*
*/
#ifndef SAX2XMLReader_HPP
#define SAX2XMLReader_HPP
#include <util/XercesDefs.hpp>
#include <util/XMLUniDefs.hpp>
#include <framework/XMLValidator.hpp>
#include <framework/XMLPScanToken.hpp>
class ContentHandler ;
class DTDHandler;
class EntityResolver;
class ErrorHandler;
class InputSource;
class LexicalHandler;
class XMLDocumentHandler;
class SAX2_EXPORT SAX2XMLReader
{
public:
enum ValSchemes
{
Val_Never
, Val_Always
, Val_Auto
};
/** @name Constructors and Destructor */
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
//@{
/** The default constructor */
SAX2XMLReader()
{
}
/** The destructor */
virtual ~SAX2XMLReader()
{
}
//@}
//-----------------------------------------------------------------------
// The XMLReader interface
//-----------------------------------------------------------------------
/** @name The XMLReader interfaces */
//@{
/** Get error count from the last parse operation.
*
* This method returns the error count from the last parse
* operation. Note that this count is actually stored in the
* scanner, so this method simply returns what the
* scanner reports.
*
* @return number of errors encountered during the latest
* parse operation.
*/
virtual int getErrorCount() const = 0 ;
/**
* This method returns the installed content handler.
*
* @return A pointer to the installed content handler object.
*/
virtual ContentHandler* getContentHandler() const = 0 ;
/**
* This method returns the installed DTD handler.
*
* @return A pointer to the installed DTD handler object.
*/
virtual DTDHandler* getDTDHandler() const = 0;
/**
* This method returns the installed entity resolver.
*
* @return A pointer to the installed entity resolver object.
*/
virtual EntityResolver* getEntityResolver() const = 0 ;
/**
* This method returns the installed error handler.
*
* @return A pointer to the installed error handler object.
*/
virtual ErrorHandler* getErrorHandler() const = 0 ;
/**
* This method returns the installed lexical handler.
*
* @return A pointer to the installed lexical handler object.
*/
virtual LexicalHandler* getLexicalHandler() const = 0 ;
/**
* Query the current state of any feature in a SAX2 XMLReader.
*
* @param name The unique identifier (URI) of the feature being set.
* @return The current state of the feature.
* @exception SAXNotRecognizedException If the requested feature is not known.
*/
virtual bool getFeature(const XMLCh* const name) const = 0;
/**
* Query the current value of a property in a SAX2 XMLReader.
*
* @param name The unique identifier (URI) of the property being set.
* @return The current value of the property.
* @exception SAXNotRecognizedException If the requested property is not known.
*/
virtual void* getProperty(const XMLCh* const name) const = 0 ;
/**
* This method returns the state of the parser's
* exit-on-First-Fatal-Error flag.
*
* @return true, if the parser is currently configured to
* exit on the first fatal error, false otherwise.
*
* @see #setExitOnFirstFatalError
*/
virtual bool getExitOnFirstFatalError() const = 0;
/**
* This method returns the state of the parser's
* validation-constraint-fatal flag.
*
* @return true, if the parser is currently configured to
* set validation constraint errors as fatal, false
* otherwise.
*
* @see #setValidationContraintFatal
*/
virtual bool getValidationConstraintFatal() const = 0;
/**
* Parse an XML document.
*
* The application can use this method to instruct the SAX parser
* to begin parsing an XML document from any valid input
* source (a character stream, a byte stream, or a URI).
*
* Applications may not invoke this method while a parse is in
* progress (they should create a new Parser instead for each
* additional XML document). Once a parse is complete, an
* application may reuse the same Parser object, possibly with a
* different input source.
*
* @param source The input source for the top-level of the
* XML document.
* @exception SAXException Any SAX exception, possibly
* wrapping another exception.
* @exception XMLException An exception from the parser or client
* handler code.
* @see InputSource#InputSource
* @see #setEntityResolver
* @see #setDTDHandler
* @see #setDocumentHandler
* @see #setErrorHandler
*/
virtual void parse
(
const InputSource& source
) = 0;
/**
* Parse an XML document from a system identifier (URI).
*
* This method is a shortcut for the common case of reading a
* document from a system identifier. It is the exact equivalent
* of the following:
*
* parse(new URLInputSource(systemId));
*
* If the system identifier is a URL, it must be fully resolved
* by the application before it is passed to the parser.
*
* @param systemId The system identifier (URI).
* @exception SAXException Any SAX exception, possibly
* wrapping another exception.
* @exception XMLException An exception from the parser or client
* handler code.
* @see #parse(InputSource)
*/
virtual void parse
(
const XMLCh* const systemId
) = 0;
/**
* Parse an XML document from a system identifier (URI).
*
* This method is a shortcut for the common case of reading a
* document from a system identifier. It is the exact equivalent
* of the following:
*
* parse(new URLInputSource(systemId));
*
* If the system identifier is a URL, it must be fully resolved
* by the application before it is passed to the parser.
*
* @param systemId The system identifier (URI).
* @exception SAXException Any SAX exception, possibly
* wrapping another exception.
* @exception XMLException An exception from the parser or client
* handler code.
* @see #parse(InputSource)
*/
virtual void parse
(
const char* const systemId
) = 0;
/**
* Allow an application to register a document event handler.
*
* If the application does not register a document handler, all
* document events reported by the SAX parser will be silently
* ignored (this is the default behaviour implemented by
* HandlerBase).
*
* Applications may register a new or different handler in the
* middle of a parse, and the SAX parser must begin using the new
* handler immediately.
*
* @param handler The document handler.
* @see DocumentHandler#DocumentHandler
* @see HandlerBase#HandlerBase
*/
virtual void setContentHandler(ContentHandler* const handler) = 0;
/**
* Allow an application to register a DTD event handler.
*
* If the application does not register a DTD handler, all DTD
* events reported by the SAX parser will be silently ignored (this
* is the default behaviour implemented by HandlerBase).
*
* Applications may register a new or different handler in the middle
* of a parse, and the SAX parser must begin using the new handler
* immediately.
*
* @param handler The DTD handler.
* @see DTDHandler#DTDHandler
* @see HandlerBase#HandlerBase
*/
virtual void setDTDHandler(DTDHandler* const handler) = 0;
/**
* Allow an application to register a custom entity resolver.
*
* If the application does not register an entity resolver, the
* SAX parser will resolve system identifiers and open connections
* to entities itself (this is the default behaviour implemented in
* DefaultHandler).
*
* Applications may register a new or different entity resolver
* in the middle of a parse, and the SAX parser must begin using
* the new resolver immediately.
*
* @param resolver The object for resolving entities.
* @see EntityResolver#EntityResolver
* @see DefaultHandler#DefaultHandler
*/
virtual void setEntityResolver(EntityResolver* const resolver) = 0;
/**
* Allow an application to register an error event handler.
*
* If the application does not register an error event handler,
* all error events reported by the SAX parser will be silently
* ignored, except for fatalError, which will throw a SAXException
* (this is the default behaviour implemented by HandlerBase).
*
* Applications may register a new or different handler in the
* middle of a parse, and the SAX parser must begin using the new
* handler immediately.
*
* @param handler The error handler.
* @see ErrorHandler#ErrorHandler
* @see SAXException#SAXException
* @see HandlerBase#HandlerBase
*/
virtual void setErrorHandler(ErrorHandler* const handler) = 0;
/**
* Allow an application to register a lexical event handler.
*
* If the application does not register a lexical handler,
* all events reported by the SAX parser will be silently
* ignored. (this is the default behaviour implemented by HandlerBase).
*
* Applications may register a new or different handler in the
* middle of a parse, and the SAX parser must begin using the new
* handler immediately.
*
* @param handler The error handler.
* @see LexicalHandler#LexicalHandler
* @see SAXException#SAXException
* @see HandlerBase#HandlerBase
*/
virtual void setLexicalHandler(LexicalHandler* const handler) = 0;
/**
* This method allows users to set the parser's behaviour when it
* encounters the first fatal error. If set to true, the parser
* will exit at the first fatal error. If false, then it will
* report the error and continue processing.
*
* <p>The default value is 'true' and the parser exits on the
* first fatal error.</p>
*
* @param newState The value specifying whether the parser should
* continue or exit when it encounters the first
* fatal error.
*
* @see #getExitOnFirstFatalError
*/
virtual void setExitOnFirstFatalError(const bool newState) = 0;
/**
* This method allows users to set the parser's behaviour when it
* encounters a validtion constraint error. If set to true, and the
* the parser is set to exit when it encounter the first fatal error,
* the parser will exit at the first encounter. If false, then it will
* report the error and continue processing.
*
* <p>The default value is 'false'.</p>
*
* @param newState The value specifying whether the parser should
* continue or exit when it encounters a validation
* constraint error.
*
* @see #getValidationConstraintFatal
*/
virtual void setValidationConstraintFatal(const bool newState) = 0;
/**
* Set the state of any feature in a SAX2 XMLReader.
* Supported features in SAX2 for xerces-c are:
*
* <br>http://xml.org/sax/features/validation (default: true)
* <br>http://xml.org/sax/features/namespaces (default: true)
* <br>http://xml.org/sax/features/namespace-prefixes (default: true)
* <br>http://apache.org/xml/features/validation/dynamic (default: false)
* <br>http://apache.org/xml/features/validation/reuse-validator (default: false)
* <br>http://apache.org/xml/features/validation/schema (default: true)
* <br>http://apache.org/xml/features/validation/schema-full-checking (default: false)
*
* @param name The unique identifier (URI) of the feature.
* @param value The requested state of the feature (true or false).
* @exception SAXNotRecognizedException If the requested feature is not known.
* @exception SAXNotSupportedException Property modification is not supported during parse
*
*/
virtual void setFeature(const XMLCh* const name, const bool value) = 0;
/**
* Set the value of any property in a SAX2 XMLReader.
* Supported property in SAX2 for xerces-c are:
*
* <br>none
*
* @param name The unique identifier (URI) of the property being set.
* @param value The requested value for the property.
* @exception SAXNotRecognizedException If the requested property is not known.
* @exception SAXNotSupportedException Property modification is not supported during parse
*/
virtual void setProperty(const XMLCh* const name, void* value) = 0 ;
//@}
// -----------------------------------------------------------------------
// Progressive scan methods
// -----------------------------------------------------------------------
/** @name Progressive scan methods */
//@{
/** Begin a progressive parse operation
*
* This method is used to start a progressive parse on a XML file.
* To continue parsing, subsequent calls must be to the parseNext
* method.
*
* It scans through the prolog and returns a token to be used on
* subsequent scanNext() calls. If the return value is true, then the
* token is legal and ready for further use. If it returns false, then
* the scan of the prolog failed and the token is not going to work on
* subsequent scanNext() calls.
*
* @param systemId A pointer to a Unicode string represting the path
* to the XML file to be parsed.
* @param toFill A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
* @param reuseGrammar The flag indicating whether the existing Grammar
* should be reused or not for this parsing run.
* If true, there cannot be any internal subset.
*
* @return 'true', if successful in parsing the prolog. It indicates the
* user can go ahead with parsing the rest of the file. It
* returns 'false' to indicate that the parser could parse the
* prolog (which means the token will not be valid.)
*
* @see #parseNext
* @see #parseFirst(char*,...)
* @see #parseFirst(InputSource&,...)
*/
virtual bool parseFirst
(
const XMLCh* const systemId
, XMLPScanToken& toFill
, const bool reuseGrammar = false
) = 0;
/** Begin a progressive parse operation
*
* This method is used to start a progressive parse on a XML file.
* To continue parsing, subsequent calls must be to the parseNext
* method.
*
* It scans through the prolog and returns a token to be used on
* subsequent scanNext() calls. If the return value is true, then the
* token is legal and ready for further use. If it returns false, then
* the scan of the prolog failed and the token is not going to work on
* subsequent scanNext() calls.
*
* @param systemId A pointer to a regular native string represting
* the path to the XML file to be parsed.
* @param toFill A token maintaing state information to maintain
* internal consIstency between invocation of 'parseNext'
* calls.
* @param reuseGrammar The flag indicating whether the existing Grammar
* should be reused or not for this parsing run.
* If true, there cannot be any internal subset.
*
* @return 'true', if successful in parsing the prolog. It indicates the
* user can go ahead with parsing the rest of the file. It
* returns 'false' to indicate that the parser could not parse
* the prolog.
*
* @see #parseNext
* @see #parseFirst(XMLCh*,...)
* @see #parseFirst(InputSource&,...)
*/
virtual bool parseFirst
(
const char* const systemId
, XMLPScanToken& toFill
, const bool reuseGrammar = false
) = 0;
/** Begin a progressive parse operation
*
* This method is used to start a progressive parse on a XML file.
* To continue parsing, subsequent calls must be to the parseNext
* method.
*
* It scans through the prolog and returns a token to be used on
* subsequent scanNext() calls. If the return value is true, then the
* token is legal and ready for further use. If it returns false, then
* the scan of the prolog failed and the token is not going to work on
* subsequent scanNext() calls.
*
* @param source A const reference to the InputSource object which
* points to the XML file to be parsed.
* @param toFill A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
* @param reuseGrammar The flag indicating whether the existing Grammar
* should be reused or not for this parsing run.
* If true, there cannot be any internal subset.
*
* @return 'true', if successful in parsing the prolog. It indicates the
* user can go ahead with parsing the rest of the file. It
* returns 'false' to indicate that the parser could not parse
* the prolog.
*
* @see #parseNext
* @see #parseFirst(XMLCh*,...)
* @see #parseFirst(char*,...)
*/
virtual bool parseFirst
(
const InputSource& source
, XMLPScanToken& toFill
, const bool reuseGrammar = false
) = 0;
/** Continue a progressive parse operation
*
* This method is used to continue with progressive parsing of
* XML files started by a call to 'parseFirst' method.
*
* It parses the XML file and stops as soon as it comes across
* a XML token (as defined in the XML specification). Relevant
* callback handlers are invoked as required by the SAX
* specification.
*
* @param token A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
*
* @return 'true', if successful in parsing the next XML token.
* It indicates the user can go ahead with parsing the rest
* of the file. It returns 'false' to indicate that the parser
* could not find next token as per the XML specification
* production rule.
*
* @see #parseFirst(XMLCh*,...)
* @see #parseFirst(char*,...)
* @see #parseFirst(InputSource&,...)
*/
virtual bool parseNext(XMLPScanToken& token) = 0;
/** Reset the parser after a progressive parse
*
* If a progressive parse loop exits before the end of the document
* is reached, the parser has no way of knowing this. So it will leave
* open any files or sockets or memory buffers that were in use at
* the time that the parse loop exited.
*
* The next parse operation will cause these open files and such to
* be closed, but the next parse operation might occur at some unknown
* future point. To avoid this problem, you should reset the parser if
* you exit the loop early.
*
* If you exited because of an error, then this cleanup will be done
* for you. Its only when you exit the file prematurely of your own
* accord, because you've found what you wanted in the file most
* likely.
*
* @param token A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
*/
virtual void parseReset(XMLPScanToken& token) = 0;
//@}
// -----------------------------------------------------------------------
// Validator: setters and getters
// -----------------------------------------------------------------------
/** @name Validator: setters and getters (Xerces-C specific) */
//@{
/**
* This method is used to set a validator.
*
* <b>SAX2XMLReader assumes responsibility for the validator. It will be
* deleted when the XMLReader is destroyed.</b>
*
* @param valueToAdopt A pointer to the validator that the reader should use.
*
*/
virtual void setValidator(XMLValidator* valueToAdopt) = 0;
/**
* This method is used to get the current validator.
*
* <b>SAX2XMLReader assumes responsibility for the validator. It will be
* deleted when the XMLReader is destroyed.</b>
*
* @return A pointer to the validator. An application should not deleted
* the object returned.
*
*/
virtual XMLValidator* getValidator() const = 0;
//@}
// -----------------------------------------------------------------------
// Advanced document handler list maintenance methods
// -----------------------------------------------------------------------
/** @name Advanced document handler list maintenance methods */
//@{
/**
* This method installs the specified 'advanced' document callback
* handler, thereby allowing the user to customize the processing,
* if they choose to do so. Any number of advanced callback handlers
* maybe installed.
*
* <p>The methods in the advanced callback interface represent
* Xerces-C extensions. There is no specification for this interface.</p>
*
* @param toInstall A pointer to the users advanced callback handler.
*
* @see #removeAdvDocHandler
*/
virtual void installAdvDocHandler(XMLDocumentHandler* const toInstall) = 0;
/**
* This method removes the 'advanced' document handler callback from
* the underlying parser scanner. If no handler is installed, advanced
* callbacks are not invoked by the scanner.
* @param toRemove A pointer to the advanced callback handler which
* should be removed.
*
* @see #installAdvDocHandler
*/
virtual bool removeAdvDocHandler(XMLDocumentHandler* const toRemove) = 0;
//@}
private :
/* The copy constructor, you cannot call this directly */
SAX2XMLReader(const SAX2XMLReader&);
/* The assignment operator, you cannot call this directly */
void operator=(const SAX2XMLReader&);
};
#endif