blob: c9a7e0f7abeba1f3a0b5ee3228716baeeb1d1f62 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 1999-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Log$
* Revision 1.34 2001/10/12 20:52:18 tng
* Schema: Find the attributes see if they should be (un)qualified.
*
* Revision 1.33 2001/09/10 15:16:04 tng
* Store the fGrammarType instead of calling getGrammarType all the time for faster performance.
*
* Revision 1.32 2001/09/10 14:06:22 tng
* Schema: AnyAttribute support in Scanner and Validator.
*
* Revision 1.31 2001/08/13 15:06:39 knoaman
* update <any> validation.
*
* Revision 1.30 2001/08/02 16:54:39 tng
* Reset some Scanner flags in scanReset().
*
* Revision 1.29 2001/08/01 19:11:01 tng
* Add full schema constraint checking flag to the samples and the parser.
*
* Revision 1.28 2001/07/24 21:23:39 tng
* Schema: Use DatatypeValidator for ID/IDREF/ENTITY/ENTITIES/NOTATION.
*
* Revision 1.27 2001/07/13 16:56:48 tng
* ScanId fix.
*
* Revision 1.26 2001/07/12 18:50:17 tng
* Some performance modification regarding standalone check and xml decl check.
*
* Revision 1.25 2001/07/10 21:09:31 tng
* Give proper error messsage when scanning external id.
*
* Revision 1.24 2001/07/09 13:42:08 tng
* Partial Markup in Parameter Entity is validity constraint and thus should be just error, not fatal error.
*
* Revision 1.23 2001/07/05 13:12:11 tng
* Standalone checking is validity constraint and thus should be just error, not fatal error:
*
* Revision 1.22 2001/06/22 12:42:33 tng
* [Bug 2257] 1.5 thinks a <?xml-stylesheet ...> tag is a <?xml ...> tag
*
* Revision 1.21 2001/06/04 20:59:29 jberry
* Add method incrementErrorCount for use by validator. Make sure to reset error count in _both_ the scanReset methods.
*
* Revision 1.20 2001/06/03 19:21:40 jberry
* Add support for tracking error count during parse; enables simple parse without requiring error handler.
*
* Revision 1.19 2001/05/28 20:55:02 tng
* Schema: allocate a fDTDValidator, fSchemaValidator explicitly to avoid wrong cast
*
* Revision 1.18 2001/05/11 15:17:28 tng
* Schema: Nillable fixes.
*
* Revision 1.17 2001/05/11 13:26:17 tng
* Copyright update.
*
* Revision 1.16 2001/05/03 20:34:29 tng
* Schema: SchemaValidator update
*
* Revision 1.15 2001/05/03 19:09:09 knoaman
* Support Warning/Error/FatalError messaging.
* Validity constraints errors are treated as errors, with the ability by user to set
* validity constraints as fatal errors.
*
* Revision 1.14 2001/04/19 18:16:59 tng
* Schema: SchemaValidator update, and use QName in Content Model
*
* Revision 1.13 2001/03/30 16:46:56 tng
* Schema: Use setDoSchema instead of setSchemaValidation which makes more sense.
*
* Revision 1.12 2001/03/30 16:35:06 tng
* Schema: Whitespace normalization.
*
* Revision 1.11 2001/03/21 21:56:05 tng
* Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
*
* Revision 1.10 2001/02/15 15:56:27 tng
* Schema: Add setSchemaValidation and getSchemaValidation for DOMParser and SAXParser.
* Add feature "http://apache.org/xml/features/validation/schema" for SAX2XMLReader.
* New data field fSchemaValidation in XMLScanner as the flag.
*
* Revision 1.9 2000/04/12 22:58:28 roddey
* Added support for 'auto validate' mode.
*
* Revision 1.8 2000/03/03 01:29:32 roddey
* Added a scanReset()/parseReset() method to the scanner and
* parsers, to allow for reset after early exit from a progressive parse.
* Added calls to new Terminate() call to all of the samples. Improved
* documentation in SAX and DOM parsers.
*
* Revision 1.7 2000/03/02 19:54:30 roddey
* This checkin includes many changes done while waiting for the
* 1.1.0 code to be finished. I can't list them all here, but a list is
* available elsewhere.
*
* Revision 1.6 2000/02/24 20:18:07 abagchi
* Swat for removing Log from API docs
*
* Revision 1.5 2000/02/06 07:47:54 rahulj
* Year 2K copyright swat.
*
* Revision 1.4 2000/01/24 20:40:43 roddey
* Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
* is not tested yet, but I wanted to get the API changes in now so that the API
* can be stablized.
*
* Revision 1.3 2000/01/12 23:52:46 roddey
* These are trivial changes required to get the C++ and Java versions
* of error messages more into sync. Mostly it was where the Java version
* was passing out one or more parameter than the C++ version was. In
* some cases the change just required an extra parameter to get the
* needed info to the place where the error was issued.
*
* Revision 1.2 2000/01/12 00:15:04 roddey
* Changes to deal with multiply nested, relative pathed, entities and to deal
* with the new URL class changes.
*
* Revision 1.1.1.1 1999/11/09 01:08:23 twl
* Initial checkin
*
* Revision 1.4 1999/11/08 20:44:52 rahul
* Swat for adding in Product name and CVS comment log variable.
*
*/
#if !defined(XMLSCANNER_HPP)
#define XMLSCANNER_HPP
#include <util/KVStringPair.hpp>
#include <util/RefVectorOf.hpp>
#include <util/XMLString.hpp>
#include <framework/XMLAttr.hpp>
#include <framework/XMLBufferMgr.hpp>
#include <framework/XMLErrorCodes.hpp>
#include <framework/XMLRefInfo.hpp>
#include <framework/XMLPScanToken.hpp>
#include <internal/ElemStack.hpp>
#include <internal/ReaderMgr.hpp>
#include <validators/DTD/DTDEntityDecl.hpp>
#include <validators/DTD/DTDValidator.hpp>
#include <validators/schema/SchemaValidator.hpp>
class InputSource;
class XMLDocumentHandler;
class DocTypeHandler;
class XMLElementDecl;
class XMLEntityHandler;
class EntityResolver;
class XMLErrorReporter;
class ErrorHandler;
class XMLMsgLoader;
class XMLValidator;
//
// This is the mondo scanner class, which does the vast majority of the
// work of parsing. It handles reading in input and spitting out events
// to installed handlers.
//
class XMLPARSER_EXPORT XMLScanner
{
public :
// -----------------------------------------------------------------------
// Public class types
//
// NOTE: These should really be private, but some of the compilers we
// have to deal with are too stupid to understand this.
//
// DeclTypes
// Used by scanXMLDecl() to know what type of decl it should scan.
// Text decls have slightly different rules from XMLDecls.
//
// EntityExpRes
// These are the values returned from the entity expansion method,
// to indicate how it went.
//
// XMLTokens
// These represent the possible types of input we can get while
// scanning content.
//
// ValScheme
// This indicates what the scanner should do in terms of validation.
// 'Auto' means if there is any int/ext subset, then validate. Else,
// don't.
// -----------------------------------------------------------------------
enum DeclTypes
{
Decl_Text
, Decl_XML
};
enum EntityExpRes
{
EntityExp_Pushed
, EntityExp_Returned
, EntityExp_Failed
};
enum XMLTokens
{
Token_CData
, Token_CharData
, Token_Comment
, Token_EndTag
, Token_EOF
, Token_PI
, Token_StartTag
, Token_Unknown
};
enum ValSchemes
{
Val_Never
, Val_Always
, Val_Auto
};
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
XMLScanner
(
XMLValidator* const valToAdopt
);
XMLScanner
(
XMLDocumentHandler* const docHandler
, DocTypeHandler* const docTypeHandler
, XMLEntityHandler* const entityHandler
, XMLErrorReporter* const errReporter
, XMLValidator* const valToAdopt
);
~XMLScanner();
// -----------------------------------------------------------------------
// Error emitter methods
// -----------------------------------------------------------------------
void emitError(const XMLErrs::Codes toEmit);
void emitError
(
const XMLErrs::Codes toEmit
, const XMLCh* const text1
, const XMLCh* const text2 = 0
, const XMLCh* const text3 = 0
, const XMLCh* const text4 = 0
);
void emitError
(
const XMLErrs::Codes toEmit
, const char* const text1
, const char* const text2 = 0
, const char* const text3 = 0
, const char* const text4 = 0
);
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
const XMLDocumentHandler* getDocHandler() const;
XMLDocumentHandler* getDocHandler();
const DocTypeHandler* getDocTypeHandler() const;
DocTypeHandler* getDocTypeHandler();
bool getDoNamespaces() const;
ValSchemes getValidationScheme() const;
bool getDoSchema() const;
bool getValidationSchemaFullChecking() const;
const XMLEntityHandler* getEntityHandler() const;
XMLEntityHandler* getEntityHandler();
const XMLErrorReporter* getErrorReporter() const;
XMLErrorReporter* getErrorReporter();
bool getExitOnFirstFatal() const;
bool getValidationConstraintFatal() const;
RefHashTableOf<XMLRefInfo>* getIDRefList();
const RefHashTableOf<XMLRefInfo>* getIDRefList() const;
bool getInException() const;
bool getLastExtLocation
(
XMLCh* const sysIdToFill
, const unsigned int maxSysIdChars
, XMLCh* const pubIdToFill
, const unsigned int maxPubIdChars
, unsigned int& lineToFill
, unsigned int& colToFill
);
const Locator* getLocator() const;
unsigned int getSrcOffset() const;
bool getStandalone() const;
const XMLValidator* getValidator() const;
XMLValidator* getValidator();
int getErrorCount();
const DTDEntityDecl* getEntityDecl
(
const XMLCh* const entName
) const;
DTDEntityDecl* getEntityDecl
(
const XMLCh* const entName
);
NameIdPoolEnumerator<DTDEntityDecl> getEntityEnumerator() const;
NameIdPool<DTDEntityDecl>* getEntityDeclPool();
const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const;
const XMLStringPool* getURIStringPool() const;
XMLStringPool* getURIStringPool();
bool getHasNoDTD() const;
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
/**
* When an attribute name has no prefix, unlike elements, it is not mapped
* to the global namespace. So, in order to have something to map it to
* for practical purposes, a id for an empty URL is created and used for
* such names.
*
* @return The URL pool id of the URL for an empty URL "".
*/
unsigned int getEmptyNamespaceId() const;
/**
* When a prefix is found that has not been mapped, an error is issued.
* However, if the parser has been instructed not to stop on the first
* fatal error, it needs to be able to continue. To do so, it will map
* that prefix tot his magic unknown namespace id.
*
* @return The URL pool id of the URL for the unknown prefix
* namespace.
*/
unsigned int getUnknownNamespaceId() const;
/**
* The prefix 'xml' is a magic prefix, defined by the XML spec and
* requiring no prior definition. This method returns the id for the
* intrinsically defined URL for this prefix.
*
* @return The URL pool id of the URL for the 'xml' prefix.
*/
unsigned int getXMLNamespaceId() const;
/**
* The prefix 'xmlns' is a magic prefix, defined by the namespace spec
* and requiring no prior definition. This method returns the id for the
* intrinsically defined URL for this prefix.
*
* @return The URL pool id of the URL for the 'xmlns' prefix.
*/
unsigned int getXMLNSNamespaceId() const;
/**
* This method find the passed URI id in its URI pool and
* copy the text of that URI into the passed buffer.
*/
bool getURIText
(
const unsigned int uriId
, XMLBuffer& uriBufToFill
) const;
const XMLCh* getURIText(const unsigned int uriId) const;
/**
* This method separate the passed QName into prefix
* and local part, and then return the URI id by resolving
* the prefix.
*
* mode: Indicate if this QName comes from an Element or Attribute
*/
unsigned int resolveQName
(
const XMLCh* const qName
, XMLBuffer& nameBufToFill
, XMLBuffer& prefixBufToFill
, const ElemStack::MapModes mode
);
/* tell if the validator comes from user */
bool isValidatorFromUser();
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setDocHandler(XMLDocumentHandler* const docHandler);
void setDocTypeHandler(DocTypeHandler* const docTypeHandler);
void setDoNamespaces(const bool doNamespaces);
void setEntityHandler(XMLEntityHandler* const docTypeHandler);
void setEntityResolver(EntityResolver* const handler);
void setErrorReporter(XMLErrorReporter* const errHandler);
void setErrorHandler(ErrorHandler* const handler);
void setExitOnFirstFatal(const bool newValue);
void setValidationConstraintFatal(const bool newValue);
void setValidationScheme(const ValSchemes newScheme);
void setValidator(XMLValidator* const valToAdopt);
void setDoSchema(const bool doSchema);
void setValidationSchemaFullChecking(const bool schemaFullChecking);
void setHasNoDTD(const bool hasNoDTD);
// -----------------------------------------------------------------------
// Mutator methods
// -----------------------------------------------------------------------
void incrementErrorCount(void); // For use by XMLValidator
// -----------------------------------------------------------------------
// Deprecated methods as of 3.2.0. Use getValidationScheme() and
// setValidationScheme() instead.
// -----------------------------------------------------------------------
bool getDoValidation() const;
void setDoValidation(const bool validate, const bool setValScheme = true);
// -----------------------------------------------------------------------
// Document scanning methods
//
// scanDocument() does the entire source document. scanFirst(),
// scanNext(), and scanReset() support a progressive parse.
// -----------------------------------------------------------------------
void scanDocument
(
const InputSource& src
, const bool reuseGrammar = false
);
void scanDocument
(
const XMLCh* const systemId
, const bool reuseGrammar = false
);
void scanDocument
(
const char* const systemId
, const bool reuseGrammar = false
);
bool scanFirst
(
const InputSource& src
, XMLPScanToken& toFill
, const bool reuseGrammar = false
);
bool scanFirst
(
const XMLCh* const systemId
, XMLPScanToken& toFill
, const bool reuseGrammar = false
);
bool scanFirst
(
const char* const systemId
, XMLPScanToken& toFill
, const bool reuseGrammar = false
);
bool scanNext(XMLPScanToken& toFill);
void scanReset(XMLPScanToken& toFill);
bool checkXMLDecl(bool startWithAngle);
private :
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
XMLScanner();
XMLScanner(const XMLScanner&);
void operator=(const XMLScanner&);
// -----------------------------------------------------------------------
// Private helper methods
// -----------------------------------------------------------------------
void commonInit();
void initValidator(XMLValidator* theValidator);
void resetEntityDeclPool();
void resetURIStringPool();
// -----------------------------------------------------------------------
// Private helper methods
//
// These are implemented in XMLScanner2.cpp, to keep the main file from
// becoming so bloated. We can't have any bloated files.
// -----------------------------------------------------------------------
unsigned int buildAttList
(
const RefVectorOf<KVStringPair>& providedAttrs
, const unsigned int attCount
, XMLElementDecl* elemDecl
, RefVectorOf<XMLAttr>& toFill
);
void checkIDRefs();
bool isLegalToken(const XMLPScanToken& toCheck);
bool normalizeAttValue
(
const XMLAttDef* const attDef
, const XMLCh* const value
, XMLBuffer& toFill
);
bool normalizeAttRawValue
(
const XMLCh* const attrName
, const XMLCh* const value
, XMLBuffer& toFill
);
unsigned int resolvePrefix
(
const XMLCh* const prefix
, const ElemStack::MapModes mode
);
unsigned int resolvePrefix
(
const XMLCh* const prefix
, XMLBuffer& uriBufToFill
, const ElemStack::MapModes mode
);
void scanReset(const InputSource& src);
void sendCharData(XMLBuffer& toSend);
XMLTokens senseNextToken(unsigned int& orgReader);
void updateNSMap
(
const XMLCh* const attrName
, const XMLCh* const attrValue
);
void scanRawAttrListforNameSpaces(const RefVectorOf<KVStringPair>* theRawAttrList, int attCount);
void parseSchemaLocation(const XMLCh* const schemaLocationStr);
void resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri);
bool switchGrammar(unsigned int newGrammarNameSpaceIndex);
bool switchGrammar(const XMLCh* const newGrammarNameSpace);
bool laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
const XMLContentModel* const cm,
const unsigned int parentElemDepth);
bool anyAttributeValidation(SchemaAttDef* attWildCard,
unsigned int uriId,
bool& skipThisOne,
bool& laxThisOne);
// -----------------------------------------------------------------------
// Private scanning methods
// -----------------------------------------------------------------------
bool basicAttrValueScan
(
const XMLCh* const attrName
, XMLBuffer& toFill
);
bool getQuotedString(XMLBuffer& toFill);
unsigned int rawAttrScan
(
const XMLCh* const elemName
, RefVectorOf<KVStringPair>& toFill
, bool& isEmpty
);
bool scanAttValue
(
const XMLAttDef* const attDef
, XMLBuffer& toFill
);
void scanCDSection();
void scanCharData(XMLBuffer& toToUse);
bool scanCharRef(XMLCh& toFill, XMLCh& second);
void scanComment();
bool scanContent(const bool extEntity);
void scanEndTag(bool& gotData);
EntityExpRes scanEntityRef
(
const bool inAttVal
, XMLCh& firstCh
, XMLCh& secondCh
, bool& escaped
);
bool scanEq();
void scanMiscellaneous();
void scanPI();
void scanProlog();
bool scanStartTag(bool& gotData);
bool scanStartTagNS(bool& gotData);
void scanXMLDecl(const DeclTypes type);
unsigned int scanUpToWSOr
(
XMLBuffer& toFill
, const XMLCh chEndChar
);
// -----------------------------------------------------------------------
// Private helper methods
// -----------------------------------------------------------------------
void resizeElemState();
// -----------------------------------------------------------------------
// Data members
//
// fAttrList
// Every time we get a new element start tag, we have to pass to
// the document handler the attributes found. To make it more
// efficient we keep this ref vector of XMLAttr objects around. We
// just reuse it over and over, allowing it to grow to meet the
// peek need.
//
// fBufMgr
// This is a manager for temporary buffers used during scanning.
// For efficiency we must use a set of static buffers, but we have
// to insure that they are not incorrectly reused. So this manager
// provides the smarts to hand out buffers as required.
//
// fDocHandler
// The client code's document handler. If zero, then no document
// handler callouts are done. We don't adopt it.
//
// fDocTypeHandler
// The client code's document type handler (used by DTD Validator).
//
// fDoNamespaces
// This flag indicates whether the client code wants us to do
// namespaces or not. If the installed validator indicates that it
// has to do namespaces, then this is ignored.
//
// fElemStack
// This is the element stack that is used to track the elements that
// are currently being worked on.
//
// fEntityHandler
// The client code's entity handler. If zero, then no entity handler
// callouts are done. We don't adopt it.
//
// fEntityResolver
// The client code's entity resolver. Need to store this info for
// Schema parse entity resolving.
//
// fErrorReporter
// The client code's error reporter. If zero, then no error reporter
// callouts are done. We don't adopt it.
//
// fErrorHandler
// The client code's error handler. Need to store this info for
// Schema parse error handling.
//
// fExitOnFirstFatal
// This indicates whether we bail out on the first fatal XML error
// or not. It defaults to true, which is the strict XML way, but it
// can be changed.
//
// fValidationConstraintFatal
// This indicates whether we treat validation constraint errors as
// fatal errors or not. It defaults to false, but it can be changed.
//
// fIDRefList
// This is a list of XMLRefInfo objects. This member lets us do all
// needed ID-IDREF balancing checks.
//
// fInException
// To avoid a circular freakout when we catch an exception and emit
// it, which would normally throw again if the 'fail on first error'
// flag is one.
//
// fRawAttrList
// During the initial scan of the attributes we can only do a raw
// scan for key/value pairs. So this vector is used to store them
// until they can be processed (and put into fAttrList.)
//
// fReaderMgr
// This is the reader manager, from which we get characters. It
// manages the reader stack for us, and provides a lot of convenience
// methods to do specialized checking for chars, sequences of chars,
// skipping chars, etc...
//
// fReuseGrammar
// This flag is set on a per-scan basis. So its provided in the
// scanDocument() and scanFirst() methods, and applies for that
// one pass. It indicates if the Grammar should be reused or not.
// If so, then all the Grammar will be ignored.
// There cannot be any internal subset.
//
// fScannerId
// fSequenceId
// These are used for progressive parsing, to make sure that the
// client code does the right thing at the right time.
//
// fStandalone
// Indicates whether the document is standalone or not. Defaults to
// no, but can be overridden in the XMLDecl.
//
// fHasNoDTD
// Indicates the document has no DTD or has only an internal DTD subset
// which contains no parameter entity references.
//
// fValidate
// Indicates whether any validation should be done. This is defined
// by the existence of a Grammar together with fValScheme.
//
// fValidator
// The installed validator. We look at them via the abstract
// validator interface, and don't know what it actual is.
// Either point to user's installed validator, or fDTDValidator
// or fSchemaValidator.
//
// fDTDValidator
// The DTD validator instance.
//
// fSchemaValidator
// The Schema validator instance.
//
// fValidatorFromUser
// This flag indicates whether the validator was installed from
// user. If false, then the validator was created by the Scanner.
//
// fValScheme
// This is the currently set validation scheme. It defaults to
// 'never', but can be set by the client.
//
// fErrorCount
// The number of errors we've encountered.
//
// fDoSchema
// This flag indicates whether the client code wants Schema to
// be processed or not.
//
// fSchemaFullChecking
// This flag indicates whether the client code wants full Schema
// constraint checking.
//
// fSeeXsi
// This flag indicates a schema has been seen.
//
// fAttName
// fAttValue
// fCDataBuf
// fNameBuf
// fQNameBuf
// fPrefixBuf
// For the most part, buffers are obtained from the fBufMgr object
// on the fly. However, for the start tag scan, we have a set of
// fixed buffers for performance reasons. These are used a lot and
// there are a number of them, so asking the buffer manager each
// time for new buffers is a bit too much overhead.
//
// fEmptyNamespaceId
// This is the id of the empty namespace URI. This is a special one
// because of the xmlns="" type of deal. We have to quickly sense
// that its the empty namespace.
//
// fUnknownNamespaceId
// This is the id of the namespace URI which is assigned to the
// global namespace. Its for debug purposes only, since there is no
// real global namespace URI. Its set by the derived class.
//
// fXMLNamespaceId
// fXMLNSNamespaceId
// These are the ids of the namespace URIs which are assigned to the
// 'xml' and 'xmlns' special prefixes. The former is officially
// defined but the latter is not, so we just provide one for debug
// purposes.
//
// fSchemaNamespaceId
// This is the id of the schema namespace URI.
//
// fElemState
// fElemStateSize
// Stores an element next state from DFA content model - used for
// wildcard validation
//
// fGrammarResolver
// Grammar Pool that stores all the Grammar
//
// fGrammar
// Current Grammar used by the Scanner and Validator
//
// fGrammarType
// Current Grammar Type. Store this value instead of calling getGrammarType
// all the time for faster performance.
//
// fEntityDeclPool
// This is a pool of EntityDecl objects, which contains all of the
// general entities that are declared in the DTD subsets, plus the
// default entities (such as &gt; &lt; ...) defined by the XML Standard.
//
// fURIStringPool
// This is a pool for URIs with unique ids assigned. We use a standard
// string pool class. This pool is going to be shared by all Grammar.
// Use only if namespace is turned on.
//
// -----------------------------------------------------------------------
bool fDoNamespaces;
bool fExitOnFirstFatal;
bool fValidationConstraintFatal;
bool fInException;
bool fReuseGrammar;
bool fStandalone;
bool fHasNoDTD;
bool fValidate;
bool fValidatorFromUser;
bool fDoSchema;
bool fSchemaFullChecking;
bool fSeeXsi;
int fErrorCount;
unsigned int fEmptyNamespaceId;
unsigned int fUnknownNamespaceId;
unsigned int fXMLNamespaceId;
unsigned int fXMLNSNamespaceId;
unsigned int fSchemaNamespaceId;
unsigned int fElemStateSize;
XMLUInt32 fScannerId;
XMLUInt32 fSequenceId;
unsigned int* fElemState;
RefVectorOf<XMLAttr>* fAttrList;
XMLBufferMgr fBufMgr;
XMLDocumentHandler* fDocHandler;
DocTypeHandler* fDocTypeHandler;
ElemStack fElemStack;
XMLEntityHandler* fEntityHandler;
EntityResolver* fEntityResolver;
XMLErrorReporter* fErrorReporter;
ErrorHandler* fErrorHandler;
RefHashTableOf<XMLRefInfo>* fIDRefList;
RefVectorOf<KVStringPair>* fRawAttrList;
ReaderMgr fReaderMgr;
XMLValidator* fValidator;
DTDValidator* fDTDValidator;
SchemaValidator* fSchemaValidator;
ValSchemes fValScheme;
XMLBuffer fAttNameBuf;
XMLBuffer fAttValueBuf;
XMLBuffer fCDataBuf;
XMLBuffer fNameBuf;
XMLBuffer fQNameBuf;
XMLBuffer fPrefixBuf;
XMLBuffer fURIBuf;
GrammarResolver* fGrammarResolver;
Grammar* fGrammar;
Grammar::GrammarType fGrammarType;
NameIdPool<DTDEntityDecl>* fEntityDeclPool;
XMLStringPool* fURIStringPool;
};
// ---------------------------------------------------------------------------
// XMLScanner: Getter methods
// ---------------------------------------------------------------------------
inline const XMLDocumentHandler* XMLScanner::getDocHandler() const
{
return fDocHandler;
}
inline XMLDocumentHandler* XMLScanner::getDocHandler()
{
return fDocHandler;
}
inline const DocTypeHandler* XMLScanner::getDocTypeHandler() const
{
return fDocTypeHandler;
}
inline DocTypeHandler* XMLScanner::getDocTypeHandler()
{
return fDocTypeHandler;
}
inline bool XMLScanner::getDoNamespaces() const
{
return fDoNamespaces;
}
inline const XMLEntityHandler* XMLScanner::getEntityHandler() const
{
return fEntityHandler;
}
inline XMLEntityHandler* XMLScanner::getEntityHandler()
{
return fEntityHandler;
}
inline const XMLErrorReporter* XMLScanner::getErrorReporter() const
{
return fErrorReporter;
}
inline XMLErrorReporter* XMLScanner::getErrorReporter()
{
return fErrorReporter;
}
inline bool XMLScanner::getExitOnFirstFatal() const
{
return fExitOnFirstFatal;
}
inline bool XMLScanner::getValidationConstraintFatal() const
{
return fValidationConstraintFatal;
}
inline RefHashTableOf<XMLRefInfo>* XMLScanner::getIDRefList()
{
return fIDRefList;
}
inline bool XMLScanner::getInException() const
{
return fInException;
}
inline const RefHashTableOf<XMLRefInfo>* XMLScanner::getIDRefList() const
{
return fIDRefList;
}
inline const Locator* XMLScanner::getLocator() const
{
return &fReaderMgr;
}
inline unsigned int XMLScanner::getSrcOffset() const
{
return fReaderMgr.getSrcOffset();
}
inline bool XMLScanner::getStandalone() const
{
return fStandalone;
}
inline XMLScanner::ValSchemes XMLScanner::getValidationScheme() const
{
return fValScheme;
}
inline const XMLValidator* XMLScanner::getValidator() const
{
return fValidator;
}
inline XMLValidator* XMLScanner::getValidator()
{
return fValidator;
}
inline bool XMLScanner::getDoSchema() const
{
return fDoSchema;
}
inline bool XMLScanner::getValidationSchemaFullChecking() const
{
return fSchemaFullChecking;
}
inline int XMLScanner::getErrorCount()
{
return fErrorCount;
}
inline bool XMLScanner::isValidatorFromUser()
{
return fValidatorFromUser;
}
inline unsigned int XMLScanner::getEmptyNamespaceId() const
{
return fEmptyNamespaceId;
}
inline unsigned int XMLScanner::getUnknownNamespaceId() const
{
return fUnknownNamespaceId;
}
inline unsigned int XMLScanner::getXMLNamespaceId() const
{
return fXMLNamespaceId;
}
inline unsigned int XMLScanner::getXMLNSNamespaceId() const
{
return fXMLNSNamespaceId;
}
inline NameIdPoolEnumerator<DTDEntityDecl>
XMLScanner::getEntityEnumerator() const
{
return NameIdPoolEnumerator<DTDEntityDecl>(fEntityDeclPool);
}
inline const DTDEntityDecl* XMLScanner::getEntityDecl(const XMLCh* const entName) const
{
return fEntityDeclPool->getByKey(entName);
}
inline DTDEntityDecl* XMLScanner::getEntityDecl(const XMLCh* const entName)
{
return fEntityDeclPool->getByKey(entName);
}
inline NameIdPool<DTDEntityDecl>* XMLScanner::getEntityDeclPool()
{
return fEntityDeclPool;
}
inline const NameIdPool<DTDEntityDecl>* XMLScanner::getEntityDeclPool() const
{
return fEntityDeclPool;
}
inline const XMLStringPool* XMLScanner::getURIStringPool() const
{
return fURIStringPool;
}
inline XMLStringPool* XMLScanner::getURIStringPool()
{
return fURIStringPool;
}
inline bool XMLScanner::getHasNoDTD() const
{
return fHasNoDTD;
}
// ---------------------------------------------------------------------------
// XMLScanner: Setter methods
// ---------------------------------------------------------------------------
inline void XMLScanner::setDocHandler(XMLDocumentHandler* const docHandler)
{
fDocHandler = docHandler;
}
inline void XMLScanner::setDocTypeHandler(DocTypeHandler* const docTypeHandler)
{
fDocTypeHandler = docTypeHandler;
}
inline void XMLScanner::setDoNamespaces(const bool doNamespaces)
{
fDoNamespaces = doNamespaces;
if (fDoNamespaces) {
if (!fURIStringPool) {
fURIStringPool = new XMLStringPool();
resetURIStringPool();
}
}
}
inline void XMLScanner::setErrorReporter(XMLErrorReporter* const errHandler)
{
fErrorReporter = errHandler;
fDTDValidator->setErrorReporter(fErrorReporter);
fSchemaValidator->setErrorReporter(fErrorReporter);
}
inline void XMLScanner::setErrorHandler(ErrorHandler* const handler)
{
fErrorHandler = handler;
}
inline void XMLScanner::setEntityHandler(XMLEntityHandler* const entityHandler)
{
fEntityHandler = entityHandler;
fReaderMgr.setEntityHandler(entityHandler);
}
inline void XMLScanner::setEntityResolver(EntityResolver* const handler)
{
fEntityResolver = handler;
}
inline void XMLScanner::setExitOnFirstFatal(const bool newValue)
{
fExitOnFirstFatal = newValue;
}
inline void XMLScanner::setValidationConstraintFatal(const bool newValue)
{
fValidationConstraintFatal = newValue;
}
inline void XMLScanner::setValidationScheme(const ValSchemes newScheme)
{
fValScheme = newScheme;
// validation flag for Val_Auto is set to false by default,
// and will be turned to true if a grammar is seen
if (fValScheme == Val_Always)
fValidate = true;
else
fValidate = false;
}
inline void XMLScanner::setValidator(XMLValidator* const valToAdopt)
{
if (fValidatorFromUser)
delete fValidator;
fValidator = valToAdopt;
fValidatorFromUser = true;
initValidator(fValidator);
}
inline void XMLScanner::setDoSchema(const bool doSchema)
{
fDoSchema = doSchema;
}
inline void XMLScanner::setValidationSchemaFullChecking(const bool schemaFullChecking)
{
fSchemaFullChecking = schemaFullChecking;
}
inline void XMLScanner::setHasNoDTD(const bool hasNoDTD)
{
fHasNoDTD = hasNoDTD;
}
// ---------------------------------------------------------------------------
// XMLScanner: Mutator methods
// ---------------------------------------------------------------------------
inline void XMLScanner::incrementErrorCount()
{
++fErrorCount;
}
// ---------------------------------------------------------------------------
// XMLScanner: Deprecated methods
// ---------------------------------------------------------------------------
inline bool XMLScanner::getDoValidation() const
{
return fValidate;
}
inline void XMLScanner::setDoValidation(const bool validate, const bool setValScheme)
{
fValidate = validate;
if (setValScheme) {
if (fValidate)
fValScheme = Val_Always;
else
fValScheme = Val_Never;
}
}
#endif