src/internal/XMLScanner.hpp - xerces-c - Git at Google

 /*
  * The Apache Software License, Version 1.1
  *
  * Copyright (c) 1999 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Xerces" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache\@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation, and was
  * originally based on software copyright (c) 1999, International
  * Business Machines, Inc., http://www.ibm.com .  For more information
  * on the Apache Software Foundation, please see
  * <http://www.apache.org/>.
  */

 /**
  * $Log$
  * Revision 1.1  1999/11/09 01:08:23  twl
  * Initial revision
  *
  * Revision 1.4  1999/11/08 20:44:52  rahul
  * Swat for adding in Product name and CVS comment log variable.
  *
  */


 #if !defined(XMLSCANNER_HPP)
 #define XMLSCANNER_HPP

 #include <util/KVStringPair.hpp>
 #include <util/RefVectorOf.hpp>
 #include <util/XMLString.hpp>
 #include <framework/XMLAttr.hpp>
 #include <framework/XMLBufferMgr.hpp>
 #include <framework/XMLErrorCodes.hpp>
 #include <framework/XMLRefInfo.hpp>
 #include <framework/XMLPScanToken.hpp>
 #include <internal/ElemStack.hpp>
 #include <internal/ReaderMgr.hpp>

 class InputSource;
 class XMLDocumentHandler;
 class XMLDocumentTypeHandler;
 class XMLElementDecl;
 class XMLEntityHandler;
 class XMLErrorReporter;
 class XMLMsgLoader;
 class XMLValidator;


 //
 //  This is the mondo scanner class, which does the vast majority of the
 //  work of parsing. It handles reading in input and spitting out events
 //  to installed handlers.
 //
 class XMLPARSER_EXPORT XMLScanner
 {
 public :
     // -----------------------------------------------------------------------
     //  Public class types
     //
     //  NOTE: These should really be private, but some of the compilers we
     //  have to deal with are too stupid to understand this.
     //
     //  DeclTypes
     //      Used by scanXMLDecl() to know what type of decl it should scan.
     //      Text decls have slightly different rules from XMLDecls.
     //
     //  EntityExpRes
     //      These are the values returned from the entity expansion method,
     //      to indicate how it went.
     //
     //  XMLTokens
     //      These represent the possible types of input we can get while
     //      scanning content.
     // -----------------------------------------------------------------------
     enum DeclTypes
     {
         Decl_Text
         , Decl_XML
     };

     enum EntityExpRes
     {
         EntityExp_Pushed
         , EntityExp_Returned
         , EntityExp_Failed
     };

     enum XMLTokens
     {
         Token_CData
         , Token_CharData
         , Token_Comment
         , Token_EndTag
         , Token_EOF
         , Token_PI
         , Token_StartTag
         , Token_Unknown
     };


     // -----------------------------------------------------------------------
     //  Constructors and Destructor
     // -----------------------------------------------------------------------
     XMLScanner
     (
         XMLValidator* const validator
     );
     XMLScanner
     (
         XMLDocumentHandler* const   docHandler
         , XMLEntityHandler* const   entityHandler
         , XMLErrorReporter* const   errReporter
         , XMLValidator* const       validator
     );
     ~XMLScanner();


     // -----------------------------------------------------------------------
     //  Error emitter methods
     // -----------------------------------------------------------------------
     void emitError(const XML4CErrs::Codes toEmit);
     void emitError
     (
         const   XML4CErrs::Codes    toEmit
         , const XMLCh* const        text1
         , const XMLCh* const        text2 = 0
         , const XMLCh* const        text3 = 0
         , const XMLCh* const        text4 = 0
     );
     void emitError
     (
         const   XML4CErrs::Codes    toEmit
         , const char* const         text1
         , const char* const         text2 = 0
         , const char* const         text3 = 0
         , const char* const         text4 = 0
     );


     // -----------------------------------------------------------------------
     //  Getter methods
     // -----------------------------------------------------------------------
     const XMLDocumentHandler* getDocHandler() const;
     XMLDocumentHandler* getDocHandler();
     bool getDoNamespaces() const;
     bool getDoValidation() const;
     const XMLEntityHandler* getEntityHandler() const;
     XMLEntityHandler* getEntityHandler();
     const XMLErrorReporter* getErrorReporter() const;
     XMLErrorReporter* getErrorReporter();
     bool getExitOnFirstFatal() const;
     RefHashTableOf<XMLRefInfo>& getIDRefList();
     bool getInException() const;
     const RefHashTableOf<XMLRefInfo>& getIDRefList() const;
     bool getLastExtLocation
     (
                 XMLCh* const    sysIdToFill
         , const unsigned int    maxSysIdChars
         ,       XMLCh* const    pubIdToFill
         , const unsigned int    maxPubIdChars
         ,       unsigned int&   lineToFill
         ,       unsigned int&   colToFill
     );
     bool getStandalone() const;
     const XMLValidator* getValidator() const;
     XMLValidator* getValidator();


     // -----------------------------------------------------------------------
     //  Setter methods
     // -----------------------------------------------------------------------
     void setDocHandler(XMLDocumentHandler* const docHandler);
     void setDoNamespaces(const bool doNamespaces);
     void setDoValidation(const bool validate);
     void setEntityHandler(XMLEntityHandler* const docTypeHandler);
     void setErrorReporter(XMLErrorReporter* const errHandler);
     void setExitOnFirstFatal(const bool newValue);
     void setValidator(XMLValidator* const validator);


     // -----------------------------------------------------------------------
     //  Document scanning methods
     //
     //  scanDocument() does the entire source document. scanFirst() and
     //  scanNext() support a progressive parse.
     // -----------------------------------------------------------------------
     void scanDocument
     (
         const   InputSource&    src
         , const bool            reuseValidator = false
     );
     bool scanFirst
     (
         const   InputSource&    src
         ,       XMLPScanToken&  toFill
         , const bool            reuseValidator = false
     );
     bool scanNext(XMLPScanToken& toFill);


 private :
     // -----------------------------------------------------------------------
     //  Private class types
     // -----------------------------------------------------------------------
     enum IDTypes
     {
         IDType_Public
         , IDType_External
         , IDType_Either
     };

     enum DTDSubsets
     {
         Subset_Internal
         , Subset_External
     };


     // -----------------------------------------------------------------------
     //  Unimplemented constructors and operators
     // -----------------------------------------------------------------------
     XMLScanner();
     XMLScanner(const XMLScanner&);
     void operator=(const XMLScanner&);


     // -----------------------------------------------------------------------
     //  Private helper methods
     // -----------------------------------------------------------------------
     void commonInit();


     // -----------------------------------------------------------------------
     //  Private helper methods
     //
     //  These are implemented in XMLScanner2.cpp, to keep the main file from
     //  becoming so bloated. We can't have any bloated files.
     // -----------------------------------------------------------------------
     unsigned int buildAttList
     (
         const   RefVectorOf<KVStringPair>&  providedAttrs
         , const unsigned int                attCount
         ,       XMLElementDecl&             elemDecl
         ,       RefVectorOf<XMLAttr>&       toFill
     );
     void checkIDRefs();
     bool isLegalToken(const XMLPScanToken& toCheck);
     bool normalizeAttValue
     (
         const   XMLCh* const        value
         , const XMLAttDef::AttTypes type
         ,       XMLBuffer&          toFill
     );
     unsigned int resolveQName
     (
         const   XMLCh* const        qName
         ,       XMLBuffer&          nameBufToFill
         ,       XMLBuffer&          prefixBufToFill
         , const ElemStack::MapModes mode
     );
     unsigned int resolvePrefix
     (
         const   XMLCh* const        prefix
         , const ElemStack::MapModes mode
     );
     unsigned int resolvePrefix
     (
         const   XMLCh* const        prefix
         ,       XMLBuffer&          uriBufToFill
         , const ElemStack::MapModes mode
     );
     void scanReset(const InputSource& src);
     void sendCharData(XMLBuffer& toSend);
     XMLTokens senseNextToken(unsigned int& orgReader);
     void updateNSMap
     (
         const   XMLCh* const    attrName
         , const XMLCh* const    attrValue
     );
     void validateAttrValue
     (
         const   XMLCh* const            valueText
         , const XMLAttDef::AttTypes     type
         , const XMLAttDef::DefAttTypes  defType
         , const XMLCh* const            defText
         , const XMLCh* const            fullName
         , const XMLCh* const            enumList
     );


     // -----------------------------------------------------------------------
     //  Private scanning methods
     // -----------------------------------------------------------------------
     bool basicAttrValueScan(XMLBuffer& toFill);
     bool getQuotedString(XMLBuffer& toFill);
     unsigned int rawAttrScan
     (
         RefVectorOf<KVStringPair>&  toFill
         , bool&                     isEmpty
     );
     bool scanAttValue(XMLBuffer& toFill, const XMLAttDef::AttTypes type);
     void scanCDSection();
     void scanCharData(XMLBuffer& toToUse);
     bool scanCharRef(XMLCh& toFill, XMLCh& second);
     void scanComment();
     bool scanContent(const bool extEntity);
     void scanDocTypeDecl();
     void scanEndTag(bool& gotData);
     EntityExpRes scanEntityRef
     (
         const   bool    inAttVal
         ,       XMLCh&  firstCh
         ,       XMLCh&  secondCh
         ,       bool&   escaped
     );
     bool scanEq();
     bool scanId
     (
                 XMLBuffer&  pubIdToFill
         ,       XMLBuffer&  sysIdToFill
         , const IDTypes     whatKind
     );
     void scanIgnoredSection();
     bool scanInternalSubset();
     void scanMiscellaneous();
     void scanPI();
     void scanProlog();
     bool scanPublicLiteral(XMLBuffer& toFill);
     bool scanStartTag(bool& gotData);
     bool scanStartTagNS(bool& gotData);
     bool scanSystemLiteral(XMLBuffer& toFill);
     void scanXMLDecl(const DeclTypes type);
     unsigned int scanUpToWSOr
     (
                 XMLBuffer&  toFill
         , const XMLCh       chEndChar
     );


     // -----------------------------------------------------------------------
     //  Data members
     //
     //  fAttrList
     //      Every time we get a new element start tag, we have to pass to
     //      the document handler the attributes found. To make it more
     //      efficient we keep this ref vector of XMLAttr objects around. We
     //      just reuse it over and over, allowing it to grow to meet the
     //      peek need.
     //
     //  fBaseDir
     //      This is the base directory, from which the initial XML file
     //      was loaded. It is set after the file is successfully opened,
     //      so we know it to be valid. If the initial file had no path
     //      component, then this is left null. It is used to handle relative
     //      paths of DTD and external entity system ids.
     //
     //  fBufMgr
     //      This is a manager for temporary buffers used during scanning.
     //      For efficiency we must use a set of static buffers, but we have
     //      to insure that they are not incorrectly reused. So this manager
     //      provides the smarts to hand out buffers as required.
     //
     //  fDocHandler
     //      The client code's document handler. If zero, then no document
     //      handler callouts are done. We don't adopt it.
     //
     //  fDoNamespaces
     //      This flag indicates whether the client code wants us to do
     //      namespaces or not. If the installed validator indicates that it
     //      has to do namespaces, then this is ignored.
     //
     //  fDoValidation
     //      Indicates whether any validation should be done. A validator
     //      can still be installed and it will build up all the data
     //      structures for the DTD/Schema declarations, but the scanner will
     //      not ask it to validate anything.
     //
     //  fElemStack
     //      This is the element stack that is used to track the elements that
     //      are currently being worked on.
     //
     //  fEntityHandler
     //      The client code's entity handler. If zero, then no entity handler
     //      callouts are done. We don't adopt it.
     //
     //  fErrorReporter
     //      The client code's error handler. If zero, then no error handler
     //      callouts are done. We don't adopt it.
     //
     //  fExitOnFirstFatal
     //      This indicates whether we bail out on the first fatal XML error
     //      or not. It defaults to true, which is the strict XML way, but it
     //      can be changed.
     //
     //  fIDRefList
     //      This is a list of XMLRefInfo objects. This member lets us do all
     //      needed ID-IDREF balancing checks.
     //
     //  fInException
     //      To avoid a circular freakout when we catch an exception and emit
     //      it, which would normally throw again if the 'fail on first error'
     //      flag is one.
     //
     //  fRawAttrList
     //      During the initial scan of the attributes we can only do a raw
     //      scan for key/value pairs. So this vector is used to store them
     //      until they can be processed (and put into fAttrList.)
     //
     //  fReaderMgr
     //      This is the reader manager, from which we get characters. It
     //      manages the reader stack for us, and provides a lot of convenience
     //      methods to do specialized checking for chars, sequences of chars,
     //      skipping chars, etc...
     //
     //  fReuseValidator
     //      This flag is set on a per-scan basis. So its provided in the
     //      scanDocument() and scanFirst() methods, and applies for that
     //      one pass. It indicates that the validator should not be reused
     //      and that any external structural description should be ignored.
     //      There cannot be any internal subset.
     //
     //  fScannerId
     //  fSequenceId
     //      These are used for progressive parsing, to make sure that the
     //      client code does the right thing at the right time.
     //
     //  fStandalone
     //      Indicates whether the document is standalone or not. Defaults to
     //      no, but can be overridden in the XMLDecl.
     //
     //  fValidator
     //      The installed validator. We look at them via the abstract
     //      validator interface, and don't know what it actual is.
     //
     //
     //  fAttName
     //  fAttValue
     //  fCDataBuf
     //  fNameBuf
     //  fQNameBuf
     //  fPrefixBuf
     //      For the most part, buffers are obtained from the fBufMgr object
     //      on the fly. However, for the start tag scan, we have a set of
     //      fixed buffers for performance reasons. These are used a lot and
     //      there are a number of them, so asking the buffer manager each
     //      time for new buffers is a bit too much overhead.
     // -----------------------------------------------------------------------
     RefVectorOf<XMLAttr>*       fAttrList;
     XMLCh*                      fBaseDir;
     XMLBufferMgr                fBufMgr;
     XMLDocumentHandler*         fDocHandler;
     bool                        fDoNamespaces;
     bool                        fDoValidation;
     ElemStack                   fElemStack;
     XMLEntityHandler*           fEntityHandler;
     XMLErrorReporter*           fErrorReporter;
     bool                        fExitOnFirstFatal;
     RefHashTableOf<XMLRefInfo>* fIDRefList;
     bool                        fInException;
     RefVectorOf<KVStringPair>*  fRawAttrList;
     ReaderMgr                   fReaderMgr;
     bool                        fReuseValidator;
     XMLUInt32                   fScannerId;
     XMLUInt32                   fSequenceId;
     bool                        fStandalone;
     XMLValidator*               fValidator;

     XMLBuffer                   fAttNameBuf;
     XMLBuffer                   fAttValueBuf;
     XMLBuffer                   fCDataBuf;
     XMLBuffer                   fNameBuf;
     XMLBuffer                   fQNameBuf;
     XMLBuffer                   fPrefixBuf;
     XMLBuffer                   fURIBuf;
 };


 // ---------------------------------------------------------------------------
 //  XMLScanner: Getter methods
 // ---------------------------------------------------------------------------
 inline const XMLDocumentHandler* XMLScanner::getDocHandler() const
 {
     return fDocHandler;
 }

 inline XMLDocumentHandler* XMLScanner::getDocHandler()
 {
     return fDocHandler;
 }

 inline bool XMLScanner::getDoNamespaces() const
 {
     return fDoNamespaces;
 }

 inline bool XMLScanner::getDoValidation() const
 {
     return fDoValidation;
 }

 inline const XMLEntityHandler* XMLScanner::getEntityHandler() const
 {
     return fEntityHandler;
 }

 inline XMLEntityHandler* XMLScanner::getEntityHandler()
 {
     return fEntityHandler;
 }

 inline const XMLErrorReporter* XMLScanner::getErrorReporter() const
 {
     return fErrorReporter;
 }

 inline XMLErrorReporter* XMLScanner::getErrorReporter()
 {
     return fErrorReporter;
 }

 inline bool XMLScanner::getExitOnFirstFatal() const
 {
     return fExitOnFirstFatal;
 }

 inline RefHashTableOf<XMLRefInfo>& XMLScanner::getIDRefList()
 {
     return *fIDRefList;
 }

 inline bool XMLScanner::getInException() const
 {
     return fInException;
 }

 inline const RefHashTableOf<XMLRefInfo>& XMLScanner::getIDRefList() const
 {
     return *fIDRefList;
 }

 inline bool XMLScanner::getStandalone() const
 {
     return fStandalone;
 }

 inline const XMLValidator* XMLScanner::getValidator() const
 {
     return fValidator;
 }

 inline XMLValidator* XMLScanner::getValidator()
 {
     return fValidator;
 }


 // ---------------------------------------------------------------------------
 //  XMLScanner: Setter methods
 // ---------------------------------------------------------------------------
 inline void XMLScanner::setDoNamespaces(const bool doNamespaces)
 {
     fDoNamespaces = doNamespaces;
 }

 inline void XMLScanner::setDocHandler(XMLDocumentHandler* const docHandler)
 {
     fDocHandler = docHandler;
 }

 inline void XMLScanner::setErrorReporter(XMLErrorReporter* const errHandler)
 {
     fErrorReporter = errHandler;
 }

 inline void XMLScanner::setEntityHandler(XMLEntityHandler* const entityHandler)
 {
     fEntityHandler = entityHandler;
     fReaderMgr.setEntityHandler(entityHandler);
 }

 inline void XMLScanner::setExitOnFirstFatal(const bool newValue)
 {
     fExitOnFirstFatal = newValue;
 }

 #endif
	/*
	* The Apache Software License, Version 1.1
	*
	* Copyright (c) 1999 The Apache Software Foundation. All rights
	* reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* 3. The end-user documentation included with the redistribution,
	* if any, must include the following acknowledgment:
	* "This product includes software developed by the
	* Apache Software Foundation (http://www.apache.org/)."
	* Alternately, this acknowledgment may appear in the software itself,
	* if and wherever such third-party acknowledgments normally appear.
	*
	* 4. The names "Xerces" and "Apache Software Foundation" must
	* not be used to endorse or promote products derived from this
	* software without prior written permission. For written
	* permission, please contact apache\@apache.org.
	*
	* 5. Products derived from this software may not be called "Apache",
	* nor may "Apache" appear in their name, without prior written
	* permission of the Apache Software Foundation.
	*
	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
	* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
	* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	* ====================================================================
	*
	* This software consists of voluntary contributions made by many
	* individuals on behalf of the Apache Software Foundation, and was
	* originally based on software copyright (c) 1999, International
	* Business Machines, Inc., http://www.ibm.com . For more information
	* on the Apache Software Foundation, please see
	* <http://www.apache.org/>.
	*/

	/**
	* $Log$
	* Revision 1.1 1999/11/09 01:08:23 twl
	* Initial revision
	*
	* Revision 1.4 1999/11/08 20:44:52 rahul
	* Swat for adding in Product name and CVS comment log variable.
	*
	*/


	#if !defined(XMLSCANNER_HPP)
	#define XMLSCANNER_HPP

	#include <util/KVStringPair.hpp>
	#include <util/RefVectorOf.hpp>
	#include <util/XMLString.hpp>
	#include <framework/XMLAttr.hpp>
	#include <framework/XMLBufferMgr.hpp>
	#include <framework/XMLErrorCodes.hpp>
	#include <framework/XMLRefInfo.hpp>
	#include <framework/XMLPScanToken.hpp>
	#include <internal/ElemStack.hpp>
	#include <internal/ReaderMgr.hpp>

	class InputSource;
	class XMLDocumentHandler;
	class XMLDocumentTypeHandler;
	class XMLElementDecl;
	class XMLEntityHandler;
	class XMLErrorReporter;
	class XMLMsgLoader;
	class XMLValidator;


	//
	// This is the mondo scanner class, which does the vast majority of the
	// work of parsing. It handles reading in input and spitting out events
	// to installed handlers.
	//
	class XMLPARSER_EXPORT XMLScanner
	{
	public :
	// -----------------------------------------------------------------------
	// Public class types
	//
	// NOTE: These should really be private, but some of the compilers we
	// have to deal with are too stupid to understand this.
	//
	// DeclTypes
	// Used by scanXMLDecl() to know what type of decl it should scan.
	// Text decls have slightly different rules from XMLDecls.
	//
	// EntityExpRes
	// These are the values returned from the entity expansion method,
	// to indicate how it went.
	//
	// XMLTokens
	// These represent the possible types of input we can get while
	// scanning content.
	// -----------------------------------------------------------------------
	enum DeclTypes
	{
	Decl_Text
	, Decl_XML
	};

	enum EntityExpRes
	{
	EntityExp_Pushed
	, EntityExp_Returned
	, EntityExp_Failed
	};

	enum XMLTokens
	{
	Token_CData
	, Token_CharData
	, Token_Comment
	, Token_EndTag
	, Token_EOF
	, Token_PI
	, Token_StartTag
	, Token_Unknown
	};


	// -----------------------------------------------------------------------
	// Constructors and Destructor
	// -----------------------------------------------------------------------
	XMLScanner
	(
	XMLValidator* const validator
	);
	XMLScanner
	(
	XMLDocumentHandler* const docHandler
	, XMLEntityHandler* const entityHandler
	, XMLErrorReporter* const errReporter
	, XMLValidator* const validator
	);
	~XMLScanner();


	// -----------------------------------------------------------------------
	// Error emitter methods
	// -----------------------------------------------------------------------
	void emitError(const XML4CErrs::Codes toEmit);
	void emitError
	(
	const XML4CErrs::Codes toEmit
	, const XMLCh* const text1
	, const XMLCh* const text2 = 0
	, const XMLCh* const text3 = 0
	, const XMLCh* const text4 = 0
	);
	void emitError
	(
	const XML4CErrs::Codes toEmit
	, const char* const text1
	, const char* const text2 = 0
	, const char* const text3 = 0
	, const char* const text4 = 0
	);


	// -----------------------------------------------------------------------
	// Getter methods
	// -----------------------------------------------------------------------
	const XMLDocumentHandler* getDocHandler() const;
	XMLDocumentHandler* getDocHandler();
	bool getDoNamespaces() const;
	bool getDoValidation() const;
	const XMLEntityHandler* getEntityHandler() const;
	XMLEntityHandler* getEntityHandler();
	const XMLErrorReporter* getErrorReporter() const;
	XMLErrorReporter* getErrorReporter();
	bool getExitOnFirstFatal() const;
	RefHashTableOf<XMLRefInfo>& getIDRefList();
	bool getInException() const;
	const RefHashTableOf<XMLRefInfo>& getIDRefList() const;
	bool getLastExtLocation
	(
	XMLCh* const sysIdToFill
	, const unsigned int maxSysIdChars
	, XMLCh* const pubIdToFill
	, const unsigned int maxPubIdChars
	, unsigned int& lineToFill
	, unsigned int& colToFill
	);
	bool getStandalone() const;
	const XMLValidator* getValidator() const;
	XMLValidator* getValidator();


	// -----------------------------------------------------------------------
	// Setter methods
	// -----------------------------------------------------------------------
	void setDocHandler(XMLDocumentHandler* const docHandler);
	void setDoNamespaces(const bool doNamespaces);
	void setDoValidation(const bool validate);
	void setEntityHandler(XMLEntityHandler* const docTypeHandler);
	void setErrorReporter(XMLErrorReporter* const errHandler);
	void setExitOnFirstFatal(const bool newValue);
	void setValidator(XMLValidator* const validator);


	// -----------------------------------------------------------------------
	// Document scanning methods
	//
	// scanDocument() does the entire source document. scanFirst() and
	// scanNext() support a progressive parse.
	// -----------------------------------------------------------------------
	void scanDocument
	(
	const InputSource& src
	, const bool reuseValidator = false
	);
	bool scanFirst
	(
	const InputSource& src
	, XMLPScanToken& toFill
	, const bool reuseValidator = false
	);
	bool scanNext(XMLPScanToken& toFill);


	private :
	// -----------------------------------------------------------------------
	// Private class types
	// -----------------------------------------------------------------------
	enum IDTypes
	{
	IDType_Public
	, IDType_External
	, IDType_Either
	};

	enum DTDSubsets
	{
	Subset_Internal
	, Subset_External
	};


	// -----------------------------------------------------------------------
	// Unimplemented constructors and operators
	// -----------------------------------------------------------------------
	XMLScanner();
	XMLScanner(const XMLScanner&);
	void operator=(const XMLScanner&);


	// -----------------------------------------------------------------------
	// Private helper methods
	// -----------------------------------------------------------------------
	void commonInit();


	// -----------------------------------------------------------------------
	// Private helper methods
	//
	// These are implemented in XMLScanner2.cpp, to keep the main file from
	// becoming so bloated. We can't have any bloated files.
	// -----------------------------------------------------------------------
	unsigned int buildAttList
	(
	const RefVectorOf<KVStringPair>& providedAttrs
	, const unsigned int attCount
	, XMLElementDecl& elemDecl
	, RefVectorOf<XMLAttr>& toFill
	);
	void checkIDRefs();
	bool isLegalToken(const XMLPScanToken& toCheck);
	bool normalizeAttValue
	(
	const XMLCh* const value
	, const XMLAttDef::AttTypes type
	, XMLBuffer& toFill
	);
	unsigned int resolveQName
	(
	const XMLCh* const qName
	, XMLBuffer& nameBufToFill
	, XMLBuffer& prefixBufToFill
	, const ElemStack::MapModes mode
	);
	unsigned int resolvePrefix
	(
	const XMLCh* const prefix
	, const ElemStack::MapModes mode
	);
	unsigned int resolvePrefix
	(
	const XMLCh* const prefix
	, XMLBuffer& uriBufToFill
	, const ElemStack::MapModes mode
	);
	void scanReset(const InputSource& src);
	void sendCharData(XMLBuffer& toSend);
	XMLTokens senseNextToken(unsigned int& orgReader);
	void updateNSMap
	(
	const XMLCh* const attrName
	, const XMLCh* const attrValue
	);
	void validateAttrValue
	(
	const XMLCh* const valueText
	, const XMLAttDef::AttTypes type
	, const XMLAttDef::DefAttTypes defType
	, const XMLCh* const defText
	, const XMLCh* const fullName
	, const XMLCh* const enumList
	);


	// -----------------------------------------------------------------------
	// Private scanning methods
	// -----------------------------------------------------------------------
	bool basicAttrValueScan(XMLBuffer& toFill);
	bool getQuotedString(XMLBuffer& toFill);
	unsigned int rawAttrScan
	(
	RefVectorOf<KVStringPair>& toFill
	, bool& isEmpty
	);
	bool scanAttValue(XMLBuffer& toFill, const XMLAttDef::AttTypes type);
	void scanCDSection();
	void scanCharData(XMLBuffer& toToUse);
	bool scanCharRef(XMLCh& toFill, XMLCh& second);
	void scanComment();
	bool scanContent(const bool extEntity);
	void scanDocTypeDecl();
	void scanEndTag(bool& gotData);
	EntityExpRes scanEntityRef
	(
	const bool inAttVal
	, XMLCh& firstCh
	, XMLCh& secondCh
	, bool& escaped
	);
	bool scanEq();
	bool scanId
	(
	XMLBuffer& pubIdToFill
	, XMLBuffer& sysIdToFill
	, const IDTypes whatKind
	);
	void scanIgnoredSection();
	bool scanInternalSubset();
	void scanMiscellaneous();
	void scanPI();
	void scanProlog();
	bool scanPublicLiteral(XMLBuffer& toFill);
	bool scanStartTag(bool& gotData);
	bool scanStartTagNS(bool& gotData);
	bool scanSystemLiteral(XMLBuffer& toFill);
	void scanXMLDecl(const DeclTypes type);
	unsigned int scanUpToWSOr
	(
	XMLBuffer& toFill
	, const XMLCh chEndChar
	);



	// -----------------------------------------------------------------------
	// Data members
	//
	// fAttrList
	// Every time we get a new element start tag, we have to pass to
	// the document handler the attributes found. To make it more
	// efficient we keep this ref vector of XMLAttr objects around. We
	// just reuse it over and over, allowing it to grow to meet the
	// peek need.
	//
	// fBaseDir
	// This is the base directory, from which the initial XML file
	// was loaded. It is set after the file is successfully opened,
	// so we know it to be valid. If the initial file had no path
	// component, then this is left null. It is used to handle relative
	// paths of DTD and external entity system ids.
	//
	// fBufMgr
	// This is a manager for temporary buffers used during scanning.
	// For efficiency we must use a set of static buffers, but we have
	// to insure that they are not incorrectly reused. So this manager
	// provides the smarts to hand out buffers as required.
	//
	// fDocHandler
	// The client code's document handler. If zero, then no document
	// handler callouts are done. We don't adopt it.
	//
	// fDoNamespaces
	// This flag indicates whether the client code wants us to do
	// namespaces or not. If the installed validator indicates that it
	// has to do namespaces, then this is ignored.
	//
	// fDoValidation
	// Indicates whether any validation should be done. A validator
	// can still be installed and it will build up all the data
	// structures for the DTD/Schema declarations, but the scanner will
	// not ask it to validate anything.
	//
	// fElemStack
	// This is the element stack that is used to track the elements that
	// are currently being worked on.
	//
	// fEntityHandler
	// The client code's entity handler. If zero, then no entity handler
	// callouts are done. We don't adopt it.
	//
	// fErrorReporter
	// The client code's error handler. If zero, then no error handler
	// callouts are done. We don't adopt it.
	//
	// fExitOnFirstFatal
	// This indicates whether we bail out on the first fatal XML error
	// or not. It defaults to true, which is the strict XML way, but it
	// can be changed.
	//
	// fIDRefList
	// This is a list of XMLRefInfo objects. This member lets us do all
	// needed ID-IDREF balancing checks.
	//
	// fInException
	// To avoid a circular freakout when we catch an exception and emit
	// it, which would normally throw again if the 'fail on first error'
	// flag is one.
	//
	// fRawAttrList
	// During the initial scan of the attributes we can only do a raw
	// scan for key/value pairs. So this vector is used to store them
	// until they can be processed (and put into fAttrList.)
	//
	// fReaderMgr
	// This is the reader manager, from which we get characters. It
	// manages the reader stack for us, and provides a lot of convenience
	// methods to do specialized checking for chars, sequences of chars,
	// skipping chars, etc...
	//
	// fReuseValidator
	// This flag is set on a per-scan basis. So its provided in the
	// scanDocument() and scanFirst() methods, and applies for that
	// one pass. It indicates that the validator should not be reused
	// and that any external structural description should be ignored.
	// There cannot be any internal subset.
	//
	// fScannerId
	// fSequenceId
	// These are used for progressive parsing, to make sure that the
	// client code does the right thing at the right time.
	//
	// fStandalone
	// Indicates whether the document is standalone or not. Defaults to
	// no, but can be overridden in the XMLDecl.
	//
	// fValidator
	// The installed validator. We look at them via the abstract
	// validator interface, and don't know what it actual is.
	//
	//
	// fAttName
	// fAttValue
	// fCDataBuf
	// fNameBuf
	// fQNameBuf
	// fPrefixBuf
	// For the most part, buffers are obtained from the fBufMgr object
	// on the fly. However, for the start tag scan, we have a set of
	// fixed buffers for performance reasons. These are used a lot and
	// there are a number of them, so asking the buffer manager each
	// time for new buffers is a bit too much overhead.
	// -----------------------------------------------------------------------
	RefVectorOf<XMLAttr>* fAttrList;
	XMLCh* fBaseDir;
	XMLBufferMgr fBufMgr;
	XMLDocumentHandler* fDocHandler;
	bool fDoNamespaces;
	bool fDoValidation;
	ElemStack fElemStack;
	XMLEntityHandler* fEntityHandler;
	XMLErrorReporter* fErrorReporter;
	bool fExitOnFirstFatal;
	RefHashTableOf<XMLRefInfo>* fIDRefList;
	bool fInException;
	RefVectorOf<KVStringPair>* fRawAttrList;
	ReaderMgr fReaderMgr;
	bool fReuseValidator;
	XMLUInt32 fScannerId;
	XMLUInt32 fSequenceId;
	bool fStandalone;
	XMLValidator* fValidator;

	XMLBuffer fAttNameBuf;
	XMLBuffer fAttValueBuf;
	XMLBuffer fCDataBuf;
	XMLBuffer fNameBuf;
	XMLBuffer fQNameBuf;
	XMLBuffer fPrefixBuf;
	XMLBuffer fURIBuf;
	};



	// ---------------------------------------------------------------------------
	// XMLScanner: Getter methods
	// ---------------------------------------------------------------------------
	inline const XMLDocumentHandler* XMLScanner::getDocHandler() const
	{
	return fDocHandler;
	}

	inline XMLDocumentHandler* XMLScanner::getDocHandler()
	{
	return fDocHandler;
	}

	inline bool XMLScanner::getDoNamespaces() const
	{
	return fDoNamespaces;
	}

	inline bool XMLScanner::getDoValidation() const
	{
	return fDoValidation;
	}

	inline const XMLEntityHandler* XMLScanner::getEntityHandler() const
	{
	return fEntityHandler;
	}

	inline XMLEntityHandler* XMLScanner::getEntityHandler()
	{
	return fEntityHandler;
	}

	inline const XMLErrorReporter* XMLScanner::getErrorReporter() const
	{
	return fErrorReporter;
	}

	inline XMLErrorReporter* XMLScanner::getErrorReporter()
	{
	return fErrorReporter;
	}

	inline bool XMLScanner::getExitOnFirstFatal() const
	{
	return fExitOnFirstFatal;
	}

	inline RefHashTableOf<XMLRefInfo>& XMLScanner::getIDRefList()
	{
	return *fIDRefList;
	}

	inline bool XMLScanner::getInException() const
	{
	return fInException;
	}

	inline const RefHashTableOf<XMLRefInfo>& XMLScanner::getIDRefList() const
	{
	return *fIDRefList;
	}

	inline bool XMLScanner::getStandalone() const
	{
	return fStandalone;
	}

	inline const XMLValidator* XMLScanner::getValidator() const
	{
	return fValidator;
	}

	inline XMLValidator* XMLScanner::getValidator()
	{
	return fValidator;
	}


	// ---------------------------------------------------------------------------
	// XMLScanner: Setter methods
	// ---------------------------------------------------------------------------
	inline void XMLScanner::setDoNamespaces(const bool doNamespaces)
	{
	fDoNamespaces = doNamespaces;
	}

	inline void XMLScanner::setDocHandler(XMLDocumentHandler* const docHandler)
	{
	fDocHandler = docHandler;
	}

	inline void XMLScanner::setErrorReporter(XMLErrorReporter* const errHandler)
	{
	fErrorReporter = errHandler;
	}

	inline void XMLScanner::setEntityHandler(XMLEntityHandler* const entityHandler)
	{
	fEntityHandler = entityHandler;
	fReaderMgr.setEntityHandler(entityHandler);
	}

	inline void XMLScanner::setExitOnFirstFatal(const bool newValue)
	{
	fExitOnFirstFatal = newValue;
	}

	#endif