src/xercesc/internal/XMLScanner2.cpp - xerces-c - Git at Google

 /*
  * The Apache Software License, Version 1.1
  *
  * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Xerces" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache\@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation, and was
  * originally based on software copyright (c) 1999, International
  * Business Machines, Inc., http://www.ibm.com .  For more information
  * on the Apache Software Foundation, please see
  * <http://www.apache.org/>.
  */

 /*
  * $Id$
  */


 // ---------------------------------------------------------------------------
 //  This file holds some of the grunt work methods of XMLScanner.cpp to keep
 //  it a little more readable.
 // ---------------------------------------------------------------------------


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/util/BinMemInputStream.hpp>
 #include <xercesc/util/Janitor.hpp>
 #include <xercesc/util/PlatformUtils.hpp>
 #include <xercesc/util/TransService.hpp>
 #include <xercesc/util/UnexpectedEOFException.hpp>
 #include <xercesc/util/XMLUniDefs.hpp>
 #include <xercesc/util/XMLUni.hpp>
 #include <xercesc/util/XMLURL.hpp>
 #include <xercesc/sax/InputSource.hpp>
 #include <xercesc/framework/LocalFileInputSource.hpp>
 #include <xercesc/framework/URLInputSource.hpp>
 #include <xercesc/framework/XMLErrorReporter.hpp>
 #include <xercesc/framework/XMLDocumentHandler.hpp>
 #include <xercesc/framework/XMLEntityHandler.hpp>
 #include <xercesc/framework/XMLPScanToken.hpp>
 #include <xercesc/framework/XMLRefInfo.hpp>
 #include <xercesc/framework/XMLValidator.hpp>
 #include <xercesc/internal/XMLScanner.hpp>
 #include <xercesc/internal/EndOfEntityException.hpp>
 #include <xercesc/internal/XMLInternalErrorHandler.hpp>
 #include <xercesc/parsers/IDOMParser.hpp>
 #include <xercesc/dom/DOM_DOMException.hpp>
 #include <xercesc/sax/EntityResolver.hpp>
 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
 #include <xercesc/validators/datatype/DatatypeValidator.hpp>
 #include <xercesc/validators/schema/SchemaSymbols.hpp>
 #include <xercesc/validators/schema/SchemaGrammar.hpp>
 #include <xercesc/validators/schema/TraverseSchema.hpp>
 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
 #include <xercesc/validators/schema/identity/XPathMatcherStack.hpp>


 // ---------------------------------------------------------------------------
 //  XMLScanner: Private helper methods
 // ---------------------------------------------------------------------------

 //
 //  This method is called from scanStartTagNS() to build up the list of
 //  XMLAttr objects that will be passed out in the start tag callout. We
 //  get the key/value pairs from the raw scan of explicitly provided attrs,
 //  which have not been normalized. And we get the element declaration from
 //  which we will get any defaulted or fixed attribute defs and add those
 //  in as well.
 //
 unsigned int
 XMLScanner::buildAttList(const  RefVectorOf<KVStringPair>&  providedAttrs
                         , const unsigned int                attCount
                         ,       XMLElementDecl*             elemDecl
                         ,       RefVectorOf<XMLAttr>&       toFill)
 {
     //
     //  Ask the element to clear the 'provided' flag on all of the att defs
     //  that it owns, and to return us a boolean indicating whether it has
     //  any defs.
     //
     const bool hasDefs = elemDecl->resetDefs();

     //
     //  If there are no expliclitily provided attributes and there are no
     //  defined attributes for the element, the we don't have anything to do.
     //  So just return zero in this case.
     //
     if (!hasDefs && !attCount)
         return 0;

     // Keep up with how many attrs we end up with total
     unsigned int retCount = 0;

     //
     //  And get the current size of the output vector. This lets us use
     //  existing elements until we fill it, then start adding new ones.
     //
     const unsigned int curAttListSize = toFill.size();

     //
     //  We need a buffer into which raw scanned attribute values will be
     //  normalized.
     //
     XMLBufBid bbNormal(&fBufMgr);
     XMLBuffer& normBuf = bbNormal.getBuffer();

     //
     //  Loop through our explicitly provided attributes, which are in the raw
     //  scanned form, and build up XMLAttr objects.
     //
     unsigned int index;
     for (index = 0; index < attCount; index++)
     {
         const KVStringPair* curPair = providedAttrs.elementAt(index);

         //
         //  We have to split the name into its prefix and name parts. Then
         //  we map the prefix to its URI.
         //
         const XMLCh* const namePtr = curPair->getKey();
         ArrayJanitor<XMLCh> janName(0);

         // use a stack-based buffer when possible.
         XMLCh tempBuffer[100];

         const int colonInd = XMLString::indexOf(namePtr, chColon);
         const XMLCh* prefPtr = XMLUni::fgZeroLenString;
         const XMLCh* suffPtr = XMLUni::fgZeroLenString;
         if (colonInd != -1)
         {
             // We have to split the string, so make a copy.
              if (XMLString::stringLen(namePtr) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
             {
                 XMLString::copyString(tempBuffer, namePtr);
                 tempBuffer[colonInd] = chNull;
                 prefPtr = tempBuffer;
             }
             else
             {
                 janName.reset(XMLString::replicate(namePtr));
                 janName[colonInd] = chNull;
                 prefPtr = janName.get();
             }

             suffPtr = prefPtr + colonInd + 1;
         }
          else
         {
             // No colon, so we just have a name with no prefix
             suffPtr = namePtr;
         }

         //
         //  Map the prefix to a URI id. We tell him that we are mapping an
         //  attr prefix, so any xmlns attrs at this level will not affect it.
         //
         const unsigned int uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);

         //
         //  If the uri comes back as the xmlns or xml URI or its just a name
         //  and that name is 'xmlns', then we handle it specially. So set a
         //  boolean flag that lets us quickly below know which we are dealing
         //  with.
         //
         const bool isNSAttr = (uriId == fXMLNSNamespaceId)
                               || (uriId == fXMLNamespaceId)
                               || !XMLString::compareString(suffPtr, XMLUni::fgXMLNSString)
                               || !XMLString::compareString(getURIText(uriId), SchemaSymbols::fgURI_XSI);


         //
         //  If its not a special case namespace attr of some sort, then we
         //  do normal checking and processing.
         //
         XMLAttDef::AttTypes attType;
         if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType)
         {
             // Some checking for attribute wild card first (for schema)
             bool laxThisOne = false;
             bool skipThisOne = false;

             XMLAttDef* attDefForWildCard = 0;

             XMLAttDef*  attDef = 0;
             if (fGrammarType == Grammar::SchemaGrammarType) {

                 SchemaAttDef* attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();

                 if (attWildCard) {
                     //if schema, see if we should lax or skip the validation of this attribute
                     if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
                         SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
                         if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
                             RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
                             if (attRegistry) {
                                 attDefForWildCard = attRegistry->get(suffPtr);
                             }
                         }
                     }
                 }

                 //retrieve the att def
                 attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId);

                 if (!attDef) {
                     // not find, see if the attDef should be qualified or not
                     if (uriId == fEmptyNamespaceId) {
                         attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace()));
                         if (fValidate
                             && attDef
                             && !attDefForWildCard
                             && !skipThisOne
                             && !laxThisOne
                             && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
                             // the attribute should be qualified
                             fValidator->emitError
                             (
                                 XMLValid::AttributeNotQualified
                                 , attDef->getFullName()
                             );
                         }
                     }
                     else {
                         attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fEmptyNamespaceId);
                         if (fValidate
                             && attDef
                             && !attDefForWildCard
                             && !skipThisOne
                             && !laxThisOne
                             && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
                             // the attribute should be qualified
                             fValidator->emitError
                             (
                                 XMLValid::AttributeNotUnQualified
                                 , attDef->getFullName()
                             );
                         }
                     }
                 }
             }

             //
             //  Find this attribute within the parent element. We pass both
             //  the uriID/name and the raw QName buffer, since we don't know
             //  how the derived validator and its elements store attributes.
             //
             bool wasAdded = false;
             if (!attDef) {
                 attDef = elemDecl->findAttr
                 (
                     curPair->getKey()
                     , uriId
                     , suffPtr
                     , prefPtr
                     , XMLElementDecl::AddIfNotFound
                     , wasAdded
                 );
             }

             if (wasAdded)
             {
                 // This is to tell the Validator that this attribute was
                 // faulted-in, was not an attribute in the attdef originally
                 attDef->setCreateReason(XMLAttDef::JustFaultIn);
             }

             if (!attDefForWildCard) {
                 if (wasAdded)
                 {
                     if (fValidate && !skipThisOne && !laxThisOne)
                     {
                         //
                         //  Its not valid for this element, so issue an error if we are
                         //  validating.
                         //
                         XMLBufBid bbURI(&fBufMgr);
                         XMLBuffer& bufURI = bbURI.getBuffer();

                         getURIText(uriId, bufURI);

                         XMLBufBid bbMsg(&fBufMgr);
                         XMLBuffer& bufMsg = bbMsg.getBuffer();
                         bufMsg.append(chOpenCurly);
                         bufMsg.append(bufURI.getRawBuffer());
                         bufMsg.append(chCloseCurly);
                         bufMsg.append(suffPtr);
                         fValidator->emitError
                         (
                             XMLValid::AttNotDefinedForElement
                             , bufMsg.getRawBuffer()
                             , elemDecl->getFullName()
                         );
                     }
                 }
                 else
                 {
                     // If this attribute was faulted-in and first occurence,
                     // then emit an error
                     if (fValidate
                         && attDef->getCreateReason() == XMLAttDef::JustFaultIn
                         && !attDef->getProvided()
                         && !skipThisOne
                         && !laxThisOne)
                     {
                         XMLBufBid bbURI(&fBufMgr);
                         XMLBuffer& bufURI = bbURI.getBuffer();
                         getURIText(uriId, bufURI);

                         XMLBufBid bbMsg(&fBufMgr);
                         XMLBuffer& bufMsg = bbMsg.getBuffer();
                         bufMsg.append(chOpenCurly);
                         bufMsg.append(bufURI.getRawBuffer());
                         bufMsg.append(chCloseCurly);
                         bufMsg.append(suffPtr);
                         fValidator->emitError
                         (
                             XMLValid::AttNotDefinedForElement
                             , bufMsg.getRawBuffer()
                             , elemDecl->getFullName()
                         );
                     }
                 }
             }

             //
             //  If its already provided, then there are more than one of
             //  this attribute in this start tag, so emit an error.
             //

             if (attDef->getProvided())
             {
                 emitError
                 (
                     XMLErrs::AttrAlreadyUsedInSTag
                     , attDef->getFullName()
                     , elemDecl->getFullName()
                 );
             }
              else
             {
                 attDef->setProvided(true);
             }

             //
             //  Now normalize the raw value since we have the attribute type. We
             //  don't care about the return status here. If it failed, an error
             //  was issued, which is all we care about.
             //

             if (attDefForWildCard && (wasAdded || (!wasAdded && attDef->getCreateReason() == XMLAttDef::JustFaultIn))) {
                 normalizeAttValue
                 (
                     attDefForWildCard
                     , curPair->getValue()
                     , normBuf
                 );

                 //
                 //  If we found an attdef for this one, then lets validate it.
                 //
                 if (fValidate && !skipThisOne)
                 {
                     // normalize the attribute according to schema whitespace facet
                     XMLBufBid bbtemp(&fBufMgr);
                     XMLBuffer& tempBuf = bbtemp.getBuffer();

                     DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, normBuf.getRawBuffer(), tempBuf);
                     normBuf.set(tempBuf.getRawBuffer());

                     fValidator->validateAttrValue
                     (
                         attDefForWildCard
                         , normBuf.getRawBuffer()
                     );
                 }

                 // Save the type for later use
                 attType = attDefForWildCard->getType();
             }
             else {
                 normalizeAttValue
                 (
                     attDef
                     , curPair->getValue()
                     , normBuf
                 );

                 //
                 //  If we found an attdef for this one, then lets validate it.
                 //
                 if (!wasAdded && attDef->getCreateReason() != XMLAttDef::JustFaultIn)
                 {
                     if (fValidate && !skipThisOne)
                     {
                         if (fGrammarType == Grammar::SchemaGrammarType)
                         {
                             // normalize the attribute according to schema whitespace facet
                             XMLBufBid bbtemp(&fBufMgr);
                             XMLBuffer& tempBuf = bbtemp.getBuffer();

                             DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator();
                             ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, normBuf.getRawBuffer(), tempBuf);
                             normBuf.set(tempBuf.getRawBuffer());
                         }

                         fValidator->validateAttrValue
                         (
                             attDef
                             , normBuf.getRawBuffer()
                         );
                     }
                 }

                 // Save the type for later use
                 attType = attDef->getType();
             }
         }
          else
         {
             // Just normalize as CDATA
             attType = XMLAttDef::CData;
             normalizeAttRawValue
             (
                 curPair->getKey()
                 , curPair->getValue()
                 , normBuf
             );
         }

         //
         //  Add this attribute to the attribute list that we use to pass them
         //  to the handler. We reuse its existing elements but expand it as
         //  required.
         //
         XMLAttr* curAttr;
         if (retCount >= curAttListSize)
         {
             curAttr = new XMLAttr
             (
                 uriId
                 , suffPtr
                 , prefPtr
                 , normBuf.getRawBuffer()
                 , attType
                 , true
             );
             toFill.addElement(curAttr);
         }
          else
         {
             curAttr = toFill.elementAt(retCount);
             curAttr->set
             (
                 uriId
                 , suffPtr
                 , prefPtr
                 , normBuf.getRawBuffer()
                 , attType
             );
             curAttr->setSpecified(true);
         }

         // Bump the count of attrs in the list
         retCount++;
     }

     //
     //  Now, if there are any attributes declared by this element, let's
     //  go through them and make sure that any required ones are provided,
     //  and fault in any fixed ones and defaulted ones that are not provided
     //  literally.
     //
     if (hasDefs)
     {
         //
         // Check after all specified attrs are scanned
         // (1) report error for REQUIRED attrs that are missing (V_TAGc)
         // (2) add default attrs if missing (FIXED and NOT_FIXED)
         //
         XMLAttDefList& attDefList = elemDecl->getAttDefList();
         while (attDefList.hasMoreElements())
         {
             // Get the current att def, for convenience and its def type
             const XMLAttDef& curDef = attDefList.nextElement();
             const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();

             if (!curDef.getProvided())
             {
                 //the attributes is not provided
                 if (fValidate)
                 {
                     // If we are validating and its required, then an error
                     if ((defType == XMLAttDef::Required) ||
                         (defType == XMLAttDef::Required_And_Fixed)  )

                     {
                         fValidator->emitError
                         (
                             XMLValid::RequiredAttrNotProvided
                             , curDef.getFullName()
                         );
                     }
                     else if ((defType == XMLAttDef::Default) ||
                              (defType == XMLAttDef::Fixed)  )
                     {
                         if (fStandalone && curDef.isExternal())
                         {
                             //
                             // XML 1.0 Section 2.9
                             // Document is standalone, so attributes must not be defaulted.
                             //
                             fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
                         }
                     }
                 }

                 //
                 //  Fault in the value if needed, and bump the att count.
                 //  We have to
                 //
                 if ((defType == XMLAttDef::Default)
                 ||  (defType == XMLAttDef::Fixed))
                 {
                     XMLAttr* curAtt;
                     if (retCount >= curAttListSize)
                     {
                         curAtt = new XMLAttr;
                         fValidator->faultInAttr(*curAtt, curDef);
                         fAttrList->addElement(curAtt);
                     }
                     else
                     {
                         curAtt = fAttrList->elementAt(retCount);
                         fValidator->faultInAttr(*curAtt, curDef);
                     }

                     if (fGrammarType == Grammar::DTDGrammarType)
                     {
                         //
                         //  Map the new attribute's prefix to a URI id and store
                         //  that in the attribute object.
                         //
                         curAtt->setURIId
                         (
                             resolvePrefix(curAtt->getPrefix(), ElemStack::Mode_Attribute)
                         );
                     }

                     // Indicate it was not explicitly specified and bump count
                     curAtt->setSpecified(false);
                     retCount++;
                 }
             }
             else
             {
                 //attribute is provided
                 // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
                 if (defType == XMLAttDef::Prohibited && fValidate)
                     fValidator->emitError
                     (
                         XMLValid::ProhibitedAttributePresent
                         , curDef.getFullName()
                     );
             }
         }
     }
     return retCount;
 }


 //
 //  This method is called after the content scan to insure that all the
 //  ID/IDREF attributes match up (i.e. that all IDREFs refer to IDs.) This is
 //  an XML 1.0 rule, so we can do here in the core.
 //
 void XMLScanner::checkIDRefs()
 {
     //
     //
     //  Iterate the id ref list. If we find any entries here which are used
     //  but not declared, then that's an error.
     //
     RefHashTableOfEnumerator<XMLRefInfo> refEnum(fIDRefList);
     while (refEnum.hasMoreElements())
     {
         // Get a ref to the current element
         const XMLRefInfo& curRef = refEnum.nextElement();

         // If its used but not declared, then its an error
         if (!curRef.getDeclared() && curRef.getUsed() && fValidate)
             fValidator->emitError(XMLValid::IDNotDeclared, curRef.getRefName());
     }
 }


 //
 //  This just does a simple check that the passed progressive scan token is
 //  legal for this scanner.
 //
 bool XMLScanner::isLegalToken(const XMLPScanToken& toCheck)
 {
     return ((fScannerId == toCheck.fScannerId)
     &&      (fSequenceId == toCheck.fSequenceId));
 }


 //
 //  This method will take a raw attribute value and normalize it according to
 //  the rules of the attribute type. It will put the resulting value into the
 //  passed buffer.
 //
 //  This code assumes that escaped characters in the original value (via char
 //  refs) are prefixed by a 0xFFFF character. This is because some characters
 //  are legal if escaped only. And some escape chars are not subject to
 //  normalization rules.
 //
 bool XMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
                                     , const XMLCh* const        value
                                     ,       XMLBuffer&          toFill)
 {
     // A simple state value for a whitespace processing state machine
     enum States
     {
         InWhitespace
         , InContent
     };

     // Get the type and name
     const XMLAttDef::AttTypes type = attDef->getType();
     const XMLCh* const attrName = attDef->getFullName();

     // Assume its going to go fine, and empty the target buffer in preperation
     bool retVal = true;
     toFill.reset();

     //
     // Get attribute def - to check to see if it's declared externally or not
     //
     bool  isAttExternal = attDef->isExternal();

     //
     //  Loop through the chars of the source value and normalize it according
     //  to the type.
     //
     States curState = InContent;
     bool escaped;
     bool firstNonWS = false;
     XMLCh nextCh;
     const XMLCh* srcPtr = value;
     while (*srcPtr)
     {
         //
         //  Get the next character from the source. We have to watch for
         //  escaped characters (which are indicated by a 0xFFFF value followed
         //  by the char that was escaped.)
         //
         nextCh = *srcPtr;
         escaped = (nextCh == 0xFFFF);
         if (escaped)
             nextCh = *++srcPtr;

         //
         //  If its not escaped, then make sure its not a < character, which is
         //  not allowed in attribute values.
         //
         if (!escaped && (*srcPtr == chOpenAngle))
         {
             emitError(XMLErrs::BracketInAttrValue, attrName);
             retVal = false;
         }

         if (type == XMLAttDef::CData || type > XMLAttDef::Notation)
         {
             if (!escaped)
             {
                 if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
                 {
                     //
                     // Check Validity Constraint for Standalone document declaration
                     // XML 1.0, Section 2.9
                     //
                     if (fStandalone && fValidate && isAttExternal)
                     {
                          //
                          // Can't have a standalone document declaration of "yes" if  attribute
                          // values are subject to normalisation
                          //
                          fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
                     }
                     nextCh = chSpace;
                 }
             }
         }
          else
         {
             if (curState == InWhitespace)
             {
                 if (!XMLReader::isWhitespace(nextCh))
                 {
                     if (firstNonWS)
                         toFill.append(chSpace);
                     curState = InContent;
                     firstNonWS = true;
                 }
                  else
                 {
                     srcPtr++;
                     continue;
                 }
             }
              else if (curState == InContent)
             {
                 if (XMLReader::isWhitespace(nextCh))
                 {
                     curState = InWhitespace;
                     srcPtr++;
                     //
                     // Check Validity Constraint for Standalone document declaration
                     // XML 1.0, Section 2.9
                     //
                     if (fStandalone && fValidate && isAttExternal)
                     {
                         if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || XMLReader::isWhitespace(*srcPtr))
                         {
                              //
                              // Can't have a standalone document declaration of "yes" if  attribute
                              // values are subject to normalisation
                              //
                              fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
                         }
                     }
                     continue;
                 }
                 firstNonWS = true;
             }
         }

         // Add this char to the target buffer
         toFill.append(nextCh);

         // And move up to the next character in the source
         srcPtr++;
     }
     return retVal;
 }

 //
 //  This method will just normalize the input value as CDATA without
 //  any standalone checking.
 //
 bool XMLScanner::normalizeAttRawValue( const   XMLCh* const        attrName
                                     , const XMLCh* const        value
                                     ,       XMLBuffer&          toFill)
 {
     // A simple state value for a whitespace processing state machine
     enum States
     {
         InWhitespace
         , InContent
     };

     // Assume its going to go fine, and empty the target buffer in preperation
     bool retVal = true;
     toFill.reset();

     //
     //  Loop through the chars of the source value and normalize it according
     //  to the type.
     //
     States curState = InContent;
     bool escaped;
     bool firstNonWS = false;
     XMLCh nextCh;
     const XMLCh* srcPtr = value;
     while (*srcPtr)
     {
         //
         //  Get the next character from the source. We have to watch for
         //  escaped characters (which are indicated by a 0xFFFF value followed
         //  by the char that was escaped.)
         //
         nextCh = *srcPtr;
         escaped = (nextCh == 0xFFFF);
         if (escaped)
             nextCh = *++srcPtr;

         //
         //  If its not escaped, then make sure its not a < character, which is
         //  not allowed in attribute values.
         //
         if (!escaped && (*srcPtr == chOpenAngle))
         {
             emitError(XMLErrs::BracketInAttrValue, attrName);
             retVal = false;
         }

         if (!escaped)
         {
             //
             //  NOTE: Yes this is a little redundant in that a 0x20 is
             //  replaced with an 0x20. But its faster to do this (I think)
             //  than checking for 9, A, and D separately.
             //
             if (XMLReader::isWhitespace(nextCh))
                 nextCh = chSpace;
         }

         // Add this char to the target buffer
         toFill.append(nextCh);

         // And move up to the next character in the source
         srcPtr++;
     }
     return retVal;
 }

 unsigned int
 XMLScanner::resolvePrefix(  const   XMLCh* const        prefix
                             , const ElemStack::MapModes mode)
 {
     //
     //  Watch for the special namespace prefixes. We always map these to
     //  special URIs. 'xml' gets mapped to the official URI that its defined
     //  to map to by the NS spec. xmlns gets mapped to a special place holder
     //  URI that we define (so that it maps to something checkable.)
     //
     if (!XMLString::compareString(prefix, XMLUni::fgXMLNSString))
         return fXMLNSNamespaceId;
     else if (!XMLString::compareString(prefix, XMLUni::fgXMLString))
         return fXMLNamespaceId;


     //
     //  Ask the element stack to search up itself for a mapping for the
     //  passed prefix.
     //
     bool unknown;
     unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown);

     // If it was unknown, then the URI was faked in but we have to issue an error
     if (unknown)
         emitError(XMLErrs::UnknownPrefix, prefix);

     return uriId;
 }


 unsigned int
 XMLScanner::resolvePrefix(  const   XMLCh* const        prefix
                             ,       XMLBuffer&          bufToFill
                             , const ElemStack::MapModes mode)
 {
     //
     //  Watch for the special namespace prefixes. We always map these to
     //  special URIs. 'xml' gets mapped to the official URI that its defined
     //  to map to by the NS spec. xmlns gets mapped to a special place holder
     //  URI that we define (so that it maps to something checkable.)
     //
     if (!XMLString::compareString(prefix, XMLUni::fgXMLNSString))
         return fXMLNSNamespaceId;
     else if (!XMLString::compareString(prefix, XMLUni::fgXMLString))
         return fXMLNamespaceId;

     //
     //  Ask the element stack to search up itself for a mapping for the
     //  passed prefix.
     //
     bool unknown;
     unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown);

     // If it was unknown, then the URI was faked in but we have to issue an error
     if (unknown)
         emitError(XMLErrs::UnknownPrefix, prefix);

     getURIText(uriId,bufToFill);

     return uriId;
 }


 //
 //  This method will reset the scanner data structures, and related plugged
 //  in stuff, for a new scan session. We get the input source for the primary
 //  XML entity, create the reader for it, and push it on the stack so that
 //  upon successful return from here we are ready to go.
 //
 void XMLScanner::scanReset(const InputSource& src)
 {
     //
     //  This call implicitly tells us that we are going to reuse the scanner
     //  if it was previously used. So tell the validator to reset itself.
     //
     //  But, if the fReuseGrammar flag is set, then don't reset it.
     //
     //  NOTE:   The ReaderMgr is flushed on the way out, because that is
     //          required to insure that files are closed.
     //
     if (!fReuseGrammar) {
         fGrammarResolver->reset();

         resetEntityDeclPool();
         if (fDoNamespaces)
             resetURIStringPool();

         // create a default grammar first
         fGrammar = new DTDGrammar();

         //
         if (fValidatorFromUser) {
             if (fValidator->handlesDTD())
                 fValidator->setGrammar(fGrammar);
         }
         else {
             // set fValidator as fDTDValidator
             fValidator = fDTDValidator;
             fValidator->setGrammar(fGrammar);
         }

         fGrammarType = fGrammar->getGrammarType();
         fGrammarResolver->putGrammar(XMLUni::fgZeroLenString, fGrammar);

         if (fValScheme == Val_Auto) {
             fValidate = false;
         }
     }
     else {
         // reusing grammar, thus the fGrammar must pre-exist already
         // make sure the validator handles this reuse grammar type
         if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
             if (fValidatorFromUser)
                 ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
             else {
                 fValidator = fSchemaValidator;
             }
         }
         else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
             if (fValidatorFromUser)
                 ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
             else {
                 fValidator = fDTDValidator;
             }
         }

         if (!fValidator->getGrammar())
             fValidator->setGrammar(fGrammar);
     }


     //
     //  And for all installed handlers, send reset events. This gives them
     //  a chance to flush any cached data.
     //
     if (fDocHandler)
         fDocHandler->resetDocument();
     if (fEntityHandler)
         fEntityHandler->resetEntities();
     if (fErrorReporter)
         fErrorReporter->resetErrors();

     // Clear out the id reference list
     fIDRefList->removeAll();

     // Reset the Root Element Name
     delete [] fRootElemName;
     fRootElemName = 0;

     // Reset IdentityConstraints
     fMatcherStack->clear();

     //
     //  Reset the element stack, and give it the latest ids for the special
     //  URIs it has to know about.
     //
     fElemStack.reset
     (
         fEmptyNamespaceId
         , fUnknownNamespaceId
         , fXMLNamespaceId
         , fXMLNSNamespaceId
     );

     // Reset some status flags
     fInException = false;
     fStandalone = false;
     fErrorCount = 0;
     fHasNoDTD = true;
     fSeeXsi = false;

     // Reset the validators
     fDTDValidator->reset();
     fSchemaValidator->reset();
     if (fValidatorFromUser)
         fValidator->reset();

     //
     //  Handle the creation of the XML reader object for this input source.
     //  This will provide us with transcoding and basic lexing services.
     //
     XMLReader* newReader = fReaderMgr.createReader
     (
         src
         , true
         , XMLReader::RefFrom_NonLiteral
         , XMLReader::Type_General
         , XMLReader::Source_External
     );
     if (!newReader) {
         if (src.getIssueFatalErrorIfNotFound())
             ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId());
         else
             ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId());
     }

     // Push this read onto the reader manager
     fReaderMgr.pushReader(newReader, 0);
 }


 //
 //  This method is called between markup in content. It scans for character
 //  data that is sent to the document handler. It watches for any markup
 //  characters that would indicate that the character data has ended. It also
 //  handles expansion of general and character entities.
 //
 //  sendData() is a local static helper for this method which handles some
 //  code that must be done in three different places here.
 //
 void XMLScanner::sendCharData(XMLBuffer& toSend)
 {
     // If no data in the buffer, then nothing to do
     if (toSend.isEmpty())
         return;

     //
     //  We do different things according to whether we are validating or
     //  not. If not, its always just characters; else, it depends on the
     //  current element's content model.
     //
     if (fValidate)
     {
         // Get the raw data we need for the callback
         const XMLCh* const rawBuf = toSend.getRawBuffer();
         const unsigned int len = toSend.getLen();

         // And see if the current element is a 'Children' style content model
         const ElemStack::StackElem* topElem = fElemStack.topElement();

         // Get the character data opts for the current element
         XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();

         if (charOpts == XMLElementDecl::NoCharData)
         {
             // They definitely cannot handle any type of char data
             fValidator->emitError(XMLValid::NoCharDataInCM);
         }
          else if (XMLReader::isAllSpaces(rawBuf, len))
         {
             //
             //  Its all spaces. So, if they can take spaces, then send it
             //  as ignorable whitespace. If they can handle any char data
             //  send it as characters.
             //
             if (charOpts == XMLElementDecl::SpacesOk) {
                 if (fDocHandler)
                     fDocHandler->ignorableWhitespace(rawBuf, len, false);
             }
             else if (charOpts == XMLElementDecl::AllCharData)
             {
                 if (fGrammarType != Grammar::SchemaGrammarType)
                 {
                     if (fDocHandler)
                         fDocHandler->docCharacters(rawBuf, len, false);
                 }
                 else
                 {
                     // The normalized data can only be as large as the
                     // original size, so this will avoid allocating way
                     // too much or too little memory.
                     XMLBuffer toFill(len+1);

                     // normalize the character according to schema whitespace facet
                     DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator();
                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, toFill);

                     // call all active identity constraints
                     unsigned int count = fMatcherStack->getMatcherCount();

                     for (unsigned int i = 0; i < count; i++) {
                         fMatcherStack->getMatcherAt(i)->docCharacters(toFill.getRawBuffer(), toFill.getLen());
                     }

                     if (fDocHandler)
                         fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false);
                 }
             }
         }
          else
         {
             //
             //  If they can take any char data, then send it. Otherwise, they
             //  can only handle whitespace and can't handle this stuff so
             //  issue an error.
             //
             if (charOpts == XMLElementDecl::AllCharData)
             {
                 if (fGrammarType != Grammar::SchemaGrammarType)
                 {
                     if (fDocHandler)
                         fDocHandler->docCharacters(rawBuf, len, false);
                 }
                 else
                 {
                     // The normalized data can only be as large as the
                     // original size, so this will avoid allocating way
                     // too much or too little memory.
                     XMLBuffer toFill(len+1);

                     // normalize the character according to schema whitespace facet
                     DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator();
                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, toFill);

                     // call all active identity constraints
                     unsigned int count = fMatcherStack->getMatcherCount();

                     for (unsigned int i = 0; i < count; i++) {
                         fMatcherStack->getMatcherAt(i)->docCharacters(toFill.getRawBuffer(), toFill.getLen());
                     }

                     if (fDocHandler)
                         fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false);
                 }
             }
              else
             {
                 fValidator->emitError(XMLValid::NoCharDataInCM);
             }
         }
     }
      else
     {
         // call all active identity constraints
         if (fGrammarType == Grammar::SchemaGrammarType) {

             unsigned int count = fMatcherStack->getMatcherCount();

             for (unsigned int i = 0; i < count; i++) {
                 fMatcherStack->getMatcherAt(i)->docCharacters(toSend.getRawBuffer(), toSend.getLen());
             }
         }

         // Always assume its just char data if not validating
         if (fDocHandler)
             fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
     }

     // Reset buffer
     toSend.reset();
 }


 //
 //  This method will handle figuring out what the next top level token is
 //  in the input stream. It will return an enumerated value that indicates
 //  what it believes the next XML level token must be. It will eat as many
 //  chars are required to figure out what is next.
 //
 XMLScanner::XMLTokens XMLScanner::senseNextToken(unsigned int& orgReader)
 {
     //
     //  Get the next character and use it to guesstimate what the next token
     //  is going to be. We turn on end of entity exceptions when we do this
     //  in order to catch the scenario where the current entity ended at
     //  the > of some markup.
     //
     XMLCh nextCh;
     {
         ThrowEOEJanitor janMgr(&fReaderMgr, true);
         nextCh = fReaderMgr.peekNextChar();
     }


     //
     //  Check for special chars. Start with the most
     //  obvious end of file, which should be legal here at top level.
     //
     if (!nextCh)
         return Token_EOF;


     //
     //  If it's not a '<' we must be in content.
     //
     //  This includes entity references '&' of some sort. These must
     //  be character data because that's the only place a reference can
     //  occur in content.
     //
     if (nextCh != chOpenAngle)
         return Token_CharData;

     //
     //
     //  Ok it had to have been a '<' character. So get it out of the reader
     //  and store the reader number where we saw it, passing it back to the
     //  caller.
     //
     fReaderMgr.getNextChar();
     orgReader = fReaderMgr.getCurrentReaderNum();

     //
     //  Ok, so lets go through the things that it could be at this point which
     //  are all some form of markup.
     //
     nextCh = fReaderMgr.peekNextChar();

     if (nextCh == chForwardSlash)
     {
         fReaderMgr.getNextChar();
         return Token_EndTag;
     }
     else if (nextCh == chBang)
     {
         static const XMLCh gCDATAStr[] =
         {
                 chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A
             ,   chLatin_T, chLatin_A, chNull
         };

         static const XMLCh gCommentString[] =
         {
             chBang, chDash, chDash, chNull
         };

         if (fReaderMgr.skippedString(gCDATAStr))
             return Token_CData;

         if (fReaderMgr.skippedString(gCommentString))
             return Token_Comment;

         emitError(XMLErrs::ExpectedCommentOrCDATA);
         return Token_Unknown;
     }
      else if (nextCh == chQuestion)
     {
         // It must be a PI
         fReaderMgr.getNextChar();
         return Token_PI;
     }

     //
     //  Assume its an element name, so return with a start tag token. If it
     //  turns out not to be, then it will fail when it cannot get a valid tag.
     //
     return Token_StartTag;
 }


 //
 //  This method is called with a key/value string pair that represents an
 //  xmlns="xxx" or xmlns:xxx="yyy" attribute. This method will update the
 //  current top of the element stack based on this data. We know that when
 //  we get here, that it is one of these forms, so we don't bother confirming
 //  it.
 //
 void XMLScanner::updateNSMap(const  XMLCh* const    attrName
                             , const XMLCh* const    attrValue)
 {
     // We need a buffer to normalize the attribute value into
     XMLBufBid bbNormal(&fBufMgr);
     XMLBuffer& normalBuf = bbNormal.getBuffer();

     //  We either have the default prefix (""), or we point it into the attr
     //  name parameter. Note that the xmlns is not the prefix we care about
     //  here. To us, the 'prefix' is really the local part of the attrName
     //  parameter.
     //
     const XMLCh* prefPtr = XMLUni::fgZeroLenString;
     const unsigned int colonOfs = XMLString::indexOf(attrName, chColon);
     if (colonOfs != -1)
         prefPtr = &attrName[colonOfs + 1];

     //
     //  Normalize the value into the passed buffer. In this case, we don't
     //  care about the return value. An error was issued for the error, which
     //  is all we care about here.
     //
     normalizeAttRawValue(attrName, attrValue, normalBuf);

     //
     //  Ok, we have to get the unique id for the attribute value, which is the
     //  URI that this value should be mapped to. The validator has the
     //  namespace string pool, so we ask him to find or add this new one. Then
     //  we ask the element stack to add this prefix to URI Id mapping.
     //
     fElemStack.addPrefix
     (
         prefPtr
         , fURIStringPool->addOrFind(normalBuf.getRawBuffer())
     );
 }

 void XMLScanner::scanRawAttrListforNameSpaces(const RefVectorOf<KVStringPair>* theRawAttrList, int attCount) {

     //  Schema Xsi Type yyyy (e.g. xsi:type="yyyyy")
     XMLBufBid bbXsi(&fBufMgr);
     XMLBuffer& fXsiType = bbXsi.getBuffer();

     //
     //  Make an initial pass through the list and find any xmlns attributes or
     //  schema attributes.
     //  When we find one, send it off to be used to update the element stack's
     //  namespace mappings.
     //

     int index = 0;

     for (index = 0; index < attCount; index++)
     {
         // each attribute has the prefix:suffix="value"
         const KVStringPair* curPair = fRawAttrList->elementAt(index);
         const XMLCh* valuePtr = curPair->getValue();
         const XMLCh* rawPtr = curPair->getKey();

         QName attName(rawPtr, fEmptyNamespaceId);
         const XMLCh* suffPtr = attName.getLocalPart();

         //  If either the key begins with "xmlns:" or its just plain
         //  "xmlns", then use it to update the map.
         //
         if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
         ||  !XMLString::compareString(rawPtr, XMLUni::fgXMLNSString))
         {
             updateNSMap(rawPtr, valuePtr);

             // if the schema URI is seen in the the valuePtr, set the boolean seeXsi
             if (!XMLString::compareString(valuePtr, SchemaSymbols::fgURI_XSI)) {
                 fSeeXsi = true;
             }
         }
     }

     // walk through the list again to deal with "xsi:...."
     if (fDoSchema && fSeeXsi)
     {
         for (index = 0; index < attCount; index++)
         {
             // each attribute has the prefix:suffix="value"
             const KVStringPair* curPair = fRawAttrList->elementAt(index);
             const XMLCh* valuePtr = curPair->getValue();
             const XMLCh* rawPtr = curPair->getKey();

             QName attName(rawPtr, fEmptyNamespaceId);
             const XMLCh* prefPtr = attName.getPrefix();
             const XMLCh* suffPtr = attName.getLocalPart();

             // if schema URI has been seen, scan for the schema location and uri
             // and resolve the schema grammar; or scan for schema type
             if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {

                 if (!fReuseGrammar) {
                     if (!XMLString::compareString(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCACTION))
                         parseSchemaLocation(valuePtr);
                     else if (!XMLString::compareString(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCACTION))
                         resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
                 }

                 if (!XMLString::compareString(suffPtr, SchemaSymbols::fgXSI_TYPE)) {
                         fXsiType.set(valuePtr);
                 }
                 else if (!XMLString::compareString(suffPtr, SchemaSymbols::fgATT_NILL)
                          && fValidator && fValidator->handlesSchema()
                          && !XMLString::compareString(valuePtr, SchemaSymbols::fgATTVAL_TRUE)) {
                             ((SchemaValidator*)fValidator)->setNillable(true);
                 }
             }
         }

         if (fValidator && fValidator->handlesSchema()) {
             if (!fXsiType.isEmpty()) {
                 unsigned int uriId = resolveQName (
                       fXsiType.getRawBuffer()
                     , fNameBuf
                     , fPrefixBuf
                     , ElemStack::Mode_Element
                 );
                 ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fNameBuf.getRawBuffer(), uriId);
             }
         }
     }
 }

 void XMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr)
 {
     RefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr);
     unsigned int size = schemaLocation->size();
     if (size % 2 != 0 ) {
         emitError(XMLErrs::BadSchemaLocation);
     } else {
         for(unsigned int i=0; i<size; i=i+2) {
             resolveSchemaGrammar(schemaLocation->elementAt(i+1), schemaLocation->elementAt(i));
         }
     }

     delete schemaLocation;
 }

 void XMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri) {

     Grammar* grammar = fGrammarResolver->getGrammar(uri);

     if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType) {
         IDOMParser parser;
         XMLInternalErrorHandler internalErrorHandler(fErrorHandler);
         parser.setValidationScheme(IDOMParser::Val_Never);
         parser.setDoNamespaces(true);
         parser.setErrorHandler((ErrorHandler*) &internalErrorHandler);
         parser.setEntityResolver(fEntityResolver);

         // Create a buffer for expanding the system id
         XMLBufBid bbSys(&fBufMgr);
         XMLBuffer& expSysId = bbSys.getBuffer();

         //
         //  Allow the entity handler to expand the system id if they choose
         //  to do so.
         //
         if (fEntityHandler)
         {
             if (!fEntityHandler->expandSystemId(loc, expSysId))
                 expSysId.set(loc);
         }
          else
         {
             expSysId.set(loc);
         }

         // Call the entity resolver interface to get an input source
         InputSource* srcToFill = 0;
         if (fEntityResolver)
         {
             srcToFill = fEntityResolver->resolveEntity
             (
                 XMLUni::fgZeroLenString
                 , expSysId.getRawBuffer()
             );
         }

         //
         //  If they didn't create a source via the entity resolver, then we
         //  have to create one on our own.
         //
         if (!srcToFill)
         {
             ReaderMgr::LastExtEntityInfo lastInfo;
             fReaderMgr.getLastExtEntityInfo(lastInfo);

             try
             {
                 XMLURL urlTmp(lastInfo.systemId, expSysId.getRawBuffer());
                 if (urlTmp.isRelative())
                 {
                     ThrowXML
                     (
                         MalformedURLException
                         , XMLExcepts::URL_NoProtocolPresent
                     );
                 }
                 srcToFill = new URLInputSource(urlTmp);
             }

             catch(const MalformedURLException&)
             {
                 // Its not a URL, so lets assume its a local file name.
                 srcToFill = new LocalFileInputSource
                 (
                     lastInfo.systemId
                     , expSysId.getRawBuffer()
                 );
             }
         }

         // Put a janitor on the input source
         Janitor<InputSource> janSrc(srcToFill);

         // Should just issue warning if the schema is not found
         const bool flag = srcToFill->getIssueFatalErrorIfNotFound();
         srcToFill->setIssueFatalErrorIfNotFound(false);

         parser.parse(*srcToFill) ;

         // Reset the InputSource
         srcToFill->setIssueFatalErrorIfNotFound(flag);

         if (internalErrorHandler.getSawFatal() && fExitOnFirstFatal)
             emitError(XMLErrs::SchemaScanFatalError);

         IDOM_Document* document = parser.getDocument(); //Our Grammar

         if (document != 0) {

             IDOM_Element* root = document->getDocumentElement();// This is what we pass to TraverserSchema
             if (root != 0)
             {
                 const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
                 if (XMLString::compareString(newUri, uri)) {
                     if (fValidate)
                         fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
                     grammar = fGrammarResolver->getGrammar(newUri);
                 }

                 if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType) {

                     //
                     //  Since we have seen a grammar, set our validation flag
                     //  at this point if the validation scheme is auto
                     //
                     if (fValScheme == Val_Auto && !fValidate) {
                         fValidate = true;
                         fElemStack.setValidationFlag(fValidate);
                     }

                     // we have seen a schema, so set up the fValidator as fSchemaValidator
                     if (!fValidator->handlesSchema())
                     {
                         if (fValidatorFromUser) {
                             // the fValidator is from user
                             ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
                         }
                         else {
                             fValidator = fSchemaValidator;
                         }
                     }

                     grammar = new SchemaGrammar();
                     TraverseSchema traverseSchema(root, fURIStringPool, (SchemaGrammar*) grammar, fGrammarResolver, this, fValidator, srcToFill->getSystemId(), fEntityResolver, fErrorHandler);

                     if (fGrammarType == Grammar::DTDGrammarType) {
                         fGrammar = grammar;
                         fGrammarType = Grammar::SchemaGrammarType;
                         fValidator->setGrammar(fGrammar);
                     }

                     if (!fReuseGrammar && fValidate) {
                         //  validate the Schema scan so far
                         fValidator->preContentValidation(fReuseGrammar);
                     }
                 }
             }
         }
     }
     else {
         //
         //  Since we have seen a grammar, set our validation flag
         //  at this point if the validation scheme is auto
         //
         if (fValScheme == Val_Auto && !fValidate) {
             fValidate = true;
             fElemStack.setValidationFlag(fValidate);
         }

         // we have seen a schema, so set up the fValidator as fSchemaValidator
         if (!fValidator->handlesSchema())
         {
             if (fValidatorFromUser) {
                 // the fValidator is from user
                 ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
             }
             else {
                 fValidator = fSchemaValidator;
             }
         }

         if (fGrammarType == Grammar::DTDGrammarType) {
             fGrammar = grammar;
             fGrammarType = Grammar::SchemaGrammarType;
             fValidator->setGrammar(fGrammar);
         }
     }
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Private parsing methods
 // ---------------------------------------------------------------------------


 //
 //  This guy just scans out a single or double quoted string of characters.
 //  It does not pass any judgement on the contents and assumes that it is
 //  illegal to have another quote of the same kind inside the string's
 //  contents.
 //
 //  NOTE: This is for simple stuff like the strings in the XMLDecl which
 //  cannot have any entities inside them. So this guy does not handle any
 //  end of entity stuff.
 //
 bool XMLScanner::getQuotedString(XMLBuffer& toFill)
 {
     // Reset the target buffer
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr.skipIfQuote(quoteCh))
         return false;

     while (true)
     {
         // Get another char
         const XMLCh nextCh = fReaderMgr.getNextChar();

         // See if it matches the starting quote char
         if (nextCh == quoteCh)
             break;

         //
         //  We should never get either an end of file null char here. If we
         //  do, just fail. It will be handled more gracefully in the higher
         //  level code that called us.
         //
         if (!nextCh)
             return false;

         // Else add it to the buffer
         toFill.append(nextCh);
     }
     return true;
 }


 //
 //  This method is called to do a raw scan of an attribute value. It does not
 //  do normalization (since we don't know their types yet.) It just scans the
 //  value and does entity expansion.
 //
 //  End of entity's must be dealt with here. During DTD scan, they can come
 //  from external entities. During content, they can come from any entity.
 //  We just eat the end of entity and continue with our scan until we come
 //  to the closing quote. If an unterminated value causes us to go through
 //  subsequent entities, that will cause errors back in the calling code,
 //  but there's little we can do about it here.
 //
 bool XMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
 {
     // Reset the target buffer
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr.skipIfQuote(quoteCh))
         return false;

     //
     //  We have to get the current reader because we have to ignore closing
     //  quotes until we hit the same reader again.
     //
     const unsigned int curReader = fReaderMgr.getCurrentReaderNum();

     //
     //  Loop until we get the attribute value. Note that we use a double
     //  loop here to avoid the setup/teardown overhead of the exception
     //  handler on every round.
     //
     XMLCh   nextCh;
     XMLCh   secondCh = 0;
     bool    gotLeadingSurrogate = false;
     bool    escaped;
     while (true)
     {
         try
         {
             while(true)
             {
                 // Get another char. Use second char if one is waiting
                 if (secondCh)
                 {
                     nextCh = secondCh;
                     secondCh = 0;
                 }
                  else
                 {
                     nextCh = fReaderMgr.getNextChar();
                 }

                 if (!nextCh)
                     ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);

                 //
                 //  Check for our ending quote. It has to be in the same entity
                 //  as where we started. Quotes in nested entities are ignored.
                 //
                 if (nextCh == quoteCh)
                 {
                     if (curReader == fReaderMgr.getCurrentReaderNum())
                         return true;

                     // Watch for spillover into a previous entity
                     if (curReader > fReaderMgr.getCurrentReaderNum())
                     {
                         emitError(XMLErrs::PartialMarkupInEntity);
                         return false;
                     }
                 }

                 //
                 //  Check for an entity ref . We ignore the empty flag in
                 //  this one.
                 //
                 escaped = false;
                 if (nextCh == chAmpersand)
                 {
                     // If it was not returned directly, then jump back up
                     if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
                     {
                         gotLeadingSurrogate = false;
                         continue;
                     }
                 }

                 // Deal with surrogate pairs
                 if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                 {
                     //
                     //  Its a leading surrogate. If we already got one, then
                     //  issue an error, else set leading flag to make sure that
                     //  we look for a trailing next time.
                     //
                     if (gotLeadingSurrogate)
                     {
                         emitError(XMLErrs::Expected2ndSurrogateChar);
                     }
                      else
                         gotLeadingSurrogate = true;
                 }
                  else
                 {
                     //
                     //  If its a trailing surrogate, make sure that we are
                     //  prepared for that. Else, its just a regular char so make
                     //  sure that we were not expected a trailing surrogate.
                     //
                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                     {
                         // Its trailing, so make sure we were expecting it
                         if (!gotLeadingSurrogate)
                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
                     }
                     else
                     {
                         //
                         //  Its just a char, so make sure we were not expecting a
                         //  trailing surrogate.
                         //
                         if (gotLeadingSurrogate) {
                             emitError(XMLErrs::Expected2ndSurrogateChar);
                         }
                         // Its got to at least be a valid XML character
                         else if (!XMLReader::isXMLChar(nextCh))
                         {
                             XMLCh tmpBuf[9];
                             XMLString::binToText
                             (
                                 nextCh
                                 , tmpBuf
                                 , 8
                                 , 16
                             );
                             emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
                         }
                     }
                     gotLeadingSurrogate = false;
                 }

                 //
                 //  If it was escaped, then put in a 0xFFFF value. This will
                 //  be used later during validation and normalization of the
                 //  value to know that the following character was via an
                 //  escape char.
                 //
                 if (escaped)
                     toFill.append(0xFFFF);

                 // Else add it to the buffer
                 toFill.append(nextCh);
             }
         }

         catch(const EndOfEntityException&)
         {
             // Just eat it and continue.
             gotLeadingSurrogate = false;
             escaped = false;
         }
     }
     return true;
 }


 bool XMLScanner::scanAttValue(  const   XMLAttDef* const    attDef
                                 ,       XMLBuffer&          toFill)
 {
     enum States
     {
         InWhitespace
         , InContent
     };

     // Get the type and name
     const XMLAttDef::AttTypes type = attDef->getType();
     const XMLCh* const attrName = attDef->getFullName();

     // Reset the target buffer
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr.skipIfQuote(quoteCh))
         return false;

     //
     //  We have to get the current reader because we have to ignore closing
     //  quotes until we hit the same reader again.
     //
     const unsigned int curReader = fReaderMgr.getCurrentReaderNum();

     //
     // Get attribute def - to check to see if it's declared externally or not
     //
     bool  isAttExternal = attDef->isExternal();

     //
     //  Loop until we get the attribute value. Note that we use a double
     //  loop here to avoid the setup/teardown overhead of the exception
     //  handler on every round.
     //
     XMLCh   nextCh;
     XMLCh   secondCh = 0;
     States  curState = InContent;
     bool    firstNonWS = false;
     bool    gotLeadingSurrogate = false;
     bool    escaped;
     while (true)
     {
     try
     {
         while(true)
         {
             // Get another char. Use second char if one is waiting
             if (secondCh)
             {
                 nextCh = secondCh;
                 secondCh = 0;
             }
              else
             {
                 nextCh = fReaderMgr.getNextChar();
             }

             if (!nextCh)
                 ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);

             // Check for our ending quote in the same entity
             if (nextCh == quoteCh)
             {
                 if (curReader == fReaderMgr.getCurrentReaderNum())
                     return true;

                 // Watch for spillover into a previous entity
                 if (curReader > fReaderMgr.getCurrentReaderNum())
                 {
                     emitError(XMLErrs::PartialMarkupInEntity);
                     return false;
                 }
             }

             //
             //  Check for an entity ref now, before we let it affect our
             //  whitespace normalization logic below. We ignore the empty flag
             //  in this one.
             //
             escaped = false;
             if (nextCh == chAmpersand)
             {
                 if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
                 {
                     gotLeadingSurrogate = false;
                     continue;
                 }
             }

             // Deal with surrogate pairs
             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
             {
                 //
                 //  Its a leading surrogate. If we already got one, then
                 //  issue an error, else set leading flag to make sure that
                 //  we look for a trailing next time.
                 //
                 if (gotLeadingSurrogate)
                     emitError(XMLErrs::Expected2ndSurrogateChar);
                  else
                     gotLeadingSurrogate = true;
             }
              else
             {
                 //
                 //  If its a trailing surrogate, make sure that we are
                 //  prepared for that. Else, its just a regular char so make
                 //  sure that we were not expected a trailing surrogate.
                 //
                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                 {
                     // Its trailing, so make sure we were expecting it
                     if (!gotLeadingSurrogate)
                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
                 }
                  else
                 {
                     //
                     //  Its just a char, so make sure we were not expecting a
                     //  trailing surrogate.
                     //
                     if (gotLeadingSurrogate)
                         emitError(XMLErrs::Expected2ndSurrogateChar);

                     // Its got to at least be a valid XML character
                     if (!XMLReader::isXMLChar(nextCh))
                     {
                         XMLCh tmpBuf[9];
                         XMLString::binToText
                         (
                             nextCh
                             , tmpBuf
                             , 8
                             , 16
                         );
                         emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
                     }
                 }
                 gotLeadingSurrogate = false;
             }

             //
             //  If its not escaped, then make sure its not a < character, which
             //  is not allowed in attribute values.
             //
             if (!escaped && (nextCh == chOpenAngle))
                 emitError(XMLErrs::BracketInAttrValue, attrName);

             //
             //  If the attribute is a CDATA type we do simple replacement of
             //  tabs and new lines with spaces, if the character is not escaped
             //  by way of a char ref.
             //
             //  Otherwise, we do the standard non-CDATA normalization of
             //  compressing whitespace to single spaces and getting rid of leading
             //  and trailing whitespace.
             //
             if (type == XMLAttDef::CData)
             {
                 if (!escaped)
                 {
                     if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
                     {
                         //
                         // Check Validity Constraint for Standalone document declaration
                         // XML 1.0, Section 2.9
                         //
                         if (fStandalone && fValidate && isAttExternal)
                         {
                              //
                              // Can't have a standalone document declaration of "yes" if  attribute
                              // values are subject to normalisation
                              //
                              fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
                         }
                         nextCh = chSpace;
                     }
                 }
             }
              else
             {
                 if (curState == InWhitespace)
                 {
                     if ((escaped && nextCh != chSpace) || !XMLReader::isWhitespace(nextCh))
                     {
                         if (firstNonWS)
                             toFill.append(chSpace);
                         curState = InContent;
                         firstNonWS = true;
                     }
                      else
                     {
                         continue;
                     }
                 }
                  else if (curState == InContent)
                 {
                     if ((nextCh == chSpace) ||
                         (XMLReader::isWhitespace(nextCh) && !escaped))
                     {
                         curState = InWhitespace;
                         //
                         // Check Validity Constraint for Standalone document declaration
                         // XML 1.0, Section 2.9
                         //
                         if (fStandalone && fValidate && isAttExternal)
                         {
                             if (!firstNonWS || (nextCh != chSpace) || (fReaderMgr.lookingAtSpace()))
                             {
                                  //
                                  // Can't have a standalone document declaration of "yes" if  attribute
                                  // values are subject to normalisation
                                  //
                                  fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
                             }
                         }
                         continue;
                     }
                     firstNonWS = true;
                 }
             }

             // Else add it to the buffer
             toFill.append(nextCh);
         }
     }

     catch(const EndOfEntityException&)
     {
         // Just eat it and continue.
         gotLeadingSurrogate = false;
         escaped = false;
     }
     }
     return true;
 }


 //
 //  This method scans a CDATA section. It collects the character into one
 //  of the temp buffers and calls the document handler, if any, with the
 //  characters. It assumes that the <![CDATA string has been scanned before
 //  this call.
 //
 void XMLScanner::scanCDSection()
 {
     //
     //  This is the CDATA section opening sequence, minus the '<' character.
     //  We use this to watch for nested CDATA sections, which are illegal.
     //
     static const XMLCh CDataPrefix[] =
     {
             chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A
         ,   chLatin_T, chLatin_A, chOpenSquare, chNull
     };

     static const XMLCh CDataClose[] =
     {
             chCloseSquare, chCloseAngle, chNull
     };


     //
     //  The next character should be the opening square bracket. If not
     //  issue an error, but then try to recover by skipping any whitespace
     //  and checking again.
     //
     if (!fReaderMgr.skippedChar(chOpenSquare))
     {
         emitError(XMLErrs::ExpectedOpenSquareBracket);
         fReaderMgr.skipPastSpaces();

         // If we still don't find it, then give up, else keep going
         if (!fReaderMgr.skippedChar(chOpenSquare))
             return;
     }

     // Get a buffer for this
     XMLBufBid bbCData(&fBufMgr);

     //
     //  We just scan forward until we hit the end of CDATA section sequence.
     //  CDATA is effectively a big escape mechanism so we don't treat markup
     //  characters specially here.
     //
     bool            emittedError = false;
     while (true)
     {
         const XMLCh nextCh = fReaderMgr.getNextChar();

         // Watch for unexpected end of file
         if (!nextCh)
         {
             emitError(XMLErrs::UnterminatedCDATASection);
             ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
         }

         if (fValidate && fStandalone && (XMLReader::isWhitespace(nextCh)))
         {
             //
             // This document is standalone; this ignorable CDATA whitespace is forbidden.
             // XML 1.0, Section 2.9
             //

             // And see if the current element is a 'Children' style content model
             const ElemStack::StackElem* topElem = fElemStack.topElement();

             if (topElem->fThisElement->isExternal()) {

                 // Get the character data opts for the current element
                 XMLElementDecl::CharDataOpts charOpts =  topElem->fThisElement->getCharDataOpts();

                 if (charOpts == XMLElementDecl::SpacesOk) // Element Content
                 {
                     // Error - standalone should have a value of "no" as whitespace detected in an
                     // element type with element content whose element declaration was external
                     //
                     fValidator->emitError(XMLValid::NoWSForStandalone);
                 }
             }
         }

         //
         //  If this is a close square bracket it could be our closing
         //  sequence.
         //
         if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
         {
             if (fGrammarType == Grammar::SchemaGrammarType) {

                 // call all active identity constraints
                 unsigned int count = fMatcherStack->getMatcherCount();

                 for (unsigned int i = 0; i < count; i++) {
                     fMatcherStack->getMatcherAt(i)->docCharacters(bbCData.getRawBuffer(), bbCData.getLen());
                 }
             }

             // If we have a doc handler, call it
             if (fDocHandler)
             {
                 fDocHandler->docCharacters
                     (
                     bbCData.getRawBuffer()
                     , bbCData.getLen()
                     , true
                     );
             }

             // And we are done
             break;
         }

         //
         //  Make sure its a valid character. But if we've emitted an error
         //  already, don't bother with the overhead since we've already told
         //  them about it.
         //
         if (!emittedError)
         {
             if (!XMLReader::isXMLChar(nextCh))
             {
                 XMLCh tmpBuf[9];
                 XMLString::binToText
                 (
                     nextCh
                     , tmpBuf
                     , 8
                     , 16
                 );
                 emitError(XMLErrs::InvalidCharacter, tmpBuf);
                 emittedError = true;
             }
         }

         // Add it to the buffer
         bbCData.append(nextCh);
     }
 }


 void XMLScanner::scanCharData(XMLBuffer& toUse)
 {
     //
     //  We have to watch for the stupid ]]> sequence, which is illegal in
     //  character data. So this is a little state machine that handles that.
     //
     enum States
     {
         State_Waiting
         , State_GotOne
         , State_GotTwo
     };

     // Reset the buffer before we start
     toUse.reset();

     // Turn on the 'throw at end' flag of the reader manager
     ThrowEOEJanitor jan(&fReaderMgr, true);

     //
     //  In order to be more efficient we have to use kind of a deeply nested
     //  set of blocks here. The outer block puts on a try and catches end of
     //  entity exceptions. The inner loop is the per-character loop. If we
     //  put the try inside the inner loop, it would work but would require
     //  the exception handling code setup/teardown code to be invoked for
     //  each character.
     //
     XMLCh   nextCh;
     XMLCh   secondCh = 0;
     States  curState = State_Waiting;
     bool    escaped = false;
     bool    gotLeadingSurrogate = false;
     bool    notDone = true;
     while (notDone)
     {
         try
         {
             while (true)
             {
                 if (secondCh)
                 {
                     nextCh = secondCh;
                     secondCh = 0;
                 }
                  else
                 {
                      //  Eat through as many plain content characters as possible without
                      //  needing special handling.  Moving most content characters here,
                      //  in this one call, rather than running the overall loop once
                      //  per content character, is a speed optimization.
                      //
                     if (curState == State_Waiting  &&  !gotLeadingSurrogate)
                     {
                          fReaderMgr.movePlainContentChars(toUse);
                     }


                     // Try to get another char from the source
                     //   The code from here on down covers all contengencies,
                     //
                     if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
                     {
                         // If we were waiting for a trailing surrogate, its an error
                         if (gotLeadingSurrogate)
                             emitError(XMLErrs::Expected2ndSurrogateChar);

                         notDone = false;
                         break;
                     }
                 }

                 //
                 //  Watch for a reference. Note that the escapement mechanism
                 //  is ignored in this content.
                 //
                 if (nextCh == chAmpersand)
                 {
                     sendCharData(toUse);

                     // Turn off the throwing at the end of entity during this
                     ThrowEOEJanitor jan(&fReaderMgr, false);

                     if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
                     {
                         gotLeadingSurrogate = false;
                         continue;
                     }
                 }
                  else
                 {
                     escaped = false;
                 }

                  // Keep the state machine up to date
                 if (!escaped)
                 {
                     if (nextCh == chCloseSquare)
                     {
                         if (curState == State_Waiting)
                             curState = State_GotOne;
                         else if (curState == State_GotOne)
                             curState = State_GotTwo;
                     }
                      else if (nextCh == chCloseAngle)
                     {
                         if (curState == State_GotTwo)
                             emitError(XMLErrs::BadSequenceInCharData);
                         curState = State_Waiting;
                     }
                      else
                     {
                         curState = State_Waiting;
                     }
                 }
                  else
                 {
                     curState = State_Waiting;
                 }

                 // Deal with surrogate pairs
                 if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                 {
                     //
                     //  Its a leading surrogate. If we already got one, then
                     //  issue an error, else set leading flag to make sure that
                     //  we look for a trailing next time.
                     //
                     if (gotLeadingSurrogate)
                         emitError(XMLErrs::Expected2ndSurrogateChar);
                     else
                         gotLeadingSurrogate = true;
                 }
                  else
                 {
                     //
                     //  If its a trailing surrogate, make sure that we are
                     //  prepared for that. Else, its just a regular char so make
                     //  sure that we were not expected a trailing surrogate.
                     //
                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                     {
                         // Its trailing, so make sure we were expecting it
                         if (!gotLeadingSurrogate)
                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
                     }
                      else
                     {
                         //
                         //  Its just a char, so make sure we were not expecting a
                         //  trailing surrogate.
                         //
                         if (gotLeadingSurrogate)
                             emitError(XMLErrs::Expected2ndSurrogateChar);

                         // Make sure the returned char is a valid XML char
                         if (!XMLReader::isXMLChar(nextCh))
                         {
                             XMLCh tmpBuf[9];
                             XMLString::binToText
                             (
                                 nextCh
                                 , tmpBuf
                                 , 8
                                 , 16
                             );
                             emitError(XMLErrs::InvalidCharacter, tmpBuf);
                         }
                     }
                     gotLeadingSurrogate = false;
                 }

                 // Add this char to the buffer
                 toUse.append(nextCh);
             }
         }

         catch(const EndOfEntityException& toCatch)
         {
             //
             //  Some entity ended, so we have to send any accumulated
             //  chars and send an end of entity event.
             //
             sendCharData(toUse);
             gotLeadingSurrogate = false;

             if (fDocHandler)
                 fDocHandler->endEntityReference(toCatch.getEntity());
         }
     }
     //
     // Check the validity constraints as per XML 1.0 Section 2.9
     //

     const XMLCh* rawBuf = toUse.getRawBuffer();
     const unsigned int len = toUse.getLen();

     if (fValidate && fStandalone)
     {
         // See if the text contains whitespace
         // Get the raw data we need for the callback
         const bool isSpaces = XMLReader::containsWhiteSpace(rawBuf, len);
         if (isSpaces)
         {
             // And see if the current element is a 'Children' style content model
             const ElemStack::StackElem* topElem = fElemStack.topElement();

             if (topElem->fThisElement->isExternal()) {

                 // Get the character data opts for the current element
                 XMLElementDecl::CharDataOpts charOpts =  topElem->fThisElement->getCharDataOpts();

                 if (charOpts == XMLElementDecl::SpacesOk)  // => Element Content
                 {
                     // Error - standalone should have a value of "no" as whitespace detected in an
                     // element type with element content whose element declaration was external
                     //
                     fValidator->emitError(XMLValid::NoWSForStandalone);
                 }
             }
         }
     }
     // Send any char data that we accumulated into the buffer
     sendCharData(toUse);
 }


 //
 //  This method scans a character reference and returns the character that
 //  was refered to. It assumes that we've already scanned the &# characters
 //  that prefix the numeric code.
 //
 bool XMLScanner::scanCharRef(XMLCh& toFill, XMLCh& second)
 {
     bool gotOne = false;
     unsigned int value = 0;

     //
     //  Set the radix. Its supposed to be a lower case x if hex. But, in
     //  order to recover well, we check for an upper and put out an error
     //  for that.
     //
     unsigned int radix = 10;
     if (fReaderMgr.skippedChar(chLatin_x))
     {
         radix = 16;
     }
      else if (fReaderMgr.skippedChar(chLatin_X))
     {
         emitError(XMLErrs::HexRadixMustBeLowerCase);
         radix = 16;
     }

     while (true)
     {
         const XMLCh nextCh = fReaderMgr.peekNextChar();

         // Watch for EOF
         if (!nextCh)
             ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);

         // Break out on the terminating semicolon
         if (nextCh == chSemiColon)
         {
             fReaderMgr.getNextChar();
             break;
         }

         //
         //  Convert this char to a binary value, or bail out if its not
         //  one.
         //
         unsigned int nextVal;
         if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))
             nextVal = (unsigned int)(nextCh - chDigit_0);
          else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))
             nextVal= (unsigned int)(10 + (nextCh - chLatin_A));
          else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))
             nextVal = (unsigned int)(10 + (nextCh - chLatin_a));
          else
         {
             // Return a zero
             toFill = 0;

             //
             //  If we got at least a sigit, then do an unterminated ref error.
             //  Else, do an expected a numerical ref thing.
             //
             if (gotOne)
                 emitError(XMLErrs::UnterminatedCharRef);
             else
                 emitError(XMLErrs::ExpectedNumericalCharRef);

             // Return failure
             return false;
         }

         //
         //  Make sure its valid for the radix. If not, then just eat the
         //  digit and go on after issueing an error. Else, update the
         //  running value with this new digit.
         //
         if (nextVal >= radix)
         {
             XMLCh tmpStr[2];
             tmpStr[0] = nextCh;
             tmpStr[1] = chNull;
             emitError(XMLErrs::BadDigitForRadix, tmpStr);
         }
          else
         {
             value = (value * radix) + nextVal;
         }

         // Indicate that we got at least one good digit
         gotOne = true;

         // And eat the last char
         fReaderMgr.getNextChar();
     }

     //
     //  [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF]        // any Unicode character, excluding the
     //               | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
     //
     bool validChar = false;
     if (value < 0x20)
     {
       if (value == 0x09 || value == 0x0A || value == 0x0D)
       {
           validChar = true;
       }
     }
     else if (value <= 0xD7FF || (value >= 0xE000 && (value <= 0xFFFD || (value >= 0x10000 && value <= 0x10FFFF))))
     {
           validChar = true;
     }

     if (!validChar)
     {
       //
       // Character reference was not in the valid range
       emitError(XMLErrs::InvalidCharacterRef);
       return false;
       }

     // Return the char (or chars)
     if (value >= 0x10000)
     {
         value -= 0x10000;
         toFill = XMLCh((value >> 10) + 0xD800);
         second = XMLCh((value & 0x3FF) + 0xDC00);
     }
      else
     {
         toFill = XMLCh(value);
         second = 0;
     }

     return true;
 }


 //
 //  We get here after the '<!--' part of the comment. We scan past the
 //  terminating '-->' It will calls the appropriate handler with the comment
 //  text, if one is provided. A comment can be in either the document or
 //  the DTD, so the fInDocument flag is used to know which handler to send
 //  it to.
 //
 void XMLScanner::scanComment()
 {
     enum States
     {
         InText
         , OneDash
         , TwoDashes
     };

     // Get a buffer for this
     XMLBufBid bbComment(&fBufMgr);

     //
     //  Get the comment text into a temp buffer. Be sure to use temp buffer
     //  two here, since its to be used for stuff that is potentially longer
     //  than just a name.
     //
     States curState = InText;
     bool gotLeadingSurrogate = false;
     while (true)
     {
         // Get the next character
         const XMLCh nextCh = fReaderMgr.getNextChar();

         //  Watch for an end of file
         if (!nextCh)
         {
             emitError(XMLErrs::UnterminatedComment);
             ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
         }

         // Check for correct surrogate pairs
         if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
         {
             if (gotLeadingSurrogate)
                 emitError(XMLErrs::Expected2ndSurrogateChar);
             else
                 gotLeadingSurrogate = true;
         }
         else
         {
             if (gotLeadingSurrogate)
             {
                 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
                     emitError(XMLErrs::Expected2ndSurrogateChar);
             }
             // Its got to at least be a valid XML character
             else if (!XMLReader::isXMLChar(nextCh)) {

                 XMLCh tmpBuf[9];
                 XMLString::binToText
                 (
                     nextCh
                     , tmpBuf
                     , 8
                     , 16
                 );
                 emitError(XMLErrs::InvalidCharacter, tmpBuf);
             }

             gotLeadingSurrogate = false;
         }

         if (curState == InText)
         {
             // If its a dash, go to OneDash state. Otherwise take as text
             if (nextCh == chDash)
                 curState = OneDash;
             else
                 bbComment.append(nextCh);
         }
          else if (curState == OneDash)
         {
             //
             //  If its another dash, then we change to the two dashes states.
             //  Otherwise, we have to put in the deficit dash and the new
             //  character and go back to InText.
             //
             if (nextCh == chDash)
             {
                 curState = TwoDashes;
             }
              else
             {
                 bbComment.append(chDash);
                 bbComment.append(nextCh);
                 curState = InText;
             }
         }
          else if (curState == TwoDashes)
         {
             // The next character must be the closing bracket
             if (nextCh != chCloseAngle)
             {
                 emitError(XMLErrs::IllegalSequenceInComment);
                 fReaderMgr.skipPastChar(chCloseAngle);
                 return;
             }
             break;
         }
     }

     // If we have an available handler, call back with the comment.
     if (fDocHandler)
     {
         fDocHandler->docComment
         (
             bbComment.getRawBuffer()
         );
     }
 }


 //
 //  Most equal signs can have white space around them, so this little guy
 //  just makes the calling code cleaner by eating whitespace.
 //
 bool XMLScanner::scanEq()
 {
     fReaderMgr.skipPastSpaces();
     if (fReaderMgr.skippedChar(chEqual))
     {
         fReaderMgr.skipPastSpaces();
         return true;
     }
     return false;
 }


 //
 //  This method will scan a general/character entity ref. It will either
 //  expand a char ref and return it directly, or push a reader for a general
 //  entity.
 //
 //  The return value indicates whether the char parameters hold the value
 //  or whether the value was pushed as a reader, or that it failed.
 //
 //  The escaped flag tells the caller whether the returned parameter resulted
 //  from a character reference, which escapes the character in some cases. It
 //  only makes any difference if the return value indicates the value was
 //  returned directly.
 //
 XMLScanner::EntityExpRes
 XMLScanner::scanEntityRef(  const   bool    inAttVal
                             ,       XMLCh&  firstCh
                             ,       XMLCh&  secondCh
                             ,       bool&   escaped)
 {
     // Assume no escape
     secondCh = 0;
     escaped = false;

     // We have to insure that its all in one entity
     const unsigned int curReader = fReaderMgr.getCurrentReaderNum();

     //
     //  If the next char is a pound, then its a character reference and we
     //  need to expand it always.
     //
     if (fReaderMgr.skippedChar(chPound))
     {
         //
         //  Its a character reference, so scan it and get back the numeric
         //  value it represents.
         //
         if (!scanCharRef(firstCh, secondCh))
             return EntityExp_Failed;

         escaped = true;

         if (curReader != fReaderMgr.getCurrentReaderNum())
             emitError(XMLErrs::PartialMarkupInEntity);

         return EntityExp_Returned;
     }

     // Expand it since its a normal entity ref
     XMLBufBid bbName(&fBufMgr);
     if (!fReaderMgr.getName(bbName.getBuffer()))
     {
         emitError(XMLErrs::ExpectedEntityRefName);
         return EntityExp_Failed;
     }

     //
     //  Next char must be a semi-colon. But if its not, just emit
     //  an error and try to continue.
     //
     if (!fReaderMgr.skippedChar(chSemiColon))
         emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());

     // Make sure we ended up on the same entity reader as the & char
     if (curReader != fReaderMgr.getCurrentReaderNum())
         emitError(XMLErrs::PartialMarkupInEntity);

     // Look up the name in the general entity pool
     XMLEntityDecl* decl = fEntityDeclPool->getByKey(bbName.getRawBuffer());

     // If it does not exist, then obviously an error
     if (!decl)
     {
         // XML 1.0 Section 4.1
         // Well-formedness Constraint for entity not found:
         //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
         //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
         //      or a parameter entity
         //
         // Else it's Validity Constraint
         //
         if (fStandalone || fHasNoDTD)
             emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
         else {
             if (fValidate)
                 fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
         }

         return EntityExp_Failed;
     }

     //
     // XML 1.0 Section 2.9
     //  If we are a standalone document, then it has to have been declared
     //  in the internal subset. Keep going though.
     //
     if (fStandalone && !decl->getDeclaredInIntSubset() && fValidate)
         fValidator->emitError(XMLValid::IllegalRefInStandalone, bbName.getRawBuffer());

     if (decl->isExternal())
     {
         // If its unparsed, then its not valid here
         if (decl->isUnparsed())
         {
             emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
             return EntityExp_Failed;
         }

         // If we are in an attribute value, then not valid but keep going
         if (inAttVal)
             emitError(XMLErrs::NoExtRefsInAttValue);

         // And now create a reader to read this entity
         InputSource* srcUsed;
         XMLReader* reader = fReaderMgr.createReader
         (
             decl->getSystemId()
             , decl->getPublicId()
             , false
             , XMLReader::RefFrom_NonLiteral
             , XMLReader::Type_General
             , XMLReader::Source_External
             , srcUsed
         );

         // Put a janitor on the source so it gets cleaned up on exit
         Janitor<InputSource> janSrc(srcUsed);

         //
         //  If the creation failed, and its not because the source was empty,
         //  then emit an error and return.
         //
         if (!reader)
             ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId());

         //
         //  Push the reader. If its a recursive expansion, then emit an error
         //  and return an failure.
         //
         if (!fReaderMgr.pushReader(reader, decl))
         {
             emitError(XMLErrs::RecursiveEntity, decl->getName());
             return EntityExp_Failed;
         }

         //
         //  Do a start entity reference event.
         //
         //  <TBD> For now, we supress them in att values. Later, when
         //  the stuff is in place to correctly allow DOM to handle them
         //  we'll turn this back on.
         //
         if (fDocHandler && !inAttVal)
             fDocHandler->startEntityReference(*decl);

         // If it starts with the XML string, then parse a text decl
         if (checkXMLDecl(true))
             scanXMLDecl(Decl_Text);
     }
      else
     {
         //
         //  If its one of the special char references, then we can return
         //  it as a character, and its considered escaped.
         //
         if (decl->getIsSpecialChar())
         {
             firstCh = decl->getValue()[0];
             escaped = true;
             return EntityExp_Returned;
         }

         //
         //  Create a reader over a memory stream over the entity value
         //  We force it to assume UTF-16 by passing in an encoding
         //  string. This way it won't both trying to predecode the
         //  first line, looking for an XML/TextDecl.
         //
         XMLReader* valueReader = fReaderMgr.createIntEntReader
         (
             decl->getName()
             , XMLReader::RefFrom_NonLiteral
             , XMLReader::Type_General
             , decl->getValue()
             , decl->getValueLen()
             , false
         );

         //
         //  Try to push the entity reader onto the reader manager stack,
         //  where it will become the subsequent input. If it fails, that
         //  means the entity is recursive, so issue an error. The reader
         //  will have just been discarded, but we just keep going.
         //
         if (!fReaderMgr.pushReader(valueReader, decl))
             emitError(XMLErrs::RecursiveEntity, decl->getName());

         //
         //  Do a start entity reference event.
         //
         //  <TBD> For now, we supress them in att values. Later, when
         //  the stuff is in place to correctly allow DOM to handle them
         //  we'll turn this back on.
         //
         if (fDocHandler && !inAttVal)
             fDocHandler->startEntityReference(*decl);

         // If it starts with the XML string, then it's an error
         if (checkXMLDecl(true)) {
             emitError(XMLErrs::TextDeclNotLegalHere);
             fReaderMgr.skipPastChar(chCloseAngle);
         }
     }
     return EntityExp_Pushed;
 }


 unsigned int
 XMLScanner::scanUpToWSOr(XMLBuffer& toFill, const XMLCh chEndChar)
 {
     fReaderMgr.getUpToCharOrWS(toFill, chEndChar);
     return toFill.getLen();
 }

 bool XMLScanner::switchGrammar(unsigned int newGrammarNameSpaceIndex)
 {
     XMLBufBid bbURI(&fBufMgr);
     XMLBuffer& bufURI = bbURI.getBuffer();
     getURIText(newGrammarNameSpaceIndex, bufURI);
     Grammar* tempGrammar = fGrammarResolver->getGrammar(bufURI.getRawBuffer());
     if (!tempGrammar) {
         // This is a case where namespaces is on with a DTD grammar.
         tempGrammar = fGrammarResolver->getGrammar(XMLUni::fgZeroLenString);
     }
     if (!tempGrammar)
         return false;
     else {
         fGrammar = tempGrammar;
         fGrammarType = fGrammar->getGrammarType();
         if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
             if (fValidatorFromUser)
                 ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
             else {
                 fValidator = fSchemaValidator;
             }
         }
         else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
             if (fValidatorFromUser)
                 ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
             else {
                 fValidator = fDTDValidator;
             }
         }

         fValidator->setGrammar(fGrammar);
         return true;
     }
 }

 bool XMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
 {
     Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
     if (!tempGrammar) {
         // This is a case where namespaces is on with a DTD grammar.
         tempGrammar = fGrammarResolver->getGrammar(XMLUni::fgZeroLenString);
     }
     if (!tempGrammar)
         return false;
     else {
         fGrammar = tempGrammar;
         fGrammarType = fGrammar->getGrammarType();
         if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
             if (fValidatorFromUser)
                 ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
             else {
                 fValidator = fSchemaValidator;
             }
         }
         else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
             if (fValidatorFromUser)
                 ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
             else {
                 fValidator = fDTDValidator;
             }
         }

         fValidator->setGrammar(fGrammar);
         return true;
     }
 }

 // check if we should skip or lax the validation of the element
 // if skip - no validation
 // if lax - validate only if the element if found
 bool XMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
                                       const XMLContentModel* const cm,
                                       const unsigned int parentElemDepth)
 {
     bool skipThisOne = false;
     bool laxThisOne = false;
     unsigned int elementURI = element->getURI();
     unsigned int currState = fElemState[parentElemDepth];

     if (currState == XMLContentModel::gInvalidTrans) {
         return laxThisOne;
     }

     SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);

     if (cv) {
         unsigned int i = 0;
         unsigned int leafCount = cv->getLeafCount();

         for (; i < leafCount; i++) {

             QName* fElemMap = cv->getLeafNameAt(i);
             unsigned int uri = fElemMap->getURI();
             unsigned int nextState;
             bool anyEncountered = false;
             ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);

             if (type == ContentSpecNode::Leaf) {
                 if (((uri == elementURI)
                       && !XMLString::compareString(fElemMap->getLocalPart(), element->getLocalPart()))
                     || comparator.isEquivalentTo(element, fElemMap)) {

                     nextState = cm->getNextState(currState, i);

                     if (nextState != XMLContentModel::gInvalidTrans) {
                         fElemState[parentElemDepth] = nextState;
                         break;
                     }
                 }
             } else if ((type & 0x0f) == ContentSpecNode::Any) {
                 anyEncountered = true;
             }
             else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
                 if (uri != elementURI) {
                     anyEncountered = true;
                 }
             }
             else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
                 if (uri == elementURI) {
                     anyEncountered = true;
                 }
             }

             if (anyEncountered) {

                 nextState = cm->getNextState(currState, i);
                 if (nextState != XMLContentModel::gInvalidTrans) {
                     fElemState[parentElemDepth] = nextState;

                     if (type == ContentSpecNode::Any_Skip ||
                         type == ContentSpecNode::Any_NS_Skip ||
                         type == ContentSpecNode::Any_Other_Skip) {
                         skipThisOne = true;
                     }
                     else if (type == ContentSpecNode::Any_Lax ||
                              type == ContentSpecNode::Any_NS_Lax ||
                              type == ContentSpecNode::Any_Other_Lax) {
                         laxThisOne = true;
                     }

                     break;
                 }
             }
         } // for

         if (i == leafCount) { // no match
             fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
             return laxThisOne;
         }

     } // if

     if (skipThisOne) {
         fValidate = false;
         fElemStack.setValidationFlag(fValidate);
     }

     return laxThisOne;
 }


 // check if there is an AnyAttribute, and if so, see if we should lax or skip
 // if skip - no validation
 // if lax - validate only if the attribute if found
 bool XMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
 {
     XMLAttDef::AttTypes wildCardType = attWildCard->getType();
     bool anyEncountered = false;
     skipThisOne = false;
     laxThisOne = false;
     if (wildCardType == XMLAttDef::Any_Any)
         anyEncountered = true;
     else if (wildCardType == XMLAttDef::Any_Other) {
         if (attWildCard->getAttName()->getURI() != uriId)
             anyEncountered = true;
     }
     else if (wildCardType == XMLAttDef::Any_List) {
         ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
         unsigned int listSize = (nameURIList) ? nameURIList->size() : 0;

         if (listSize) {
             for (unsigned int i=0; i < listSize; i++) {
                 if (nameURIList->elementAt(i) == uriId)
                     anyEncountered = true;
             }
         }
     }

     if (anyEncountered) {
         XMLAttDef::DefAttTypes   defType   = attWildCard->getDefaultType();
         if (defType == XMLAttDef::ProcessContents_Skip) {
             // attribute should just be bypassed,
             skipThisOne = true;
         }
         else if (defType == XMLAttDef::ProcessContents_Lax) {
             laxThisOne = true;
         }
     }

     return anyEncountered;
 }