src/xercesc/validators/common/DFAContentModel.cpp - xerces-c - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * $Id$
  */


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/util/RuntimeException.hpp>
 #include <xercesc/framework/XMLBuffer.hpp>
 #include <xercesc/framework/XMLElementDecl.hpp>
 #include <xercesc/framework/XMLValidator.hpp>
 #include <xercesc/validators/common/CMAny.hpp>
 #include <xercesc/validators/common/CMBinaryOp.hpp>
 #include <xercesc/validators/common/CMLeaf.hpp>
 #include <xercesc/validators/common/CMRepeatingLeaf.hpp>
 #include <xercesc/validators/common/CMUnaryOp.hpp>
 #include <xercesc/validators/common/DFAContentModel.hpp>
 #include <xercesc/validators/common/ContentSpecNode.hpp>
 #include <xercesc/validators/common/Grammar.hpp>
 #include <xercesc/validators/schema/SchemaSymbols.hpp>
 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
 #include <xercesc/validators/schema/XercesElementWildcard.hpp>
 #include <xercesc/util/RefHashTableOf.hpp>
 #include <xercesc/util/XMLInteger.hpp>
 #include <math.h>

 namespace XERCES_CPP_NAMESPACE {

 struct CMStateSetHasher
 {
   XMLSize_t getHashVal(const void *const key, XMLSize_t mod)
   {
     const CMStateSet* const pkey = (const CMStateSet*) key;
     return ((pkey->hashCode()) % mod);
   }

   bool equals(const void *const key1, const void *const key2)
   {
     const CMStateSet* const pkey1 = (const CMStateSet*) key1;
     const CMStateSet* const pkey2 = (const CMStateSet*) key2;
     return (*pkey1==*pkey2);
   }
 };

 // ---------------------------------------------------------------------------
 //  DFAContentModel: Constructors and Destructor
 // ---------------------------------------------------------------------------
 DFAContentModel::DFAContentModel( const bool             dtd
                                 , ContentSpecNode* const elemContentSpec
                                 , MemoryManager* const   manager) :

     fElemMap(0)
     , fElemMapType(0)
     , fElemMapSize(0)
     , fEmptyOk(false)
     , fEOCPos(0)
     , fFinalStateFlags(0)
     , fFollowList(0)
     , fHeadNode(0)
     , fLeafCount(0)
     , fLeafList(0)
     , fLeafListType(0)
     , fTransTable(0)
     , fTransTableSize(0)
     , fCountingStates(0)
     , fDTD(dtd)
     , fIsMixed(false)
     , fLeafNameTypeVector(0)
     , fMemoryManager(manager)
 {
     // And build the DFA data structures
     buildDFA(elemContentSpec);
 }

 DFAContentModel::DFAContentModel( const bool             dtd
                                 , ContentSpecNode* const elemContentSpec
                                 , const bool             isMixed
                                 , MemoryManager* const   manager):

     fElemMap(0)
     , fElemMapType(0)
     , fElemMapSize(0)
     , fEmptyOk(false)
     , fEOCPos(0)
     , fFinalStateFlags(0)
     , fFollowList(0)
     , fHeadNode(0)
     , fLeafCount(0)
     , fLeafList(0)
     , fLeafListType(0)
     , fTransTable(0)
     , fTransTableSize(0)
     , fCountingStates(0)
     , fDTD(dtd)
     , fIsMixed(isMixed)
     , fLeafNameTypeVector(0)
     , fMemoryManager(manager)
 {
     // And build the DFA data structures
     buildDFA(elemContentSpec);
 }

 DFAContentModel::~DFAContentModel()
 {
     //
     //  Clean up all the stuff that is not just temporary representation
     //  data that was cleaned up after building the DFA.
     //
     fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags;

     unsigned int index;
     for (index = 0; index < fTransTableSize; index++)
         fMemoryManager->deallocate(fTransTable[index]); //delete [] fTransTable[index];
     fMemoryManager->deallocate(fTransTable); //delete [] fTransTable;

     if(fCountingStates)
     {
         for (unsigned int j = 0; j < fTransTableSize; ++j)
             delete fCountingStates[j];
         fMemoryManager->deallocate(fCountingStates);
     }

     for (index = 0; index < fLeafCount; index++)
         delete fElemMap[index];
     fMemoryManager->deallocate(fElemMap); //delete [] fElemMap;

     fMemoryManager->deallocate(fElemMapType); //delete [] fElemMapType;
     fMemoryManager->deallocate(fLeafListType); //delete [] fLeafListType;

     delete fLeafNameTypeVector;
 }


 // ---------------------------------------------------------------------------
 //  DFAContentModel: Implementation of the ContentModel virtual interface
 // ---------------------------------------------------------------------------
 bool
 DFAContentModel::validateContent( QName** const        children
                                 , XMLSize_t            childCount
                                 , unsigned int
                                 , XMLSize_t*           indexFailingChild
                                 , MemoryManager*    const) const
 {
     //
     //  If there are no children, then either we fail on the 0th element
     //  or we return success. It depends upon whether this content model
     //  accepts empty content, which we determined earlier.
     //
     if (!childCount)
     {
         // success
         if(fEmptyOk)
             return true;
         *indexFailingChild=0;
         return false;
     }

     //
     //  Lets loop through the children in the array and move our way
     //  through the states. Note that we use the fElemMap array to map
     //  an element index to a state index.
     //
     unsigned int curState = 0;
     unsigned int nextState = 0;
     unsigned int loopCount = 0;
     unsigned int childIndex = 0;
     for (; childIndex < childCount; childIndex++)
     {
         // Get the current element index out
         const QName* curElem = children[childIndex];
         const XMLCh* curElemRawName = 0;
         if (fDTD)
             curElemRawName = curElem->getRawName();

         // If this is text in a Schema mixed content model, skip it.
         if ( fIsMixed &&
             ( curElem->getURI() == XMLElementDecl::fgPCDataElemId))
             continue;

         // Look up this child in our element map
         unsigned int elemIndex = 0;
         for (; elemIndex < fElemMapSize; elemIndex++)
         {
             const QName* inElem  = fElemMap[elemIndex];
             if (fDTD) {
                 if (XMLString::equals(inElem->getRawName(), curElemRawName)) {
                     nextState = fTransTable[curState][elemIndex];
                     if (nextState != XMLContentModel::gInvalidTrans)
                         break;
                 }
             }
             else {
                 ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
                 if (type == ContentSpecNode::Leaf)
                 {
                     if ((inElem->getURI() == curElem->getURI()) &&
                     (XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) {
                         nextState = fTransTable[curState][elemIndex];
                         if (nextState != XMLContentModel::gInvalidTrans)
                             break;
                     }
                 }
                 else if ((type & 0x0f)== ContentSpecNode::Any)
                 {
                     nextState = fTransTable[curState][elemIndex];
                     if (nextState != XMLContentModel::gInvalidTrans)
                         break;
                 }
                 else if ((type & 0x0f) == ContentSpecNode::Any_NS)
                 {
                     if (inElem->getURI() == curElem->getURI())
                     {
                         nextState = fTransTable[curState][elemIndex];
                         if (nextState != XMLContentModel::gInvalidTrans)
                             break;
                     }
                 }
                 else if ((type & 0x0f) == ContentSpecNode::Any_Other)
                 {
                     // Here we assume that empty string has id 1.
                     //
                     unsigned int uriId = curElem->getURI();
                     if (uriId != 1 && uriId != inElem->getURI()) {
                         nextState = fTransTable[curState][elemIndex];
                         if (nextState != XMLContentModel::gInvalidTrans)
                             break;
                     }
                 }
             }
         }//for elemIndex

         // If "nextState" is -1, we found a match, but the transition is invalid
         if (nextState == XMLContentModel::gInvalidTrans)
         {
             *indexFailingChild=childIndex;
             return false;
         }

         // If we didn't find it, then obviously not valid
         if (elemIndex == fElemMapSize)
         {
             *indexFailingChild=childIndex;
             return false;
         }

         unsigned int nextLoop = 0;
         if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, 0))
         {
             *indexFailingChild=childIndex;
             return false;
         }

         curState = nextState;
         loopCount = nextLoop;
         nextState = 0;

     }//for childIndex

     //
     //  We transitioned all the way through the input list. However, that
     //  does not mean that we ended in a final state. So check whether
     //  our ending state is a final state.
     //
     if (!fFinalStateFlags[curState])
     {
         *indexFailingChild=childIndex;
         return false;
     }

     // verify if we exited before the minOccurs was satisfied
     if (fCountingStates != 0) {
         Occurence* o = fCountingStates[curState];
         if (o != 0 && loopCount < (unsigned int)o->minOccurs) {
             // not enough loops on the current state to be considered final.
             *indexFailingChild=childIndex;
             return false;
         }
     }

     //success
     return true;
 }

 bool DFAContentModel::validateContentSpecial(QName** const            children
                                             , XMLSize_t               childCount
                                             , unsigned int
                                             , GrammarResolver*  const pGrammarResolver
                                             , XMLStringPool*    const pStringPool
                                             , XMLSize_t*              indexFailingChild
                                             , MemoryManager*    const) const
 {

     SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);

     if (childCount == 0)
     {
         if(fEmptyOk)
             return true;
         *indexFailingChild=0;
         return false;
     }

     //
     //  Lets loop through the children in the array and move our way
     //  through the states. Note that we use the fElemMap array to map
     //  an element index to a state index.
     //
     unsigned int curState = 0;
     unsigned int loopCount = 0;
     unsigned int nextState = 0;
     unsigned int childIndex = 0;
     for (; childIndex < childCount; childIndex++)
     {
         // Get the current element index out
         QName* curElem = children[childIndex];

         // If this is text in a Schema mixed content model, skip it.
         if ( fIsMixed &&
             ( curElem->getURI() == XMLElementDecl::fgPCDataElemId))
             continue;

         // Look up this child in our element map
         unsigned int elemIndex = 0;
         for (; elemIndex < fElemMapSize; elemIndex++)
         {
             QName* inElem  = fElemMap[elemIndex];
             ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
             if (type == ContentSpecNode::Leaf)
             {
                 if (comparator.isEquivalentTo(curElem, inElem) )
                 {
                     nextState = fTransTable[curState][elemIndex];
                     if (nextState != XMLContentModel::gInvalidTrans)
                         break;
                 }

             }
             else if ((type & 0x0f)== ContentSpecNode::Any)
             {
                 nextState = fTransTable[curState][elemIndex];
                 if (nextState != XMLContentModel::gInvalidTrans)
                     break;
             }
             else if ((type & 0x0f) == ContentSpecNode::Any_NS)
             {
                 if (inElem->getURI() == curElem->getURI())
                 {
                     nextState = fTransTable[curState][elemIndex];
                     if (nextState != XMLContentModel::gInvalidTrans)
                         break;
                 }
             }
             else if ((type & 0x0f) == ContentSpecNode::Any_Other)
             {
                 // Here we assume that empty string has id 1.
                 //
                 unsigned int uriId = curElem->getURI();
                 if (uriId != 1 && uriId != inElem->getURI())
                 {
                     nextState = fTransTable[curState][elemIndex];
                     if (nextState != XMLContentModel::gInvalidTrans)
                         break;
                 }
             }
         }//for elemIndex

         // If "nextState" is -1, we found a match, but the transition is invalid
         if (nextState == XMLContentModel::gInvalidTrans)
         {
             *indexFailingChild=childIndex;
             return false;
         }

         // If we didn't find it, then obviously not valid
         if (elemIndex == fElemMapSize)
         {
             *indexFailingChild=childIndex;
             return false;
         }

         unsigned int nextLoop = 0;
         if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, &comparator))
         {
             *indexFailingChild=childIndex;
             return false;
         }

         curState = nextState;
         loopCount = nextLoop;
         nextState = 0;

     }//for childIndex

     //
     //  We transitioned all the way through the input list. However, that
     //  does not mean that we ended in a final state. So check whether
     //  our ending state is a final state.
     //
     if (!fFinalStateFlags[curState])
     {
         *indexFailingChild=childIndex;
         return false;
     }

     // verify if we exited before the minOccurs was satisfied
     if (fCountingStates != 0) {
         Occurence* o = fCountingStates[curState];
         if (o != 0) {
             if (loopCount < (unsigned int)o->minOccurs) {
                 // not enough loops on the current state.
                 *indexFailingChild=childIndex;
                 return false;
             }
         }
     }

     //success
     return true;
 }

 bool DFAContentModel::handleRepetitions(const QName* const curElem,
                                         unsigned int curState,
                                         unsigned int currentLoop,
                                         unsigned int& nextState,
                                         unsigned int& nextLoop,
                                         XMLSize_t elemIndex,
                                         SubstitutionGroupComparator * comparator) const
 {
     nextLoop = 0;
     if (fCountingStates != 0) {
         nextLoop = currentLoop;
         Occurence* o = fCountingStates[curState];
         if (o != 0) {
             if (curState == nextState) {
                 if (++nextLoop > (unsigned int)o->maxOccurs && o->maxOccurs != -1) {
                     // It's likely that we looped too many times on the current state
                     // however it's possible that we actually matched another particle
                     // which allows the same name.
                     //
                     // Consider:
                     //
                     // <xs:sequence>
                     //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
                     //  <xs:element name="foo" type="xs:string" fixed="bar"/>
                     // </xs:sequence>
                     //
                     // and
                     //
                     // <xs:sequence>
                     //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
                     //  <xs:any namespace="##any" processContents="skip"/>
                     // </xs:sequence>
                     //
                     // In the DFA there will be two transitions from the current state which
                     // allow "foo". Note that this is not a UPA violation. The ambiguity of which
                     // transition to take is resolved by the current value of the counter. Since
                     // we've already seen enough instances of the first "foo" perhaps there is
                     // another element declaration or wildcard deeper in the element map which
                     // matches.
                     unsigned int tempNextState = 0;

                     while (++elemIndex < fElemMapSize) {
                         QName* inElem  = fElemMap[elemIndex];
                         ContentSpecNode::NodeTypes type = fElemMapType[elemIndex];
                         if (type == ContentSpecNode::Leaf)
                         {
                             if(comparator!=0) {
                                 if (comparator->isEquivalentTo(curElem, inElem) )
                                 {
                                     tempNextState = fTransTable[curState][elemIndex];
                                     if (tempNextState != XMLContentModel::gInvalidTrans)
                                         break;
                                 }
                             }
                             else if (fDTD) {
                                 if (XMLString::equals(inElem->getRawName(), curElem->getRawName())) {
                                     tempNextState = fTransTable[curState][elemIndex];
                                     if (tempNextState != XMLContentModel::gInvalidTrans)
                                         break;
                                 }
                             }
                             else {
                                 if ((inElem->getURI() == curElem->getURI()) &&
                                 (XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) {
                                     tempNextState = fTransTable[curState][elemIndex];
                                     if (tempNextState != XMLContentModel::gInvalidTrans)
                                         break;
                                 }
                             }
                         }
                         else if ((type & 0x0f)== ContentSpecNode::Any)
                         {
                             tempNextState = fTransTable[curState][elemIndex];
                             if (tempNextState != XMLContentModel::gInvalidTrans)
                                 break;
                         }
                         else if ((type & 0x0f) == ContentSpecNode::Any_NS)
                         {
                             if (inElem->getURI() == curElem->getURI())
                             {
                                 tempNextState = fTransTable[curState][elemIndex];
                                 if (tempNextState != XMLContentModel::gInvalidTrans)
                                     break;
                             }
                         }
                         else if ((type & 0x0f) == ContentSpecNode::Any_Other)
                         {
                             // Here we assume that empty string has id 1.
                             //
                             unsigned int uriId = curElem->getURI();
                             if (uriId != 1 && uriId != inElem->getURI())
                             {
                                 tempNextState = fTransTable[curState][elemIndex];
                                 if (tempNextState != XMLContentModel::gInvalidTrans)
                                     break;
                             }
                         }
                     }

                     // if we still can't find a match, report the error
                     if (elemIndex == fElemMapSize)
                         return false;

                     // if we found a match, set the next state and reset the
                     // counter if the next state is a counting state.
                     nextState = tempNextState;
                     Occurence* o = fCountingStates[nextState];
                     if (o != 0) {
                       nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0;
                     }
                 }
             }
             else if (nextLoop < (unsigned int)o->minOccurs) {
                 // not enough loops on the current state.
                 return false;
             }
             else {
                 // Exiting a counting state. If we're entering a new
                 // counting state, reset the counter.
                 o = fCountingStates[nextState];
                 if (o != 0) {
                   nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0;
                 }
             }
         }
         else {
             o = fCountingStates[nextState];
             if (o != 0) {
                 // Entering a new counting state. Reset the counter.
                 // If we've already seen one instance of the looping
                 // particle set the counter to 1, otherwise set it
                 // to 0.
               nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0;
             }
         }
     }
     return true;
 }

 // ---------------------------------------------------------------------------
 //  DFAContentModel: Private helper methods
 // ---------------------------------------------------------------------------
 void DFAContentModel::buildDFA(ContentSpecNode* const curNode)
 {
     unsigned int index;

     //
     //  The first step we need to take is to rewrite the content model using
     //  our CMNode objects, and in the process get rid of any repetition short
     //  cuts, converting them into '*' style repetitions or getting rid of
     //  repetitions altogether.
     //
     //  The conversions done are:
     //
     //  x+ -> (x|x*)
     //  x? -> (x|epsilon)
     //
     //  This is a relatively complex scenario. What is happening is that we
     //  create a top level binary node of which the special EOC value is set
     //  as the right side node. The the left side is set to the rewritten
     //  syntax tree. The source is the original content model info from the
     //  decl pool. The rewrite is done by buildSyntaxTree() which recurses the
     //  decl pool's content of the element and builds a new tree in the
     //  process.
     //
     //  Note that, during this operation, we set each non-epsilon leaf node's
     //  DFA state position and count the number of such leafs, which is left
     //  in the fLeafCount member.
     //
     fLeafCount=countLeafNodes(curNode);
     fEOCPos = fLeafCount++;

     //  We need to build an array of references to the non-epsilon
     //  leaf nodes. We will put them in the array according to their position values
     //
     fLeafList = (CMLeaf**) fMemoryManager->allocate(fLeafCount*sizeof(CMLeaf*)); //new CMLeaf*[fLeafCount];
     fLeafListType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
     (
         fLeafCount * sizeof(ContentSpecNode::NodeTypes)
     ); //new ContentSpecNode::NodeTypes[fLeafCount];
     //
     //  And, moving onward... We now need to build the follow position sets
     //  for all the nodes. So we allocate an array of pointers to state sets,
     //  one for each leaf node (i.e. each significant DFA position.)
     //
     fFollowList = (CMStateSet**) fMemoryManager->allocate
     (
         fLeafCount * sizeof(CMStateSet*)
     ); //new CMStateSet*[fLeafCount];
     for (index = 0; index < fLeafCount; index++)
         fFollowList[index] = new (fMemoryManager) CMStateSet(fLeafCount, fMemoryManager);

     //  The buildSyntaxTree function will recursively iterate over the ContentSpecNode
     //  and build the CMNode hierarchy; it will also put every leaf node in the fLeafList
     //  array, then calculate the first and last position sets of each node. This is
     //  cached away in each of the nodes.
     //
     //  Along the way we also set the leaf count in each node as the maximum
     //  state count. They must know this in order to create their first/last
     //  position sets.
     //
     unsigned int counter=0;
     CMNode* nodeOrgContent = buildSyntaxTree(curNode, counter);
     //
     //  Check to see whether this content model can handle an empty content,
     //  which is something we need to optimize by looking now before we
     //  throw away the info that would tell us that.
     //
     //  If the left node of the head (the top level of the original content)
     //  is nullable, then its true.
     //
     fEmptyOk = nodeOrgContent->isNullable();

     //
     //  And handle specially the EOC node, which also must be numbered and
     //  counted as a non-epsilon leaf node. It could not be handled in the
     //  above tree build because it was created before all that started. We
     //  save the EOC position since its used during the DFA building loop.
     //
     CMLeaf* nodeEOC = new (fMemoryManager) CMLeaf
     (
         new (fMemoryManager) QName
         (
             XMLUni::fgZeroLenString
             , XMLUni::fgZeroLenString
             , XMLContentModel::gEOCFakeId
             , fMemoryManager
         )
         , fEOCPos
         , true
         , fLeafCount
         , fMemoryManager
     );
     fHeadNode = new (fMemoryManager) CMBinaryOp
     (
         ContentSpecNode::Sequence
         , nodeOrgContent
         , nodeEOC
         , fLeafCount
         , fMemoryManager
     );

     //  Put also the final EOC node in the leaf array
     fLeafList[counter] = new (fMemoryManager) CMLeaf
     (
         nodeEOC->getElement()
         , nodeEOC->getPosition()
         , fLeafCount
         , fMemoryManager
     );
     fLeafListType[counter] = ContentSpecNode::Leaf;

     //
     //  Now handle our top level. We use our left child's last pos set and our
     //  right child's first pos set, so get them now for convenience.
     //
     const CMStateSet& last  = nodeOrgContent->getLastPos();
     const CMStateSet& first = nodeEOC->getFirstPos();

     //
     //  Now, for every position which is in our left child's last set
     //  add all of the states in our right child's first set to the
     //  follow set for that position.
     //
     CMStateSetEnumerator enumLast(&last);
     while(enumLast.hasMoreElements())
     {
         XMLSize_t index=enumLast.nextElement();
         *fFollowList[index] |= first;
     }

     //
     //  And finally the big push... Now we build the DFA using all the states
     //  and the tree we've built up. First we set up the various data
     //  structures we are going to use while we do this.
     //
     //  First of all we need an array of unique element ids in our content
     //  model. For each transition table entry, we need a set of contiguous
     //  indices to represent the transitions for a particular input element.
     //  So we need to a zero based range of indexes that map to element types.
     //  This element map provides that mapping.
     //
     fElemMap = (QName**) fMemoryManager->allocate
     (
         fLeafCount * sizeof(QName*)
     ); //new QName*[fLeafCount];
     fElemMapType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
     (
         fLeafCount * sizeof(ContentSpecNode::NodeTypes)
     ); //new ContentSpecNode::NodeTypes[fLeafCount];
     fElemMapSize = 0;

     Occurence** elemOccurenceMap=0;
     for (unsigned int outIndex = 0; outIndex < fLeafCount; outIndex++)
     {
         fElemMap[outIndex] = new (fMemoryManager) QName(fMemoryManager);

         if ( (fLeafListType[outIndex] & 0x0f) != ContentSpecNode::Leaf )
             if (!fLeafNameTypeVector)
                 fLeafNameTypeVector = new (fMemoryManager) ContentLeafNameTypeVector(fMemoryManager);

         // Get the current leaf's element index
         CMLeaf* leaf=fLeafList[outIndex];
         const QName* element = leaf->getElement();
         const XMLCh* elementRawName = 0;
         if (fDTD && element)
             elementRawName = element->getRawName();

         // See if the current leaf node's element index is in the list
         unsigned int inIndex = 0;

         for (; inIndex < fElemMapSize; inIndex++)
         {
             const QName* inElem = fElemMap[inIndex];
             if (fDTD) {
                 if (XMLString::equals(inElem->getRawName(), elementRawName)) {
                     break;
                 }
             }
             else {
                 if ((fElemMapType[inIndex] == fLeafListType[outIndex]) &&
                     (inElem->getURI() == element->getURI()) &&
                     (XMLString::equals(inElem->getLocalPart(), element->getLocalPart()))) {
                     break;
                 }
             }
         }

         // If it was not in the list, then add it and bump the map size
         if (inIndex == fElemMapSize)
         {
             fElemMap[fElemMapSize]->setValues(*element);
             if(leaf->isRepeatableLeaf())
             {
                 if (elemOccurenceMap == 0) {
                     elemOccurenceMap = (Occurence**)fMemoryManager->allocate(fLeafCount*sizeof(Occurence*));
                     memset(elemOccurenceMap, 0, fLeafCount*sizeof(Occurence*));
                 }
                 elemOccurenceMap[fElemMapSize] = new (fMemoryManager) Occurence(((CMRepeatingLeaf*)leaf)->getMinOccurs(), ((CMRepeatingLeaf*)leaf)->getMaxOccurs(), fElemMapSize);
             }
             fElemMapType[fElemMapSize] = fLeafListType[outIndex];
             ++fElemMapSize;
         }
     }

     // set up the fLeafNameTypeVector object if there is one.
     if (fLeafNameTypeVector) {
         fLeafNameTypeVector->setValues(fElemMap, fElemMapType, fElemMapSize);
     }

     /***
      * Optimization(Jan, 2001); We sort fLeafList according to
      * elemIndex which is *uniquely* associated to each leaf.
      * We are *assuming* that each element appears in at least one leaf.
      **/
     // don't forget to delete it
 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
     int *fLeafSorter = (int*) fMemoryManager->allocate
     (
         (fLeafCount + fElemMapSize) * sizeof(int)
     ); //new int[fLeafCount + fElemMapSize];
     unsigned int fSortCount = 0;

     for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
     {
         const QName* element = fElemMap[elemIndex];
         const XMLCh* elementRawName = 0;
         if (fDTD && element)
             elementRawName = element->getRawName();

         for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
         {
             const QName* leaf = fLeafList[leafIndex]->getElement();
             if (fDTD) {
                 if (XMLString::equals(leaf->getRawName(), elementRawName)) {
                     fLeafSorter[fSortCount++] = leafIndex;
                 }
             }
             else {
                 if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) &&
                     (leaf->getURI() == element->getURI()) &&
                     (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
                       fLeafSorter[fSortCount++] = leafIndex;
                 }
             }
         }
         fLeafSorter[fSortCount++] = -1;
     }
 #endif

     // instead of using a single array with -1 to separate elements, use a bidimensional map
     unsigned int** fLeafSorter = (unsigned int**)fMemoryManager->allocate(fElemMapSize * sizeof(unsigned int*));
     unsigned int* tmpSorter = (unsigned int*)fMemoryManager->allocate(fLeafCount * sizeof(unsigned int));
     for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
     {
         const QName* element = fElemMap[elemIndex];
         const XMLCh* elementRawName = 0;
         if (fDTD && element)
             elementRawName = element->getRawName();

         unsigned int fSortCount=0;
         for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
         {
             const QName* leaf = fLeafList[leafIndex]->getElement();
             if (fDTD) {
                 if (XMLString::equals(leaf->getRawName(), elementRawName)) {
                     tmpSorter[fSortCount++] = leafIndex;
                 }
             }
             else {
                 if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) &&
                     (leaf->getURI() == element->getURI()) &&
                     (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
                       tmpSorter[fSortCount++] = leafIndex;
                 }
             }
         }

         fLeafSorter[elemIndex]=(unsigned int*)fMemoryManager->allocate((fSortCount+1) * sizeof(unsigned int));
         fLeafSorter[elemIndex][0]=fSortCount;
         for (unsigned int index=0;index<fSortCount;index++)
             fLeafSorter[elemIndex][index+1]=tmpSorter[index];
     }
     fMemoryManager->deallocate(tmpSorter);

     //
     //  Next lets create some arrays, some that that hold transient info
     //  during the DFA build and some that are permament. These are kind of
     //  sticky since we cannot know how big they will get, but we don't want
     //  to use any collection type classes because of performance.
     //
     //  Basically they will probably be about fLeafCount*2 on average, but can
     //  be as large as 2^(fLeafCount*2), worst case. So we start with
     //  fLeafCount*4 as a middle ground. This will be very unlikely to ever
     //  have to expand though, it if does, the overhead will be somewhat ugly.
     //
     unsigned int curArraySize = fLeafCount * 4;
     CMStateSet** statesToDo = (CMStateSet**)
         fMemoryManager->allocate
         (
             curArraySize * sizeof(CMStateSet*)
         ); //new const CMStateSet*[curArraySize];
     fFinalStateFlags = (bool*) fMemoryManager->allocate
     (
         curArraySize * sizeof(bool)
     ); //new bool[curArraySize];
     fTransTable = (unsigned int**) fMemoryManager->allocate
     (
         curArraySize * sizeof(unsigned int*)
     ); //new unsigned int*[curArraySize];

     //
     //  Ok we start with the initial set as the first pos set of the head node
     //  (which is the seq node that holds the content model and the EOC node.)
     //
     CMStateSet* setT = new (fMemoryManager) CMStateSet(fHeadNode->getFirstPos());

     //
     // Note on memory leak: Bugzilla#2707:
     // ===================================
     // The CMBinary, pointed to by fHeadNode, shall be released by
     // deleted by itself.
     //
     // fLeafList[] maintains its **OWN** copy of CMLeaf to avoid double deletion
     // of CMLeaf.
     //

     delete fHeadNode;

     //
     //  Init our two state flags. Basically the unmarked state counter is
     //  always chasing the current state counter. When it catches up, that
     //  means we made a pass through that did not add any new states to the
     //  lists, at which time we are done. We could have used a expanding array
     //  of flags which we used to mark off states as we complete them, but
     //  this is easier though less readable maybe.
     //
     unsigned int unmarkedState = 0;
     unsigned int curState = 0;

     //
     //  Init the first transition table entry, and put the initial state
     //  into the states to do list, then bump the current state.
     //
     fTransTable[curState] = makeDefStateList();
     statesToDo[curState] = setT;
     curState++;

     //
     // the stateTable is an auxiliary means to fast
     // identification of new state created (instead
     // of sequential loop statesToDo to find out),
     // while the role that statesToDo plays remain unchanged.
     //
     RefHashTableOf<XMLInteger, CMStateSetHasher> *stateTable =
         new (fMemoryManager) RefHashTableOf<XMLInteger, CMStateSetHasher>
         (
             curArraySize
             , true
             , fMemoryManager
         );
     //stateTable->put((CMStateSet*)setT, new (fMemoryManager) XMLInteger(0));

     //
     //  Ok, almost done with the algorithm from hell... We now enter the
     //  loop where we go until the states done counter catches up with
     //  the states to do counter.
     //
     CMStateSet* newSet = 0;
     while (unmarkedState < curState)
     {
         //
         //  Get the next unmarked state out of the list of states to do.
         //  And get the associated transition table entry.
         //
         setT = statesToDo[unmarkedState];
         unsigned int* transEntry = fTransTable[unmarkedState];

         // Mark this one final if it contains the EOC state
         fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos);

         // Bump up the unmarked state count, marking this state done
         unmarkedState++;

 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
         // Optimization(Jan, 2001)
         unsigned int sorterIndex = 0;
         // Optimization(Jan, 2001)
 #endif

         // Loop through each possible input symbol in the element map
         for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
         {
             //
             //  Build up a set of states which is the union of all of the
             //  follow sets of DFA positions that are in the current state. If
             //  we gave away the new set last time through then create a new
             //  one. Otherwise, zero out the existing one.
             //
             if (!newSet)
                 newSet = new (fMemoryManager) CMStateSet
                 (
                     fLeafCount
                     , fMemoryManager
                 );
             else
                 newSet->zeroBits();

 #ifdef OBSOLETED
 // unoptimized code
             for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
             {
                 // If this leaf index (DFA position) is in the current set...
                 if (setT->getBit(leafIndex))
                 {
                     //
                     //  If this leaf is the current input symbol, then we want
                     //  to add its follow list to the set of states to transition
                     //  to from the current state.
                     //
                     const QName* leaf = fLeafList[leafIndex]->getElement();
                     const QName* element = fElemMap[elemIndex];
                     if (fDTD) {
                         if (XMLString::equals(leaf->getRawName(), element->getRawName())) {
                             *newSet |= *fFollowList[leafIndex];
                         }
                     }
                     else {
                         if ((leaf->getURI() == element->getURI()) &&
                             (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) {
                             *newSet |= *fFollowList[leafIndex];
                         }
                     }
                 }
             } // for leafIndex
 #endif

 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
             // Optimization(Jan, 2001)
             int leafIndex = fLeafSorter[sorterIndex++];

             while (leafIndex != -1)
             {
                 // If this leaf index (DFA position) is in the current set...
                 if (setT->getBit(leafIndex))
                 {
                     //
                     //  If this leaf is the current input symbol, then we
                     //  want to add its follow list to the set of states to
                     //  transition to from the current state.
                     //
                     *newSet |= *fFollowList[leafIndex];
                 }
                 leafIndex = fLeafSorter[sorterIndex++];
             } // while (leafIndex != -1)
 #endif

             unsigned int* fLeafIndexes=fLeafSorter[elemIndex];
             unsigned int fNumItems=fLeafIndexes[0];
             if(fNumItems!=0)
             {
                 // The algorithm requires finding the leaf that is present both in the bitfield of the current state, and in the
                 // list of places where the currently tested item can appear. When this occurs, the follow list of this parent item
                 // is added to the bitfield representing the next state.
                 // Both the bitfield and the list of places are sorted, so we can analyze them in two ways; either iterating over the
                 // parent items, testing the bitfield for the existence of the parent (N times a constant Tb), or by iterating over the
                 // bitfield (restricted to the range of the sorted list of places), using a binary search to locate the leaf in the
                 // sorted list of places (M times log(N) testing operations Ts)
                 // Assuming that the time to test a bit is roughly the same of the time needed to compute the average of two integers,
                 // plus a couple of comparisons and additions, we compare N agains M*log(N) to decide which algorithm should be faster given
                 // the two sets
                 if(fNumItems <= setT->getBitCountInRange(fLeafIndexes[1], fLeafIndexes[fNumItems])*log((float)fNumItems))
                 {
                     for(unsigned int i=1; i<=fNumItems; ++i)
                         if(setT->getBit(fLeafIndexes[i]))
                         {
                             //
                             //  If this leaf is the current input symbol, then we
                             //  want to add its follow list to the set of states to
                             //  transition to from the current state.
                             //
                             *newSet |= *fFollowList[ fLeafIndexes[i] ];
                         }
                 }
                 else
                 {
                     // Further optimization: given that the bitfield enumerator returns the numbers in order,
                     // every time we raise the lower marker we know it will true also for the next bits, so
                     // the next binary search will not start from 1 but from this index
                     unsigned int lowIndex = 1;
                     // Start the enumerator from the first index in the sorted list of places,
                     // as nothing before that point will match
                     CMStateSetEnumerator enumBits(setT, fLeafIndexes[1]);
                     while(enumBits.hasMoreElements())
                     {
                         unsigned int bitIndex=enumBits.nextElement();
                         // if this leaf is greater than the last index in the sorted list of places,
                         // nothing can be found from now on, so get out of here
                         if(bitIndex > fLeafIndexes[fNumItems])
                             break;

                         // Check if this leaf index (DFA position) is in the current set
                         // (using binary search: the indexes are sorted)
                         unsigned int first=lowIndex,last=fNumItems,i;
                         while(first<=last)
                         {
                             i=(first+last)/2;
                             if(fLeafIndexes[i]>bitIndex)
                                 last=i-1;
                             else if(fLeafIndexes[i]<bitIndex)
                                 lowIndex=first=i+1;
                             else
                             {
                                 //
                                 //  If this leaf is the current input symbol, then we
                                 //  want to add its follow list to the set of states to
                                 //  transition to from the current state.
                                 //
                                 *newSet |= *fFollowList[bitIndex];
                                 break;
                             }
                         }
                     }
                 }
             }

             //
             //  If this new set is not empty, then see if its in the list
             //  of states to do. If not, then add it.
             //
             if (!newSet->isEmpty())
             {
                 //
                 //  Search the 'states to do' list to see if this new
                 //  state set is already in there.
                 //
                 /***
                 unsigned int stateIndex = 0;
                 for (; stateIndex < curState; stateIndex++)
                 {
                     if (*statesToDo[stateIndex] == *newSet)
                         break;
                 }
                 ***/

                 XMLInteger *stateObj = stateTable->get(newSet);
                 unsigned int stateIndex = (stateObj == 0 ? curState : stateObj->intValue());

                 // If we did not find it, then add it
                 if (stateIndex == curState)
                 {
                     //
                     //  Put this new state into the states to do and init
                     //  a new entry at the same index in the transition
                     //  table.
                     //
                     statesToDo[curState] = newSet;
                     fTransTable[curState] = makeDefStateList();
                     stateTable->put
                     (
                         newSet
                         , new (fMemoryManager) XMLInteger(curState)
                     );

                     // We now have a new state to do so bump the count
                     curState++;

                     //
                     //  Null out the new set to indicate we adopted it. This
                     //  will cause the creation of a new set on the next time
                     //  around the loop.
                     //
                     newSet = 0;
                 }

                 //
                 //  Now set this state in the transition table's entry for this
                 //  element (using its index), with the DFA state we will move
                 //  to from the current state when we see this input element.
                 //
                 transEntry[elemIndex] = stateIndex;

                 // Expand the arrays if we're full
                 if (curState == curArraySize)
                 {
                     //
                     //  Yikes, we overflowed the initial array size, so we've
                     //  got to expand all of these arrays. So adjust up the
                     //  size by 50% and allocate new arrays.
                     //
                     const unsigned int newSize = (unsigned int)(curArraySize * 1.5);
                     CMStateSet** newToDo = (CMStateSet**)
                         fMemoryManager->allocate
                         (
                             newSize * sizeof(CMStateSet*)
                         ); //new const CMStateSet*[newSize];
                     bool* newFinalFlags = (bool*) fMemoryManager->allocate
                     (
                         newSize * sizeof(bool)
                     ); //new bool[newSize];
                     unsigned int** newTransTable = (unsigned int**)
                         fMemoryManager->allocate
                         (
                             newSize * sizeof(unsigned int*)
                         ); //new unsigned int*[newSize];

                     // Copy over all of the existing content
                     for (unsigned int expIndex = 0; expIndex < curArraySize; expIndex++)
                     {
                         newToDo[expIndex] = statesToDo[expIndex];
                         newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
                         newTransTable[expIndex] = fTransTable[expIndex];
                     }

                     // Clean up the old stuff
                     fMemoryManager->deallocate(statesToDo); //delete [] statesToDo;
                     fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags;
                     fMemoryManager->deallocate(fTransTable); //delete [] fTransTable;

                     // Store the new array size and pointers
                     curArraySize = newSize;
                     statesToDo = newToDo;
                     fFinalStateFlags = newFinalFlags;
                     fTransTable = newTransTable;
                 } //if (curState == curArraySize)
             } //if (!newSet->isEmpty())
         } // for elemIndex
     } //while

     // Store the current state count in the trans table size
     fTransTableSize = curState;

     //
     // Fill in the occurence information for each looping state
     // if we're using counters.
     //
     if (elemOccurenceMap != 0) {
         fCountingStates = (Occurence**)fMemoryManager->allocate(fTransTableSize*sizeof(Occurence*));
         memset(fCountingStates, 0, fTransTableSize*sizeof(Occurence*));
         for (unsigned int i = 0; i < fTransTableSize; ++i) {
             unsigned int * transitions = fTransTable[i];
             for (unsigned int j = 0; j < fElemMapSize; ++j) {
                 if (i == transitions[j]) {
                     Occurence* old=elemOccurenceMap[j];
                     if(old!=0)
                         fCountingStates[i] = new (fMemoryManager) Occurence(old->minOccurs, old->maxOccurs, old->elemIndex);
                     break;
                 }
             }
         }
         for (unsigned int j = 0; j < fLeafCount; ++j) {
             if(elemOccurenceMap[j]!=0)
                 delete elemOccurenceMap[j];
         }
         fMemoryManager->deallocate(elemOccurenceMap);
     }

     // If the last temp set was not stored, then clean it up
     if (newSet)
         delete newSet;

     //
     //  Now we can clean up all of the temporary data that was needed during
     //  DFA build.
     //

     for (index = 0; index < fLeafCount; index++)
         delete fFollowList[index];
     fMemoryManager->deallocate(fFollowList); //delete [] fFollowList;

     //
     // removeAll() will delete all data, XMLInteger,
     // while the keys are to be deleted by the
     // deletion of statesToDo.
     //
     delete stateTable;

     for (index = 0; index < curState; index++)
         delete statesToDo[index];
     fMemoryManager->deallocate(statesToDo); //delete [] statesToDo;

     for (index = 0; index < fLeafCount; index++)
         delete fLeafList[index];
     fMemoryManager->deallocate(fLeafList); //delete [] fLeafList;

 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
     fMemoryManager->deallocate(fLeafSorter); //delete [] fLeafSorter;
 #endif
     for (index=0; index < fElemMapSize; index++)
         fMemoryManager->deallocate(fLeafSorter[index]);
     fMemoryManager->deallocate(fLeafSorter);
 }

 unsigned int DFAContentModel::countLeafNodes(ContentSpecNode* const curNode)
 {
     unsigned int count = 0;

     // Get the spec type of the passed node
     const ContentSpecNode::NodeTypes curType = curNode->getType();

     if ((curType & 0x0f) == ContentSpecNode::Any
         || (curType & 0x0f) == ContentSpecNode::Any_Other
         || (curType & 0x0f) == ContentSpecNode::Any_NS
         || curType == ContentSpecNode::Leaf
         || curType == ContentSpecNode::Loop)
     {
         count++;
     }
     else
     {
         //
         //  Its not a leaf, so we have to recurse its left and maybe right
         //  nodes. Save both values before we recurse and trash the node.
         //
         ContentSpecNode* leftNode = curNode->getFirst();
         ContentSpecNode* rightNode = curNode->getSecond();

         // Detect if we have a deep tree that can be analyzed using a loop instead of recursion
         unsigned int nLoopCount=0;
         ContentSpecNode* cursor=curNode;
         while(cursor->getType()==ContentSpecNode::Sequence && cursor->getFirst() && cursor->getFirst()->getSecond()==rightNode)
         {
             nLoopCount++;
             cursor=cursor->getFirst();
         }
         if(nLoopCount!=0)
         {
             count += countLeafNodes(cursor);
             for(unsigned int i=0;i<nLoopCount;i++)
                 count += countLeafNodes(rightNode);
             return count;
         }
         if(leftNode)
             count+=countLeafNodes(leftNode);
         if(rightNode)
             count+=countLeafNodes(rightNode);
     }
     return count;
 }

 CMNode* DFAContentModel::buildSyntaxTree(ContentSpecNode* const curNode
                                          , unsigned int&        curIndex)
 {
     // Initialize a return node pointer
     CMNode* retNode = 0;

     // Get the spec type of the passed node
     const ContentSpecNode::NodeTypes curType = curNode->getType();

     if ((curType & 0x0f) == ContentSpecNode::Any
         || (curType & 0x0f) == ContentSpecNode::Any_Other
         || (curType & 0x0f) == ContentSpecNode::Any_NS)
     {
         retNode = new (fMemoryManager) CMAny
         (
             curType
             , curNode->getElement()->getURI()
             , curIndex
             , fLeafCount
             , fMemoryManager
         );
         fLeafList[curIndex] = new (fMemoryManager) CMLeaf
         (
             new (fMemoryManager) QName
             (
                 XMLUni::fgZeroLenString
                 , XMLUni::fgZeroLenString
                 , curNode->getElement()->getURI()
                 , fMemoryManager
             )
             , curIndex
             , true
             , fLeafCount
             , fMemoryManager
         );
         fLeafListType[curIndex] = curType;
         ++curIndex;
     }
     else if (curType == ContentSpecNode::Leaf)
     {
         //
         //  Create a new leaf node, and pass it the current leaf count, which
         //  is its DFA state position. Bump the leaf count after storing it.
         //  This makes the positions zero based since we store first and then
         //  increment.
         //
         retNode = new (fMemoryManager) CMLeaf
         (
             curNode->getElement()
             , curIndex
             , fLeafCount
             , fMemoryManager
         );
         fLeafList[curIndex] = new (fMemoryManager) CMLeaf
         (
             curNode->getElement()
             , curIndex
             , fLeafCount
             , fMemoryManager
         );
         fLeafListType[curIndex] = ContentSpecNode::Leaf;
         ++curIndex;
     }
     else if (curType == ContentSpecNode::Loop)
     {
         //
         //  Create a new leaf node, and pass it the current leaf count, which
         //  is its DFA state position. Bump the leaf count after storing it.
         //  This makes the positions zero based since we store first and then
         //  increment.
         //
         retNode = new (fMemoryManager) CMRepeatingLeaf
         (
             curNode->getFirst()->getElement()
             , curNode->getMinOccurs()
             , curNode->getMaxOccurs()
             , curIndex
             , fLeafCount
             , fMemoryManager
         );
         fLeafList[curIndex] = new (fMemoryManager) CMRepeatingLeaf
         (
             curNode->getFirst()->getElement()
             , curNode->getMinOccurs()
             , curNode->getMaxOccurs()
             , curIndex
             , fLeafCount
             , fMemoryManager
         );
         fLeafListType[curIndex] = curNode->getFirst()->getType();
         ++curIndex;
     }
      else
     {
         //
         //  Its not a leaf, so we have to recurse its left and maybe right
         //  nodes. Save both values before we recurse and trash the node.
         //
         ContentSpecNode* leftNode = curNode->getFirst();
         ContentSpecNode* rightNode = curNode->getSecond();

         // Detect if we have a deep tree that can be analyzed using a loop instead of recursion
         unsigned int nLoopCount=0;
         ContentSpecNode* cursor=curNode;
         while(cursor->getType()==ContentSpecNode::Sequence && cursor->getFirst() && cursor->getFirst()->getSecond()==rightNode)
         {
             nLoopCount++;
             cursor=cursor->getFirst();
         }
         if(nLoopCount!=0)
         {
             retNode = buildSyntaxTree(cursor, curIndex);
             for(unsigned int i=0;i<nLoopCount;i++)
             {
                 CMNode* newRight = buildSyntaxTree(rightNode, curIndex);
                 //
                 //  Now handle our level. We use our left child's last pos set and our
                 //  right child's first pos set, so get them now for convenience.
                 //
                 const CMStateSet& last  = retNode->getLastPos();
                 const CMStateSet& first = newRight->getFirstPos();

                 //
                 //  Now, for every position which is in our left child's last set
                 //  add all of the states in our right child's first set to the
                 //  follow set for that position.
                 //
                 CMStateSetEnumerator enumLast(&last);
                 while(enumLast.hasMoreElements())
                 {
                     XMLSize_t index=enumLast.nextElement();
                     *fFollowList[index] |= first;
                 }
                 retNode = new (fMemoryManager) CMBinaryOp
                 (
                     ContentSpecNode::Sequence
                     , retNode
                     , newRight
                     , fLeafCount
                     , fMemoryManager
                 );
             }
             return retNode;
         }

         if (((curType & 0x0f) == ContentSpecNode::Choice)
         ||   ((curType & 0x0f) == ContentSpecNode::Sequence))
         {
             //
             //  Recurse on both children, and return a binary op node with the
             //  two created sub nodes as its children. The node type is the
             //  same type as the source.
             //
             CMNode* newLeft = buildSyntaxTree(leftNode, curIndex);
             CMNode* newRight = buildSyntaxTree(rightNode, curIndex);
             if(((curType & 0x0f) == ContentSpecNode::Sequence))
             {
                 //
                 //  Now handle our level. We use our left child's last pos set and our
                 //  right child's first pos set, so get them now for convenience.
                 //
                 const CMStateSet& last  = newLeft->getLastPos();
                 const CMStateSet& first = newRight->getFirstPos();

                 //
                 //  Now, for every position which is in our left child's last set
                 //  add all of the states in our right child's first set to the
                 //  follow set for that position.
                 //
                 CMStateSetEnumerator enumLast(&last);
                 while(enumLast.hasMoreElements())
                 {
                     XMLSize_t index=enumLast.nextElement();
                     *fFollowList[index] |= first;
                 }
             }
             retNode = new (fMemoryManager) CMBinaryOp
             (
                 curType
                 , newLeft
                 , newRight
                 , fLeafCount
                 , fMemoryManager
             );
         }
          else if (curType == ContentSpecNode::ZeroOrMore
                || curType == ContentSpecNode::ZeroOrOne
                || curType == ContentSpecNode::OneOrMore)
         {
             CMNode* newChild = buildSyntaxTree(leftNode, curIndex);
             if (curType == ContentSpecNode::ZeroOrMore
                || curType == ContentSpecNode::OneOrMore)
             {
                 //
                 //  Now handle our level. We use our own first and last position
                 //  sets, so get them up front.
                 //
                 const CMStateSet& first = newChild->getFirstPos();
                 const CMStateSet& last  = newChild->getLastPos();

                 //
                 //  For every position which is in our last position set, add all
                 //  of our first position states to the follow set for that
                 //  position.
                 //
                 CMStateSetEnumerator enumLast(&last);
                 while(enumLast.hasMoreElements())
                 {
                     XMLSize_t index=enumLast.nextElement();
                     *fFollowList[index] |= first;
                 }
             }
             // This one is fine as is, just change to our form
             retNode = new (fMemoryManager) CMUnaryOp
             (
                 curType
                 , newChild
                 , fLeafCount
                 , fMemoryManager
             );
         }
          else
         {
             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMSpecType, fMemoryManager);
         }
     }
     // fault in the first and last pos, then delete it children
     retNode->getFirstPos();
     retNode->getLastPos();
     retNode->orphanChild();
     return retNode;
 }

 //
 //  gInvalidTrans is used to represent bad transitions in the transition table
 //  entry for each state. So each entry is initialized to that value. This
 //  method creates a new entry and initializes it.
 //
 unsigned int* DFAContentModel::makeDefStateList() const
 {
     unsigned int* retArray = (unsigned int*) fMemoryManager->allocate
     (
         fElemMapSize * sizeof(unsigned int)
     ); //new unsigned int[fElemMapSize];
     for (unsigned int index = 0; index < fElemMapSize; index++)
         retArray[index] = XMLContentModel::gInvalidTrans;
     return retArray;
 }

 ContentLeafNameTypeVector* DFAContentModel::getContentLeafNameTypeVector() const
 {
    //later change it to return the data member
     return fLeafNameTypeVector;
 }

 void DFAContentModel::checkUniqueParticleAttribution (SchemaGrammar*    const pGrammar,
                                                       GrammarResolver*  const pGrammarResolver,
                                                       XMLStringPool*    const pStringPool,
                                                       XMLValidator*     const pValidator,
                                                       unsigned int*     const pContentSpecOrgURI,
                                                       const XMLCh*            pComplexTypeName /*= 0*/)
 {

     SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);

     unsigned int i, j, k;

     // Rename the URI back
     for (i = 0; i < fElemMapSize; i++) {

         unsigned int orgURIIndex = fElemMap[i]->getURI();

         if ((orgURIIndex != XMLContentModel::gEOCFakeId) &&
             (orgURIIndex != XMLContentModel::gEpsilonFakeId) &&
             (orgURIIndex != XMLElementDecl::fgInvalidElemId) &&
             (orgURIIndex != XMLElementDecl::fgPCDataElemId)) {
             fElemMap[i]->setURI(pContentSpecOrgURI[orgURIIndex]);
         }
     }

     // Unique Particle Attribution
     // Store the conflict results between any two elements in fElemMap
     // 0 - not yet tested, 1 - conflict, (-1) - no conflict
     signed char** conflictTable = (signed char**) fMemoryManager->allocate
     (
         fElemMapSize * sizeof(signed char*)
     );

     // initialize the conflict table
     for (j = 0; j < fElemMapSize; j++) {
         conflictTable[j] = (signed char*) fMemoryManager->allocate
         (
             fElemMapSize * sizeof(signed char)
         );
         memset(conflictTable[j], 0, fElemMapSize*sizeof(signed char));
     }

     // for each state, check whether it has overlap transitions
     for (i = 0; i < fTransTableSize; i++) {
         for (j = 0; j < fElemMapSize; j++) {
             for (k = j+1; k < fElemMapSize; k++) {
                 if (fTransTable[i][j] != XMLContentModel::gInvalidTrans &&
                     fTransTable[i][k] != XMLContentModel::gInvalidTrans &&
                     conflictTable[j][k] == 0) {

                     // If this is text in a Schema mixed content model, skip it.
                     if ( fIsMixed &&
                          (( fElemMap[j]->getURI() == XMLElementDecl::fgPCDataElemId) ||
                           ( fElemMap[k]->getURI() == XMLElementDecl::fgPCDataElemId)))
                         continue;

                     if (XercesElementWildcard::conflict(pGrammar,
                                                         fElemMapType[j],
                                                         fElemMap[j],
                                                         fElemMapType[k],
                                                         fElemMap[k],
                                                         &comparator)) {
                         if (fCountingStates != 0) {
                             Occurence* o = fCountingStates[i];
                             // If "i" is a counting state and exactly one of the transitions
                             // loops back to "i" then the two particles do not overlap if
                             // minOccurs == maxOccurs.
                             if (o != 0 &&
                                 ((fTransTable[i][j] == i) ^ (fTransTable[i][k] == i)) &&
                                 o->minOccurs == o->maxOccurs) {
                                 conflictTable[j][k] = -1;
                                 continue;
                             }
                         }
                        conflictTable[j][k] = 1;

                        XMLBuffer buf1(1023, fMemoryManager);
                        if (((fElemMapType[j] & 0x0f) == ContentSpecNode::Any) ||
                            ((fElemMapType[j] & 0x0f) == ContentSpecNode::Any_NS))
                            buf1.set(SchemaSymbols::fgATTVAL_TWOPOUNDANY);
                        else if ((fElemMapType[j] & 0x0f) == ContentSpecNode::Any_Other)
                            buf1.set(SchemaSymbols::fgATTVAL_TWOPOUNDOTHER);
                        else
                            buf1.set(fElemMap[j]->getRawName());

                        XMLBuffer buf2(1023, fMemoryManager);
                        if (((fElemMapType[k] & 0x0f) == ContentSpecNode::Any) ||
                            ((fElemMapType[k] & 0x0f) == ContentSpecNode::Any_NS))
                            buf2.set(SchemaSymbols::fgATTVAL_TWOPOUNDANY);
                        else if ((fElemMapType[k] & 0x0f) == ContentSpecNode::Any_Other)
                            buf2.set(SchemaSymbols::fgATTVAL_TWOPOUNDOTHER);
                        else
                            buf2.set(fElemMap[k]->getRawName());

                        pValidator->emitError(XMLValid::UniqueParticleAttributionFail,
                                              pComplexTypeName,
                                              buf1.getRawBuffer(),
                                              buf2.getRawBuffer());
                     }
                     else
                         conflictTable[j][k] = -1;
                 }
             }
         }
     }

     for (i = 0; i < fElemMapSize; i++)
         fMemoryManager->deallocate(conflictTable[i]);
     fMemoryManager->deallocate(conflictTable);
 }

 }