src/validators/DTD/DFAContentModel.cpp - xerces-c - Git at Google

 /*
  * The Apache Software License, Version 1.1
  *
  * Copyright (c) 1999 The Apache Software Foundation.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Xerces" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache\@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation, and was
  * originally based on software copyright (c) 1999, International
  * Business Machines, Inc., http://www.ibm.com .  For more information
  * on the Apache Software Foundation, please see
  * <http://www.apache.org/>.
  */

 /**
  * $Log$
  * Revision 1.1  1999/11/09 01:03:17  twl
  * Initial revision
  *
  * Revision 1.2  1999/11/08 20:45:38  rahul
  * Swat for adding in Product name and CVS comment log variable.
  *
  */


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <util/RuntimeException.hpp>
 #include <framework/XMLValidator.hpp>
 #include <validators/DTD/CMBinaryOp.hpp>
 #include <validators/DTD/CMLeaf.hpp>
 #include <validators/DTD/CMUnaryOp.hpp>
 #include <validators/DTD/DFAContentModel.hpp>
 #include <validators/DTD/ContentSpecNode.hpp>
 #include <validators/DTD/DTDElementDecl.hpp>


 // ---------------------------------------------------------------------------
 //  Local static data
 //
 //  gEOCFakeId
 //  gEpsilonFakeId
 //      We have to put in a couple of special CMLeaf nodes to represent
 //      special values, using fake element ids that we know won't conflict
 //      with real element ids.
 // ---------------------------------------------------------------------------
 static const unsigned int   gEOCFakeId      = 0xFFFFFFF1;
 static const unsigned int   gEpsilonFakeId  = 0xFFFFFFF2;


 // ---------------------------------------------------------------------------
 //  DFAContentModel: Constructors and Destructor
 // ---------------------------------------------------------------------------
 DFAContentModel::DFAContentModel(const DTDElementDecl& elemDecl) :

     fElemDecl(elemDecl)
     , fElemMap(0)
     , fElemMapSize(0)
     , fEmptyOk(false)
     , fEOCPos(0)
     , fFinalStateFlags(0)
     , fFollowList(0)
     , fHeadNode(0)
     , fLeafCount(0)
     , fLeafList(0)
     , fSpecNode(0)
     , fTransTable(0)
     , fTransTableSize(0)
 {
     //
     //  Store away our content spec node. This is used all over the place
     //  here so its easier to store a member pointer than to pass it around.
     //
     fSpecNode = elemDecl.getContentSpec();

     // And build the DFA data structures
     buildDFA();
 }

 DFAContentModel::~DFAContentModel()
 {
     //
     //  Clean up all the stuff that is not just temporary representation
     //  data that was cleaned up after building the DFA.
     //
     delete [] fFinalStateFlags;

     unsigned index;
     for (index = 0; index < fTransTableSize; index++)
         delete [] fTransTable[index];
     delete [] fTransTable;

     delete [] fElemMap;
 }


 // ---------------------------------------------------------------------------
 //  DFAContentModel: Implementation of the ContentModel virtual interface
 // ---------------------------------------------------------------------------
 int
 DFAContentModel::validateContent(   const   unsigned int*   childIds
                                     , const unsigned int    childCount) const
 {
     //
     //  If there are no children, then either we fail on the 0th element
     //  or we return success. It depends upon whether this content model
     //  accepts empty content, which we determined earlier.
     //
     if (!childCount)
     {
         if (fEmptyOk)
             return -1;
         return 0;
     }

     //
     //  Lets loop through the children in the array and move our way
     //  through the states. Note that we use the fElemMap array to map
     //  an element index to a state index.
     //
     unsigned int curState = 0;
     unsigned int childIndex = 0;
     for (; childIndex < childCount; childIndex++)
     {
         // Get the current element index out
         const unsigned int curElem = childIds[childIndex];

         // Look up this child in our element map
         unsigned int elemIndex = 0;
         for (; elemIndex < fElemMapSize; elemIndex++)
         {
             if (fElemMap[elemIndex] == curElem)
                 break;
         }

         // If we didn't find it, then obviously not valid
         if (elemIndex == fElemMapSize)
             return childIndex;

         //
         //  Look up the next state for this input symbol when in the
         //  current state.
         //
         curState = fTransTable[curState][elemIndex];

         //  If its not a legal transition then we failed.
         if (curState == -1)
             return childIndex;
     }

     //
     //  We transitioned all the way through the input list. However, that
     //  does not mean that we ended in a final state. So check whether
     //  our ending state is a final state.
     //
     if (!fFinalStateFlags[curState])
         return childIndex;

     return XMLValidator::Success;
 }


 // ---------------------------------------------------------------------------
 //  DFAContentModel: Private helper methods
 // ---------------------------------------------------------------------------
 void DFAContentModel::buildDFA()
 {
     unsigned int index;


     //
     //  The first step we need to take is to rewrite the content model using
     //  our CMNode objects, and in the process get rid of any repetition short
     //  cuts, converting them into '*' style repetitions or getting rid of
     //  repetitions altogether.
     //
     //  The conversions done are:
     //
     //  x+ -> (x|x*)
     //  x? -> (x|epsilon)
     //
     //  This is a relatively complex scenario. What is happening is that we
     //  create a top level binary node of which the special EOC value is set
     //  as the right side node. The the left side is set to the rewritten
     //  syntax tree. The source is the original content model info from the
     //  decl pool. The rewrite is done by buildSyntaxTree() which recurses the
     //  decl pool's content of the element and builds a new tree in the
     //  process.
     //
     //  Note that, during this operation, we set each non-epsilon leaf node's
     //  DFA state position and count the number of such leafs, which is left
     //  in the fLeafCount member.
     //
     CMLeaf* nodeEOC = new CMLeaf(gEOCFakeId);
     CMNode* nodeOrgContent = buildSyntaxTree(fElemDecl.getContentSpec());
     fHeadNode = new CMBinaryOp
     (
         ContentSpecNode::Sequence
         , nodeOrgContent
         , nodeEOC
     );

     //
     //  And handle specially the EOC node, which also must be numbered and
     //  counted as a non-epsilon leaf node. It could not be handled in the
     //  above tree build because it was created before all that started. We
     //  save the EOC position since its used during the DFA building loop.
     //
     fEOCPos = fLeafCount;
     nodeEOC->setPosition(fLeafCount++);

     //
     //  Ok, so now we have to iterate the new tree and do a little more work
     //  now that we know the leaf count. One thing we need to do is to
     //  calculate the first and last position sets of each node. This is
     //  cached away in each of the nodes.
     //
     //  Along the way we also set the leaf count in each node as the maximum
     //  state count. They must know this in order to create their first/last
     //  position sets.
     //
     //  We also need to build an array of references to the non-epsilon
     //  leaf nodes. Since we iterate here the same way as we did during the
     //  initial tree build (which built their position numbers, we will put
     //  them in the array according to their position values.
     //
     fLeafList = new CMLeaf*[fLeafCount];
     postTreeBuildInit(fHeadNode, 0);

     //
     //  And, moving onward... We now need to build the follow position sets
     //  for all the nodes. So we allocate an array of pointers to state sets,
     //  one for each leaf node (i.e. each significant DFA position.)
     //
     fFollowList = new CMStateSet*[fLeafCount];
     for (index = 0; index < fLeafCount; index++)
         fFollowList[index] = new CMStateSet(fLeafCount);
     calcFollowList(fHeadNode);

     //
     //  Check the DFA for ambiguity. If it is ambiguous, then set the
     //  ambiguous flag member and keep going.
     //
     fIsAmbiguous = isAmbiguous();

     //
     //  Check to see whether this content model can handle an empty content,
     //  which is something we need to optimize by looking now before we
     //  throw away the info that would tell us that.
     //
     //  If the left node of the head (the top level of the original content)
     //  is nullable, then its true.
     //
     fEmptyOk = nodeOrgContent->isNullable();

     //
     //  And finally the big push... Now we build the DFA using all the states
     //  and the tree we've built up. First we set up the various data
     //  structures we are going to use while we do this.
     //
     //  First of all we need an array of unique element ids in our content
     //  model. For each transition table entry, we need a set of contiguous
     //  indices to represent the transitions for a particular input element.
     //  So we need to a zero based range of indexes that map to element types.
     //  This element map provides that mapping.
     //
     fElemMap = new unsigned int[fLeafCount];
     fElemMapSize = 0;
     for (unsigned int outIndex = 0; outIndex < fLeafCount; outIndex++)
     {
         // Get the current leaf's element index
         const unsigned int elemId = fLeafList[outIndex]->getId();

         // See if the current leaf node's element index is in the list
         unsigned int inIndex = 0;
         for (; inIndex < fElemMapSize; inIndex++)
         {
             if (fElemMap[inIndex] == elemId)
                 break;
         }

         // If it was not in the list, then add it and bump the map size
         if (inIndex == fElemMapSize)
             fElemMap[fElemMapSize++] = elemId;
     }

     //
     //  Next lets create some arrays, some that that hold transient info
     //  during the DFA build and some that are permament. These are kind of
     //  sticky since we cannot know how big they will get, but we don't want
     //  to use any collection type classes because of performance.
     //
     //  Basically they will probably be about fLeafCount*2 on average, but can
     //  be as large as 2^(fLeafCount*2), worst case. So we start with
     //  fLeafCount*4 as a middle ground. This will be very unlikely to ever
     //  have to expand though, it if does, the overhead will be somewhat ugly.
     //
     unsigned int curArraySize = fLeafCount * 4;
     const CMStateSet** statesToDo = new const CMStateSet*[curArraySize];
     fFinalStateFlags = new bool[curArraySize];
     fTransTable = new unsigned int*[curArraySize];

     //
     //  Ok we start with the initial set as the first pos set of the head node
     //  (which is the seq node that holds the content model and the EOC node.)
     //
     const CMStateSet* setT = new CMStateSet(fHeadNode->getFirstPos());

     //
     //  Init our two state flags. Basically the unmarked state counter is
     //  always chasing the current state counter. When it catches up, that
     //  means we made a pass through that did not add any new states to the
     //  lists, at which time we are done. We could have used a expanding array
     //  of flags which we used to mark off states as we complete them, but
     //  this is easier though less readable maybe.
     //
     unsigned int unmarkedState = 0;
     unsigned int curState = 0;

     //
     //  Init the first transition table entry, and put the initial state
     //  into the states to do list, then bump the current state.
     //
     fTransTable[curState] = makeDefStateList();
     statesToDo[curState] = setT;
     curState++;

     //
     //  Ok, almost done with the algorithm from hell... We now enter the
     //  loop where we go until the states done counter catches up with
     //  the states to do counter.
     //
     CMStateSet* newSet = 0;
     while (unmarkedState < curState)
     {
         //
         //  Get the next unmarked state out of the list of states to do.
         //  And get the associated transition table entry.
         //
         setT = statesToDo[unmarkedState];
         unsigned int* transEntry = fTransTable[unmarkedState];

         // Mark this one final if it contains the EOC state
         fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos);

         // Bump up the unmarked state count, marking this state done
         unmarkedState++;

         // Loop through each possible input symbol in the element map
         for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
         {
             //
             //  Build up a set of states which is the union of all of the
             //  follow sets of DFA positions that are in the current state. If
             //  we gave away the new set last time through then create a new
             //  one. Otherwise, zero out the existing one.
             //
             if (!newSet)
                 newSet = new CMStateSet(fLeafCount);
             else
                 newSet->zeroBits();

             for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
             {
                 // If this leaf index (DFA position) is in the current set...
                 if (setT->getBit(leafIndex))
                 {
                     //
                     //  If this leaf is the current input symbol, then we want
                     //  to add its follow list to the set of states to transition
                     //  to from the current state.
                     //
                     if (fLeafList[leafIndex]->getId() == fElemMap[elemIndex])
                         *newSet |= *fFollowList[leafIndex];
                 }
             }

             //
             //  If this new set is not empty, then see if its in the list
             //  of states to do. If not, then add it.
             //
             if (!newSet->isEmpty())
             {
                 //
                 //  Search the 'states to do' list to see if this new
                 //  state set is already in there.
                 //
                 unsigned int stateIndex = 0;
                 for (; stateIndex < curState; stateIndex++)
                 {
                     if (*statesToDo[stateIndex] == *newSet)
                         break;
                 }

                 // If we did not find it, then add it
                 if (stateIndex == curState)
                 {
                     //
                     //  Put this new state into the states to do and init
                     //  a new entry at the same index in the transition
                     //  table.
                     //
                     statesToDo[curState] = newSet;
                     fTransTable[curState] = makeDefStateList();

                     // We now have a new state to do so bump the count
                     curState++;

                     //
                     //  Null out the new set to indicate we adopted it. This
                     //  will cause the creation of a new set on the next time
                     //  around the loop.
                     //
                     newSet = 0;
                 }

                 //
                 //  Now set this state in the transition table's entry for this
                 //  element (using its index), with the DFA state we will move
                 //  to from the current state when we see this input element.
                 //
                 transEntry[elemIndex] = stateIndex;

                 // Expand the arrays if we're full
                 if (curState == curArraySize)
                 {
                     //
                     //  Yikes, we overflowed the initial array size, so we've
                     //  got to expand all of these arrays. So adjust up the
                     //  size by 50% and allocate new arrays.
                     //
                     const unsigned int newSize = (unsigned int)(curArraySize * 1.5);
                     const CMStateSet** newToDo = new const CMStateSet*[newSize];
                     bool* newFinalFlags = new bool[newSize];
                     unsigned int** newTransTable = new unsigned int*[newSize];

                     // Copy over all of the existing content
                     for (unsigned int expIndex = 0; expIndex < curArraySize; expIndex++)
                     {
                         newToDo[expIndex] = statesToDo[expIndex];
                         newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
                         newTransTable[expIndex] = fTransTable[expIndex];
                     }

                     // Clean up the old stuff
                     delete [] statesToDo;
                     delete [] fFinalStateFlags;
                     delete [] fTransTable;

                     // Store the new array size and pointers
                     curArraySize = newSize;
                     statesToDo = newToDo;
                     fFinalStateFlags = newFinalFlags;
                     fTransTable = newTransTable;
                 }
             }
         }
     }

     // Store the current state count in the trans table size
     fTransTableSize = curState;

     // If the last temp set was not stored, then clean it up
     if (newSet)
         delete newSet;

     //
     //  Now we can clean up all of the temporary data that was needed during
     //  DFA build.
     //
     delete fHeadNode;
     fHeadNode = 0;

     for (index = 0; index < fLeafCount; index++)
         delete fFollowList[index];

     for (index = 0; index < curState; index++)
         delete (CMStateSet*)statesToDo[index];

     delete [] fLeafList;
     delete [] fFollowList;
     delete [] statesToDo;
 }


 CMNode* DFAContentModel::buildSyntaxTree(const ContentSpecNode* const curNode)
 {
     // Initialize a return node pointer
     CMNode* retNode = 0;

     // Get the spec type of the passed node
     const ContentSpecNode::NodeTypes curType = curNode->getType();

     if (curType == ContentSpecNode::Leaf)
     {
         //
         //  Create a new leaf node, and pass it the current leaf count, which
         //  is its DFA state position. Bump the leaf count after storing it.
         //  This makes the positions zero based since we store first and then
         //  increment.
         //
         retNode = new CMLeaf(curNode->getElemId(), fLeafCount++);
     }
      else
     {
         //
         //  Its not a leaf, so we have to recurse its left and maybe right
         //  nodes. Save both values before we recurse and trash the node.
         //
         const ContentSpecNode* leftNode = curNode->getFirst();
         const ContentSpecNode* rightNode = curNode->getSecond();

         if ((curType == ContentSpecNode::Choice)
         ||   (curType == ContentSpecNode::Sequence))
         {
             //
             //  Recurse on both children, and return a binary op node with the
             //  two created sub nodes as its children. The node type is the
             //  same type as the source.
             //
             CMNode* newLeft = buildSyntaxTree(leftNode);
             CMNode* newRight = buildSyntaxTree(rightNode);
             retNode = new CMBinaryOp(curType, newLeft, newRight);
         }
          else if (curType == ContentSpecNode::ZeroOrMore)
         {
             // This one is fine as is, just change to our form
             retNode = new CMUnaryOp(curType, buildSyntaxTree(leftNode));
         }
          else if (curType == ContentSpecNode::ZeroOrOne)
         {
             // Convert to (x|epsilon)
             CMNode* newLeft = buildSyntaxTree(leftNode);
             retNode = new CMBinaryOp
             (
                 ContentSpecNode::Choice
                 , newLeft
                 , new CMLeaf(gEpsilonFakeId)
             );
         }
          else if (curType == ContentSpecNode::OneOrMore)
         {
             // Convert to (x,x*)
             CMNode* newLeft = buildSyntaxTree(leftNode);
             CMNode* newLeft2 = buildSyntaxTree(leftNode);
             retNode = new CMBinaryOp
             (
                 ContentSpecNode::Sequence
                 , newLeft
                 , new CMUnaryOp(ContentSpecNode::ZeroOrMore, newLeft2)
             );
         }
          else
         {
             ThrowXML(RuntimeException, XML4CExcepts::CM_UnknownCMSpecType);
         }
     }
     return retNode;
 }


 void DFAContentModel::calcFollowList(CMNode* const curNode)
 {
     // Get the spec type of the passed node
     const ContentSpecNode::NodeTypes curType = curNode->getType();

     if (curType == ContentSpecNode::Choice)
     {
         // Just recurse
         calcFollowList(((CMBinaryOp*)curNode)->getLeft());
         calcFollowList(((CMBinaryOp*)curNode)->getRight());
     }
      else if (curType == ContentSpecNode::Sequence)
     {
         // Recurse before we process this node
         calcFollowList(((CMBinaryOp*)curNode)->getLeft());
         calcFollowList(((CMBinaryOp*)curNode)->getRight());

         //
         //  Now handle our level. We use our left child's last pos set and our
         //  right child's first pos set, so get them now for convenience.
         //
         const CMStateSet& last  = ((CMBinaryOp*)curNode)->getLeft()->getLastPos();
         const CMStateSet& first = ((CMBinaryOp*)curNode)->getRight()->getFirstPos();

         //
         //  Now, for every position which is in our left child's last set
         //  add all of the states in our right child's first set to the
         //  follow set for that position.
         //
         for (unsigned int index = 0; index < fLeafCount; index++)
         {
             if (last.getBit(index))
                 *fFollowList[index] |= first;
         }
     }
      else if (curType == ContentSpecNode::ZeroOrMore)
     {
         // Recurse first
         calcFollowList(((CMUnaryOp*)curNode)->getChild());

         //
         //  Now handle our level. We use our own first and last position
         //  sets, so get them up front.
         //
         const CMStateSet& first = curNode->getFirstPos();
         const CMStateSet& last  = curNode->getLastPos();

         //
         //  For every position which is in our last position set, add all
         //  of our first position states to the follow set for that
         //  position.
         //
         for (unsigned int index = 0; index < fLeafCount; index++)
         {
             if (last.getBit(index))
                 *fFollowList[index] |= first;
         }
     }
      else if ((curType == ContentSpecNode::OneOrMore)
           ||  (curType == ContentSpecNode::ZeroOrOne))
     {
         ThrowXML1(RuntimeException, XML4CExcepts::CM_NotValidForSpecType, "CalcFollowList");
     }
 }


 bool DFAContentModel::isAmbiguous() const
 {
     unsigned int index;

     //
     //  Run through the current leaves and remember the min and max element
     //  ids. These will be used to create a bit set that will map to element
     //  ids (adjusted by the min value.)
     //
     //  Do fLeafCount - 1 because we don't want to pick up the 'EOC' node,
     //  which has a very large dummy value in it.
     //
     unsigned int minId = 0xFFFFFFFF;
     unsigned int maxId = 0;
     for (index = 0; index < fLeafCount - 1; index++)
     {
         const unsigned int curId = fLeafList[index]->getId();
         if (curId < minId)
             minId = curId;

         if (curId > maxId)
             maxId = curId;
     }

     //
     //  Ok, now we can create a range value that represents the spread
     //  between the min/max element value. The range can never be larger
     //  than the number of elements in the content model, so it would
     //  never be outrageously large unless the content model was just
     //  totally pathological.
     //
     //  With this number we can create a state set that has a bit per
     //  possible entry in the leaf array.
     //
     const unsigned int idRange = (maxId - minId) + 1;
     CMStateSet idSet(idRange);

     // Check each follow list
     for (index = 0; index < fLeafCount - 1; index++)
     {
         // Get the current follow list set
         const CMStateSet* curSet = fFollowList[index];

         //
         //  For each possible leaf, we now go through and get the element
         //  id, adjust it to zero base it, then check it in the idSet. If
         //  that bit is already on, its ambiguous. Else set that bit and
         //  keep going.
         //
         for (unsigned int inner = 0; inner < fLeafCount - 1; inner++)
         {
             // If this bit is not on in the follow set, skip to next
             if (!curSet->getBit(inner))
                 continue;

             // Get the adjusted id value for the element in the follow list
             const unsigned int adjustedId = fLeafList[inner]->getId() - minId;

             if (idSet.getBit(adjustedId))
                 return true;
             idSet.setBit(adjustedId);
         }

         // Zero out the bitset for the next round
         idSet.zeroBits();
     }
     return false;
 }


 //
 //  -1 is used to represent bad transitions in the transition table entry for
 //  each state. So each entry is initialized to an all -1 array. This method
 //  creates a new entry and initializes it.
 //
 unsigned int* DFAContentModel::makeDefStateList() const
 {
     unsigned int* retArray = new unsigned int[fElemMapSize];
     for (unsigned int index = 0; index < fElemMapSize; index++)
         retArray[index] = -1;
     return retArray;
 }


 int DFAContentModel::postTreeBuildInit(         CMNode* const   nodeCur
                                         , const unsigned int    curIndex)
 {
     // Set the maximum states on this node
     nodeCur->setMaxStates(fLeafCount);

     // Get the spec type of the passed node
     const ContentSpecNode::NodeTypes curType = nodeCur->getType();

     // Get a copy of the index we can modify
     unsigned int newIndex = curIndex;

     // Recurse as required
     if ((curType == ContentSpecNode::Choice)
     ||  (curType == ContentSpecNode::Sequence))
     {
         newIndex = postTreeBuildInit(((CMBinaryOp*)nodeCur)->getLeft(), newIndex);
         newIndex = postTreeBuildInit(((CMBinaryOp*)nodeCur)->getRight(), newIndex);
     }
      else if (curType == ContentSpecNode::ZeroOrMore)
     {
         newIndex = postTreeBuildInit(((CMUnaryOp*)nodeCur)->getChild(), newIndex);
     }
      else if (curType == ContentSpecNode::Leaf)
     {
         //
         //  Put this node in the leaf list at the current index if its
         //  a non-epsilon leaf.
         //
         if (((CMLeaf*)nodeCur)->getId() != gEpsilonFakeId)
             fLeafList[newIndex++] = (CMLeaf*)nodeCur;
     }
      else
     {
         ThrowXML(RuntimeException, XML4CExcepts::CM_UnknownCMSpecType);
     }
     return newIndex;
 }