blob: 7273da9c202ef0afc3fdf4193e06a317a13b91b2 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999,2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xerces.validators.common;
import org.apache.xerces.framework.XMLContentSpec;
import org.apache.xerces.utils.ImplementationMessages;
import org.apache.xerces.utils.QName;
import org.apache.xerces.validators.schema.EquivClassComparator;
//import org.apache.xerces.utils.StringPool;
/**
* DFAContentModel is the derivative of ContentModel that does
* all of the non-trivial element content validation. This class does
* the conversion from the regular expression to the DFA that
* it then uses in its validation algorithm.
* <p>
* <b>Note:</b> Upstream work insures that this class will never see
* a content model with PCDATA in it. Any model with PCDATA is 'mixed'
* and is handled via the MixedContentModel class since mixed models
* are very constrained in form and easily handled via a special case.
* This also makes implementation of this class much easier.
*
* @version $Id$
*/
public class DFAContentModel
implements XMLContentModel {
//
// Constants
//
// special strings
/** Epsilon string. */
//private static final String fEpsilonString = "<<CMNODE_EPSILON>>";
private static final int EPSILON = -2;
/** End-of-content string. */
//private static final String fEOCString = "<<CMNODE_EOC>>";
private static final int EOC = -3;
// debugging
/** Set to true to debug content model validation. */
private static final boolean DEBUG_VALIDATE_CONTENT = false;
//
// Data
//
/* this is the EquivClassComparator object */
private EquivClassComparator comparator = null;
/**
* This is the map of unique input symbol elements to indices into
* each state's per-input symbol transition table entry. This is part
* of the built DFA information that must be kept around to do the
* actual validation.
*/
private QName fElemMap[] = null;
/**
* This is a map of whether the element map contains information
* related to ANY models.
*/
private int fElemMapType[] = null;
/** The element map size. */
private int fElemMapSize = 0;
/** Boolean to allow DTDs to validate even with namespace support. */
private boolean fDTD;
/**
* The string index for the 'end of content' string that we add to
* the string pool. This is used as the special name of an element
* that represents the end of the syntax tree.
*/
private int fEOCIndex = 0;
/**
* The NFA position of the special EOC (end of content) node. This
* is saved away since it's used during the DFA build.
*/
private int fEOCPos = 0;
/**
* The string index for the 'epsilon' string that we add to the
* string pool. This represents epsilon node transitions in the
* syntax tree.
*/
private int fEpsilonIndex = 0;
/**
* This is an array of booleans, one per state (there are
* fTransTableSize states in the DFA) that indicates whether that
* state is a final state.
*/
private boolean fFinalStateFlags[] = null;
/**
* The list of follow positions for each NFA position (i.e. for each
* non-epsilon leaf node.) This is only used during the building of
* the DFA, and is let go afterwards.
*/
private CMStateSet fFollowList[] = null;
/**
* This is the head node of our intermediate representation. It is
* only non-null during the building of the DFA (just so that it
* does not have to be passed all around.) Once the DFA is built,
* this is no longer required so its nulled out.
*/
private CMNode fHeadNode = null;
/**
* The count of leaf nodes. This is an important number that set some
* limits on the sizes of data structures in the DFA process.
*/
private int fLeafCount = 0;
/**
* An array of non-epsilon leaf nodes, which is used during the DFA
* build operation, then dropped.
*/
private CMLeaf fLeafList[] = null;
/** Array mapping ANY types to the leaf list. */
private int fLeafListType[] = null;
private ContentLeafNameTypeVector fLeafNameTypeVector = null;
/**
* The string pool of our parser session. This is set during construction
* and kept around.
*/
//private StringPool fStringPool = null;
/**
* This is the transition table that is the main by product of all
* of the effort here. It is an array of arrays of ints. The first
* dimension is the number of states we end up with in the DFA. The
* second dimensions is the number of unique elements in the content
* model (fElemMapSize). Each entry in the second dimension indicates
* the new state given that input for the first dimension's start
* state.
* <p>
* The fElemMap array handles mapping from element indexes to
* positions in the second dimension of the transition table.
*/
private int fTransTable[][] = null;
/**
* The number of valid entries in the transition table, and in the other
* related tables such as fFinalStateFlags.
*/
private int fTransTableSize = 0;
/**
* Flag that indicates that even though we have a "complicated"
* content model, it is valid to have no content. In other words,
* all parts of the content model are optional. For example:
* <pre>
* &lt;!ELEMENT AllOptional (Optional*,NotRequired?)&gt;
* </pre>
*/
private boolean fEmptyContentIsValid = false;
// temp variables
/** Temporary qualified name. */
private QName fQName = new QName();
//
// Constructors
//
/**
* Constructs a DFA content model.
*
* @param stringPool The string pool.
* @param syntaxTree The syntax tree of the content model.
* @param leafCount The number of leaves.
*
* @exception CMException Thrown if DMA can't be built.
*/
// public DFAContentModel(StringPool stringPool,
public DFAContentModel( CMNode syntaxTree,
int leafCount) throws CMException {
this(syntaxTree, leafCount, false);
}
/**
* Constructs a DFA content model.
*
* @param stringPool The string pool.
* @param syntaxTree The syntax tree of the content model.
* @param leafCount The number of leaves.
*
* @exception CMException Thrown if DMA can't be built.
*/
// public DFAContentModel(StringPool stringPool,
public DFAContentModel( CMNode syntaxTree,
int leafCount, boolean dtd) throws CMException {
// Store away our index and pools in members
//fStringPool = stringPool;
fLeafCount = leafCount;
//
// Create some string pool indexes that represent the names of some
// magical nodes in the syntax tree.
//
/*** Defect 945 ***
if (fEpsilonString == null)
{
fEpsilonString = new String("<<CMNODE_EPSILON>>");
fEpsilonString.intern();
fEOCString = new String("<<CMNODE_EOC>>");
fEOCString.intern();
}
/***/
// fEpsilonIndex = fStringPool.addSymbol(fEpsilonString);
// fEOCIndex = fStringPool.addSymbol(fEOCString);
fEpsilonIndex = EPSILON;
fEOCIndex = EOC;
fDTD = dtd;
//
// Ok, so lets grind through the building of the DFA. This method
// handles the high level logic of the algorithm, but it uses a
// number of helper classes to do its thing.
//
// In order to avoid having hundreds of references to the error and
// string handlers around, this guy and all of his helper classes
// just throw a simple exception and we then pass it along.
//
buildDFA(syntaxTree);
}
//
// XMLContentModel methods
//
/**
* Check that the specified content is valid according to this
* content model. This method can also be called to do 'what if'
* testing of content models just to see if they would be valid.
* <p>
* A value of -1 in the children array indicates a PCDATA node. All other
* indexes will be positive and represent child elements. The count can be
* zero, since some elements have the EMPTY content model and that must be
* confirmed.
*
* @param children The children of this element. Each integer is an index within
* the <code>StringPool</code> of the child element name. An index
* of -1 is used to indicate an occurrence of non-whitespace character
* data.
* @param offset Offset into the array where the children starts.
* @param length The number of entries in the <code>children</code> array.
*
* @return The value -1 if fully valid, else the 0 based index of the child
* that first failed. If the value returned is equal to the number
* of children, then the specified children are valid but additional
* content is required to reach a valid ending state.
*
* @exception CMException Thrown on error.
*/
public int validateContent(QName children[], int offset, int length) throws CMException {
if (DEBUG_VALIDATE_CONTENT)
System.out.println("DFAContentModel#validateContent");
//
// A DFA content model must *always* have at least 1 child
// so a failure is given if no children present.
//
// Defect 782: This is an incorrect statement because a DFA
// content model is also used for constructions such as:
//
// (Optional*,NotRequired?)
//
// where a perfectly valid content would be NO CHILDREN.
// Therefore, if there are no children, we must check to
// see if the CMNODE_EOC marker is a valid start state! -Ac
//
if (length == 0) {
if (DEBUG_VALIDATE_CONTENT) {
System.out.println("!!! no children");
System.out.println("elemMap="+fElemMap);
for (int i = 0; i < fElemMap.length; i++) {
int uriIndex = fElemMap[i].uri;
int localpartIndex = fElemMap[i].localpart;
/*
System.out.println("fElemMap["+i+"]="+uriIndex+","+
localpartIndex+" ("+
fStringPool.toString(uriIndex)+", "+
fStringPool.toString(localpartIndex)+
')');
*/
}
System.out.println("EOCIndex="+fEOCIndex);
}
return fEmptyContentIsValid ? -1 : 0;
} // if child count == 0
//
// Lets loop through the children in the array and move our way
// through the states. Note that we use the fElemMap array to map
// an element index to a state index.
//
int curState = 0;
for (int childIndex = 0; childIndex < length; childIndex++)
{
// Get the current element index out
final QName curElem = children[offset + childIndex];
//System.out.println("children["+(offset+childIndex)+"]: "+curElem);
// Look up this child in our element map
int elemIndex = 0;
for (; elemIndex < fElemMapSize; elemIndex++)
{
int type = fElemMapType[elemIndex] & 0x0f ;
if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
//System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
if (fDTD) {
if (fElemMap[elemIndex].rawname == curElem.rawname) {
break;
}
}
else {
if (fElemMap[elemIndex].uri==curElem.uri
&& fElemMap[elemIndex].localpart == curElem.localpart)
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
int uri = fElemMap[elemIndex].uri;
if (uri == -1 || uri == curElem.uri) {
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
if (curElem.uri == -1) {
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
if (fElemMap[elemIndex].uri != curElem.uri) {
break;
}
}
}
// If we didn't find it, then obviously not valid
if (elemIndex == fElemMapSize) {
if (DEBUG_VALIDATE_CONTENT) {
System.out.println("!!! didn't find it");
System.out.println("curElem : " +curElem );
for (int i=0; i<fElemMapSize; i++) {
System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
}
}
return childIndex;
}
//
// Look up the next state for this input symbol when in the
// current state.
//
curState = fTransTable[curState][elemIndex];
// If its not a legal transition, then invalid
if (curState == -1) {
if (DEBUG_VALIDATE_CONTENT)
System.out.println("!!! not a legal transition");
return childIndex;
}
}
//
// We transitioned all the way through the input list. However, that
// does not mean that we ended in a final state. So check whether
// our ending state is a final state.
//
if (DEBUG_VALIDATE_CONTENT)
System.out.println("curState="+curState+", childCount="+length);
if (!fFinalStateFlags[curState])
return length;
// success!
return -1;
}
private boolean isEqual(QName name1, QName name2) {
return name1.localpart == name2.localpart &&
name1.uri == name2.uri;
}
public int validateContentSpecial(QName children[], int offset, int length) throws Exception{
if (DEBUG_VALIDATE_CONTENT)
System.out.println("DFAContentModel#validateContentSpecial");
if (comparator==null) {
return validateContent(children,offset, length);
}
if (length == 0) {
if (DEBUG_VALIDATE_CONTENT) {
System.out.println("!!! no children");
System.out.println("elemMap="+fElemMap);
for (int i = 0; i < fElemMap.length; i++) {
int uriIndex = fElemMap[i].uri;
int localpartIndex = fElemMap[i].localpart;
}
System.out.println("EOCIndex="+fEOCIndex);
}
return fEmptyContentIsValid ? -1 : 0;
} // if child count == 0
//
// Lets loop through the children in the array and move our way
// through the states. Note that we use the fElemMap array to map
// an element index to a state index.
//
int curState = 0;
for (int childIndex = 0; childIndex < length; childIndex++)
{
// Get the current element index out
final QName curElem = children[offset + childIndex];
// Look up this child in our element map
int elemIndex = 0;
for (; elemIndex < fElemMapSize; elemIndex++)
{
int type = fElemMapType[elemIndex] & 0x0f;
if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
if (comparator.isEquivalentTo(curElem,fElemMap[elemIndex] ) )
break;
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
int uri = fElemMap[elemIndex].uri;
if (uri == -1 || uri == curElem.uri) {
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
if (curElem.uri == -1) {
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
if (fElemMap[elemIndex].uri != curElem.uri) {
break;
}
}
}
// If we didn't find it, then obviously not valid
if (elemIndex == fElemMapSize) {
if (DEBUG_VALIDATE_CONTENT) {
System.out.println("!!! didn't find it");
System.out.println("curElem : " +curElem );
for (int i=0; i<fElemMapSize; i++) {
System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
}
}
return childIndex;
}
//
// Look up the next state for this input symbol when in the
// current state.
//
curState = fTransTable[curState][elemIndex];
// If its not a legal transition, then invalid
if (curState == -1) {
if (DEBUG_VALIDATE_CONTENT)
System.out.println("!!! not a legal transition");
return childIndex;
}
}
//
// We transitioned all the way through the input list. However, that
// does not mean that we ended in a final state. So check whether
// our ending state is a final state.
//
if (DEBUG_VALIDATE_CONTENT)
System.out.println("curState="+curState+", childCount="+length);
if (!fFinalStateFlags[curState])
return length;
// success!
return -1;
}
public void setEquivClassComparator(EquivClassComparator comparator) {
this.comparator = comparator;
}
/**
* Returns information about which elements can be placed at a particular point
* in the passed element's content model.
* <p>
* Note that the incoming content model to test must be valid at least up to
* the insertion point. If not, then -1 will be returned and the info object
* will not have been filled in.
* <p>
* If, on return, the info.isValidEOC flag is set, then the 'insert after'
* element is a valid end of content. In other words, nothing needs to be
* inserted after it to make the parent element's content model valid.
*
* @param fullyValid Only return elements that can be inserted and still
* maintain the validity of subsequent elements past the
* insertion point (if any). If the insertion point is at
* the end, and this is true, then only elements that can
* be legal final states will be returned.
* @param info An object that contains the required input data for the method,
* and which will contain the output information if successful.
*
* @return The value -1 if fully valid, else the 0 based index of the child
* that first failed before the insertion point. If the value
* returned is equal to the number of children, then the specified
* children are valid but additional content is required to reach a
* valid ending state.
*
* @see InsertableElementsInfo
*/
public int whatCanGoHere(boolean fullyValid,
InsertableElementsInfo info) throws CMException {
//
// First, lets make sure that the passed in current content is valid
// up to the insert point.
//
int curState = 0;
for (int childIndex = 0; childIndex < info.insertAt; childIndex++)
{
// Get the current element index out
final QName curElem = info.curChildren[childIndex];
// Look up this child in our element map
int elemIndex = 0;
for (; elemIndex < fElemMapSize; elemIndex++)
{
if (fElemMap[elemIndex].uri == curElem.uri &&
fElemMap[elemIndex].localpart == curElem.localpart)
break;
}
// If we didn't find it, then not valid so return failure index
if (elemIndex == fElemMapSize)
return childIndex;
//
// Look up the next state for this input symbol when in the
// current state.
//
curState = fTransTable[curState][elemIndex];
// If its not a legal transition, then invalid
if (curState == -1)
return childIndex;
}
//
// If we got here, then curState is set to the state that would be
// the transition before the insertion point. We let this sit until
// below, where it will be needed.
//
final int insertState = curState;
//
// Set any stuff we can know right off the bat for all cases. We know
// that this content model will never get PCData nodes because that
// is a mixed model. We can also set the valid EOC flag at this point
// since its just based on the state we ended in at the insert point.
//
info.canHoldPCData = false;
info.isValidEOC = fFinalStateFlags[insertState];
//
// Set the results count member and then see if we need to reallocate
// the outgoing arrays.
//
info.resultsCount = fElemMapSize;
if ((info.results == null) || (info.results.length < info.resultsCount))
info.results = new boolean[info.resultsCount];
if ((info.possibleChildren == null)
|| (info.possibleChildren.length < info.resultsCount))
{
info.possibleChildren = new QName[info.resultsCount];
for (int i = 0; i < info.possibleChildren.length; i++) {
info.possibleChildren[i] = new QName();
}
}
//
// Fill in the possible children array, from our array. For each one
// of them, see if there is a valid transition from our insert at
// state on that input. Mark the results index for that child according
// to whether there is a transition or not.
//
for (int index = 0; index < fElemMapSize; index++)
{
info.possibleChildren[index].setValues(fElemMap[index]);
info.results[index] = (fTransTable[insertState][index] != -1);
}
//
// If the fully valid parameter is set, then we have to go through
// the grunt work of plugging in each possible insertable element
// and running the DFA from that point to see if it would create a
// fully valid content model.
//
// <TBD> When/if the validator is changed to be stateful, then change
// this stuff to start the exploratory validation at the insert state,
// not from the start each time.
//
if (fullyValid)
{
for (int index = 0; index < info.resultsCount; index++)
{
// Don't need to consider this one since its not insertable
if (!info.results[index])
continue;
// Stick this element into the insert at spot
info.curChildren[info.insertAt] = info.possibleChildren[index];
// And validate it. If it fails, then this one loses
if (validateContent(info.curChildren, 0, info.childCount) != -1)
info.results[index] = false;
}
}
return -1;
}
public ContentLeafNameTypeVector getContentLeafNameTypeVector() {
return fLeafNameTypeVector;
}
//
// Private methods
//
/**
* Builds the internal DFA transition table from the given syntax tree.
*
* @param syntaxTree The syntax tree.
*
* @exception CMException Thrown if DFA cannot be built.
*/
private void buildDFA(CMNode syntaxTree) throws CMException
{
//
// The first step we need to take is to rewrite the content model
// using our CMNode objects, and in the process get rid of any
// repetition short cuts, converting them into '*' style repetitions
// or getting rid of repetitions altogether.
//
// The conversions done are:
//
// x+ -> (x|x*)
// x? -> (x|epsilon)
//
// This is a relatively complex scenario. What is happening is that
// we create a top level binary node of which the special EOC value
// is set as the right side node. The the left side is set to the
// rewritten syntax tree. The source is the original content model
// info from the decl pool. The rewrite is done by buildSyntaxTree()
// which recurses the decl pool's content of the element and builds
// a new tree in the process.
//
// Note that, during this operation, we set each non-epsilon leaf
// node's DFA state position and count the number of such leafs, which
// is left in the fLeafCount member.
//
// The nodeTmp object is passed in just as a temp node to use during
// the recursion. Otherwise, we'd have to create a new node on every
// level of recursion, which would be piggy in Java (as is everything
// for that matter.)
//
fQName.setValues(-1, fEOCIndex, fEOCIndex);
CMLeaf nodeEOC = new CMLeaf(fQName);
fHeadNode = new CMBinOp
(
XMLContentSpec.CONTENTSPECNODE_SEQ
, syntaxTree
, nodeEOC
);
//
// And handle specially the EOC node, which also must be numbered
// and counted as a non-epsilon leaf node. It could not be handled
// in the above tree build because it was created before all that
// started. We save the EOC position since its used during the DFA
// building loop.
//
fEOCPos = fLeafCount;
nodeEOC.setPosition(fLeafCount++);
//
// Ok, so now we have to iterate the new tree and do a little more
// work now that we know the leaf count. One thing we need to do is
// to calculate the first and last position sets of each node. This
// is cached away in each of the nodes.
//
// Along the way we also set the leaf count in each node as the
// maximum state count. They must know this in order to create their
// first/last pos sets.
//
// We also need to build an array of references to the non-epsilon
// leaf nodes. Since we iterate it in the same way as before, this
// will put them in the array according to their position values.
//
fLeafList = new CMLeaf[fLeafCount];
fLeafListType = new int[fLeafCount];
postTreeBuildInit(fHeadNode, 0);
//
// And, moving onward... We now need to build the follow position
// sets for all the nodes. So we allocate an array of state sets,
// one for each leaf node (i.e. each DFA position.)
//
fFollowList = new CMStateSet[fLeafCount];
for (int index = 0; index < fLeafCount; index++)
fFollowList[index] = new CMStateSet(fLeafCount);
calcFollowList(fHeadNode);
//
// And finally the big push... Now we build the DFA using all the
// states and the tree we've built up. First we set up the various
// data structures we are going to use while we do this.
//
// First of all we need an array of unique element names in our
// content model. For each transition table entry, we need a set of
// contiguous indices to represent the transitions for a particular
// input element. So we need to a zero based range of indexes that
// map to element types. This element map provides that mapping.
//
fElemMap = new QName[fLeafCount];
fElemMapType = new int[fLeafCount];
fElemMapSize = 0;
for (int outIndex = 0; outIndex < fLeafCount; outIndex++)
{
fElemMap[outIndex] = new QName();
if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
if (fLeafNameTypeVector == null) {
fLeafNameTypeVector = new ContentLeafNameTypeVector();
}
}
// Get the current leaf's element index
final QName element = fLeafList[outIndex].getElement();
// See if the current leaf node's element index is in the list
int inIndex = 0;
for (; inIndex < fElemMapSize; inIndex++)
{
if (fDTD) {
if (fElemMap[inIndex].rawname == element.rawname) {
break;
}
}
else {
if (fElemMap[inIndex].uri == element.uri &&
fElemMap[inIndex].localpart == element.localpart &&
fElemMapType[inIndex] == fLeafListType[outIndex] )
break;
}
}
// If it was not in the list, then add it, if not the EOC node
if (inIndex == fElemMapSize) {
//if (fDTD) {
// fElemMap[fElemMapSize].setValues(-1, element.rawname, element.rawname, -1);
//}
//else {
fElemMap[fElemMapSize].setValues(element);
//}
fElemMapType[fElemMapSize] = fLeafListType[outIndex];
fElemMapSize++;
}
}
// set up the fLeafNameTypeVector object if there is one.
if (fLeafNameTypeVector != null) {
fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
}
//
// Next lets create some arrays, some that that hold transient
// information during the DFA build and some that are permament.
// These are kind of sticky since we cannot know how big they will
// get, but we don't want to use any Java collections because of
// performance.
//
// Basically they will probably be about fLeafCount*2 on average,
// but can be as large as 2^(fLeafCount*2), worst case. So we start
// with fLeafCount*4 as a middle ground. This will be very unlikely
// to ever have to expand, though it if does, the overhead will be
// somewhat ugly.
//
int curArraySize = fLeafCount * 4;
CMStateSet[] statesToDo = new CMStateSet[curArraySize];
fFinalStateFlags = new boolean[curArraySize];
fTransTable = new int[curArraySize][];
//
// Ok we start with the initial set as the first pos set of the
// head node (which is the seq node that holds the content model
// and the EOC node.)
//
CMStateSet setT = fHeadNode.firstPos();
//
// Init our two state flags. Basically the unmarked state counter
// is always chasing the current state counter. When it catches up,
// that means we made a pass through that did not add any new states
// to the lists, at which time we are done. We could have used a
// expanding array of flags which we used to mark off states as we
// complete them, but this is easier though less readable maybe.
//
int unmarkedState = 0;
int curState = 0;
//
// Init the first transition table entry, and put the initial state
// into the states to do list, then bump the current state.
//
fTransTable[curState] = makeDefStateList();
statesToDo[curState] = setT;
curState++;
//
// Ok, almost done with the algorithm... We now enter the
// loop where we go until the states done counter catches up with
// the states to do counter.
//
while (unmarkedState < curState)
{
//
// Get the first unmarked state out of the list of states to do.
// And get the associated transition table entry.
//
setT = statesToDo[unmarkedState];
int[] transEntry = fTransTable[unmarkedState];
// Mark this one final if it contains the EOC state
fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
// Bump up the unmarked state count, marking this state done
unmarkedState++;
// Loop through each possible input symbol in the element map
CMStateSet newSet = null;
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
{
//
// Build up a set of states which is the union of all of
// the follow sets of DFA positions that are in the current
// state. If we gave away the new set last time through then
// create a new one. Otherwise, zero out the existing one.
//
if (newSet == null)
newSet = new CMStateSet(fLeafCount);
else
newSet.zeroBits();
for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++)
{
// If this leaf index (DFA position) is in the current set...
if (setT.getBit(leafIndex))
{
//
// If this leaf is the current input symbol, then we
// want to add its follow list to the set of states to
// transition to from the current state.
//
final QName leaf = fLeafList[leafIndex].getElement();
final QName element = fElemMap[elemIndex];
if (fDTD) {
if (leaf.rawname == element.rawname) {
newSet.union(fFollowList[leafIndex]);
}
}
else {
if (leaf.uri == element.uri &&
leaf.localpart == element.localpart)
newSet.union(fFollowList[leafIndex]);
}
}
}
//
// If this new set is not empty, then see if its in the list
// of states to do. If not, then add it.
//
if (!newSet.isEmpty())
{
//
// Search the 'states to do' list to see if this new
// state set is already in there.
//
int stateIndex = 0;
for (; stateIndex < curState; stateIndex++)
{
if (statesToDo[stateIndex].isSameSet(newSet))
break;
}
// If we did not find it, then add it
if (stateIndex == curState)
{
//
// Put this new state into the states to do and init
// a new entry at the same index in the transition
// table.
//
statesToDo[curState] = newSet;
fTransTable[curState] = makeDefStateList();
// We now have a new state to do so bump the count
curState++;
//
// Null out the new set to indicate we adopted it.
// This will cause the creation of a new set on the
// next time around the loop.
//
newSet = null;
}
//
// Now set this state in the transition table's entry
// for this element (using its index), with the DFA
// state we will move to from the current state when we
// see this input element.
//
transEntry[elemIndex] = stateIndex;
// Expand the arrays if we're full
if (curState == curArraySize)
{
//
// Yikes, we overflowed the initial array size, so
// we've got to expand all of these arrays. So adjust
// up the size by 50% and allocate new arrays.
//
final int newSize = (int)(curArraySize * 1.5);
CMStateSet[] newToDo = new CMStateSet[newSize];
boolean[] newFinalFlags = new boolean[newSize];
int[][] newTransTable = new int[newSize][];
// Copy over all of the existing content
for (int expIndex = 0; expIndex < curArraySize; expIndex++)
{
newToDo[expIndex] = statesToDo[expIndex];
newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
newTransTable[expIndex] = fTransTable[expIndex];
}
// Store the new array size
curArraySize = newSize;
statesToDo = newToDo;
fFinalStateFlags = newFinalFlags;
fTransTable = newTransTable;
}
}
}
}
// Check to see if we can set the fEmptyContentIsValid flag.
fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable();
//
// And now we can say bye bye to the temp representation since we've
// built the DFA.
//
if (DEBUG_VALIDATE_CONTENT)
dumpTree(fHeadNode, 0);
fHeadNode = null;
fLeafList = null;
fFollowList = null;
}
/**
* Calculates the follow list of the current node.
*
* @param nodeCur The curent node.
*
* @exception CMException Thrown if follow list cannot be calculated.
*/
private void calcFollowList(CMNode nodeCur) throws CMException
{
// Recurse as required
if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
{
// Recurse only
calcFollowList(((CMBinOp)nodeCur).getLeft());
calcFollowList(((CMBinOp)nodeCur).getRight());
}
else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)
{
// Recurse first
calcFollowList(((CMBinOp)nodeCur).getLeft());
calcFollowList(((CMBinOp)nodeCur).getRight());
//
// Now handle our level. We use our left child's last pos
// set and our right child's first pos set, so go ahead and
// get them ahead of time.
//
final CMStateSet last = ((CMBinOp)nodeCur).getLeft().lastPos();
final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos();
//
// Now, for every position which is in our left child's last set
// add all of the states in our right child's first set to the
// follow set for that position.
//
for (int index = 0; index < fLeafCount; index++)
{
if (last.getBit(index))
fFollowList[index].union(first);
}
}
else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
{
// Recurse first
calcFollowList(((CMUniOp)nodeCur).getChild());
//
// Now handle our level. We use our own first and last position
// sets, so get them up front.
//
final CMStateSet first = nodeCur.firstPos();
final CMStateSet last = nodeCur.lastPos();
//
// For every position which is in our last position set, add all
// of our first position states to the follow set for that
// position.
//
for (int index = 0; index < fLeafCount; index++)
{
if (last.getBit(index))
fFollowList[index].union(first);
}
}
else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
|| (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
{
throw new CMException(ImplementationMessages.VAL_NIICM);
}
}
/**
* Dumps the tree of the current node to standard output.
*
* @param nodeCur The current node.
* @param level The maximum levels to output.
*
* @exception CMException Thrown on error.
*/
private void dumpTree(CMNode nodeCur, int level) throws CMException
{
for (int index = 0; index < level; index++)
System.out.print(" ");
int type = nodeCur.type();
if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
|| (type == XMLContentSpec.CONTENTSPECNODE_SEQ))
{
if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
System.out.print("Choice Node ");
else
System.out.print("Seq Node ");
if (nodeCur.isNullable())
System.out.print("Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
dumpTree(((CMBinOp)nodeCur).getLeft(), level+1);
dumpTree(((CMBinOp)nodeCur).getRight(), level+1);
}
else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
{
System.out.print("Rep Node ");
if (nodeCur.isNullable())
System.out.print("Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
dumpTree(((CMUniOp)nodeCur).getChild(), level+1);
}
else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
{
System.out.print
(
"Leaf: (pos="
+ ((CMLeaf)nodeCur).getPosition()
+ "), "
+ ((CMLeaf)nodeCur).getElement()
+ "(elemIndex="
+ ((CMLeaf)nodeCur).getElement()
+ ") "
);
if (nodeCur.isNullable())
System.out.print(" Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
}
else
{
throw new CMException(ImplementationMessages.VAL_NIICM);
}
}
/**
* -1 is used to represent bad transitions in the transition table
* entry for each state. So each entry is initialized to an all -1
* array. This method creates a new entry and initializes it.
*/
private int[] makeDefStateList()
{
int[] retArray = new int[fElemMapSize];
for (int index = 0; index < fElemMapSize; index++)
retArray[index] = -1;
return retArray;
}
/** Post tree build initialization. */
private int postTreeBuildInit(CMNode nodeCur, int curIndex) throws CMException
{
// Set the maximum states on this node
nodeCur.setMaxStates(fLeafCount);
// Recurse as required
if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY ||
(nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL ||
(nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
// REVISIT: Don't waste these structures.
QName qname = new QName(-1, -1, -1, ((CMAny)nodeCur).getURI());
fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition());
fLeafListType[curIndex] = nodeCur.type();
curIndex++;
}
else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
|| (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ))
{
curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex);
curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex);
}
else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
{
curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex);
}
else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
{
//
// Put this node in the leaf list at the current index if its
// a non-epsilon leaf.
//
final QName node = ((CMLeaf)nodeCur).getElement();
if (node.localpart != fEpsilonIndex) {
fLeafList[curIndex] = (CMLeaf)nodeCur;
fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
curIndex++;
}
}
else
{
throw new CMException(ImplementationMessages.VAL_NIICM);
}
return curIndex;
}
} // class DFAContentModel