blob: 9d60915a334cfddf45f101e1700b6321c2e10dbe [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 1999-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Log$
* Revision 1.5 2003/05/16 21:43:20 knoaman
* Memory manager implementation: Modify constructors to pass in the memory manager.
*
* Revision 1.4 2003/05/15 18:48:27 knoaman
* Partial implementation of the configurable memory manager.
*
* Revision 1.3 2003/03/07 18:16:57 tng
* Return a reference instead of void for operator=
*
* Revision 1.2 2002/11/04 14:54:58 tng
* C++ Namespace Support.
*
* Revision 1.1.1.1 2002/02/01 22:22:38 peiyongz
* sane_include
*
* Revision 1.13 2001/11/21 14:30:13 knoaman
* Fix for UPA checking.
*
* Revision 1.12 2001/08/24 12:48:48 tng
* Schema: AllContentModel
*
* Revision 1.11 2001/08/21 16:06:11 tng
* Schema: Unique Particle Attribution Constraint Checking.
*
* Revision 1.10 2001/08/13 15:06:39 knoaman
* update <any> validation.
*
* Revision 1.9 2001/06/13 20:50:55 peiyongz
* fIsMixed: to handle mixed Content Model
*
* Revision 1.8 2001/05/11 13:27:18 tng
* Copyright update.
*
* Revision 1.7 2001/05/03 21:02:30 tng
* Schema: Add SubstitutionGroupComparator and update exception messages. By Pei Yong Zhang.
*
* Revision 1.6 2001/04/19 18:17:30 tng
* Schema: SchemaValidator update, and use QName in Content Model
*
* Revision 1.5 2001/03/21 21:56:27 tng
* Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
*
* Revision 1.4 2001/03/21 19:29:55 tng
* Schema: Content Model Updates, by Pei Yong Zhang.
*
* Revision 1.3 2001/02/27 18:32:32 tng
* Schema: Use XMLElementDecl instead of DTDElementDecl in Content Model.
*
* Revision 1.2 2001/02/27 14:48:52 tng
* Schema: Add CMAny and ContentLeafNameTypeVector, by Pei Yong Zhang
*
* Revision 1.1 2001/02/16 14:17:29 tng
* Schema: Move the common Content Model files that are shared by DTD
* and schema from 'DTD' folder to 'common' folder. By Pei Yong Zhang.
*
* Revision 1.4 2000/03/02 19:55:38 roddey
* This checkin includes many changes done while waiting for the
* 1.1.0 code to be finished. I can't list them all here, but a list is
* available elsewhere.
*
* Revision 1.3 2000/02/24 20:16:48 abagchi
* Swat for removing Log from API docs
*
* Revision 1.2 2000/02/09 21:42:37 abagchi
* Copyright swat
*
* Revision 1.1.1.1 1999/11/09 01:03:19 twl
* Initial checkin
*
* Revision 1.2 1999/11/08 20:45:38 rahul
* Swat for adding in Product name and CVS comment log variable.
*
*/
#if !defined(DFACONTENTMODEL_HPP)
#define DFACONTENTMODEL_HPP
#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
#include <xercesc/framework/XMLContentModel.hpp>
#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class ContentSpecNode;
class CMLeaf;
class CMNode;
class CMStateSet;
//
// DFAContentModel is the heavy weight derivative of ContentModel that does
// all of the non-trivial element content validation. This guy does the full
// bore regular expression to DFA conversion to create a DFA that it then
// uses in its validation algorithm.
//
// NOTE: Upstream work insures that this guy will never see a content model
// with PCDATA in it. Any model with PCDATA is 'mixed' and is handled
// via the MixedContentModel class, since mixed models are very
// constrained in form and easily handled via a special case. This
// also makes our life much easier here.
//
class DFAContentModel : public XMLContentModel
{
public:
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
DFAContentModel
(
const bool dtd
, ContentSpecNode* const elemContentSpec
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
DFAContentModel
(
const bool dtd
, ContentSpecNode* const elemContentSpec
, const bool isMixed
, MemoryManager* const manager
);
virtual ~DFAContentModel();
// -----------------------------------------------------------------------
// Implementation of the virtual content model interface
// -----------------------------------------------------------------------
virtual int validateContent
(
QName** const children
, const unsigned int childCount
, const unsigned int emptyNamespaceId
) const;
virtual int validateContentSpecial
(
QName** const children
, const unsigned int childCount
, const unsigned int emptyNamespaceId
, GrammarResolver* const pGrammarResolver
, XMLStringPool* const pStringPool
) const;
virtual void checkUniqueParticleAttribution
(
SchemaGrammar* const pGrammar
, GrammarResolver* const pGrammarResolver
, XMLStringPool* const pStringPool
, XMLValidator* const pValidator
, unsigned int* const pContentSpecOrgURI
) ;
virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ;
virtual unsigned int getNextState(const unsigned int currentState,
const unsigned int elementIndex) const;
private :
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
DFAContentModel();
DFAContentModel(const DFAContentModel&);
DFAContentModel& operator=(const DFAContentModel&);
// -----------------------------------------------------------------------
// Private helper methods
// -----------------------------------------------------------------------
void buildDFA(ContentSpecNode* const curNode);
CMNode* buildSyntaxTree(ContentSpecNode* const curNode);
void calcFollowList(CMNode* const curNode);
unsigned int* makeDefStateList() const;
int postTreeBuildInit
(
CMNode* const nodeCur
, const unsigned int curIndex
);
// -----------------------------------------------------------------------
// Private data members
//
// fElemMap
// fElemMapSize
// This is the map of unique input symbol elements to indices into
// each state's per-input symbol transition table entry. This is part
// of the built DFA information that must be kept around to do the
// actual validation.
//
// fElemMapType
// This is a map of whether the element map contains information
// related to ANY models.
//
// fEmptyOk
// This is an optimization. While building the transition table we
// can see whether this content model would approve of an empty
// content (which could happen if everything was optional.) So we
// set this flag and short circuit that check, which would otherwise
// be ugly and time consuming if we tried to determine it at each
// validation call.
//
// fEOCPos
// The NFA position of the special EOC (end of content) node. This
// is saved away since its used during the DFA build.
//
// fFinalStateFlags
// This is an array of booleans, one per state (there are
// fTransTableSize states in the DFA) that indicates whether that
// state is a final state.
//
// fFollowList
// The list of follow positions for each NFA position (i.e. for each
// non-epsilon leaf node.) This is only used during the building of
// the DFA, and is let go afterwards.
//
// fHeadNode
// This is the head node of our intermediate representation. It is
// only non-null during the building of the DFA (just so that it
// does not have to be passed all around.) Once the DFA is built,
// this is no longer required so its deleted.
//
// fLeafCount
// The count of leaf nodes. This is an important number that set some
// limits on the sizes of data structures in the DFA process.
//
// fLeafList
// An array of non-epsilon leaf nodes, which is used during the DFA
// build operation, then dropped. These are just references to nodes
// pointed to by fHeadNode, so we don't have to clean them up, just
// the actually leaf list array itself needs cleanup.
//
// fLeafListType
// Array mapping ANY types to the leaf list.
//
// fTransTable
// fTransTableSize
// This is the transition table that is the main by product of all
// of the effort here. It is an array of arrays of ints. The first
// dimension is the number of states we end up with in the DFA. The
// second dimensions is the number of unique elements in the content
// model (fElemMapSize). Each entry in the second dimension indicates
// the new state given that input for the first dimension's start
// state.
//
// The fElemMap array handles mapping from element indexes to
// positions in the second dimension of the transition table.
//
// fTransTableSize is the number of valid entries in the transition
// table, and in the other related tables such as fFinalStateFlags.
//
// fDTD
// Boolean to allow DTDs to validate even with namespace support.
//
// fIsMixed
// DFA ContentModel with mixed PCDATA.
// -----------------------------------------------------------------------
QName** fElemMap;
ContentSpecNode::NodeTypes *fElemMapType;
unsigned int fElemMapSize;
bool fEmptyOk;
unsigned int fEOCPos;
bool* fFinalStateFlags;
CMStateSet** fFollowList;
CMNode* fHeadNode;
unsigned int fLeafCount;
CMLeaf** fLeafList;
ContentSpecNode::NodeTypes *fLeafListType;
unsigned int** fTransTable;
unsigned int fTransTableSize;
bool fDTD;
bool fIsMixed;
ContentLeafNameTypeVector *fLeafNameTypeVector;
MemoryManager* fMemoryManager;
};
inline unsigned int
DFAContentModel::getNextState(const unsigned int currentState,
const unsigned int elementIndex) const {
if (currentState == XMLContentModel::gInvalidTrans) {
return XMLContentModel::gInvalidTrans;
}
if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) {
ThrowXML(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex);
}
return fTransTable[currentState][elementIndex];
}
XERCES_CPP_NAMESPACE_END
#endif