| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * Copyright (c) 1999 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache\@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation, and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.ibm.com . For more information |
| * on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| /** |
| * $Log$ |
| * Revision 1.1 1999/11/09 01:03:19 twl |
| * Initial revision |
| * |
| * Revision 1.2 1999/11/08 20:45:38 rahul |
| * Swat for adding in Product name and CVS comment log variable. |
| * |
| */ |
| |
| #if !defined(DFACONTENTMODEL_HPP) |
| #define DFACONTENTMODEL_HPP |
| |
| #include <util/XML4CDefs.hpp> |
| #include <framework/XMLContentModel.hpp> |
| |
| class ContentSpecNode; |
| class CMLeaf; |
| class CMNode; |
| class CMStateSet; |
| class DTDElementDecl; |
| |
| // |
| // DFAContentModel is the heavy weight derivative of ContentModel that does |
| // all of the non-trivial element content validation. This guy does the full |
| // bore regular expression to DFA conversion to create a DFA that it then |
| // uses in its validation algorithm. |
| // |
| // NOTE: Upstream work insures that this guy will never see a content model |
| // with PCDATA in it. Any model with PCDATA is 'mixed' and is handled |
| // via the MixedContentModel class, since mixed models are very |
| // constrained in form and easily handled via a special case. This |
| // also makes our life much easier here. |
| // |
| class DFAContentModel : public XMLContentModel |
| { |
| public: |
| // ----------------------------------------------------------------------- |
| // Constructors and Destructor |
| // ----------------------------------------------------------------------- |
| DFAContentModel(const DTDElementDecl& elemDecl); |
| virtual ~DFAContentModel(); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Implementation of the virtual content model interface |
| // ----------------------------------------------------------------------- |
| virtual bool getIsAmbiguous() const; |
| virtual int validateContent |
| ( |
| const unsigned int* childIds |
| , const unsigned int childCount |
| ) const; |
| |
| |
| private : |
| // ----------------------------------------------------------------------- |
| // Unimplemented constructors and operators |
| // ----------------------------------------------------------------------- |
| DFAContentModel(); |
| DFAContentModel(const DFAContentModel&); |
| void operator=(const DFAContentModel&); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Private helper methods |
| // ----------------------------------------------------------------------- |
| void buildDFA(); |
| CMNode* buildSyntaxTree(const ContentSpecNode* const curNode); |
| void calcFollowList(CMNode* const curNode); |
| bool isAmbiguous() const; |
| unsigned int* makeDefStateList() const; |
| int postTreeBuildInit |
| ( |
| CMNode* const nodeCur |
| , const unsigned int curIndex |
| ); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Private data members |
| // |
| // fElemDecl |
| // The element decl object of the element that this is the content |
| // model for. Its just stored here to avoid passing it around all |
| // over the place while building up the DFA. |
| // |
| // fElemMap |
| // fElemMapSize |
| // This is the map of unique input symbol elements to indices into |
| // each state's per-input symbol transition table entry. This is part |
| // of the built DFA information that must be kept around to do the |
| // actual validation. |
| // |
| // fEmptyOk |
| // This is an optimization. While building the transition table we |
| // can see whether this content model would approve of an empty |
| // content (which could happen if everything was optional.) So we |
| // set this flag and short circuit that check, which would otherwise |
| // be ugly and time consuming if we tried to determine it at each |
| // validation call. |
| // |
| // fEOCPos |
| // The NFA position of the special EOC (end of content) node. This |
| // is saved away since its used during the DFA build. |
| // |
| // fFinalStateFlags |
| // This is an array of booleans, one per state (there are |
| // fTransTableSize states in the DFA) that indicates whether that |
| // state is a final state. |
| // |
| // fFollowList |
| // The list of follow positions for each NFA position (i.e. for each |
| // non-epsilon leaf node.) This is only used during the building of |
| // the DFA, and is let go afterwards. |
| // |
| // fHeadNode |
| // This is the head node of our intermediate representation. It is |
| // only non-null during the building of the DFA (just so that it |
| // does not have to be passed all around.) Once the DFA is built, |
| // this is no longer required so its deleted. |
| // |
| // fIsAmbiguous |
| // This flag is set during construction if the content model is |
| // ambiguous. This will be used by the scanner to report an error |
| // and is available for later perusal if required. |
| // |
| // fLeafCount |
| // The count of leaf nodes. This is an important number that set some |
| // limits on the sizes of data structures in the DFA process. |
| // |
| // fLeafList |
| // An array of non-epsilon leaf nodes, which is used during the DFA |
| // build operation, then dropped. These are just references to nodes |
| // pointed to by fHeadNode, so we don't have to clean them up, just |
| // the actually leaf list array itself needs cleanup. |
| // |
| // fSpecNode |
| // The content spec node for the element that this object represents |
| // the content of. This info is needed a good bit so we get it once |
| // and keep it. We don't own it, we just reference it. |
| // |
| // fTransTable |
| // fTransTableSize |
| // This is the transition table that is the main by product of all |
| // of the effort here. It is an array of arrays of ints. The first |
| // dimension is the number of states we end up with in the DFA. The |
| // second dimensions is the number of unique elements in the content |
| // model (fElemMapSize). Each entry in the second dimension indicates |
| // the new state given that input for the first dimension's start |
| // state. |
| // |
| // The fElemMap array handles mapping from element indexes to |
| // positions in the second dimension of the transition table. |
| // |
| // fTransTableSize is the number of valid entries in the transition |
| // table, and in the other related tables such as fFinalStateFlags. |
| // ----------------------------------------------------------------------- |
| const DTDElementDecl& fElemDecl; |
| unsigned int* fElemMap; |
| unsigned int fElemMapSize; |
| bool fEmptyOk; |
| unsigned int fEOCPos; |
| bool* fFinalStateFlags; |
| CMStateSet** fFollowList; |
| CMNode* fHeadNode; |
| bool fIsAmbiguous; |
| unsigned int fLeafCount; |
| CMLeaf** fLeafList; |
| const ContentSpecNode* fSpecNode; |
| unsigned int** fTransTable; |
| unsigned int fTransTableSize; |
| }; |
| |
| |
| inline bool DFAContentModel::getIsAmbiguous() const |
| { |
| return fIsAmbiguous; |
| } |
| |
| #endif |