| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * Copyright (c) 1999 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache\@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation, and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.ibm.com . For more information |
| * on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| /** |
| * $Log$ |
| * Revision 1.1 1999/11/09 01:08:23 twl |
| * Initial revision |
| * |
| * Revision 1.4 1999/11/08 20:44:52 rahul |
| * Swat for adding in Product name and CVS comment log variable. |
| * |
| */ |
| |
| |
| #if !defined(XMLSCANNER_HPP) |
| #define XMLSCANNER_HPP |
| |
| #include <util/KVStringPair.hpp> |
| #include <util/RefVectorOf.hpp> |
| #include <util/XMLString.hpp> |
| #include <framework/XMLAttr.hpp> |
| #include <framework/XMLBufferMgr.hpp> |
| #include <framework/XMLErrorCodes.hpp> |
| #include <framework/XMLRefInfo.hpp> |
| #include <framework/XMLPScanToken.hpp> |
| #include <internal/ElemStack.hpp> |
| #include <internal/ReaderMgr.hpp> |
| |
| class InputSource; |
| class XMLDocumentHandler; |
| class XMLDocumentTypeHandler; |
| class XMLElementDecl; |
| class XMLEntityHandler; |
| class XMLErrorReporter; |
| class XMLMsgLoader; |
| class XMLValidator; |
| |
| |
| // |
| // This is the mondo scanner class, which does the vast majority of the |
| // work of parsing. It handles reading in input and spitting out events |
| // to installed handlers. |
| // |
| class XMLPARSER_EXPORT XMLScanner |
| { |
| public : |
| // ----------------------------------------------------------------------- |
| // Public class types |
| // |
| // NOTE: These should really be private, but some of the compilers we |
| // have to deal with are too stupid to understand this. |
| // |
| // DeclTypes |
| // Used by scanXMLDecl() to know what type of decl it should scan. |
| // Text decls have slightly different rules from XMLDecls. |
| // |
| // EntityExpRes |
| // These are the values returned from the entity expansion method, |
| // to indicate how it went. |
| // |
| // XMLTokens |
| // These represent the possible types of input we can get while |
| // scanning content. |
| // ----------------------------------------------------------------------- |
| enum DeclTypes |
| { |
| Decl_Text |
| , Decl_XML |
| }; |
| |
| enum EntityExpRes |
| { |
| EntityExp_Pushed |
| , EntityExp_Returned |
| , EntityExp_Failed |
| }; |
| |
| enum XMLTokens |
| { |
| Token_CData |
| , Token_CharData |
| , Token_Comment |
| , Token_EndTag |
| , Token_EOF |
| , Token_PI |
| , Token_StartTag |
| , Token_Unknown |
| }; |
| |
| |
| // ----------------------------------------------------------------------- |
| // Constructors and Destructor |
| // ----------------------------------------------------------------------- |
| XMLScanner |
| ( |
| XMLValidator* const validator |
| ); |
| XMLScanner |
| ( |
| XMLDocumentHandler* const docHandler |
| , XMLEntityHandler* const entityHandler |
| , XMLErrorReporter* const errReporter |
| , XMLValidator* const validator |
| ); |
| ~XMLScanner(); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Error emitter methods |
| // ----------------------------------------------------------------------- |
| void emitError(const XML4CErrs::Codes toEmit); |
| void emitError |
| ( |
| const XML4CErrs::Codes toEmit |
| , const XMLCh* const text1 |
| , const XMLCh* const text2 = 0 |
| , const XMLCh* const text3 = 0 |
| , const XMLCh* const text4 = 0 |
| ); |
| void emitError |
| ( |
| const XML4CErrs::Codes toEmit |
| , const char* const text1 |
| , const char* const text2 = 0 |
| , const char* const text3 = 0 |
| , const char* const text4 = 0 |
| ); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Getter methods |
| // ----------------------------------------------------------------------- |
| const XMLDocumentHandler* getDocHandler() const; |
| XMLDocumentHandler* getDocHandler(); |
| bool getDoNamespaces() const; |
| bool getDoValidation() const; |
| const XMLEntityHandler* getEntityHandler() const; |
| XMLEntityHandler* getEntityHandler(); |
| const XMLErrorReporter* getErrorReporter() const; |
| XMLErrorReporter* getErrorReporter(); |
| bool getExitOnFirstFatal() const; |
| RefHashTableOf<XMLRefInfo>& getIDRefList(); |
| bool getInException() const; |
| const RefHashTableOf<XMLRefInfo>& getIDRefList() const; |
| bool getLastExtLocation |
| ( |
| XMLCh* const sysIdToFill |
| , const unsigned int maxSysIdChars |
| , XMLCh* const pubIdToFill |
| , const unsigned int maxPubIdChars |
| , unsigned int& lineToFill |
| , unsigned int& colToFill |
| ); |
| bool getStandalone() const; |
| const XMLValidator* getValidator() const; |
| XMLValidator* getValidator(); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Setter methods |
| // ----------------------------------------------------------------------- |
| void setDocHandler(XMLDocumentHandler* const docHandler); |
| void setDoNamespaces(const bool doNamespaces); |
| void setDoValidation(const bool validate); |
| void setEntityHandler(XMLEntityHandler* const docTypeHandler); |
| void setErrorReporter(XMLErrorReporter* const errHandler); |
| void setExitOnFirstFatal(const bool newValue); |
| void setValidator(XMLValidator* const validator); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Document scanning methods |
| // |
| // scanDocument() does the entire source document. scanFirst() and |
| // scanNext() support a progressive parse. |
| // ----------------------------------------------------------------------- |
| void scanDocument |
| ( |
| const InputSource& src |
| , const bool reuseValidator = false |
| ); |
| bool scanFirst |
| ( |
| const InputSource& src |
| , XMLPScanToken& toFill |
| , const bool reuseValidator = false |
| ); |
| bool scanNext(XMLPScanToken& toFill); |
| |
| |
| private : |
| // ----------------------------------------------------------------------- |
| // Private class types |
| // ----------------------------------------------------------------------- |
| enum IDTypes |
| { |
| IDType_Public |
| , IDType_External |
| , IDType_Either |
| }; |
| |
| enum DTDSubsets |
| { |
| Subset_Internal |
| , Subset_External |
| }; |
| |
| |
| // ----------------------------------------------------------------------- |
| // Unimplemented constructors and operators |
| // ----------------------------------------------------------------------- |
| XMLScanner(); |
| XMLScanner(const XMLScanner&); |
| void operator=(const XMLScanner&); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Private helper methods |
| // ----------------------------------------------------------------------- |
| void commonInit(); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Private helper methods |
| // |
| // These are implemented in XMLScanner2.cpp, to keep the main file from |
| // becoming so bloated. We can't have any bloated files. |
| // ----------------------------------------------------------------------- |
| unsigned int buildAttList |
| ( |
| const RefVectorOf<KVStringPair>& providedAttrs |
| , const unsigned int attCount |
| , XMLElementDecl& elemDecl |
| , RefVectorOf<XMLAttr>& toFill |
| ); |
| void checkIDRefs(); |
| bool isLegalToken(const XMLPScanToken& toCheck); |
| bool normalizeAttValue |
| ( |
| const XMLCh* const value |
| , const XMLAttDef::AttTypes type |
| , XMLBuffer& toFill |
| ); |
| unsigned int resolveQName |
| ( |
| const XMLCh* const qName |
| , XMLBuffer& nameBufToFill |
| , XMLBuffer& prefixBufToFill |
| , const ElemStack::MapModes mode |
| ); |
| unsigned int resolvePrefix |
| ( |
| const XMLCh* const prefix |
| , const ElemStack::MapModes mode |
| ); |
| unsigned int resolvePrefix |
| ( |
| const XMLCh* const prefix |
| , XMLBuffer& uriBufToFill |
| , const ElemStack::MapModes mode |
| ); |
| void scanReset(const InputSource& src); |
| void sendCharData(XMLBuffer& toSend); |
| XMLTokens senseNextToken(unsigned int& orgReader); |
| void updateNSMap |
| ( |
| const XMLCh* const attrName |
| , const XMLCh* const attrValue |
| ); |
| void validateAttrValue |
| ( |
| const XMLCh* const valueText |
| , const XMLAttDef::AttTypes type |
| , const XMLAttDef::DefAttTypes defType |
| , const XMLCh* const defText |
| , const XMLCh* const fullName |
| , const XMLCh* const enumList |
| ); |
| |
| |
| // ----------------------------------------------------------------------- |
| // Private scanning methods |
| // ----------------------------------------------------------------------- |
| bool basicAttrValueScan(XMLBuffer& toFill); |
| bool getQuotedString(XMLBuffer& toFill); |
| unsigned int rawAttrScan |
| ( |
| RefVectorOf<KVStringPair>& toFill |
| , bool& isEmpty |
| ); |
| bool scanAttValue(XMLBuffer& toFill, const XMLAttDef::AttTypes type); |
| void scanCDSection(); |
| void scanCharData(XMLBuffer& toToUse); |
| bool scanCharRef(XMLCh& toFill, XMLCh& second); |
| void scanComment(); |
| bool scanContent(const bool extEntity); |
| void scanDocTypeDecl(); |
| void scanEndTag(bool& gotData); |
| EntityExpRes scanEntityRef |
| ( |
| const bool inAttVal |
| , XMLCh& firstCh |
| , XMLCh& secondCh |
| , bool& escaped |
| ); |
| bool scanEq(); |
| bool scanId |
| ( |
| XMLBuffer& pubIdToFill |
| , XMLBuffer& sysIdToFill |
| , const IDTypes whatKind |
| ); |
| void scanIgnoredSection(); |
| bool scanInternalSubset(); |
| void scanMiscellaneous(); |
| void scanPI(); |
| void scanProlog(); |
| bool scanPublicLiteral(XMLBuffer& toFill); |
| bool scanStartTag(bool& gotData); |
| bool scanStartTagNS(bool& gotData); |
| bool scanSystemLiteral(XMLBuffer& toFill); |
| void scanXMLDecl(const DeclTypes type); |
| unsigned int scanUpToWSOr |
| ( |
| XMLBuffer& toFill |
| , const XMLCh chEndChar |
| ); |
| |
| |
| |
| // ----------------------------------------------------------------------- |
| // Data members |
| // |
| // fAttrList |
| // Every time we get a new element start tag, we have to pass to |
| // the document handler the attributes found. To make it more |
| // efficient we keep this ref vector of XMLAttr objects around. We |
| // just reuse it over and over, allowing it to grow to meet the |
| // peek need. |
| // |
| // fBaseDir |
| // This is the base directory, from which the initial XML file |
| // was loaded. It is set after the file is successfully opened, |
| // so we know it to be valid. If the initial file had no path |
| // component, then this is left null. It is used to handle relative |
| // paths of DTD and external entity system ids. |
| // |
| // fBufMgr |
| // This is a manager for temporary buffers used during scanning. |
| // For efficiency we must use a set of static buffers, but we have |
| // to insure that they are not incorrectly reused. So this manager |
| // provides the smarts to hand out buffers as required. |
| // |
| // fDocHandler |
| // The client code's document handler. If zero, then no document |
| // handler callouts are done. We don't adopt it. |
| // |
| // fDoNamespaces |
| // This flag indicates whether the client code wants us to do |
| // namespaces or not. If the installed validator indicates that it |
| // has to do namespaces, then this is ignored. |
| // |
| // fDoValidation |
| // Indicates whether any validation should be done. A validator |
| // can still be installed and it will build up all the data |
| // structures for the DTD/Schema declarations, but the scanner will |
| // not ask it to validate anything. |
| // |
| // fElemStack |
| // This is the element stack that is used to track the elements that |
| // are currently being worked on. |
| // |
| // fEntityHandler |
| // The client code's entity handler. If zero, then no entity handler |
| // callouts are done. We don't adopt it. |
| // |
| // fErrorReporter |
| // The client code's error handler. If zero, then no error handler |
| // callouts are done. We don't adopt it. |
| // |
| // fExitOnFirstFatal |
| // This indicates whether we bail out on the first fatal XML error |
| // or not. It defaults to true, which is the strict XML way, but it |
| // can be changed. |
| // |
| // fIDRefList |
| // This is a list of XMLRefInfo objects. This member lets us do all |
| // needed ID-IDREF balancing checks. |
| // |
| // fInException |
| // To avoid a circular freakout when we catch an exception and emit |
| // it, which would normally throw again if the 'fail on first error' |
| // flag is one. |
| // |
| // fRawAttrList |
| // During the initial scan of the attributes we can only do a raw |
| // scan for key/value pairs. So this vector is used to store them |
| // until they can be processed (and put into fAttrList.) |
| // |
| // fReaderMgr |
| // This is the reader manager, from which we get characters. It |
| // manages the reader stack for us, and provides a lot of convenience |
| // methods to do specialized checking for chars, sequences of chars, |
| // skipping chars, etc... |
| // |
| // fReuseValidator |
| // This flag is set on a per-scan basis. So its provided in the |
| // scanDocument() and scanFirst() methods, and applies for that |
| // one pass. It indicates that the validator should not be reused |
| // and that any external structural description should be ignored. |
| // There cannot be any internal subset. |
| // |
| // fScannerId |
| // fSequenceId |
| // These are used for progressive parsing, to make sure that the |
| // client code does the right thing at the right time. |
| // |
| // fStandalone |
| // Indicates whether the document is standalone or not. Defaults to |
| // no, but can be overridden in the XMLDecl. |
| // |
| // fValidator |
| // The installed validator. We look at them via the abstract |
| // validator interface, and don't know what it actual is. |
| // |
| // |
| // fAttName |
| // fAttValue |
| // fCDataBuf |
| // fNameBuf |
| // fQNameBuf |
| // fPrefixBuf |
| // For the most part, buffers are obtained from the fBufMgr object |
| // on the fly. However, for the start tag scan, we have a set of |
| // fixed buffers for performance reasons. These are used a lot and |
| // there are a number of them, so asking the buffer manager each |
| // time for new buffers is a bit too much overhead. |
| // ----------------------------------------------------------------------- |
| RefVectorOf<XMLAttr>* fAttrList; |
| XMLCh* fBaseDir; |
| XMLBufferMgr fBufMgr; |
| XMLDocumentHandler* fDocHandler; |
| bool fDoNamespaces; |
| bool fDoValidation; |
| ElemStack fElemStack; |
| XMLEntityHandler* fEntityHandler; |
| XMLErrorReporter* fErrorReporter; |
| bool fExitOnFirstFatal; |
| RefHashTableOf<XMLRefInfo>* fIDRefList; |
| bool fInException; |
| RefVectorOf<KVStringPair>* fRawAttrList; |
| ReaderMgr fReaderMgr; |
| bool fReuseValidator; |
| XMLUInt32 fScannerId; |
| XMLUInt32 fSequenceId; |
| bool fStandalone; |
| XMLValidator* fValidator; |
| |
| XMLBuffer fAttNameBuf; |
| XMLBuffer fAttValueBuf; |
| XMLBuffer fCDataBuf; |
| XMLBuffer fNameBuf; |
| XMLBuffer fQNameBuf; |
| XMLBuffer fPrefixBuf; |
| XMLBuffer fURIBuf; |
| }; |
| |
| |
| |
| // --------------------------------------------------------------------------- |
| // XMLScanner: Getter methods |
| // --------------------------------------------------------------------------- |
| inline const XMLDocumentHandler* XMLScanner::getDocHandler() const |
| { |
| return fDocHandler; |
| } |
| |
| inline XMLDocumentHandler* XMLScanner::getDocHandler() |
| { |
| return fDocHandler; |
| } |
| |
| inline bool XMLScanner::getDoNamespaces() const |
| { |
| return fDoNamespaces; |
| } |
| |
| inline bool XMLScanner::getDoValidation() const |
| { |
| return fDoValidation; |
| } |
| |
| inline const XMLEntityHandler* XMLScanner::getEntityHandler() const |
| { |
| return fEntityHandler; |
| } |
| |
| inline XMLEntityHandler* XMLScanner::getEntityHandler() |
| { |
| return fEntityHandler; |
| } |
| |
| inline const XMLErrorReporter* XMLScanner::getErrorReporter() const |
| { |
| return fErrorReporter; |
| } |
| |
| inline XMLErrorReporter* XMLScanner::getErrorReporter() |
| { |
| return fErrorReporter; |
| } |
| |
| inline bool XMLScanner::getExitOnFirstFatal() const |
| { |
| return fExitOnFirstFatal; |
| } |
| |
| inline RefHashTableOf<XMLRefInfo>& XMLScanner::getIDRefList() |
| { |
| return *fIDRefList; |
| } |
| |
| inline bool XMLScanner::getInException() const |
| { |
| return fInException; |
| } |
| |
| inline const RefHashTableOf<XMLRefInfo>& XMLScanner::getIDRefList() const |
| { |
| return *fIDRefList; |
| } |
| |
| inline bool XMLScanner::getStandalone() const |
| { |
| return fStandalone; |
| } |
| |
| inline const XMLValidator* XMLScanner::getValidator() const |
| { |
| return fValidator; |
| } |
| |
| inline XMLValidator* XMLScanner::getValidator() |
| { |
| return fValidator; |
| } |
| |
| |
| // --------------------------------------------------------------------------- |
| // XMLScanner: Setter methods |
| // --------------------------------------------------------------------------- |
| inline void XMLScanner::setDoNamespaces(const bool doNamespaces) |
| { |
| fDoNamespaces = doNamespaces; |
| } |
| |
| inline void XMLScanner::setDocHandler(XMLDocumentHandler* const docHandler) |
| { |
| fDocHandler = docHandler; |
| } |
| |
| inline void XMLScanner::setErrorReporter(XMLErrorReporter* const errHandler) |
| { |
| fErrorReporter = errHandler; |
| } |
| |
| inline void XMLScanner::setEntityHandler(XMLEntityHandler* const entityHandler) |
| { |
| fEntityHandler = entityHandler; |
| fReaderMgr.setEntityHandler(entityHandler); |
| } |
| |
| inline void XMLScanner::setExitOnFirstFatal(const bool newValue) |
| { |
| fExitOnFirstFatal = newValue; |
| } |
| |
| #endif |