blob: 18ca8227923c60be717c4662056b701797af4f4a [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
* Copyright (c) 1999-2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache\@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Log$
* Revision 1.13 2001/07/12 18:50:08 tng
* Some performance modification regarding standalone check and xml decl check.
*
* Revision 1.12 2000/09/09 00:18:18 andyh
* Reordered member variables in ThrowEOEJanitor. Patch submitted
* by Kirk Wylie.
*
* Revision 1.11 2000/07/08 00:17:13 andyh
* Cleanup of yesterday's speedup changes. Merged new bit into the
* scanner character properties table.
*
* Revision 1.10 2000/07/07 01:08:44 andyh
* Parser speed up in scan of XML content.
*
* Revision 1.9 2000/03/02 19:54:29 roddey
* This checkin includes many changes done while waiting for the
* 1.1.0 code to be finished. I can't list them all here, but a list is
* available elsewhere.
*
* Revision 1.8 2000/02/24 20:18:07 abagchi
* Swat for removing Log from API docs
*
* Revision 1.7 2000/02/24 02:12:53 aruna1
* ReaderMgr:;getReaderDepth() added
*
* Revision 1.6 2000/02/06 07:47:53 rahulj
* Year 2K copyright swat.
*
* Revision 1.5 2000/01/25 01:04:21 roddey
* Fixes a bogus error about ]]> in char data.
*
* Revision 1.4 2000/01/24 20:40:43 roddey
* Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
* is not tested yet, but I wanted to get the API changes in now so that the API
* can be stablized.
*
* Revision 1.3 2000/01/12 00:15:04 roddey
* Changes to deal with multiply nested, relative pathed, entities and to deal
* with the new URL class changes.
*
* Revision 1.2 1999/12/15 19:48:03 roddey
* Changed to use new split of transcoder interfaces into XML transcoders and
* LCP transcoders, and implementation of intrinsic transcoders as pluggable
* transcoders, and addition of Latin1 intrinsic support.
*
* Revision 1.1.1.1 1999/11/09 01:08:13 twl
* Initial checkin
*
* Revision 1.4 1999/11/08 20:56:54 droddey
* If the main xml entity does not exist, we need to get the error handling for that
* inside the main XMLScanner::scanDocument() try block so that it gets reported
* in the normal way. We have to add a little extra safety code because, when this
* happens, there is no reader on the reader stack to get position ino from.
*
* Revision 1.3 1999/11/08 20:44:43 rahul
* Swat for adding in Product name and CVS comment log variable.
*
*/
#if !defined(READERMGR_HPP)
#define READERMGR_HPP
#include <util/RefStackOf.hpp>
#include <util/XMLString.hpp>
#include <sax/Locator.hpp>
#include <framework/XMLBuffer.hpp>
#include <internal/XMLReader.hpp>
class XMLBuffer;
class XMLEntityDecl;
class XMLEntityHandler;
class XMLDocumentHandler;
class XMLScanner;
// ---------------------------------------------------------------------------
// This class is used by the scanner. The scanner must deal with expansion
// of entities, some of which are totally different files (external parsed
// entities.) It does so by pushing readers onto a stack. The top reader is
// the one it wants to read out of, but that one must be popped when it is
// empty. To keep that logic from being all over the place, the scanner
// talks to the reader manager, which handles the stack and popping off
// used up readers.
// ---------------------------------------------------------------------------
class XMLPARSER_EXPORT ReaderMgr : public Locator
{
public :
// -----------------------------------------------------------------------
// Class specific types
// -----------------------------------------------------------------------
struct LastExtEntityInfo
{
const XMLCh* systemId;
const XMLCh* publicId;
unsigned int lineNumber;
unsigned int colNumber;
};
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
ReaderMgr();
~ReaderMgr();
// -----------------------------------------------------------------------
// Convenience scanning methods
//
// This are all convenience methods that work in terms of the core
// character spooling methods.
// -----------------------------------------------------------------------
bool atEOF() const;
bool getName(XMLBuffer& toFill);
bool getNameToken(XMLBuffer& toFill);
XMLCh getNextChar();
bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
void movePlainContentChars(XMLBuffer &dest);
void getSpaces(XMLBuffer& toFill);
void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
bool isEmpty() const;
bool lookingAtChar(const XMLCh toCheck);
bool lookingAtSpace();
XMLCh peekNextChar();
bool skipIfQuote(XMLCh& chGotten);
void skipPastChar(const XMLCh toSkip);
bool skipPastSpaces();
void skipToChar(const XMLCh toSkipTo);
bool skippedChar(const XMLCh toSkip);
bool skippedSpace();
bool skippedString(const XMLCh* const toSkip);
void skipQuotedString(const XMLCh quoteCh);
XMLCh skipUntilIn(const XMLCh* const listToSkip);
XMLCh skipUntilInOrWS(const XMLCh* const listToSkip);
bool peekString(const XMLCh* const toPeek);
// -----------------------------------------------------------------------
// Control methods
// -----------------------------------------------------------------------
void cleanStackBackTo(const unsigned int readerNum);
XMLReader* createReader
(
const InputSource& src
, const bool xmlDecl
, const XMLReader::RefFrom refFrom
, const XMLReader::Types type
, const XMLReader::Sources source
);
XMLReader* createReader
(
const XMLCh* const sysId
, const XMLCh* const pubId
, const bool xmlDecl
, const XMLReader::RefFrom refFrom
, const XMLReader::Types type
, const XMLReader::Sources source
, InputSource*& srcToFill
);
XMLReader* createIntEntReader
(
const XMLCh* const sysId
, const XMLReader::RefFrom refFrom
, const XMLReader::Types type
, const XMLCh* const dataBuf
, const unsigned int dataLen
, const bool copyBuf
);
bool isScanningPERefOutOfLiteral() const;
bool pushReader
(
XMLReader* const reader
, XMLEntityDecl* const entity
);
void reset();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
const XMLCh* getCurrentEncodingStr() const;
const XMLEntityDecl* getCurrentEntity() const;
XMLEntityDecl* getCurrentEntity();
const XMLReader* getCurrentReader() const;
XMLReader* getCurrentReader();
unsigned int getCurrentReaderNum() const;
unsigned int getReaderDepth() const;
void getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const;
unsigned int getSrcOffset() const;
bool getThrowEOE() const;
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setEntityHandler(XMLEntityHandler* const newHandler);
void setThrowEOE(const bool newValue);
// -----------------------------------------------------------------------
// Implement the SAX Locator interface
// -----------------------------------------------------------------------
virtual const XMLCh* getPublicId() const;
virtual const XMLCh* getSystemId() const;
virtual int getLineNumber() const;
virtual int getColumnNumber() const;
private :
// -----------------------------------------------------------------------
// Private helper methods
// -----------------------------------------------------------------------
const XMLReader* getLastExtEntity(const XMLEntityDecl*& itsEntity) const;
bool popReader();
// -----------------------------------------------------------------------
// Private data members
//
// fCurEntity
// This is the current top of stack entity. We pull it off the stack
// and store it here for efficiency.
//
// fCurReader
// This is the current top of stack reader. We pull it off the
// stack and store it here for efficiency.
//
// fEntityHandler
// This is the installed entity handler. Its installed via the
// scanner but he passes it on to us since we need it the most, in
// process of creating external entity readers.
//
// fEntityStack
// We need to keep up with which of the pushed readers are pushed
// entity values that are being spooled. This is done to avoid the
// problem of recursive definitions. This stack consists of refs to
// EntityDecl objects for the pushed entities.
//
// fNextReaderNum
// This is the reader serial number value. Each new reader that is
// created from this reader is given a successive number. This lets
// us catch things like partial markup errors and such.
//
// fReaderStack
// This is the stack of reader references. We own all the readers
// and destroy them when they are used up.
//
// fThrowEOE
// This flag controls whether we throw an exception when we hit an
// end of entity. The scanner doesn't really need to know about ends
// of entities in the int/ext subsets, so it will turn this flag off
// until it gets into the content usually.
// -----------------------------------------------------------------------
XMLEntityDecl* fCurEntity;
XMLReader* fCurReader;
XMLEntityHandler* fEntityHandler;
RefStackOf<XMLEntityDecl>* fEntityStack;
unsigned int fNextReaderNum;
RefStackOf<XMLReader>* fReaderStack;
bool fThrowEOE;
};
// ---------------------------------------------------------------------------
// ReaderMgr: Inlined methods
//
// NOTE: We cannot put these in alphabetical and type order as we usually
// do because some of the compilers we have to support are too stupid to
// understand out of order inlines!
// ---------------------------------------------------------------------------
inline unsigned int ReaderMgr::getCurrentReaderNum() const
{
return fCurReader->getReaderNum();
}
inline bool ReaderMgr::getName(XMLBuffer& toFill)
{
toFill.reset();
return fCurReader->getName(toFill, false);
}
inline bool ReaderMgr::getNameToken(XMLBuffer& toFill)
{
toFill.reset();
return fCurReader->getName(toFill, true);
}
inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
{
return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
}
inline void ReaderMgr::movePlainContentChars(XMLBuffer &dest)
{
fCurReader->movePlainContentChars(dest);
}
inline bool ReaderMgr::getThrowEOE() const
{
return fThrowEOE;
}
inline unsigned int ReaderMgr::getSrcOffset() const
{
return fCurReader->getSrcOffset();
}
inline bool ReaderMgr::lookingAtChar(const XMLCh chToCheck)
{
return (chToCheck == peekNextChar());
}
inline bool ReaderMgr::lookingAtSpace()
{
return XMLReader::isWhitespace(peekNextChar());
}
inline void ReaderMgr::setThrowEOE(const bool newValue)
{
fThrowEOE = newValue;
}
inline bool ReaderMgr::skippedString(const XMLCh* const toSkip)
{
return fCurReader->skippedString(toSkip);
}
inline void ReaderMgr::skipToChar(const XMLCh toSkipTo)
{
while (true)
{
// Get chars until we find the one to skip
const XMLCh nextCh = getNextChar();
// Break out at end of input or the char to skip
if ((nextCh == toSkipTo) || !nextCh)
break;
}
}
inline void ReaderMgr::skipPastChar(const XMLCh toSkipPast)
{
while (true)
{
// Get chars until we find the one to skip
const XMLCh nextCh = getNextChar();
if ((nextCh == toSkipPast) || !nextCh)
break;
}
}
inline bool ReaderMgr::peekString(const XMLCh* const toPeek)
{
return fCurReader->peekString(toPeek);
}
inline void ReaderMgr::setEntityHandler(XMLEntityHandler* const newHandler)
{
fEntityHandler = newHandler;
}
//
// This is a simple class to temporarily change the 'throw at end of entity'
// flag of the reader manager. There are some places where we need to
// turn this on and off on a scoped basis.
//
class XMLPARSER_EXPORT ThrowEOEJanitor
{
public :
// -----------------------------------------------------------------------
// Constructors and destructor
// -----------------------------------------------------------------------
ThrowEOEJanitor(ReaderMgr* mgrTarget, const bool newValue) :
fOld(mgrTarget->getThrowEOE())
, fMgr(mgrTarget)
{
mgrTarget->setThrowEOE(newValue);
}
~ThrowEOEJanitor()
{
fMgr->setThrowEOE(fOld);
};
private :
// -----------------------------------------------------------------------
// Private data members
//
// fOld
// The previous value of the flag, which we replaced during ctor,
// and will replace during dtor.
//
// fMgr
// A pointer to the reader manager we are going to set/reset the
// flag on.
// -----------------------------------------------------------------------
bool fOld;
ReaderMgr* fMgr;
};
#endif