blob: d4e86b6047b40aeab3efd734b0d78240e150d7a3 [file] [log] [blame]
/** \file parse_handlers.hpp .
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
\brief Generic SAX-like parse hander class definitions
-------------------------------------------------------------------------- */
#ifndef __UIMA_PARSE_HANDLERS_HPP
#define __UIMA_PARSE_HANDLERS_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include "uima/pragmas.hpp" //must be first to surpress warnings
#include <map>
#include <stack>
#include <utility>
#include "uima/parser_config.hpp"
#include "uima/doc_buffer.hpp"
#include "uima/tcas.hpp"
#include "uima/parser_interface.hpp"
namespace uima {
/**
The class <TT>ParseHandlers</TT> is used as a generic SAX-like
parse hander class.
@see XMLParseHandlers
*/
class ParseHandlers {
public:
/**
* Typedefs for data structure for communication between the beginElement()
* and endElement() function.
* We get attribute information in beginElement() and need to pass this
* information to endElement() because we can only do the mapping
* once we know the end of an annotation
* @{*/
/// a struct to hold information about a single XML attribute
class StXMLAttrInfo {
public:
icu::UnicodeString ustrName;
icu::UnicodeString ustrType;
icu::UnicodeString ustrValue;
// OS STL need this to be a full STL compliant class
bool operator < (const StXMLAttrInfo & crclRHS) const {
return(bool)(ustrName < crclRHS.ustrName);
}
bool operator ==(const StXMLAttrInfo & crclRHS) const {
return(bool)(ustrName == crclRHS.ustrName);
}
};
/// a container to hold the list of all XML attributes of a given XML element
typedef vector< StXMLAttrInfo > TyXMLAttrInfoList;
/*@}*/
public:
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
ParseHandlers();
virtual ~ParseHandlers();
// -----------------------------------------------------------------------
// init method
// -----------------------------------------------------------------------
bool
init(
TCAS & rTCAS,
ParserConfiguration const & rclConfig,
bool bVerbose = false
);
bool deInit();
void setMultiDocCallback(ParserInterface::MultiDocCallbackInterface &);
TyErrorId beginDoc();
TyErrorId endDoc();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
size_t getNumberOfDocumentsParsed() const;
size_t getNumberOfBytesParsed() const;
bool isMultiDocFile() const;
// -----------------------------------------------------------------------
// Handlers for the DocumentHandler interface
// -----------------------------------------------------------------------
void endElement(const UChar* cpuCName, size_t uiLength);
void startElement(const UChar* cpucName, size_t uiLength, const TyXMLAttrInfoList & crvecAttributes);
void characters(const UChar* cpucChars, size_t uiLength);
void processWarning(const char* cpszErrorId, const UChar * cpszErrorContext);
UnicodeStringRef getDocumentText() const;
protected:
AnnotationFS findLastAnnOfType(size_t uiBeginPos, Type type) const;
// -----------------------------------------------------------------------
// we need a stack of those containers for each XML element
// so we define a map from the XML element name to a pair of
// 1: the begin index of the element with those attrs
// 2: the attr of the element at that position
typedef pair< TyDocIndex, TyXMLAttrInfoList > TyIndexAttrsPair;
typedef stack< TyIndexAttrsPair, deque< TyIndexAttrsPair > > TyStack;
typedef map< icu::UnicodeString, TyStack, less< icu::UnicodeString > >
TyPosStack;
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
ParserConfiguration const * iv_pclConfig;
DocBuffer iv_docBuffer;
TCAS * iv_pTCAS;
bool iv_bVerbose;
bool iv_bIsMultiDocFile;
size_t iv_uiMultiDocNbr;
size_t iv_uiMultiDocOffset;
size_t iv_uiInputSize;
long iv_lLastEndIndex;
TyPosStack iv_clPosStack;
size_t iv_uiInIgnoreTag;
ParserInterface::MultiDocCallbackInterface * iv_pCallbackObject;
};
} // namespace uima
#endif //__UIMA_PARSE_HANDLERS_HPP