blob: 8384bb2a46b417f5f7302649c9913463d230c07b [file] [log] [blame]
/** \file xmltypesystemreader.cpp .
-----------------------------------------------------------------------------
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-----------------------------------------------------------------------------
Description:
-----------------------------------------------------------------------------
-------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Include dependencies */
/* ----------------------------------------------------------------------- */
// #define DEBUG_VERBOSE
#include "uima/pragmas.hpp"
#include "xercesc/util/PlatformUtils.hpp"
#include "xercesc/sax/SAXParseException.hpp"
#include "xercesc/parsers/XercesDOMParser.hpp"
#include "xercesc/dom/DOMException.hpp"
#include "xercesc/dom/DOMNamedNodeMap.hpp"
#include "xercesc/sax/ErrorHandler.hpp"
#include "xercesc/dom/DOMDocument.hpp"
#include "xercesc/dom/DOMElement.hpp"
#include "xercesc/dom/DOMNodeList.hpp"
#include "xercesc/framework/LocalFileInputSource.hpp"
#include "xercesc/framework/MemBufInputSource.hpp"
#include "uima/xmltypesystemreader.hpp"
#include "uima/lowlevel_typesystem.hpp"
#include "uima/internal_xmlconstants.hpp"
#include "uima/internal_casimpl.hpp"
#include "uima/msg.h"
#include "uima/xmlerror_handler.hpp"
#include "uima/macros.h"
#include "uima/casdefinition.hpp"
/* ----------------------------------------------------------------------- */
/* Constants */
/* ----------------------------------------------------------------------- */
#define MAXXMLCHBUFF 256
/* ----------------------------------------------------------------------- */
/* Forward declarations */
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Types / Classes */
/* ----------------------------------------------------------------------- */
#define DEBUG_VERBOSE
namespace uima {
UIMA_EXC_CLASSIMPLEMENT(XMLTypeSystemReaderException, uima::Exception);
class RethrowErrorHandler : public ErrorHandler {
public:
void error(const SAXParseException& e) {
throw e;
}
void fatalError(const SAXParseException& e) {
throw e;
}
void warning(const SAXParseException& e) {
throw e;
}
void resetErrors() {}
};
static XMLCh gs_tempXMLChBuffer[ MAXXMLCHBUFF ];
XMLCh const * convert(char const * cpBuf) {
bool bTranscodeSuccess = XMLString::transcode( cpBuf, gs_tempXMLChBuffer, MAXXMLCHBUFF -1 );
assert( bTranscodeSuccess );
return gs_tempXMLChBuffer;
}
UnicodeString convert( XMLCh const * cpUCBuf ) {
assertWithMsg( sizeof(XMLCh) == sizeof(UChar), "Port required!");
unsigned int uiLen = XMLString::stringLen( cpUCBuf );
return UnicodeString( (UChar const *) cpUCBuf, uiLen);
}
void XMLTypeSystemReader::checkValidityCondition(bool bCondition) const {
if (!bCondition) {
UIMA_EXC_THROW_NEW(XMLTypeSystemReaderException,
UIMA_ERR_XMLTYPESYSTEMREADER,
UIMA_MSG_ID_EXC_XMLTYPESYSTEMREADER,
uima::ErrorMessage(UIMA_MSG_ID_EXCON_READING_TYPESYSTEM_FROM_XML),
uima::ErrorInfo::unrecoverable
);
}
}
void XMLTypeSystemReader::checkValidityCondition(bool bCondition, TyMessageId tyMessage, icu::UnicodeString const & crString) const {
if (!bCondition) {
uima::ErrorMessage msg(tyMessage);
msg.addParam( crString );
UIMA_EXC_THROW_NEW(XMLTypeSystemReaderException,
UIMA_ERR_XMLTYPESYSTEMREADER,
UIMA_MSG_ID_EXC_XMLTYPESYSTEMREADER,
msg,
uima::ErrorInfo::unrecoverable
);
}
}
void XMLTypeSystemReader::checkValidityCondition(bool bCondition, TyMessageId tyMessage, icu::UnicodeString const & crString1, icu::UnicodeString const & crString2) const {
if (!bCondition) {
uima::ErrorMessage msg(tyMessage);
msg.addParam( crString1 );
msg.addParam( crString2 );
UIMA_EXC_THROW_NEW(XMLTypeSystemReaderException,
UIMA_ERR_XMLTYPESYSTEMREADER,
UIMA_MSG_ID_EXC_XMLTYPESYSTEMREADER,
msg,
uima::ErrorInfo::unrecoverable
);
}
}
XMLTypeSystemReader::XMLTypeSystemReader(TypeSystem & rTypeSystem)
: iv_rTypeSystem(uima::lowlevel::TypeSystem::promoteTypeSystem( rTypeSystem )),
iv_pXMLErrorHandler(NULL) {}
XMLTypeSystemReader::XMLTypeSystemReader(uima::internal::CASDefinition & casDef)
: iv_rTypeSystem( casDef.getTypeSystem() ),
iv_pXMLErrorHandler(NULL) {}
XMLTypeSystemReader::~XMLTypeSystemReader() {}
void XMLTypeSystemReader::createFeatures(DOMElement * pTopTypeElement) {
DOMNodeList * featureList = pTopTypeElement->getElementsByTagName( convert(uima::internal::XMLConstants::TAGNAME_FEATURE) );
unsigned int i=0;
for (i=0; i<featureList->getLength(); ++i) {
DOMNode * featureNode = featureList->item(i);
assert( featureNode->getNodeType() == DOMNode::ELEMENT_NODE );
assert( XMLString::compareString( featureNode->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_FEATURE)) == 0 );
DOMElement * featureElement = (DOMElement*) featureNode;
icu::UnicodeString rangeTypeName = convert( featureElement->getAttribute( convert(uima::internal::XMLConstants::ATTRIBUTENAME_RANGE) ) );
icu::UnicodeString featureName = convert( featureElement->getAttribute( convert(uima::internal::XMLConstants::ATTRIBUTENAME_NAME)) );
icu::UnicodeString multiRefs = convert( featureElement->getAttribute( convert(uima::internal::XMLConstants::ATTRIBUTENAME_MULTIREFS)) );
UIMA_TPRINT("Checking for feature : " << featureName << " with range type " << rangeTypeName);
DOMNode * introTypeNode = featureNode->getParentNode();
assert( introTypeNode->getNodeType() == DOMNode::ELEMENT_NODE );
checkValidityCondition( XMLString::compareString( introTypeNode->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_TYPE) ) == 0,
UIMA_MSG_ID_EXC_WRONG_XML_TYPESYSTEM_FORMAT,
featureName
);
DOMElement * pIntroTypeElement = (DOMElement *) introTypeNode;
icu::UnicodeString introTypeName = convert( pIntroTypeElement->getAttribute(convert(uima::internal::XMLConstants::ATTRIBUTENAME_NAME)) );
UIMA_TPRINT("Checking for feature : " << featureName << " with range type " << rangeTypeName << " at intro type " << introTypeName);
lowlevel::TyFSType tyIntro = iv_rTypeSystem.getTypeByName(introTypeName);
checkValidityCondition( iv_rTypeSystem.isValidType(tyIntro),
UIMA_MSG_ID_EXC_INVALID_INTRO_TYPE,
introTypeName );
lowlevel::TyFSType tyRange = iv_rTypeSystem.getTypeByName(rangeTypeName);
checkValidityCondition( iv_rTypeSystem.isValidType(tyRange),
UIMA_MSG_ID_EXC_INVALID_RANGE_TYPE,
rangeTypeName );
lowlevel::TyFSFeature tyFeature = iv_rTypeSystem.getFeatureByBaseName(tyIntro, featureName );
if (tyFeature != lowlevel::TypeSystem::INVALID_FEATURE) {
// check that intro and range types are correct
checkValidityCondition( tyIntro == iv_rTypeSystem.getIntroType(tyFeature),
UIMA_MSG_ID_EXC_INVALID_INTRO_TYPE,
featureName,
introTypeName );
checkValidityCondition( tyRange == iv_rTypeSystem.getRangeType(tyFeature),
UIMA_MSG_ID_EXC_INVALID_RANGE_TYPE,
featureName,
rangeTypeName );
} else {
UIMA_TPRINT("Creating feature : " << featureName << " with range type " << rangeTypeName << " at intro type " << introTypeName);
// create the feature
bool mr = (multiRefs == icu::UnicodeString("true"));
tyFeature = iv_rTypeSystem.createFeature( tyIntro, tyRange, mr, featureName, iv_ustrCreatorID );
}
}
}
void XMLTypeSystemReader::createType(lowlevel::TyFSType tyParentType, DOMElement * pNewTypeElement) {
UIMA_TPRINT("entering createType");
assert( XMLString::compareString( pNewTypeElement->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_TYPE)) == 0 );
lowlevel::TyFSType tyNewType = lowlevel::TypeSystem::INVALID_TYPE;
if (tyParentType != lowlevel::TypeSystem::INVALID_TYPE) {
assert( iv_rTypeSystem.isValidType( tyParentType ) );
// create the type
icu::UnicodeString newTypeName = convert( pNewTypeElement->getAttribute( convert(uima::internal::XMLConstants::ATTRIBUTENAME_NAME )) );
tyNewType = iv_rTypeSystem.getTypeByName( newTypeName );
UIMA_TPRINT("Checking for type : " << newTypeName);
if (tyNewType == lowlevel::TypeSystem::INVALID_TYPE) {
UIMA_TPRINT("Creating type : " << newTypeName);
tyNewType = iv_rTypeSystem.createType(tyParentType, newTypeName, iv_ustrCreatorID);
} else {
checkValidityCondition( iv_rTypeSystem.getParentType(tyNewType) == tyParentType,
UIMA_MSG_ID_EXC_WRONG_PARENT_TYPE,
iv_rTypeSystem.getTypeName(tyNewType) );
}
} else {
tyNewType = iv_rTypeSystem.getTopType();
}
assert( iv_rTypeSystem.isValidType( tyNewType ) );
DOMNodeList * childTypes = pNewTypeElement->getChildNodes();
unsigned int i=0;
for (i=0; i<childTypes->getLength(); ++i) {
// filter type children
DOMNode * kid = childTypes->item(i);
bool bIsElement = ( kid->getNodeType() == DOMNode::ELEMENT_NODE );
bool bIsTypeTag = ( XMLString::compareString(kid->getNodeName(), convert(uima::internal::XMLConstants::TAGNAME_TYPE)) == 0 );
if (bIsTypeTag && bIsElement) {
DOMElement * pChildrenTypeElement = (DOMElement*) kid;
createType(tyNewType, pChildrenTypeElement);
}
}
UIMA_TPRINT("exiting createType");
}
void XMLTypeSystemReader::readMemory(icu::UnicodeString const & xmlString, icu::UnicodeString const & creatorID) {
UChar const * xmlChars = xmlString.getBuffer();
size_t uiBytes = xmlString.length() * 2;
UnicodeStringRef uref(xmlString);
readMemory(uref.asUTF8().c_str(), creatorID);
}
void XMLTypeSystemReader::readMemory(char const * cpszXMLString, icu::UnicodeString const & creatorID) {
MemBufInputSource memIS((XMLByte const *) cpszXMLString, strlen(cpszXMLString), "sysID");
read(memIS, creatorID );
}
void XMLTypeSystemReader::readFile(char const * fileName, icu::UnicodeString const & creatorID) {
// convert to unicode using the default converter for the platform (W/1252 U/utf-8)
icu::UnicodeString ustrFileName(fileName);
readFile( ustrFileName, creatorID );
}
void XMLTypeSystemReader::readFile(icu::UnicodeString const & fileName, icu::UnicodeString const & creatorID) {
size_t uiLen = fileName.length();
UChar* arBuffer = new UChar[uiLen + 1];
assert( arBuffer != NULL );
fileName.extract(0, uiLen, arBuffer);
arBuffer[uiLen] = 0; // terminate the buffer with 0
LocalFileInputSource fileIS((XMLCh const *) arBuffer );
read(fileIS, creatorID );
delete[] arBuffer;
}
void XMLTypeSystemReader::setErrorHandler(ErrorHandler * pErrorHandler) {
iv_pXMLErrorHandler = pErrorHandler;
}
void XMLTypeSystemReader::read(InputSource const & crInputSource, icu::UnicodeString const & creatorID) {
UIMA_TPRINT("read() entered");
iv_ustrCreatorID = creatorID;
XercesDOMParser parser;
parser.setValidationScheme(XercesDOMParser::Val_Auto);
parser.setDoNamespaces(false);
parser.setDoSchema(false);
bool bHasOwnErrorHandler = false;
if (iv_pXMLErrorHandler == NULL) {
iv_pXMLErrorHandler = new XMLErrorHandler();
assert( iv_pXMLErrorHandler != NULL );
bHasOwnErrorHandler = true;
}
parser.setErrorHandler(iv_pXMLErrorHandler);
parser.parse( crInputSource);
DOMDocument* doc = parser.getDocument();
assert(EXISTS(doc));
// get top node
DOMElement * rootElem = doc->getDocumentElement();
assert(EXISTS(rootElem));
/* taph 02.10.2002: do we need to do the validity checking ourselves?
Adding an (inline) DTD does that better then we could ever do it.
And it is expensive because of the conversions. */
icu::UnicodeString ustrTAGNAME_TYPEHIERARCHY(uima::internal::XMLConstants::TAGNAME_TYPEHIERARCHY);
icu::UnicodeString ustrTAGNAME_TYPE(uima::internal::XMLConstants::TAGNAME_TYPE);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
icu::UnicodeString ustrRootName( (UChar const *) rootElem->getNodeName());
UIMA_TPRINT("root element name: "<< ustrRootName );
checkValidityCondition( ustrRootName == ustrTAGNAME_TYPEHIERARCHY,
UIMA_MSG_ID_EXC_WRONG_XML_TYPESYSTEM_FORMAT,
ustrRootName );
DOMNodeList * children = rootElem->getChildNodes();
assert(EXISTS(children));
checkValidityCondition( children->getLength() > 0 );
unsigned int i=0;
while (i<children->getLength() ) {
DOMNode * kid = children->item(i);
assert(EXISTS(kid));
// kid should be the element of the top type
if ( kid->getNodeType() == DOMNode::ELEMENT_NODE ) {
UIMA_TPRINT("in element node block");
DOMElement * kidElem = (DOMElement*) kid;
/* taph 02.10.2002: do we need to do the validity checking ourselves?
Adding an (inline) DTD does that better then we could ever do it.
And it is expensive because of the conversions. */
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
icu::UnicodeString ustrKidName((UChar const *) kidElem->getNodeName());
checkValidityCondition( ustrKidName == ustrTAGNAME_TYPE,
UIMA_MSG_ID_EXC_WRONG_XML_TYPESYSTEM_FORMAT,
ustrKidName );
createType(lowlevel::TypeSystem::INVALID_TYPE, kidElem );
createFeatures(kidElem);
break;
}
++i;
}
if (bHasOwnErrorHandler) {
assert( EXISTS(iv_pXMLErrorHandler) );
delete iv_pXMLErrorHandler;
iv_pXMLErrorHandler = NULL;
}
UIMA_TPRINT("Exiting read()");
}
} // namespace uima
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */