blob: 6de49a03b5e3d28cbb8efe1aa160b59b2414540d [file] [log] [blame]
/** @name xmideserializer_handler.cpp
-----------------------------------------------------------------------------
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-----------------------------------------------------------------------------
10/18/2005 Initial creation
-------------------------------------------------------------------------- */
//TODO support multiple indexed FS
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include "uima/pragmas.hpp"
#include <iostream>
#include <sstream>
#include <algorithm>
using namespace std;
#include "xercesc/sax2/Attributes.hpp"
#include "xercesc/sax/SAXParseException.hpp"
#include "xercesc/sax/SAXException.hpp"
#include "uima/msg.h"
#include "uima/exceptions.hpp"
#include "uima/lowlevel_typesystem.hpp"
#include "uima/lowlevel_indexrepository.hpp"
#include "uima/xmideserializer_handler.hpp"
#include "uima/internal_fspromoter.hpp"
#include "uima/internal_typeshortcuts.hpp"
#include "uima/internal_casimpl.hpp"
#include "uima/fsindexrepository.hpp"
#include "uima/arrayfs.hpp"
#include "uima/listfs.hpp"
#include "uima/annotator_context.hpp"
#include "uima/resmgr.hpp"
namespace uima {
// ---------------------------------------------------------------------------
// XmiDeserialiserHandler: Constructors and Destructor
// ---------------------------------------------------------------------------
XmiDeserializerHandler::XmiDeserializerHandler(CAS & cas,
XmiSerializationSharedData * xmiSharedData, bool len) : iv_cas(cas.getBaseCas() ),
iv_locator(NULL), iv_casimpl( uima::internal::CASImpl::promoteCAS(*iv_cas)),
sharedData(xmiSharedData), ownsSharedData(false), outOfTypeSystemElement(NULL) {
lenient = len;
if (this->sharedData==NULL) {
this->sharedData = new XmiSerializationSharedData();
ownsSharedData=true;
} else {
lenient=true;
}
//cout << " XmiDeserializerHandler::constructor " << endl;
currentContentFeat.append(DEFAULT_CONTENT_FEATURE);
sofaTypeCode = uima::internal::gs_tySofaType;
FSIndexRepository * fsidx = &iv_cas->getBaseIndexRepository();
indexRepositories.push_back((lowlevel::IndexRepository*)fsidx);
// There should always be another index for the Initial View
fsidx = &iv_cas->getView(CAS::NAME_DEFAULT_SOFA)->getIndexRepository();
indexRepositories.push_back((lowlevel::IndexRepository*)fsidx);
// get temp heap handle for checking if an FS is an annotation
lowlevel::FSHeap const & crHeap = iv_casimpl.getHeap();
// uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap;
iv_typesystem = &crHeap.getTypeSystem();
ignoreDepth=0;
nextSofaNum=2;
}
XmiDeserializerHandler::~XmiDeserializerHandler() {
//cout << " XmiDeserializerHandler::destructor " << endl;
if (ownsSharedData) {
delete sharedData;
}
//cout << " XmiDeserializerHandler::destructor done " << endl;
}
// ---------------------------------------------------------------------------
// XmiDeserializerHandler: Implementation of the SAX2 ContentHandler interface
// ---------------------------------------------------------------------------
void XmiDeserializerHandler::setDocumentLocator(const Locator* const locator) {
//cout << " XmiDeserializerHandler::setDocumentLocator() " << endl;
iv_locator = locator;
}
void XmiDeserializerHandler::startDocument() {
//cout << " XmiDeserializerHandler::startDocument() " << endl;
iv_state = DOC_STATE;
}
void XmiDeserializerHandler::startElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const Attributes & attrs) {
//cout << " XmiDeserializerHandler::startElement() qname " << UnicodeString((UChar*)qname, XMLString::stringLen(qname)) << endl;
//cout << "startElement localname " << UnicodeString(localname) << " uri " << UnicodeString(uri) << endl;
//cout << "startElement attrs " << attrs.getLength() << endl;
assert(sizeof(XMLCh) == sizeof(UChar));
icu::UnicodeString qualifiedName(qname);
buffer.remove();
switch (iv_state) {
case DOC_STATE: {
//cout << "startElement DOC_STATE " << attrs.getLength() << endl;
// allow any root element name
// extract xmlns:prefix=uri attributes into a map, which we can use to
// resolve the prefixes even with a non-namespace-aware parser
if (attrs.getLength() != 0) {
for (size_t i = 0; i < attrs.getLength(); i++) {
UnicodeString attrName(attrs.getQName(i));
//cout << "xmlns attrName " << attrName << endl;
if (attrName.indexOf("xmlns:") > -1 ) {
UnicodeString prefix;
attrName.extract(6, attrName.length()-6, prefix);
UnicodeString uri(attrs.getValue(i));
nsPrefixToUriMap[prefix]= uri;
}
}
}
iv_state = FS_STATE;
break;
}
case FS_STATE: {
// ignore elements with XMI prefix (such as XMI annotations)
if (qualifiedName.indexOf("xmi") > 0) {
this->iv_state = IGNORING_XMI_ELEMENTS_STATE;
this->ignoreDepth++;
return;
}
UnicodeString unsuri(uri);
UnicodeString ulocalname(localname);
// parser not namespace-enabled, so try to resolve NS ourselves
// TODO test with non namespace-enabled
/**
int colonIndex = qualifiedName.indexOf(":");
if (colonIndex != -1) {
UnicodeString prefix;
qualifiedName.extract(0, colonIndex,prefix);
map<UnicodeString,UnicodeString>::iterator uriite = nsPrefixToUriMap.find(prefix);
if (uriite != nsPrefixToUriMap.end()) {
nameSpaceURI = uriite->second;
} else {
// unbound namespace. Rather than failing, just assume a reasonable default.
nameSpaceURI.append("http:///");
nameSpaceURI.append(prefix);
nameSpaceURI.append(".ecore");
}
colonIndex++;
qualifiedName.extract(colonIndex, qualifiedName.length()-colonIndex,localName );
} else { // no prefix. Use default URI
nameSpaceURI = DEFAULT_NAMESPACE_URI;
}
**/
//cout << "startElement FS_STATE calling readFS " << typeName << endl;
//readFS(typeName, attrs);
readFS(unsuri, ulocalname, qualifiedName, attrs);
map<UnicodeString, vector<UnicodeString>*>::iterator mite;
for (mite=multiValuedFeatures.begin();
mite != multiValuedFeatures.end(); mite++) {
if (mite->second != NULL) {
delete mite->second;
}
}
multiValuedFeatures.clear();
iv_state = FEAT_STATE;
break;
}
case FEAT_STATE: {
iv_state = FEAT_CONTENT_STATE;
break;
}
case IGNORING_XMI_ELEMENTS_STATE: {
ignoreDepth++;
break;
}
default: {
// If we're not in an element expecting state, raise an error.
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam( qualifiedName );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
} //switch
}
void XmiDeserializerHandler::characters(
const XMLCh* const cpwsz,
const unsigned int uiLength) {
assert(sizeof(XMLCh) == sizeof(UChar));
if (this->iv_state == FEAT_CONTENT_STATE) {
buffer.append( (UChar const *) cpwsz, 0, uiLength );
}
/**
switch (this->iv_state) {
case FEAT_CONTENT_STATE:
buffer.append( (UChar const *) cpwsz, 0, uiLength );
break;
default:
break;
}**/
}
void XmiDeserializerHandler::endElement(const XMLCh* const nsuri,
const XMLCh* const localname,
const XMLCh* const qname) {
/**
cout << " XmiDeserializerHandler::endElement() qname "
<< UnicodeString( (UChar*) qname, XMLString::stringLen(qname) ) << endl;
cout << " XmiDeserializerHandler::endElement() uri "
<< UnicodeString( (UChar*) nsuri, XMLString::stringLen(nsuri) ) << endl;
**/
assert(sizeof(XMLCh) == sizeof(UChar));
UnicodeString qualifiedName( (UChar const *) qname, XMLString::stringLen(qname));
//cout << "endElement qualifiedname " << qualifiedName << endl;
switch (iv_state) {
case DOC_STATE: {
// Do nothing.
break;
}
case FS_STATE: {
iv_state = DOC_STATE;
break;
}
case FEAT_CONTENT_STATE: {
// We have just processed one of possibly many values for a feature.
// Store this value in the multiValuedFeatures map for later use.
//cout << "endELement FEAT_CONTENT_STATE " << buffer << endl;
map<UnicodeString, vector<UnicodeString>*>::iterator ite =
multiValuedFeatures.find(qualifiedName);
vector<UnicodeString> * valuesList=0;
if (ite == multiValuedFeatures.end()) {
valuesList = new vector<UnicodeString>;
multiValuedFeatures[qualifiedName] = valuesList;
} else {
valuesList = ite->second;
}
if (valuesList==0) {
cout << "endELement()FEAT_CONTENT_STATE valuesList not created" << endl;
}
else valuesList->push_back(buffer);
// go back to the state where we're expecting a feature
iv_state = FEAT_STATE;
break;
}
case FEAT_STATE: {
// end of FS. Process multi-valued features or array elements that were
// encoded as subelements
if (this->outOfTypeSystemElement != NULL) {
if (this->multiValuedFeatures.size() > 0) {
map<UnicodeString,vector<UnicodeString>*>::iterator ite;
for (ite=multiValuedFeatures.begin(); ite != multiValuedFeatures.end();ite++) {
UnicodeString featName = ite->first;
vector<UnicodeString>* featVals = ite->second;
addOutOfTypeSystemFeature(outOfTypeSystemElement, featName, *featVals);
}
}
this->outOfTypeSystemElement = NULL;
}
//process the multivalued feature or array elements that
//were encoded as subelements.
//cout << "endElement FEAT_STATE " << qualifiedName << endl;
else if (currentType.isValid()) {
int typecode = internal::FSPromoter::demoteType(currentType);
if ( iv_cas->getTypeSystem().isArrayType(typecode) &&
typecode != internal::gs_tyByteArrayType) {
// create the array now. elements may have been provided either as
// attributes or child elements, but not both.
// BUT - not byte arrays! They are created immediately, to avoid
// the overhead of parsing into a String array first
vector<string> featVals;
// cout << "endElement FEAT_STATE currentArrayElements " <<
// currentArrayElements.length() << endl;
if (currentArrayElements.length()==0) // were not specified as attributes
{
map<UnicodeString, vector<UnicodeString>*>::iterator ite =
multiValuedFeatures.find(UnicodeString("elements"));
if (ite != multiValuedFeatures.end()) {
vector<UnicodeString>* vals = ite->second;
for (size_t i=0; i<vals->size(); i++) {
featVals.push_back( ((UnicodeStringRef)vals->at(i)).asUTF8());
}
}
} else {
tokenize(currentArrayElements,featVals);
}
createArray(internal::FSPromoter::demoteType(currentType), featVals, currentArrayId);
} else {
map<UnicodeString,vector<UnicodeString>*>::iterator ite;
for (ite=multiValuedFeatures.begin(); ite != multiValuedFeatures.end();ite++) {
UnicodeString featName = ite->first;
vector<UnicodeString>* featVals = ite->second;
vector<string> stringList;
for (size_t i=0; i< featVals->size();i++) {
stringList.push_back( ((UnicodeStringRef)featVals->at(i)).asUTF8());
}
handleFeature(currentAddr, featName, stringList);
}
}
}
iv_state = FS_STATE;
break;
}
case IGNORING_XMI_ELEMENTS_STATE: {
ignoreDepth--;
if (ignoreDepth == 0) {
iv_state = FS_STATE;
}
break;
}
}
}
void XmiDeserializerHandler::endDocument() {
//cout << " XmiDeserializerHandler::endDocument() " << endl;
//fix up deserialized FSs
for (size_t i = 0; i < this->deserializedFsAddrs.size(); i++) {
//cout << "finalize fs " << deserializedFsAddrs.at(i) << endl;
finalizeFS(deserializedFsAddrs.at(i));
}
//fix up lists
for (size_t i = 0; i < fsListNodesFromMultivaluedProperties.size(); i++) {
this->remapFSListHeads(fsListNodesFromMultivaluedProperties.at(i));
}
//cout << " XmiDeserializerHandler::endDocument() tcasInstance " << endl;
//update document annotation info in tcas
for (size_t i = 0; i < tcasInstances.size(); i++) {
CAS * tcas = (CAS *) tcasInstances[i];
if (tcas != 0) {
tcas->pickupDocumentAnnotation();
}
}
//cout << " XmiDeserializerHandler::endDocument() " << endl;
}
void XmiDeserializerHandler::ignorableWhitespace(const XMLCh* const cpwsz,
const unsigned int length) {
//cout << " XmiDeserializerHandler::ignorableWhitespace() " << endl;
}
/**
* Converts an XMI element name to a UIMA-style dotted type name.
*
*/
UnicodeString XmiDeserializerHandler::xmiElementName2uimaTypeName(UnicodeString& nsUri, UnicodeString& localName) {
// check map first to see if we've already computed the namespace mapping
map<UnicodeString,UnicodeString>::iterator ite = xmiNamespaceToUimaNamespaceMap.find(nsUri);
UnicodeString uimaNamespace;
if (ite != xmiNamespaceToUimaNamespaceMap.end()) {
uimaNamespace = ite->second;
} else {
// check for the special "no-namespace" URI, which is used for UIMA types with no namespace
if (nsUri.compare(DEFAULT_NAMESPACE_URI) == 0) {
//uimaNamespace = "";
} else {
// Our convention is that the UIMA namespace is the URI path format e.g:
// http:///uima/cas.ecore.
// remove http:/// and trailing .ecore
// replace remaining slashes with dot.
nsUri.extractBetween(8, nsUri.length()-6, uimaNamespace);
//cout << "uimanamespace " << uimaNamespace << endl;
uimaNamespace.findAndReplace("/", ".");
uimaNamespace.append("."); // include trailing dot for convenience
}
xmiNamespaceToUimaNamespaceMap[nsUri]= uimaNamespace;
}
//cout << "uimaNamespace final " << uimaNamespace << endl;
uimaNamespace.append(localName);
return uimaNamespace;
}
// Create a new FS.
void XmiDeserializerHandler::readFS(UnicodeString & nsUri, UnicodeString & localName,
UnicodeString & qualifiedName, const Attributes & attrs) {
UnicodeString typeName = xmiElementName2uimaTypeName(nsUri, localName);
Type type = iv_cas->getTypeSystem().getType(typeName);
currentType=type;
if (!type.isValid()) {
if (typeName.compare(UnicodeString("uima.cas.NULL"))==0) {
//cout << "readFS ignore " << typeName << endl;
return; //ignore
}
if (typeName.compare(UnicodeString("uima.cas.View"))==0) {
//cout << "readFS " << typeName << endl;
UnicodeString attrName;
int sofaXmiId=0;
UnicodeString members;
for (size_t i = 0; i < attrs.getLength(); i++) {
attrName = attrs.getQName(i);
if (attrName.compare(CAS::FEATURE_BASE_NAME_SOFA) == 0) {
UnicodeString ustr(attrs.getValue(i));
sofaXmiId = atoi( ((UnicodeStringRef)ustr).asUTF8().c_str());
} else if (attrName.compare("members") == 0) {
members = attrs.getValue(i);
}
}
processView(sofaXmiId, members);
return;
}
// type is not in our type system
if (!lenient) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam(typeName);
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
} else {
this->addToOutOfTypeSystemData(
new XmlElementName( ((UnicodeStringRef)nsUri).asUTF8(),
((UnicodeStringRef)localName).asUTF8(),
((UnicodeStringRef)qualifiedName).asUTF8()), attrs );
return;
}
} else if (iv_cas->getTypeSystem().isArrayType(internal::FSPromoter::demoteType(type)) ) {
UnicodeString attrName;
int xmiId=0;
UnicodeString elements;
for (size_t i = 0; i < attrs.getLength(); i++) {
attrName = attrs.getQName(i);
if (attrName.compare(XMI_ID_ATTR_NAME) == 0) {
UnicodeString ustr(attrs.getValue(i));
currentArrayId = atoi( ((UnicodeStringRef)ustr).asUTF8().c_str());
} else if (attrName.compare("elements") == 0) {
currentArrayElements = attrs.getValue(i);
}
}
//cout << "xmiId " << currentArrayId << " type=" << typeName << " elements=" << currentArrayElements << endl;
if (internal::FSPromoter::demoteType(type) == internal::gs_tyByteArrayType) {
int addr = createByteArray(currentArrayElements, currentArrayId);
}
} else {
//cout << "readFS() create FS and read attributes " << typeName << endl;
uima::lowlevel::TyFS addr = uima::internal::FSPromoter::demoteFS(iv_cas->createFS(type));
readFS(addr, attrs, true);
}
}
/**
* Handles the processing of a cas:View element in the XMI. The cas:View element encodes indexed
* FSs.
*
* @param sofa
* xmi:id of the sofa for this view, null indicates base CAS "view"
* @param membersString
* whitespace-separated string of FS addresses. Each FS is to be added to the specified
* sofa's index repository
*/
void XmiDeserializerHandler::processView(int sofaXmiId, UnicodeString & members) {
// TODO: this requires View to come AFTER all of its members
//cout << "processView start " << sofaXmiId << "members=" << membersString << endl;
if (members.length() > 0) {
// a view with no Sofa will be added to the 1st, _InitialView, index
int sofaNum = 1;
if (sofaXmiId != 0) {
// translate sofa's xmi:id into its sofanum
//cout << __LINE__ << " calling getFsAddrForXmiId " << sofaXmiId << endl;
int sofaAddr = getFsAddrForXmiId(sofaXmiId);
sofaNum = iv_cas->getHeap()->getIntValue(sofaAddr, internal::gs_tySofaNumFeature);
}
lowlevel::IndexRepository * indexRep = indexRepositories.at(sofaNum);
vector<string> memberList;
tokenize(members, memberList);
for (size_t i = 0; i < memberList.size(); i++) {
// have to map each ID to its "real" address
int addr=0;
int amember = atoi(memberList.at(i).c_str());
try {
//cout << __LINE__ << " calling getFsAddrForXmiId " << members.at(i) << endl;
addr = getFsAddrForXmiId(amember);
indexRep->addFS(internal::FSPromoter::promoteFS(addr,*iv_cas));
} catch (Exception e) {
if (!lenient) {
throw e;
}
else {
//unknown view member may be an OutOfTypeSystem FS
//cout << "calling sharedData->addOutOfTypeSystemViewMember" << endl;
this->sharedData->addOutOfTypeSystemViewMember(sofaXmiId, amember);
}
}
}
}
}
int XmiDeserializerHandler::createByteArray(UnicodeString& currentArrayElements, int currentArrayId) {
string elemStr = ( (UnicodeStringRef) currentArrayElements).asUTF8();
int arrayLen = elemStr.length() / 2;
ByteArrayFS fs = iv_cas->createByteArrayFS(arrayLen);
size_t j=0;
for (int i = 0; i < arrayLen; i++) {
char hex[5], *stop;
hex[0] = '0';
hex[1] = 'x';
if (j < elemStr.length() ) {
hex[2] = elemStr.at(j++);
if (j < elemStr.length()) {
hex[3] = elemStr.at(j++);
hex[4] = 0;
char val = strtol(hex, &stop, 16);
fs.set(i,val);
}
}
}
int arrayAddr = internal::FSPromoter::demoteFS(fs);
deserializedFsAddrs.push_back(arrayAddr);
if (currentArrayId > 0) {
sharedData->addIdMapping(arrayAddr,currentArrayId);
}
return arrayAddr;
}
void XmiDeserializerHandler::readFS(lowlevel::TyFS addr, const Attributes & attrs, bool toIndex) {
// Hang on address for setting content feature
currentAddr = addr;
int id = -1;
// int sofaRef = -1; // 0 ==> baseCas indexRepository
////vector<int>* sofaRef = new vector<int>;
UnicodeString attrName;
UnicodeString attrValue;
bool nameMapping = false;
UChar ubuff[256];
UErrorCode errorCode = U_ZERO_ERROR;
int thisSofaNum;
lowlevel::TyFS heapValue = iv_casimpl.getHeap().getType(addr);
if (sofaTypeCode == heapValue) {
int extsz = UnicodeString(CAS::FEATURE_BASE_NAME_SOFAID).extract(ubuff, 256, errorCode);
if (extsz > 256) {
cout << "ACK!" << endl;
}
const UChar* sofaID = attrs.getValue(ubuff);
if (0==UnicodeStringRef(sofaID).compare(UnicodeString("_DefaultTextSofaName"))) {
// initial view Sofa always has sofaNum = 1
thisSofaNum = 1;
} else if (0==UnicodeStringRef(sofaID).compare(UnicodeString(CAS::NAME_DEFAULT_SOFA))) {
thisSofaNum = 1;
} else {
thisSofaNum = this->nextSofaNum++;
}
}
Type type = uima::internal::FSPromoter::promoteType(heapValue, iv_cas->getTypeSystem().getLowlevelTypeSystem());
for (size_t i = 0; i < attrs.getLength(); i++) {
assertWithMsg( sizeof(XMLCh) == sizeof(UChar), "Port required!");
attrName = (UChar*)attrs.getQName(i);
attrValue = (UChar*)attrs.getValue(i);
if (attrName.compare(UnicodeString(XMI_ID_ATTR_NAME)) == 0) {
id = atoi(UnicodeStringRef(attrValue).asUTF8().c_str());
//cout << "got " << XMI_ID_ATTR_NAME << " " << id << endl;
} else {
if (sofaTypeCode == heapValue && attrName.compare(CAS::FEATURE_BASE_NAME_SOFAID)==0) {
if (attrValue.compare(UnicodeString("_DefaultTextSofaName"))==0 ) {
// First change old default Sofa name into the new one
attrValue = UnicodeString(CAS::NAME_DEFAULT_SOFA);
}
} else if (sofaTypeCode == heapValue
&& attrName.compare(UnicodeString(CAS::FEATURE_BASE_NAME_SOFANUM))==0) {
stringstream str;
str << thisSofaNum << endl;
attrValue = UnicodeString(str.str().c_str());
}
//cout << "readFS calling handleFeature " << attrName << " attrvalue= "
// << attrValue << endl;
handleFeature(addr, attrName, attrValue, true);
}
}
if (sofaTypeCode == heapValue) {
// If a Sofa, create CAS view to get new indexRepository
SofaFS sofa = (SofaFS) uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
//also add to indexes so we can retrieve the Sofa later
iv_cas->getBaseIndexRepository().addFS(sofa);
CAS * tcas = iv_cas->getView(sofa);
assert ( EXISTS(tcas) );
if (sofa.getSofaRef() == 1) {
iv_cas->registerInitialSofa();
} else {
// add indexRepo for views other than the initial view
lowlevel::IndexRepository * indexRep = iv_cas->getIndexRepositoryForSofa(sofa);
assert ( EXISTS(indexRep) );
indexRepositories.push_back(indexRep);
}
tcasInstances.push_back(tcas);
}
deserializedFsAddrs.push_back(addr);
if (id > 0) {
sharedData->addIdMapping(addr, id);
}
}
void XmiDeserializerHandler::addArrayElement(lowlevel::TyFS addr,
lowlevel::TyFSType arrayType,
int arrayPos,
string & buffer) {
if (arrayPos >= (int) iv_casimpl.getHeap().getArraySize(addr) ) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam("Invalid array FS in the CAS" );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
FeatureStructure fs = uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
switch (arrayType) {
case internal::gs_tyIntArrayType: {
//cout << "add intarray element at " << arrayPos << " " << buffer << endl;
int val = atoi(buffer.c_str());
IntArrayFS intFS(fs);
intFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyFloatArrayType: {
float val = atof(buffer.c_str());
FloatArrayFS floatFS(fs);
floatFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyStringArrayType: {
//add the striug
StringArrayFS strFS(fs);
UnicodeString strval(buffer.c_str());
strFS.set( (size_t) arrayPos,strval);
break;
}
case internal::gs_tyByteArrayType: {
short intval = atoi(buffer.c_str());
char charval[2];
sprintf(charval,"%c",intval);
ByteArrayFS byteFS(fs);
byteFS.set( (size_t) arrayPos, charval[0]);
break;
}
case internal::gs_tyBooleanArrayType: {
BooleanArrayFS booleanFS(fs);
if (buffer.compare("true") == 0) {
booleanFS.set( (size_t) arrayPos, true);
//cout << "bool buffer " << buffer << " val= " << val << "set " << true << endl;
} else {
booleanFS.set ( (size_t) arrayPos, false);
//cout << arrayPos << " bool buffer " << buffer << " val= " << val << "set " << false << endl;
}
break;
}
case internal::gs_tyShortArrayType: {
short val;
//string strval;
//UnicodeStringRef(buffer).extractUTF8(strval);
stringstream s;
s << buffer.c_str();
s >> val;
ShortArrayFS shortFS(fs);
shortFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyLongArrayType: {
INT64 val;
stringstream s;
s << buffer;
s >> val;
LongArrayFS longFS(fs);
longFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyDoubleArrayType: {
DoubleArrayFS doubleFS(fs);
stringstream s;
s << buffer;
long double doubleval;
s >> doubleval;
doubleFS.set((size_t) arrayPos, doubleval);
break;
}
default: { //array of FSs
lowlevel::TyFS fsid = atoi(buffer.c_str());
FeatureStructure fsitem(fsid, *iv_cas);
ArrayFS fsArrayfs(fs);
fsArrayfs.set((size_t) arrayPos, fsitem);
}
} //swithch
}
// Create a feature value from a string representation.
void XmiDeserializerHandler::handleFeature(lowlevel::TyFS addr, UnicodeString & featName, UnicodeString & featVal, bool lenient) {
lowlevel::TyFSType fstype = iv_casimpl.getHeap().getType(addr);
Type type = uima::internal::FSPromoter::promoteType(fstype, iv_cas->getTypeSystem().getLowlevelTypeSystem());
Feature feat = type.getFeatureByBaseName(featName);
if (!feat.isValid()) {
if (!lenient) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam("Unknown Feature");
msg.addParam(featName);
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
else {
sharedData->addOutOfTypeSystemAttribute(addr, featName, featVal);
}
return;
}
lowlevel::TyFSFeature featCode = internal::FSPromoter::demoteFeature(feat);
handleFeature(type, addr, featCode, featVal, lenient);
}
void XmiDeserializerHandler::handleFeature(Type & type,
lowlevel::TyFS addr,
lowlevel::TyFSFeature featCode,
UnicodeString & featVal,
bool lenient) {
FeatureStructure fs = uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
if (!fs.isValid() ) {
cerr << "handle feature of Invalid FS " << type.getName() << endl;
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam("Invalid FeatureStructure");
msg.addParam(type.getName());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
///Feature feat = type.getFeatureByBaseName(featName);
Feature feat = internal::FSPromoter::promoteFeature(featCode, *iv_typesystem);
Type rtype;
feat.getRangeType(rtype);
lowlevel::TyFSType rangeType = uima::internal::FSPromoter::demoteType(rtype);
switch (rangeType) {
case internal::gs_tyIntegerType: {
if (featVal.length()>0) {
if (featCode == internal::gs_tySofaRefFeature) {
// special handling for "sofa" feature of annotation. Need to change
// it from a sofa reference into a sofa number
int sofaXmiId = atoi(UnicodeStringRef(featVal).asUTF8().c_str());
//cout << __LINE__ << " calling getFsAddrForXmiId " << sofaXmiId << endl;
int sofaAddr = getFsAddrForXmiId(sofaXmiId);
int sofaNum = iv_cas->getHeap()->getFSValue(sofaAddr, internal::gs_tySofaNumFeature);
iv_cas->getHeap()->setFSValue(addr,featCode,sofaNum);
} else {
fs.setIntValue(feat, atoi(UnicodeStringRef(featVal).asUTF8().c_str()));
}
}
break;
}
case internal::gs_tyFloatType: {
if ( featVal.length() > 0) {
fs.setFloatValue(feat, atof(UnicodeStringRef(featVal).asUTF8().c_str()));
}
break;
}
case internal::gs_tyStringType: {
// if (featVal.length() > 0) {
fs.setStringValue(feat, featVal);
//}
break;
}
case internal::gs_tyByteType: {
if (featVal.length() > 0) {
string val = UnicodeStringRef(featVal).asUTF8();
short intval = atoi(val.c_str());
char charval[2];
sprintf(charval,"%c",intval);
fs.setByteValue(feat, charval[0] );
}
break;
}
case internal::gs_tyBooleanType: {
if (featVal.length() > 0) {
string val = UnicodeStringRef(featVal).asUTF8();
if (val.compare("1")==0 || val.compare("true") == 0)
fs.setBooleanValue(feat, true );
else fs.setBooleanValue(feat, false);
}
break;
}
case internal::gs_tyShortType: {
if (featVal.length() > 0) {
string strval = UnicodeStringRef(featVal).asUTF8();
short shortval;
stringstream s;
s << strval.c_str();
s >> shortval;
fs.setShortValue(feat, shortval);
}
break;
}
case internal::gs_tyLongType: {
if (featVal.length() > 0) {
string strval = UnicodeStringRef(featVal).asUTF8();
INT64 longval;
stringstream s;
s << strval.c_str();
s >> longval;
fs.setLongValue(feat, longval);
}
break;
}
case internal::gs_tyDoubleType: {
if (featVal.length() > 0) {
string strval = UnicodeStringRef(featVal).asUTF8();
long double doubleval;
stringstream s;
s << strval.c_str();
s >> doubleval;
fs.setDoubleValue(feat, doubleval );
}
break;
}
case internal::gs_tyBooleanArrayType:
case internal::gs_tyByteArrayType:
case internal::gs_tyIntArrayType:
case internal::gs_tyFloatArrayType:
case internal::gs_tyStringArrayType:
case internal::gs_tyLongArrayType:
case internal::gs_tyShortArrayType:
case internal::gs_tyDoubleArrayType:
case internal::gs_tyFSArrayType: {
//cout << "handleFeature " << feat.getName() << " " << featVal << endl;
if (feat.isMultipleReferencesAllowed()) {
// do the usual FS deserialization
//cout << " multiplerefsallowed " << endl;
if (featVal.length() > 0) {
int val = atoi(UnicodeStringRef(featVal).asUTF8().c_str());
//cout << " setting fsvalue " << "fsaddr " << addr << " value "<< val << endl;
iv_cas->getHeap()->setFeatureInternal(addr,featCode,val);
}
} else {
// Do the multivalued property deserialization.
// However, byte arrays have a special serialization (as hex digits)
//cout << " not multiplerefsallowed " << endl;
if (rangeType == internal::gs_tyByteArrayType) {
int arrayAddr = createByteArray(featVal, -1);
iv_cas->getHeap()->setFSValue(addr,featCode,arrayAddr);
} else {
//cout << "tokenizing array values " << endl;
vector<string> stringList;
tokenize(featVal, stringList);
handleFeature(addr, featCode, rangeType, stringList);
}
}
break;
}
case internal::gs_tyFloatListType:
case internal::gs_tyIntListType:
case internal::gs_tyStringListType:
case internal::gs_tyFSListType: {
//cout << "GOT A LIST FEATURE " << endl;
if (feat.isMultipleReferencesAllowed()) {
// do the usual FS deserialization
if (featVal.length() > 0) {
int val = atoi(UnicodeStringRef(featVal).asUTF8().c_str());
iv_cas->getHeap()->setFeatureInternal(addr,featCode,val);
}
} else {
// Do the multivalued property deserialization.
////handleFeature(addr,featCode,featVal);
vector<string> stringList;
tokenize(featVal, stringList);
handleFeature(addr, featCode, rangeType, stringList);
}
break;
}
default: {
if (rtype.isStringSubType()) {
if (featVal.length() > 0) {
fs.setStringValue(feat, featVal);
}
} else if (featVal.length() > 0) {
lowlevel::TyFS val = (lowlevel::TyFS) atoi(UnicodeStringRef(featVal).asUTF8().c_str());
iv_casimpl.getHeap().setFeatureInternal(addr, uima::internal::FSPromoter::demoteFeature(feat), val);
}
break;
}
}
}
void XmiDeserializerHandler::handleFeature(lowlevel::TyFS addr,
UnicodeString & featName,
vector<string> & featVal) {
lowlevel::TyFSType fstype = iv_casimpl.getHeap().getType(addr);
Type type = uima::internal::FSPromoter::promoteType(fstype, iv_cas->getTypeSystem().getLowlevelTypeSystem());
Feature feat = type.getFeatureByBaseName(featName);
if (!feat.isValid()) {
if (!lenient) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam("Unknown Feature");
msg.addParam(featName);
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
else {
sharedData->addOutOfTypeSystemChildElements(addr, ( (UnicodeStringRef)featName).asUTF8(), featVal);
}
return;
}
lowlevel::TyFSFeature featCode = internal::FSPromoter::demoteFeature(feat);
Type rtype;
feat.getRangeType(rtype);
handleFeature(addr, featCode,internal::FSPromoter::demoteType(rtype), featVal);
}
void XmiDeserializerHandler::handleFeature(lowlevel::TyFS addr,
lowlevel::TyFSFeature featCode,
lowlevel::TyFSType rangeTypeCode,
vector<string> & featVals) {
//cout << "handleFeature array/list " << featVals.size() << endl;
switch(rangeTypeCode) {
case internal::gs_tyBooleanArrayType:
case internal::gs_tyByteArrayType:
case internal::gs_tyIntArrayType:
case internal::gs_tyFloatArrayType:
case internal::gs_tyLongArrayType:
case internal::gs_tyShortArrayType:
case internal::gs_tyDoubleArrayType:
case internal::gs_tyFSArrayType: {
int arrayFS = createArray(rangeTypeCode, featVals, -1);
iv_cas->getHeap()->setFSValue(addr,featCode,arrayFS);
break;
}
case internal::gs_tyStringArrayType: {
//cout << "handleFeature of type string array" << endl;
int arrayFS = createArray(rangeTypeCode, featVals, -1);
iv_cas->getHeap()->setFSValue(addr,featCode,arrayFS);
break;
}
case internal::gs_tyIntListType: {
int arrayFS = createIntList(featVals);
iv_cas->getHeap()->setFSValue(addr,featCode,arrayFS);
break;
}
case internal::gs_tyFloatListType: {
int arrayFS = createFloatList(featVals);
iv_cas->getHeap()->setFSValue(addr,featCode,arrayFS);
break;
}
case internal::gs_tyStringListType: {
int arrayFS = createStringList(featVals);
iv_cas->getHeap()->setFSValue(addr,featCode,arrayFS);
break;
}
case internal::gs_tyFSListType: {
int arrayFS = createFSList(featVals);
iv_cas->getHeap()->setFSValue(addr,featCode,arrayFS);
break;
}
default: {
//scalar and FS type
if (featVals.size() != 1) {
//TODO log
cerr << "one feature value expected " << endl;
} else {
Type type = internal::FSPromoter::promoteType(rangeTypeCode,
iv_cas->getTypeSystem().getLowlevelTypeSystem());
UnicodeString val(featVals.at(0).c_str());
handleFeature(type,
addr, featCode,val, true);
}
break;
}
}
}
void XmiDeserializerHandler::tokenize(UnicodeString & ustr, vector<string> & stringList ) {
string str = (UnicodeStringRef(ustr)).asUTF8();
string::size_type lastPos = str.find_first_not_of(" ", 0);
string::size_type pos = str.find_first_of(" ",lastPos);
while (string::npos != pos || string::npos != lastPos) {
// Found a token, add it to the vector.
stringList.push_back(str.substr(lastPos, pos - lastPos));
// Skip blanks and find next non blank
lastPos = str.find_first_not_of(" ", pos);
// Find next blank
pos = str.find_first_of(" ",lastPos);
}
}
int XmiDeserializerHandler::createArray( lowlevel::TyFSType typeCode,
vector<string>& stringList,
int xmiId) {
int arrayAddr;
switch (typeCode) {
case internal::gs_tyBooleanArrayType:
case internal::gs_tyByteArrayType:
case internal::gs_tyIntArrayType:
case internal::gs_tyFloatArrayType:
case internal::gs_tyLongArrayType:
case internal::gs_tyShortArrayType:
case internal::gs_tyDoubleArrayType:
case internal::gs_tyStringArrayType:
case internal::gs_tyFSArrayType: {
//cout << "createArray() type " << typeCode << " size " << stringList.size() << endl;
arrayAddr = iv_cas->getHeap()->createArrayFS(typeCode, stringList.size());
//cout << "created array FS now adding element values at address " << arrayAddr << endl;
for (size_t i=0; i < stringList.size();i++) {
addArrayElement(arrayAddr, typeCode,i,stringList.at(i));
}
break;
}
default: {
cerr << "Invalid Array type" << endl;
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam("createArray failed.");
stringstream str;
str << "xmiId=" << xmiId << " typecode= " << typeCode << endl;
msg.addParam(str.str().c_str());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
break;
}
}
//cout << "createArray " << xmiId << " addr=" << arrayAddr << endl;
deserializedFsAddrs.push_back(arrayAddr);
if (xmiId > 0) {
sharedData->addIdMapping(arrayAddr, xmiId);
}
return arrayAddr;
}
int XmiDeserializerHandler::createIntList(vector<string>& stringList) {
IntListFS listFS = iv_cas->createIntListFS();
for (size_t i = 0; i < stringList.size(); i++ ) {
int value = atoi (stringList.at(i).c_str());
listFS.addLast(value);
}
return internal::FSPromoter::demoteFS(listFS);
}
int XmiDeserializerHandler::createFloatList(vector<string>& stringList) {
FloatListFS listFS = iv_cas->createFloatListFS();
for (size_t i = 0; i < stringList.size(); i++ ) {
float value = atof (stringList.at(i).c_str());
listFS.addLast(value);
}
return internal::FSPromoter::demoteFS(listFS);
}
int XmiDeserializerHandler::createFSList(vector<string>& stringList) {
int first = iv_cas->getHeap()->createFS(internal::gs_tyEListType);
size_t i = stringList.size();
for (;i > 0;i--) {
int value = atoi(stringList.at(i-1).c_str());
int node = iv_cas->getHeap()->createFS(internal::gs_tyNEListType);
fsListNodesFromMultivaluedProperties.push_back(node);
iv_cas->getHeap()->setFeatureInternal(node, internal::gs_tyHeadFeature, value);
iv_cas->getHeap()->setFeatureInternal(node, internal::gs_tyTailFeature, first);
first = node;
}
return first;
}
int XmiDeserializerHandler::createStringList(vector<string>& stringList) {
StringListFS listFS = iv_cas->createStringListFS();
for (size_t i = 0; i < stringList.size(); i++ ) {
UnicodeString value(stringList.at(i).c_str()); //use xmiId to look up addr
listFS.addLast(value);
}
return internal::FSPromoter::demoteFS(listFS);
}
void XmiDeserializerHandler::remapFSListHeads(int addr) {
int type = iv_cas->getHeap()->getType(addr);
if (type != internal::gs_tyIntListType &&
type != internal::gs_tyFloatListType &&
type != internal::gs_tyStringListType &&
type != internal::gs_tyFSListType &&
type != internal::gs_tyNEListType) {
return;
}
int headFeat = internal::gs_tyHeadFeature;
int featVal = iv_cas->getHeap()->getFeatureInternal(addr, headFeat);
if (featVal != 0) {
int fsValAddr = 0;
try {
//cout << __LINE__ << " remap calling getFsAddrForXmiId " << featVal << endl;
fsValAddr = getFsAddrForXmiId(featVal);
} catch (Exception e) {
if (!lenient) {
throw e;
} else {
stringstream str;
str << featVal;
this->sharedData->addOutOfTypeSystemAttribute(addr, CAS::FEATURE_BASE_NAME_HEAD, str.str());
}
}
iv_cas->getHeap()->setFeatureInternal(addr, headFeat, fsValAddr);
}
}
void XmiDeserializerHandler::finalizeFS(int deserializedfsaddr) {
lowlevel::TyFS addr = deserializedfsaddr;
FeatureStructure fs = uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
Type type = fs.getType();
if (iv_cas->getTypeSystem().isArrayType(uima::internal::FSPromoter::demoteType(type)) ) {
finalizeArray(type, addr);
return;
}
//update heap value of features that are references to other FS.
vector<Feature> feats;
type.getAppropriateFeatures(feats);
for (size_t i = 0; i < feats.size(); i++) {
Feature feat = (Feature) feats[i];
Type rangeType;
feat.getRangeType(rangeType);
if (rangeType.isValid()) {
lowlevel::TyFSType rangetypecode = uima::internal::FSPromoter::demoteType(rangeType);
lowlevel::TyFSFeature featcode = uima::internal::FSPromoter::demoteFeature(feat);
//if not primitive
if (iv_cas->getTypeSystem().isFSType(rangetypecode) ||
(iv_cas->getTypeSystem().isArrayType(rangetypecode) &&
feat.isMultipleReferencesAllowed() ) ||
(iv_cas->getTypeSystem().isListType(rangetypecode) &&
feat.isMultipleReferencesAllowed()) ) {
//get the current feature value which is the id
lowlevel::TyFS featVal = iv_casimpl.getHeap().getFeatureInternal(addr, featcode);
if (featVal != 0) {
int fsValAddr = 0;
try {
//cout << __LINE__ << feat.getName() << " calling getFsAddrForXmiId " << featVal << endl;
fsValAddr = getFsAddrForXmiId(featVal);
} catch (Exception e) {
if (!lenient) {
throw e;
}
else {
//this may be a reference to an out-of-typesystem FS
stringstream str;
str << featVal;
this->sharedData->addOutOfTypeSystemAttribute(addr,
((UnicodeStringRef)feat.getName()).asUTF8(), str.str());
}
}
iv_casimpl.getHeap().setFSValue(addr, featcode, fsValAddr);
}
}
}
}
}
void XmiDeserializerHandler::finalizeArray(Type & type, lowlevel::TyFS addr) {
lowlevel::TyFSType typecode = uima::internal::FSPromoter::demoteType(type);
if (!iv_cas->getTypeSystem().isFSArrayType(typecode)) {
return;
}
// *** WARNING *** *** WARNING *** *** WARNING *** *** WARNING ***
// if implementation of ArrayFS on the heap changes, this code will be invalid
int size = (int)iv_cas->getHeap()->getHeap().getHeapValue(addr + 1);
for (int i=0; i<size; i++) {
lowlevel::TyFS arrayVal = iv_cas->getHeap()->getHeap().getHeapValue(addr + 2 + i);
if (arrayVal != 0) {
int arrayValAddr = 0;
try {
//cout << __LINE__ << " calling getFsAddrForXmiId " << arrayVal << endl;
arrayValAddr = getFsAddrForXmiId(arrayVal);
} catch (Exception e) {
if (!lenient) {
throw e;
}
else {
// the array element may be out of typesystem. In that case set it
// to null, but record the id so we can add it back on next serialization.
this->sharedData->addOutOfTypeSystemArrayElement(addr, i, arrayVal);
}
}
iv_cas->getHeap()->getHeap().setHeapValue(addr + 2 + i, arrayValAddr);
}
}
}
/**
* Gets the FS address into which the XMI element with the given ID
* was deserialized. This method supports merging multiple XMI documents
* into a single CAS, by checking the XmiSerializationSharedData
* structure to get the address of elements that were skipped during this
* deserialization but were deserialized during a previous deserialization.
*
* @param xmiId
* @return
*/
int XmiDeserializerHandler::getFsAddrForXmiId(int xmiId) {
int addr = sharedData->getFsAddrForXmiId(xmiId);
//cout << "xmiid=" << xmiId << "fsaddr=" << addr << endl;
if (addr > 0)
return addr;
else {
//cerr << __FILE__<<__LINE__ << " throw exc No such xmiid " << xmiId << endl;
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam("getFsAddrForXmiId");
msg.addParam(xmiId);
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
}
/**
* Adds a feature sturcture to the out-of-typesystem data. Also sets the
* this->outOfTypeSystemElement field, which is referred to later if we have to
* handle features recorded as child elements.
*/
void XmiDeserializerHandler::addToOutOfTypeSystemData(XmlElementName * xmlElementName, const Attributes & attrs) {
this->outOfTypeSystemElement = new OotsElementData();
//this->outOfTypeSystemElement->elementName = xmlElementName->qualifiedName;
this->outOfTypeSystemElement->elementName = xmlElementName;
UnicodeString attrName;
UnicodeString attrValue;
for (size_t i = 0; i < attrs.getLength(); i++) {
attrName = attrs.getQName(i);
attrValue = attrs.getValue(i);
if (attrName.compare(UnicodeString(XMI_ID_ATTR_NAME))==0) {
UnicodeStringRef uref(attrValue);
this->outOfTypeSystemElement->xmiId = atoi(uref.asUTF8().c_str());
}
else {
this->outOfTypeSystemElement->attributes.push_back(
new XmlAttribute(attrName, attrValue));
}
}
this->sharedData->addOutOfTypeSystemElement(this->outOfTypeSystemElement);
}
/**
* Adds a feature to the out-of-typesystem features list.
* @param ootsElem object to which to add the feature
* @param featName name of feature
* @param featVals feature values, as a list of strings
*/
void XmiDeserializerHandler::addOutOfTypeSystemFeature(OotsElementData * ootsElem,
UnicodeString & featName,
vector<UnicodeString> & featVals) {
vector<string> * pVals = new vector<string>;
for (size_t i=0;i<featVals.size();i++) {
pVals->push_back( ((UnicodeStringRef)featVals.at(i)).asUTF8());
}
ootsElem->childElements[ ((UnicodeStringRef)featName).asUTF8()] = pVals;
}
// ---------------------------------------------------------------------------
// XmiDeserializerHandler: Overrides of the SAX ErrorHandler interface
// ---------------------------------------------------------------------------
void XmiDeserializerHandler::error(const SAXParseException& e) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_ERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam((UChar const *)e.getSystemId());
msg.addParam(e.getLineNumber());
msg.addParam(e.getColumnNumber());
msg.addParam((UChar const *) e.getMessage());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
void XmiDeserializerHandler::fatalError(const SAXParseException& e) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam((UChar const *)e.getSystemId());
msg.addParam(e.getLineNumber());
msg.addParam(e.getColumnNumber());
msg.addParam((UChar const *) e.getMessage());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
void XmiDeserializerHandler::warning(const SAXParseException& e) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_WARNING);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam((UChar const *)e.getSystemId());
msg.addParam(e.getLineNumber());
msg.addParam(e.getColumnNumber());
msg.addParam((UChar const *) e.getMessage());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
char const * XmiDeserializerHandler::XMI_ID_ATTR_NAME = "xmi:id";
char const * XmiDeserializerHandler::TRUE_VALUE = "true";
char const * XmiDeserializerHandler::DEFAULT_CONTENT_FEATURE = "value";
char const * XmiDeserializerHandler::DEFAULT_NAMESPACE_URI = "http:///uima/noNamespace.ecore";
} // namespace uima