blob: 53012d704ab1120207a3bffbe293626602e88061 [file] [log] [blame]
/** @name xcasdeserializer_handler.cpp
-----------------------------------------------------------------------------
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-----------------------------------------------------------------------------
10/18/2005 Initial creation
-------------------------------------------------------------------------- */
//TODO support multiple indexed FS
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include "uima/pragmas.hpp"
#include <iostream>
#include <sstream>
#include <algorithm>
using namespace std;
#include "xercesc/sax2/Attributes.hpp"
#include "xercesc/sax/SAXParseException.hpp"
#include "xercesc/sax/SAXException.hpp"
#include "uima/msg.h"
#include "uima/exceptions.hpp"
#include "uima/lowlevel_typesystem.hpp"
#include "uima/lowlevel_indexrepository.hpp"
#include "uima/xcasdeserializer_handler.hpp"
#include "uima/internal_fspromoter.hpp"
#include "uima/internal_typeshortcuts.hpp"
#include "uima/internal_casimpl.hpp"
#include "uima/fsindexrepository.hpp"
#include "uima/arrayfs.hpp"
#include "uima/annotator_context.hpp"
#include "uima/resmgr.hpp"
#define DEBUG 0
namespace uima {
// ---------------------------------------------------------------------------
// XCASDeserialiserHandler: Constructors and Destructor
// ---------------------------------------------------------------------------
XCASDeserializerHandler::XCASDeserializerHandler(CAS & cas, AnnotatorContext * const ctx) : iv_cas(cas.getBaseCas() ),
iv_locator(NULL), iv_ctx(ctx),
iv_casimpl( uima::internal::CASImpl::promoteCAS(*iv_cas)
// ,iv_typesystem(iv_casimpl.getHeap().getTypeSystem())
) {
if (DEBUG) std::cerr << " XCASDeserializerHandler::constructor " << std::endl;
currentContentFeat.append(DEFAULT_CONTENT_FEATURE);
sofaTypeCode = uima::internal::gs_tySofaType;
FSIndexRepository * fsidx = &iv_cas->getBaseIndexRepository();
indexRepositories.push_back((lowlevel::IndexRepository*)fsidx);
// There should always be another index for the Initial View
fsidx = &iv_cas->getView(CAS::NAME_DEFAULT_SOFA)->getIndexRepository();
indexRepositories.push_back((lowlevel::IndexRepository*)fsidx);
// get temp heap handle for checking if an FS is an annotation
lowlevel::FSHeap const & crHeap = iv_casimpl.getHeap();
// uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap;
iv_typesystem = &crHeap.getTypeSystem();
// add entry for baseCAS ... point non-compliant annotations at first Sofa
sofaRefMap.push_back(1);
// add entry for baseCAS ... _indexed=0 stays in 0
indexMap.push_back(0);
}
XCASDeserializerHandler::~XCASDeserializerHandler() {
if (DEBUG) std::cout << " XCASDeserializerHandler::destructor " << std::endl;
for (size_t i = 0; i < fsTree.size(); i++) {
FSInfo * fsinfo = (FSInfo*) fsTree[i];
if (fsinfo != 0) {
delete fsinfo->indexRep;
delete fsinfo;
}
}
for (size_t i = 0; i < idLess.size(); i++) {
FSInfo * fsinfo = (FSInfo*) idLess[i];
if (fsinfo != 0) {
delete fsinfo->indexRep;
delete fsinfo;
}
}
// free some storage
fsTree.clear();
sofaRefMap.clear();
indexMap.clear();
}
// ---------------------------------------------------------------------------
// XCASDeserializerHandler: Implementation of the SAX2 ContentHandler interface
// ---------------------------------------------------------------------------
void XCASDeserializerHandler::setDocumentLocator(const Locator* const locator) {
if (DEBUG) std::cerr << " XCASDeserializerHandler::setDocumentLocator() " << std::endl;
iv_locator = locator;
}
void XCASDeserializerHandler::startDocument() {
if (DEBUG) cout << " XCASDeserializerHandler::startDocument() " << endl;
iv_state = DOC_STATE;
}
void XCASDeserializerHandler::startElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const Attributes & attrs) {
if (DEBUG) std::cerr << " XCASDeserializerHandler::startElement() " << icu::UnicodeString((UChar*)qname, XMLString::stringLen(qname)) << endl;
assert(sizeof(XMLCh) == sizeof(UChar));
icu::UnicodeString qualifiedName( (UChar const *) qname, XMLString::stringLen(qname));
buffer.remove();
switch (iv_state) {
case DOC_STATE: {
if (qualifiedName.compare(CASTAGNAME) != 0) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam( qualifiedName );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
iv_state = FS_STATE;
break;
}
case FS_STATE: {
currentContentFeat = DEFAULT_CONTENT_FEATURE;
if (qualifiedName.compare(DEFAULT_DOC_TYPE_NAME) == 0) {
iv_state = DOC_TEXT_STATE;
} else {
readFS(qualifiedName, attrs);
}
break;
}
case ARRAY_ELE_STATE: {
readArrayElement(qualifiedName, attrs);
break;
}
default: {
// If we're not in an element expecting state, raise an error.
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam( qualifiedName );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
}
}
void XCASDeserializerHandler::characters(
const XMLCh* const cpwsz,
const XMLSize_t uiLength) {
if (DEBUG) std::cerr << "XCASDeserializerHandler::characters: \"" << icu::UnicodeString(cpwsz, uiLength) << "\"" << endl;
assert(sizeof(XMLCh) == sizeof(UChar));
switch (this->iv_state) {
case DOC_TEXT_STATE:
case CONTENT_STATE:
case ARRAY_ELE_CONTENT_STATE:
case FEAT_CONTENT_STATE:
buffer.append( (UChar const *) cpwsz, 0, uiLength );
break;
default:
break;
}
}
void XCASDeserializerHandler::endElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname) {
if (DEBUG) std::cerr << " XCASDeserializerHandler::endElement() " << icu::UnicodeString( (UChar*) qname, XMLString::stringLen(qname) ) << " -- buffer is >" << buffer << "<" << endl;
icu::UnicodeString qualifiedName( (UChar const *) qname, XMLString::stringLen(qname));
assert(sizeof(XMLCh) == sizeof(UChar));
switch (iv_state) {
case DOC_STATE: {
// Do nothing.
break;
}
case FS_STATE: {
iv_state = DOC_STATE;
break;
}
case FEAT_STATE: {
iv_state = FS_STATE;
break;
}
case CONTENT_STATE: {
// Set the value of the content feature.
//if (!isAllWhitespace(buffer))
//{
handleFeature(currentAddr, currentContentFeat, buffer, true);
//} }
iv_state = FS_STATE;
break;
}
case FEAT_CONTENT_STATE: {
// Create a feature value from an element.
handleFeature(currentAddr, qualifiedName, buffer, false);
iv_state = FEAT_STATE;
break;
}
case ARRAY_ELE_CONTENT_STATE: {
// Create an array value.
addArrayElement(buffer);
iv_state = ARRAY_ELE_STATE;
break;
}
case ARRAY_ELE_STATE: {
iv_state = FS_STATE;
break;
}
case DOC_TEXT_STATE: {
// Assume old style TCAS with one text Sofa
SofaFS newSofa = iv_cas->createInitialSofa(icu::UnicodeString("text"));
CAS* cas = iv_cas->getInitialView();
cas->registerView(newSofa);
// Set the document text without creating a documentAnnotation
cas->setDocTextFromDeserializtion(UnicodeStringRef(buffer.getBuffer(), buffer.length()));
// and assume the new Sofa is at location 1!
int addr = 1;
int id = 1;
sofaRefMap.push_back(id);
// and register the id for this Sofa
FSInfo * fsInfo = new FSInfo(addr, new vector<int>);
// FSInfo * fsInfo = new FSInfo(addr, -1); //??? Should be 0 or -1 ???
fsTree[id] = fsInfo;
iv_state = FS_STATE;
break;
}
}
}
void XCASDeserializerHandler::endDocument() {
//cout << " XCASDeserializerHandler::endDocument() " << endl;
//update features that are FSs
for (size_t i = 0; i < fsTree.size(); i++) {
FSInfo * fsinfo = (FSInfo*) fsTree[i];
if (fsinfo != 0)
finalizeFS(*fsinfo);
}
//update features that are FSs
for (size_t i = 0; i < idLess.size(); i++) {
FSInfo * fsinfo = (FSInfo*) idLess[i];
if (fsinfo != 0)
finalizeFS(*fsinfo);
}
//update document annotation info in tcas
for (size_t i = 0; i < tcasInstances.size(); i++) {
CAS * tcas = (CAS *) tcasInstances[i];
if (tcas != 0) {
tcas->pickupDocumentAnnotation();
}
}
}
void XCASDeserializerHandler::ignorableWhitespace(const XMLCh* const cpwsz,
const unsigned int length) {
cout << " XCASDeserializerHandler::ignorableWhitespace() " << endl;
}
// Create a new FS.
void XCASDeserializerHandler::readFS(icu::UnicodeString & qualifiedName, const Attributes & attrs) {
icu::UnicodeString typeName(qualifiedName);
Type type = iv_cas->getTypeSystem().getType(typeName);
uima::lowlevel::TyFSType typecode = uima::internal::FSPromoter::demoteType(type);
if (!type.isValid() ) {
cout << "INFO: invalid type " << typeName << endl;
iv_state = CONTENT_STATE;
} else {
if (iv_cas->getTypeSystem().isArrayType(typecode)) {
readArray(type, attrs);
return;
}
uima::lowlevel::TyFS addr = uima::internal::FSPromoter::demoteFS(iv_cas->createFS(type));
readFS(addr, attrs, true);
}
}
void XCASDeserializerHandler::readFS(lowlevel::TyFS addr, const Attributes & attrs, bool toIndex) {
// Hang on address for setting content feature
currentAddr = addr;
int id = -1;
// int sofaRef = -1; // 0 ==> baseCas indexRepository
vector<int>* sofaRef = new vector<int>;
icu::UnicodeString attrName;
icu::UnicodeString attrValue;
bool nameMapping = false;
UChar ubuff[256];
UErrorCode errorCode = U_ZERO_ERROR;
lowlevel::TyFS heapValue = iv_casimpl.getHeap().getType(addr);
// Special handling for Sofas
if (sofaTypeCode == heapValue) {
// create some maps to handle v1 format XCAS ...
// ... where the sofa feature of annotations was an int not a ref
// determine if this is the one and only initial view Sofa
bool isInitialView = false;
int extsz = icu::UnicodeString(CAS::FEATURE_BASE_NAME_SOFAID).extract(ubuff, 256, errorCode);
if (extsz > 256) {
cout << "ACK!" << endl;
}
const UChar* sofaID = attrs.getValue(ubuff);
if (0==UnicodeStringRef(sofaID).compare(icu::UnicodeString("_DefaultTextSofaName"))) {
sofaID = ubuff;
}
// no Sofa mapping for now
// if (iv_ctx != NULL) {
// // Map incoming SofaIDs
// sofaID = iv_ctx->mapToSofaID(sofaID).getSofaId();
// }
if (0==UnicodeStringRef(sofaID).compare(icu::UnicodeString(CAS::NAME_DEFAULT_SOFA))) {
isInitialView = true;
}
// get the sofaNum
extsz = icu::UnicodeString(CAS::FEATURE_BASE_NAME_SOFANUM).extract(ubuff, 256, errorCode);
if (extsz > 256) {
cout << "ACK!" << endl;
}
const UChar* aString = attrs.getValue(ubuff);
int thisSofaNum = atoi(UnicodeStringRef(aString).asUTF8().c_str());
// get the sofa's FeatureStructure id
icu::UnicodeString(ID_ATTR_NAME).extract(ubuff,256, errorCode);
aString = attrs.getValue(ubuff);
int sofaFsId = atoi(UnicodeStringRef(aString).asUTF8().c_str());
// for v1 and v2 formats, create the index map
// ***we assume Sofas are always received in Sofanum order***
// Two scenarios ... the initial view is the first sofa, or not.
// If not, the _indexed values need to be remapped to leave room for the initial view,
// which may or may not be in the received CAS.
if (indexMap.size() == 1) {
if (isInitialView) {
// the first Sofa an initial view
if (thisSofaNum == 2) {
// this sofa was mapped to the initial view
indexMap.push_back(-1); // for this CAS, there should not be a sofanum = 1
indexMap.push_back(1); // map 2 to 1
nextIndex = 2;
} else {
indexMap.push_back(1);
nextIndex = 2;
}
} else {
if (thisSofaNum > 1) {
// the first Sofa not initial, but sofaNum > 1
// must be a v2 format, and sofaNum better be 2
indexMap.push_back(1);
assert (thisSofaNum == 2);
indexMap.push_back(2);
nextIndex = 3;
} else {
// must be v1 format
indexMap.push_back(2);
nextIndex = 3;
}
}
} else {
// if the new Sofa is the initial view, always map to 1
if (isInitialView) {
// the initial view is not the first
// if v2 format, space already reserved in mapping
if (indexMap.size() == thisSofaNum) {
// v1 format, add mapping for initial view
indexMap.push_back(1);
}
} else {
indexMap.push_back(nextIndex);
nextIndex++;
}
}
// Now update the mapping from annotation int to ref values
if (sofaRefMap.size() == thisSofaNum) {
// Sofa received in sofaNum order, add new one
sofaRefMap.push_back(sofaFsId);
} else if ((int)sofaRefMap.size() > thisSofaNum) {
// new Sofa has lower sofaNum than last one
sofaRefMap[thisSofaNum] = sofaFsId;
} else {
// new Sofa has skipped ahead more than 1
sofaRefMap.resize(thisSofaNum + 1);
sofaRefMap[thisSofaNum] = sofaFsId;
}
}
Type type = uima::internal::FSPromoter::promoteType(heapValue, iv_cas->getTypeSystem().getLowlevelTypeSystem());
for (size_t i = 0; i < attrs.getLength(); i++) {
assertWithMsg( sizeof(XMLCh) == sizeof(UChar), "Port required!");
attrName = (UChar*)attrs.getQName(i);
attrValue = (UChar*)attrs.getValue(i);
if (attrName.startsWith("_")) {
if (attrName.compare(ID_ATTR_NAME) == 0) {
id = atoi(UnicodeStringRef(attrValue).asUTF8().c_str());
} else if (attrName.compare(CONTENT_ATTR_NAME) == 0) {
currentContentFeat = attrValue;
} else if (attrName.compare(INDEXED_ATTR_NAME)== 0) {
// if (toIndex)
// { // suppress indexing of document annotation if old CAS
// if (attrValue.compare(TRUE_VALUE) == 0)
// sofaRef = 1;
// else if (!attrValue.compare("false") == 0)
// sofaRef = atoi(uniStr2SingleByteStr(attrValue,"UTF-8").c_str());
// }
char indexes[256];
// we have a problem here if number of indexed views is ridiculously big
strcpy(indexes, UnicodeStringRef(attrValue).asUTF8().c_str());
char* ptr = strtok (indexes," ");
while (ptr != NULL) {
sofaRef->push_back(atoi(ptr));
ptr = strtok (NULL, " ");
}
} else {
handleFeature(type, addr, attrName, attrValue, false);
}
} else {
if (nameMapping && attrName.compare(CAS::FEATURE_BASE_NAME_SOFAID) == 0) {
if (iv_ctx != NULL) {
attrValue = iv_ctx->mapToSofaID(attrValue).getSofaId();
}
}
handleFeature(type, addr, attrName, attrValue, false);
}
}
if (sofaTypeCode == heapValue) {
// If a Sofa, create CAS view to get new indexRepository
SofaFS sofa = (SofaFS) uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
//also add to indexes so we can retrieve the Sofa later
iv_cas->getBaseIndexRepository().addFS(sofa);
CAS * tcas = iv_cas->getView(sofa);
assert ( EXISTS(tcas) );
if (sofa.getSofaRef() == 1) {
iv_cas->registerInitialSofa();
} else {
// add indexRepo for views other than the initial view
lowlevel::IndexRepository * indexRep = iv_cas->getIndexRepositoryForSofa(sofa);
assert ( EXISTS(indexRep) );
indexRepositories.push_back(indexRep);
}
tcasInstances.push_back(tcas);
}
// sofaRef.size()==0 means not indexed
FSInfo * fsInfo = new FSInfo(addr, sofaRef);
if (id < 0) {
idLess.push_back(fsInfo);
} else {
fsTree[id] = fsInfo;
}
iv_state = CONTENT_STATE;
}
void XCASDeserializerHandler::readArray(Type & type, const Attributes & attrs) {
vector<int>* indexRep = new vector<int>;
int id = -1;
int size=0;
icu::UnicodeString attrName;
icu::UnicodeString attrValue;
for (size_t i = 0; i < attrs.getLength(); i++) {
assertWithMsg( sizeof(XMLCh) == sizeof(UChar), "Port required!");
attrName = (UChar*)attrs.getQName(i);
attrValue = (UChar*)attrs.getValue(i);
if (attrName.compare(ID_ATTR_NAME) == 0) {
id = atoi(UnicodeStringRef(attrValue).asUTF8().c_str());
} else if (attrName.compare(ARRAY_SIZE_ATTR) == 0) {
size = atoi(UnicodeStringRef(attrValue).asUTF8().c_str());
} else if (attrName.compare(INDEXED_ATTR_NAME)== 0) {
// // suppress indexing of document annotation if old CAS
// if (attrValue.compare(TRUE_VALUE) == 0)
// indexRep = 1;
// else if (!attrValue.compare("false") == 0)
// indexRep = atoi(uniStr2SingleByteStr(attrValue,"UTF-8").c_str());
char indexes[256];
// we have a problem here if number of indexed views is ridiculously big
strcpy(indexes, UnicodeStringRef(attrValue).asUTF8().c_str());
char* ptr = strtok (indexes," ");
while (ptr != NULL) {
indexRep->push_back(atoi(ptr));
ptr = strtok (NULL, " ");
}
} else {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam( attrName );
msg.addParam( attrValue );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
}
arrayType = uima::internal::FSPromoter::demoteType(type);
currentAddr = iv_casimpl.getHeap().createArrayFS(arrayType, size);
arrayPos=0;
// indexRep.size()==0 means not indexed
FSInfo * fsInfo = new FSInfo(currentAddr, indexRep);
if (id < 0) {
idLess.push_back(fsInfo);
} else {
fsTree[id] = fsInfo;
}
iv_state = ARRAY_ELE_STATE;
}
void XCASDeserializerHandler::readArrayElement(icu::UnicodeString & qualifiedName, const Attributes & attrs) {
if (qualifiedName.compare(ARRAY_ELEMENT_TAG) != 0) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam( qualifiedName );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
if (attrs.getLength() > 0) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam( qualifiedName );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
iv_state = ARRAY_ELE_CONTENT_STATE;
}
void XCASDeserializerHandler::addArrayElement(icu::UnicodeString & buffer) {
if (arrayPos >= iv_casimpl.getHeap().getArraySize(currentAddr) ) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam("Invalid array FS in the CAS" );
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
FeatureStructure fs = uima::internal::FSPromoter::promoteFS(currentAddr, *iv_cas);
switch (arrayType) {
case internal::gs_tyIntArrayType: {
int val = atoi(UnicodeStringRef(buffer).asUTF8().c_str());
IntArrayFS intFS(fs);
intFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyFloatArrayType: {
float val = atof(UnicodeStringRef(buffer).asUTF8().c_str());
FloatArrayFS floatFS(fs);
floatFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyStringArrayType: {
//add the string
int stringoffset = iv_cas->getHeap()->addString(buffer);
//set the array value in fs heap
lowlevel::TyFS stringref = iv_cas->getHeap()->getStringAsFS(stringoffset);
lowlevel::TyHeapCell * fsarray = iv_cas->getHeap()->getCArrayFromFS(currentAddr);
fsarray[arrayPos] = stringref;
break;
}
case internal::gs_tyByteArrayType: {
short intval = atoi(UnicodeStringRef(buffer).asUTF8().c_str());
char charval[2];
sprintf(charval,"%c",intval);
ByteArrayFS byteFS(fs);
byteFS.set( (size_t) arrayPos, charval[0]);
break;
}
case internal::gs_tyBooleanArrayType: {
string val = UnicodeStringRef(buffer).asUTF8();
BooleanArrayFS booleanFS(fs);
if (val.compare("1")==0) {
booleanFS.set( (size_t) arrayPos, true);
//cout << "bool buffer " << buffer << " val= " << val << "set " << true << endl;
} else {
booleanFS.set ( (size_t) arrayPos, false);
//cout << arrayPos << " bool buffer " << buffer << " val= " << val << "set " << false << endl;
}
break;
}
case internal::gs_tyShortArrayType: {
short val;
string strval;
UnicodeStringRef(buffer).extractUTF8(strval);
stringstream s;
s << strval.c_str();
s >> val;
ShortArrayFS shortFS(fs);
shortFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyLongArrayType: {
INT64 val;
stringstream s;
s << UnicodeStringRef(buffer).asUTF8();
s >> val;
LongArrayFS longFS(fs);
longFS.set( (size_t) arrayPos, val);
break;
}
case internal::gs_tyDoubleArrayType: {
DoubleArrayFS doubleFS(fs);
stringstream s;
s << UnicodeStringRef(buffer).asUTF8();
long double doubleval;
s >> doubleval;
doubleFS.set((size_t) arrayPos, doubleval);
break;
}
default: { //array of FSs
lowlevel::TyFS fsid = atoi(UnicodeStringRef(buffer).asUTF8().c_str());
FeatureStructure fsitem(fsid, *iv_cas);
ArrayFS fsArrayfs(fs);
fsArrayfs.set((size_t) arrayPos, fsitem);
}
}
++arrayPos;
}
// Create a feature value from a string representation.
void XCASDeserializerHandler::handleFeature(lowlevel::TyFS addr, icu::UnicodeString & featName, icu::UnicodeString & featVal, bool lenient) {
lowlevel::TyFSType fstype = iv_casimpl.getHeap().getType(addr);
Type type = uima::internal::FSPromoter::promoteType(fstype, iv_cas->getTypeSystem().getLowlevelTypeSystem());
handleFeature(type, addr, featName, featVal, lenient);
}
void XCASDeserializerHandler::handleFeature(Type & type, lowlevel::TyFS addr, icu::UnicodeString & featName, icu::UnicodeString & featVal,
bool lenient) {
char charFeatVal[10];
// handle v1.x format annotations, mapping int to ref values
lowlevel::TyFSType fstype = iv_casimpl.getHeap().getType(addr);
if (0==featName.compare("sofa") &&
iv_typesystem->subsumes(internal::gs_tyAnnotationBaseType, fstype)) {
int ifeatval = atoi(UnicodeStringRef(featVal).asUTF8().c_str());
sprintf(charFeatVal, "%d", sofaRefMap[ifeatval]);
featVal.setTo(icu::UnicodeString(charFeatVal));
}
// handle v1.x sofanum values, remapping so that _InitialView always == 1
if (0==featName.compare(CAS::FEATURE_BASE_NAME_SOFAID)
&& sofaTypeCode == fstype) {
int sofaNum = iv_casimpl.getHeap().getIntValue(addr, internal::gs_tySofaNumFeature);
iv_casimpl.getHeap().setIntValue(addr, internal::gs_tySofaNumFeature, indexMap[sofaNum]);
}
icu::UnicodeString prefix(REF_PREFIX);
if (featName.startsWith(REF_PREFIX)) {
featName.remove(0,prefix.length()); // Delete prefix
}
FeatureStructure fs = uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
Feature feat = type.getFeatureByBaseName(featName);
// System.out.println("DEBUG - Feature map result: " + featName + " = " + feat.getName());
if (!feat.isValid()) { //feature does not exist in typesystem;
//Out of typesystem data not supported.
//we skip this feature
/**ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam(type.getName());
msg.addParam(featName);
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc; **/
} else {
Type rtype;
feat.getRangeType(rtype);
lowlevel::TyFSType rangeType = uima::internal::FSPromoter::demoteType(rtype);
switch (rangeType) {
case internal::gs_tyIntegerType: {
if (featVal.length()>0) {
fs.setIntValue(feat, atoi(UnicodeStringRef(featVal).asUTF8().c_str()));
}
break;
}
case internal::gs_tyFloatType: {
if ( featVal.length() > 0) {
fs.setFloatValue(feat, atof(UnicodeStringRef(featVal).asUTF8().c_str()));
}
break;
}
case internal::gs_tyStringType: {
if (featVal.length() > 0) {
fs.setStringValue(feat, featVal);
}
break;
}
case internal::gs_tyByteType: {
if (featVal.length() > 0) {
string val = UnicodeStringRef(featVal).asUTF8();
short intval = atoi(val.c_str());
char charval[2];
sprintf(charval,"%c",intval);
fs.setByteValue(feat, charval[0] );
}
break;
}
case internal::gs_tyBooleanType: {
if (featVal.length() > 0) {
string val = UnicodeStringRef(featVal).asUTF8();
if (val.compare("1")==0)
fs.setBooleanValue(feat, true );
else fs.setBooleanValue(feat, false);
}
break;
}
case internal::gs_tyShortType: {
if (featVal.length() > 0) {
string strval = UnicodeStringRef(featVal).asUTF8();
short shortval;
stringstream s;
s << strval.c_str();
s >> shortval;
fs.setShortValue(feat, shortval);
}
break;
}
case internal::gs_tyLongType: {
if (featVal.length() > 0) {
string strval = UnicodeStringRef(featVal).asUTF8();
INT64 longval;
stringstream s;
s << strval.c_str();
s >> longval;
fs.setLongValue(feat, longval);
}
break;
}
case internal::gs_tyDoubleType: {
if (featVal.length() > 0) {
string strval = UnicodeStringRef(featVal).asUTF8();
long double doubleval;
stringstream s;
s << strval.c_str();
s >> doubleval;
fs.setDoubleValue(feat, doubleval );
}
break;
}
default: {
if (rtype.isStringSubType()) {
if (featVal.length() > 0)
fs.setStringValue(feat, featVal);
} else if (featVal.length() > 0) {
lowlevel::TyFS val = (lowlevel::TyFS) atoi(UnicodeStringRef(featVal).asUTF8().c_str());
iv_casimpl.getHeap().setFeatureInternal(addr, uima::internal::FSPromoter::demoteFeature(feat), val);
}
break;
}
}
}
}
void XCASDeserializerHandler::finalizeFS(FSInfo & fsInfo) {
lowlevel::TyFS addr = fsInfo.addr;
FeatureStructure fs = uima::internal::FSPromoter::promoteFS(addr, *iv_cas);
Type type = fs.getType();
if (fsInfo.indexRep->size() >= 0) {
// Now add FS to all specified index repositories
for (int i = 0; i < (int)fsInfo.indexRep->size(); i++) {
lowlevel::IndexRepository * pIndexRep;
if (indexMap.size() == 1) {
pIndexRep = indexRepositories[fsInfo.indexRep->at(i)];
} else {
pIndexRep = indexRepositories[indexMap[fsInfo.indexRep->at(i)]];
}
assert(EXISTS(pIndexRep));
pIndexRep->add(addr);
}
}
if (iv_cas->getTypeSystem().isArrayType(uima::internal::FSPromoter::demoteType(type)) ) {
finalizeArray(type, addr, fsInfo);
return;
}
//update heap value of features that are references to other FS.
vector<Feature> feats;
type.getAppropriateFeatures(feats);
FSInfo * fsValInfo;
for (size_t i = 0; i < feats.size(); i++) {
Feature feat = (Feature) feats[i];
Type rangeType;
feat.getRangeType(rangeType);
if (rangeType.isValid()) {
lowlevel::TyFSType rangetypecode = uima::internal::FSPromoter::demoteType(rangeType);
lowlevel::TyFSFeature featcode = uima::internal::FSPromoter::demoteFeature(feat);
//if not primitive
if (!iv_cas->getTypeSystem().isPrimitive(rangetypecode)) {
//get the current feature value which is the id
lowlevel::TyFS featVal = iv_casimpl.getHeap().getFeatureInternal(addr, featcode);
//get the FSInfo object for that id
fsValInfo = (FSInfo*) fsTree[featVal];
//if there is a FSInfo
//set the feature value of this feature to the
//address in FSInfo else set it to NULL;
if (fsValInfo == NULL) {
//nothing to do, reference value already = 0!
//iv_casimpl.getHeap().setFSValue(addr, featcode, (lowlevel::TyFS) 0);
} else {
iv_casimpl.getHeap().setFSValue(addr, featcode, fsValInfo->addr);
}
}
}
}
}
void XCASDeserializerHandler::finalizeArray(Type & type, lowlevel::TyFS addr, FSInfo & fsInfo) {
lowlevel::TyFSType typecode = uima::internal::FSPromoter::demoteType(type);
if (!iv_cas->getTypeSystem().isFSArrayType(typecode)) {
return;
}
// *** WARNING *** *** WARNING *** *** WARNING *** *** WARNING ***
// if implementation of ArrayFS on the heap changes, this code will be invalid
int size = (int)iv_cas->getHeap()->getHeap().getHeapValue(addr + 1);
FSInfo * fsValInfo;
for (int i=0; i<size; i++) {
lowlevel::TyFS id = iv_cas->getHeap()->getHeap().getHeapValue(addr + 2 + i);
fsValInfo = fsTree[id];
if (fsValInfo != NULL) {
iv_cas->getHeap()->getHeap().setHeapValue(addr + 2 + i, fsValInfo->addr);
}
}
}
// ---------------------------------------------------------------------------
// XCASDeserializerHandler: Overrides of the SAX ErrorHandler interface
// ---------------------------------------------------------------------------
void XCASDeserializerHandler::error(const SAXParseException& e) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_ERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam((UChar const *)e.getSystemId());
msg.addParam(e.getLineNumber());
msg.addParam(e.getColumnNumber());
msg.addParam((UChar const *) e.getMessage());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
void XCASDeserializerHandler::fatalError(const SAXParseException& e) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_FATALERROR);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam((UChar const *)e.getSystemId());
msg.addParam(e.getLineNumber());
msg.addParam(e.getColumnNumber());
msg.addParam((UChar const *) e.getMessage());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
void XCASDeserializerHandler::warning(const SAXParseException& e) {
ErrorInfo errInfo;
errInfo.setErrorId((TyErrorId)UIMA_ERR_RESOURCE_CORRUPTED);
ErrorMessage msg(UIMA_MSG_ID_EXC_XML_SAXPARSE_WARNING);
assertWithMsg(sizeof(XMLCh) == sizeof(UChar), "Port required");
msg.addParam((UChar const *)e.getSystemId());
msg.addParam(e.getLineNumber());
msg.addParam(e.getColumnNumber());
msg.addParam((UChar const *) e.getMessage());
errInfo.setMessage(msg);
errInfo.setSeverity(ErrorInfo::unrecoverable);
ExcIllFormedInputError exc(errInfo);
throw exc;
}
char const * XCASDeserializerHandler::CASTAGNAME = "CAS";
char const * XCASDeserializerHandler::DEFAULT_DOC_TYPE_NAME = "uima.tcas.Document";
char const * XCASDeserializerHandler::DEFAULT_DOC_TEXT_FEAT = "text";
char const * XCASDeserializerHandler::INDEXED_ATTR_NAME = "_indexed";
char const * XCASDeserializerHandler::REF_PREFIX = "_ref_";
char const * XCASDeserializerHandler::ID_ATTR_NAME = "_id";
char const * XCASDeserializerHandler::CONTENT_ATTR_NAME = "_content";
char const * XCASDeserializerHandler::ARRAY_SIZE_ATTR = "size";
char const * XCASDeserializerHandler::ARRAY_ELEMENT_TAG = "i";
char const * XCASDeserializerHandler::TRUE_VALUE = "true";
char const * XCASDeserializerHandler::DEFAULT_CONTENT_FEATURE = "value";
} // namespace uima