blob: 6f362f91c90c09ebd3a501df2ce5755fcacb6637 [file] [log] [blame]
/** \file test_casserializer.cpp .
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-------------------------------------------------------------------------- */
// define this first to get your application name added to command line
// used inside "cmdline_driver_args.h"
#define MAIN_TITLE _TEXT("UIMA Test CAS Serializer")
/* ----------------------------------------------------------------------- */
/* Include dependencies */
/* ----------------------------------------------------------------------- */
#include "uima/pragmas.hpp" //must be first include to surpress warnings
#include "uima/api.hpp"
#include "uima/internal_casimpl.hpp"
#include "uima/casdefinition.hpp"
#include "uima/internal_casserializer.hpp"
#include "uima/internal_casdeserializer.hpp"
#include <sys/stat.h>
using namespace uima;
/* ----------------------------------------------------------------------- */
/* Constants */
/* ----------------------------------------------------------------------- */
#ifndef NDEBUG
#define ASSERT_OR_THROWEXCEPTION(x) assert(x)
#else
#define ASSERT_OR_THROWEXCEPTION(x) if (!(x)) { cerr << __FILE__ << ": Error in line " << __LINE__ << endl; exit(1); }
#endif
#define LOG(x) cout << __FILE__ << __LINE__ << ": " << x << endl
/* ----------------------------------------------------------------------- */
/* UTILS */
/* ----------------------------------------------------------------------- */
void displayException(util::ConsoleUI & console, Exception & crclException)
/* ----------------------------------------------------------------------- */
{
console.formatHeader(_TEXT("Exception"));
console.format(_TEXT("Exception error id"), crclException.getErrorInfo().getErrorId());
console.format(_TEXT("Exception name"), crclException.getName());
console.format(_TEXT("Exception what"), crclException.what());
console.format(_TEXT("Exception message"), crclException.getErrorInfo().getMessage().asString().c_str());
console.formatBool(_TEXT("Exception recoverable"), crclException.getErrorInfo().isRecoverable());
const TCHAR * cpszSavePrefix = ErrorInfo::getGlobalErrorInfoIndent();
ErrorInfo::setGlobalErrorInfoIndent(" ");
console.getOutputStream() << crclException.getErrorInfo() << endl;
ErrorInfo::setGlobalErrorInfoIndent(cpszSavePrefix);
}
template<class T>
bool equalVectors(vector<T> const & crV1, vector<T> const & crV2, util::ConsoleUI * pConsole) {
bool bResult = true;
if ( crV1.size() != crV2.size() ) {
pConsole->getOutputStream() << "size 1: " << crV1.size() << ", size 2: " << crV2.size() << endl;
return false;
}
size_t i;
for (i=0; i<crV1.size(); ++i) {
bool bIsEqual = (crV1[i] == crV2[i]);
if (!bIsEqual) {
bResult = false;
}
}
if (!bResult) {
size_t j;
for (j=0; j<crV1.size(); ++j) {
if (crV1[j] != crV2[j]) {
pConsole->getOutputStream() << j << ": " << crV1[j] << " " << crV2[j] << endl;
}
}
}
return bResult;
}
/* ----------------------------------------------------------------------- */
/* Tests */
/* ----------------------------------------------------------------------- */
void testSerializeDefinitions(util::ConsoleUI * pConsole) {
uima::internal::CASDefinition * iv_newCASDefinition;
uima::internal::SerializedCAS iv_serializedCAS;
uima::Timer iv_serializeTimerDefinitions;
/* create engine */
ErrorInfo errInfo;
UnicodeString filename("toktest.xml");
UnicodeString fn = ResourceManager::resolveFilename(filename, filename);
uima::TextAnalysisEngine * pEngine = TextAnalysisEngine::createTextAnalysisEngine
(UnicodeStringRef(fn).asUTF8().c_str(), errInfo);
if (pEngine == NULL ) {
LOG("Error: " << errInfo.asString());
ASSERT_OR_THROWEXCEPTION(false);
}
ASSERT_OR_THROWEXCEPTION(EXISTS(pEngine));
ASSERT_OR_THROWEXCEPTION( errInfo.getErrorId() == UIMA_ERR_NONE );
iv_newCASDefinition = uima::internal::CASDefinition::createCASDefinition(pEngine->getAnnotatorContext());
pConsole->info("Serializing Type System and Index definitions CAS");
uima::internal::CASSerializer serializer(true);
iv_serializedCAS.reset();
// serialize type system and index definition
uima::internal::CASDeserializer deserializer;
iv_serializeTimerDefinitions.reset();
iv_serializeTimerDefinitions.start();
serializer.serializeDefinitions(*iv_newCASDefinition, iv_serializedCAS);
iv_serializeTimerDefinitions.stop();
#ifndef UIMA_SUPPRESS_TIMING
pConsole->format(" Serialization of Definitions", uima::Timer::timeString( iv_serializeTimerDefinitions.getAccumulatedTime() ).c_str() );
#endif
iv_serializeTimerDefinitions.reset();
iv_serializeTimerDefinitions.start();
deserializer.deserializeDefinitions(iv_serializedCAS , *iv_newCASDefinition );
iv_serializeTimerDefinitions.stop();
#ifndef UIMA_SUPPRESS_TIMING
pConsole->format(" Deserialization of Definitions", uima::Timer::timeString( iv_serializeTimerDefinitions.getAccumulatedTime() ).c_str() );
#endif
// serializing again
uima::internal::SerializedCAS serializedCAS2;
serializer.serializeDefinitions(*iv_newCASDefinition, serializedCAS2);
// checking
pConsole->format("Checking", "type symbol table");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getTypeSymbolTable(), serializedCAS2.getTypeSymbolTable(), pConsole ));
pConsole->format(" success", true);
pConsole->format("Checking", "type inheritance table");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getTypeInheritanceTable(), serializedCAS2.getTypeInheritanceTable(), pConsole));
pConsole->format(" success", true);
pConsole->format("Checking", "feature symbol table");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getFeatureSymbolTable(), serializedCAS2.getFeatureSymbolTable(), pConsole));
pConsole->format(" success", true);
pConsole->format("Checking", "feature offset table");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getFeatureOffsetTable(), serializedCAS2.getFeatureOffsetTable(), pConsole));
pConsole->format(" success", true);
pConsole->format("Checking", "feature def table");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getFeatureDefinitionTable(), serializedCAS2.getFeatureDefinitionTable(), pConsole));
pConsole->format(" success", true);
pConsole->format("Checking", "type priorities");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getTypePriorityTable(), serializedCAS2.getTypePriorityTable(), pConsole));
pConsole->format(" success", true);
pConsole->format("Checking", "index IDs");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getIndexIDTable(), serializedCAS2.getIndexIDTable(), pConsole ));
pConsole->format(" success", true);
pConsole->format("Checking", "index kinds");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getIndexKindTable(), serializedCAS2.getIndexKindTable(), pConsole) );
pConsole->format(" success", true);
pConsole->format("Checking", "comparator start");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getComparatorStartTable(), serializedCAS2.getComparatorStartTable(), pConsole) );
pConsole->format(" success", true);
pConsole->format("Checking", "comparator def");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getComparatorDefinitionTable(), serializedCAS2.getComparatorDefinitionTable(), pConsole) );
pConsole->format(" success", true);
delete iv_newCASDefinition;
delete pEngine;
}
void testSerializeData(util::ConsoleUI * pConsole ) {
pConsole->info("Serializing Document Data CAS");
uima::Timer iv_serializeTimerData;
ErrorInfo errInfo;
UnicodeString filename("toktest.xml");
UnicodeString fn = ResourceManager::resolveFilename(filename, filename);
uima::TextAnalysisEngine * pEngine =
TextAnalysisEngine::createTextAnalysisEngine(UnicodeStringRef(fn).asUTF8().c_str(), errInfo);
if (pEngine == NULL ) {
LOG("Error: " << errInfo.asString());
ASSERT_OR_THROWEXCEPTION(false);
}
ASSERT_OR_THROWEXCEPTION(EXISTS(pEngine));
ASSERT_OR_THROWEXCEPTION( errInfo.getErrorId() == UIMA_ERR_NONE );
/* read in a file */
UnicodeString dataFile("tdoc_001_en_850.asc");
UnicodeString datafn = ResourceManager::resolveFilename(dataFile, dataFile);
std::string dataFilename = UnicodeStringRef(datafn).asUTF8();
/* open file for read */
FILE * pFile = fopen( dataFilename.c_str(),"rb");
ASSERT_OR_THROWEXCEPTION(pFile != NULL );
/* allocate buffer for file contents */
struct stat stat_result;
stat(dataFilename.c_str(), &stat_result);
int filesize = stat_result.st_size;
char * pBuffer = new char[filesize+1];
ASSERT_OR_THROWEXCEPTION(pBuffer != NULL );
/* read the file */
size_t numread = fread(pBuffer,1,filesize,pFile);
fclose(pFile);
/* convert to unicode and set tcas document text*/
UnicodeString ustrInputText(pBuffer, (int32_t)numread, "utf-8");
delete[] pBuffer;
/* set TCAS Document text */
CAS * tcas = pEngine->newCAS();
ASSERT_OR_THROWEXCEPTION( EXISTS(tcas) );
tcas->setDocumentText(ustrInputText.getBuffer(), ustrInputText.length(), true);
tcas->getDocumentAnnotation().setLanguage("en");
/* call process */
TyErrorId err = pEngine->process(*tcas);
ASSERT_OR_THROWEXCEPTION( err == UIMA_ERR_NONE );
uima::internal::CASImpl & cas = uima::internal::CASImpl::promoteCAS( *tcas);
uima::internal::CASSerializer serializer(true);
uima::internal::SerializedCAS iv_serializedCAS;
iv_serializedCAS.reset();
iv_serializeTimerData.reset();
iv_serializeTimerData.start();
serializer.serializeData(cas, iv_serializedCAS);
iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
pConsole->format(" Serialization per-document data", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif
iv_serializeTimerData.reset();
iv_serializeTimerData.start();
size_t blobsz = serializer.getBlobSize(cas);
char* blob = new char[blobsz];
blobsz = serializer.getBlob(cas, blob, blobsz);
iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
pConsole->format(" Blob serialization per-document data", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif
pConsole->formatHeader("CAS Serialization Results:");
pConsole->format(" FS Heap size", (unsigned long)iv_serializedCAS.getFSHeapArray().size() );
pConsole->format(" String Heap size", (unsigned long)iv_serializedCAS.getStringSymbolTable().size() );
pConsole->format(" Number of Indexed FSs", (unsigned long)iv_serializedCAS.getIndexedFSs().size() );
pConsole->format(" Blob size:", (unsigned long)blobsz);
// iv_serializedCAS.print(cout);
pConsole->info("Deserializing CAS");
//ee uima::internal::TCASImpl * newTCAS = uima::internal::TCASImpl::createTCASImpl( *iv_newCASDefinition, getEngine().getAnnotatorContext() );
uima::internal::CASImpl * newTCAS = &uima::internal::CASImpl::promoteCAS( *(pEngine->newCAS()) );
uima::internal::CASDeserializer deserializer;
/*
iv_serializeTimerFSAndStringHeap.reset();
iv_serializeTimerFSAndStringHeap.start();
deserializer.deserializeFSHeapAndStringTable(iv_serializedCAS, *iv_newTCAS);
iv_serializeTimerFSAndStringHeap.stop();
getConsole().format(" Deserialization of FS/String Heap", uima::Timer::timeString( iv_serializeTimerFSAndStringHeap.getAccumulatedTime() ).c_str() );
*/
iv_serializeTimerData.reset();
iv_serializeTimerData.start();
deserializer.deserializeData(iv_serializedCAS, *newTCAS);
iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
getConsole().format(" Deserialization document data", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif
// deserialize the blob on top for a quick test
iv_serializeTimerData.reset();
iv_serializeTimerData.start();
deserializer.deserializeBlob(blob, *newTCAS);
iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
pConsole->format(" Deserialization blob", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif
// deserializer.deserializeDocument(iv_serializedCAS, *iv_newTCAS);
// iv_newTCAS->getFSSystem().getLowlevelFSHeap().print(cout);
// cout << "DocID: " << newTCAS.getDocumentAnnotation().getID() << endl;
// cout << "========SERIALIZE AGAIN======" << endl;
pConsole->info("Serializing CAS again");
uima::internal::SerializedCAS serializedCAS2;
uima::internal::CASSerializer serializer2(true);
/*
serializer2.serializeDocument(*iv_newTCAS, serializedCAS2);
serializer2.serializeFSHeapAndStringHeap(*iv_newTCAS, serializedCAS2);
serializer2.serializeIndexedFSs(*iv_newTCAS, serializedCAS2);
*/
serializer2.serializeData(*newTCAS, serializedCAS2);
// serCas2.print(cout);
pConsole->info("Checking results");
pConsole->format("Checking", "fs heap");
ASSERT_OR_THROWEXCEPTION(equalVectors( iv_serializedCAS.getFSHeapArray(), serializedCAS2.getFSHeapArray(), pConsole ) );
pConsole->format(" success", true);
pConsole->format("Checking", "string symbol table");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getStringSymbolTable(), serializedCAS2.getStringSymbolTable(), pConsole ) );
pConsole->format(" success", true);
pConsole->format("Checking", "document");
ASSERT_OR_THROWEXCEPTION( iv_serializedCAS.getDocument() == serializedCAS2.getDocument() );
pConsole->format(" success", true);
pConsole->format("Checking", "indexed FSs");
ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getIndexedFSs(), serializedCAS2.getIndexedFSs(), pConsole ));
pConsole->format(" success", true);
pConsole->info("Done");
delete tcas;
delete newTCAS;
delete pEngine;
delete blob;
}
/* ----------------------------------------------------------------------- */
/* Main routine */
/* ----------------------------------------------------------------------- */
int main(int argc, char * argv[]) /*
---------------------------------- */
{
/* create console */
util::ConsoleUI * pConsole = new util::ConsoleUI(argc, argv, MAIN_TITLE, "\n");
assert(EXISTS(pConsole));
#if !defined(NDEBUG) && defined(_MSC_VER)
//int iRetVal = _CrtSetBreakAlloc(124613);
#endif
/* create a UIMA resource */
try {
ResourceManager::createInstance(MAIN_TITLE);
testSerializeDefinitions(pConsole);
testSerializeData(pConsole);
ResourceManager::deleteInstance();
} catch (Exception & rclException) {
displayException(*pConsole, rclException);
pConsole->error("Unexpected UIMA exception");
return 1;
} catch (exception & rclException) {
pConsole->error(rclException.what());
return 1;
}
delete pConsole;
return(0);
}
/* <EOF> */