blob: 328a722cded28bd6084cc398b462898264eb2405 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "uima/api.hpp"
#include "uima/dirwalk.hpp"
#include "uima/filename.hpp"
#include "uima/xmlwriter.hpp"
#include <sys/stat.h>
using namespace std;
using namespace uima;
/**
* An example application that reads documents from files, sends them
* though a Text Analysis Engine, and prints all discovered annotations to
* the console.
* <p>
* The application takes two arguments:
* <ol type="1">
* <li>The path to an XML descriptor for the TAE to be executed</li>
* <li>An input directory containing files to be processed</li>
* <li>Optionally, the logging level</li>
* </ol>
*
*
*/
/**
* Main program.
*
* @param args Command-line arguments - see class description
*/
/* Little helper routine to check and report errors.
This routine just does a hard program exit for any failure! */
static void CheckError(TyErrorId utErrorId,
const AnalysisEngine & crEngine);
static void CheckError(ErrorInfo const &);
void processFile (std::string filename, AnalysisEngine * pEngine, CAS * tcas);
void tell() {
cerr << "Usage: ExampleApplication UimaCppDescriptor InputFileOrDirectory <-l LogLevel>" << endl;
}
int main(int argc, char * argv[]) {
try {
int loglevel = -1;
/* check the number of command line args */
if (argc != 3 && argc != 5 ) {
tell();
return 1;
}
if (argc == 5) {
if (!strcmp(argv[3], "-l")) {
loglevel = atoi(argv[4]);
if (loglevel < LogStream::EnMessage) {
cerr << "LogLevel less than minimum value (Message) = " << LogStream::EnMessage << endl;
return 1;
}
if (loglevel > LogStream::EnError) {
cerr << "LogLevel greater than maximum value (Error) = " << LogStream::EnError << endl;
return 1;
}
} else {
cerr << "Inexpected option: " << argv[3] << endl;
tell();
return 1;
}
}
/* Create/link up to a resource manager instance (singleton) */
(void) ResourceManager::createInstance("UIMACPP_EXAMPLE_APPLICATION");
if (loglevel >= 0) {
ResourceManager::getInstance().setLoggingLevel((LogStream::EnEntryType)loglevel);
}
TyErrorId utErrorId; // Variable to store return codes
ErrorInfo errorInfo; // Variable to stored detailed error info
/* Initialize engine with filename of config-file */
AnalysisEngine * pEngine =
Framework::createAnalysisEngine(argv[1], errorInfo);
CheckError(errorInfo);
/* Get a new CAS */
CAS* tcas = pEngine->newCAS();
/* process input xcas */
util::DirectoryWalk dirwalker(argv[2]);
if (dirwalker.isValid()) {
util::Filename infile(argv[2],"FilenamePlaceHolder");
while (dirwalker.isValid()) {
// Process all files or just the ones with matching suffix
if ( dirwalker.isFile() &&
dirwalker.matchesWildcardPattern("*.txt") ) {
infile.setNewName(dirwalker.getNameWithoutPath());
std::string afile(infile.getAsCString());
//process the file
processFile(afile, pEngine, tcas);
//reset the cas
tcas->reset();
}
//get the next xcas file in the directory
dirwalker.setToNext();
}
} else {
/* If has no directory entries then probably a file */
cout << "ExampleApplication: processing file " << argv[2] << endl;
std::string afile(argv[2]);
//process the cas
processFile(afile, pEngine, tcas);
}
/* call collectionProcessComplete */
utErrorId = pEngine->collectionProcessComplete();
/* Free ressorces */
utErrorId = pEngine->destroy();
CheckError(utErrorId, *pEngine);
delete tcas;
delete pEngine;
} catch (Exception e) {
cout << "ExampleApplication " << e << endl;
}
/* If we got this far everything went OK */
cout << "ExampleApplication: processing finished sucessfully! " << endl;
return(0);
}
/* Little helper routine to check and report errors.
This routine just does a hard program exit for any failure!
*/
static void CheckError(TyErrorId utErrorId,
const AnalysisEngine & crEngine) {
if (utErrorId != UIMA_ERR_NONE) {
cerr << endl << " *** ExampleApplication - Error info:" << endl;
cerr << "Error number : "
<< utErrorId << endl;
cerr << "Error string : "
<< AnalysisEngine::getErrorIdAsCString(utErrorId) << endl;
const TCHAR* errStr = crEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr();
if (errStr != NULL)
cerr << " Last logged message : " << errStr << endl;
exit((int)utErrorId);
}
}
/* Similar routine as above just with error info objects instead of err-ids.
This routine just does a hard program exit for any failure!
*/
static void CheckError(ErrorInfo const & errInfo) {
if (errInfo.getErrorId() != UIMA_ERR_NONE) {
cerr << endl << " *** ExampleApplication - Error info:" << endl
<< "Error string : "
<< AnalysisEngine::getErrorIdAsCString(errInfo.getErrorId())
<< errInfo << endl; /* (errInfo starts with a newline) */
exit((int)errInfo.getErrorId());
}
}
void processFile (std::string filename, AnalysisEngine * pEngine, CAS * tcas ) {
cout << "processing file " << filename << endl;
try {
/* read in file contents and set CAS Document text */
FILE * pFile = fopen(filename.c_str(),"rb");
int filesize;
if (pFile == NULL) {
cerr << "ExampleApplication: Error reading file " << filename << endl;
exit(-1);
}
/* allocate buffer for file contents */
struct stat fstat;
stat(filename.c_str(), &fstat);
filesize = fstat.st_size;
char * pBuffer = new char[filesize+1];
if (pBuffer == NULL) {
cerr << "ExampleApplication: Error allocating buffer to hold contents of file " << filename << endl;
exit(-1);
}
/* read the file */
size_t numread = fread(pBuffer,1,filesize,pFile);
fclose(pFile);
/* convert to unicode and set tcas document text*/
UnicodeString ustrInputText(pBuffer, filesize, "utf-8");
tcas->setDocumentText(ustrInputText.getBuffer(), ustrInputText.length(), true);
delete[] pBuffer;
/* process the CAS */
TyErrorId utErrorId = ((AnalysisEngine*)pEngine)->process(*tcas);
CheckError(utErrorId, *pEngine);
/* serialize the cas to stdout */
cout << "ExampleApplication: write out cas after processing file " << filename << endl << endl;
XCASWriter writer(*tcas, true);
writer.write(cout);
cout << endl;
} catch (Exception e) {
cout << "ExampleApplication: " << e << endl;
}
}
/* <EOF> */