blob: bf94a71c8557bfa0959c931eb83199cf851cb28f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "uima/api.hpp"
using namespace uima;
const UChar * translate(UChar *);
class SofaExampleAnnotator : public Annotator {
private:
Type cross, annot;
Feature other;
icu::UnicodeString us_SofaString;
AnnotatorContext * pAnc;
public:
SofaExampleAnnotator(void) {
cout << "SofaExampleAnnotator: Constructor" << endl;
}
~SofaExampleAnnotator(void) {
cout << "SofaExampleAnnotator: Destructor" << endl;
}
/** */
TyErrorId initialize(AnnotatorContext & rclAnnotatorContext) {
cout << "SofaExampleAnnotator: initialize()" << endl;
// Save the annotator context for use in process()
pAnc = &rclAnnotatorContext;
return (TyErrorId)UIMA_ERR_NONE;
}
/** */
TyErrorId typeSystemInit(TypeSystem const & crTypeSystem) {
cout << "SofaExampleAnnotator::typeSystemInit()" << endl;
// get Type and Feature objects for use in process()
annot = crTypeSystem.getType("uima.tcas.Annotation");
cross = crTypeSystem.getType("sofa.test.CrossAnnotation");
other = cross.getFeatureByBaseName("otherAnnotation");
if (!(annot.isValid() && cross.isValid() && other.isValid())) {
cout << "SofaExampleAnnotator::typeSystemInit() - Error getting Type or Feature objects" << endl;
return (TyErrorId)UIMA_ERR_RESMGR_INVALID_RESOURCE;
}
return(TyErrorId)UIMA_ERR_NONE;
}
/** */
TyErrorId destroy() {
cout << "SofaExampleAnnotator: destroy()" << endl;
return (TyErrorId)UIMA_ERR_NONE;
}
/** */
TyErrorId process(CAS & rCAS, ResultSpecification const & crResultSpecification) {
CAS *engTcas, *germTcas;
UChar *myLocalSaveState;
// Look for english document and "translate" to German
cout << "SofaExampleAnnotator: process() begins" << endl;
// get English view
engTcas = rCAS.getView("EnglishDocument");
DocumentFS adocFS = engTcas->getDocumentAnnotation();
UnicodeStringRef aengText = adocFS.getCoveredText();
cout << " English Input: " << aengText << endl;
// Create the output German text Sofa and open CAS view
germTcas = rCAS.createView("GermanDocument");
// Get pointer to the English text document
DocumentFS docFS = engTcas->getDocumentAnnotation();
UnicodeStringRef engText = docFS.getCoveredText();
// make copy of document for the u_strtok_r function (100 character limit!)
UChar uWork[100];
u_strncpy(uWork, engText.getBuffer(), 99);
// Setup for translated text
int germBegin = 0;
int germEnd = 0;
UChar translation[400];
translation[0]=0;
// get two IR handles for adding annotations to the appropriate view
FSIndexRepository & engIndexRep = engTcas->getIndexRepository();
FSIndexRepository & germIndexRep = germTcas->getIndexRepository();
// Parse the English text
UChar uDelim[2];
UnicodeString delimUS(" ");
u_strncpy(uDelim, delimUS.getBuffer(), 1);
uDelim[1] = 0;
UChar * next = u_strtok_r(uWork, uDelim, &myLocalSaveState);
while (next) {
// Create annotation on source text
AnnotationFS engAnnot =
engTcas->createAnnotation(annot, next-uWork, (next-uWork)+u_strlen(next));
engIndexRep.addFS(engAnnot);
// Translate word-by-word
const UChar * gword = translate(next);
// Accumulate the total translated document
if (germBegin > 0) {
// if not the first word, add space before
u_strncat(translation, uDelim, 1);
germBegin += 1;
}
u_strcat(translation, gword);
// Create annotation on output text
germEnd = germBegin + u_strlen(gword);
AnnotationFS germAnnot = germTcas->createAnnotation(cross, germBegin, germEnd);
germIndexRep.addFS(germAnnot);
// add link to English text
germAnnot.setFSValue(other, engAnnot);
germBegin = germEnd;
next = u_strtok_r(NULL, uDelim, &myLocalSaveState);
}
// set documentText with accumulated transation
germTcas->setDocumentText( translation, u_strlen(translation), true );
cout << " German(!) Output: " << germTcas->getDocumentText() << endl;
cout << "SofaExampleAnnotator: process() ends" << endl;
return (TyErrorId)UIMA_ERR_NONE;
}
};
/** */
static UnicodeString const uThis("this");
static UnicodeString const uBeer("beer");
static UnicodeString const uIs("is");
static UnicodeString const uDas("das");
static UnicodeString const uBier("bier");
static UnicodeString const uIst("ist");
static UnicodeString const uGut("gut");
const UChar * translate(UChar * eword) {
if (0 == u_strcasecmp(eword, uThis.getBuffer(), 0))
return uDas.getBuffer();
if (0 == u_strcasecmp(eword, uBeer.getBuffer(), 0))
return uBier.getBuffer();
if (0 == u_strcasecmp(eword, uIs.getBuffer(), 0))
return uIst.getBuffer();
return uGut.getBuffer();
}
// This macro exports an entry point that is used to create the annotator.
MAKE_AE(SofaExampleAnnotator);