blob: 27de5409193cb72a88205aa89ca0a4f05f72b08f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.examples.cpm.sofa;
import java.util.Arrays;
import java.util.StringTokenizer;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.Annotator_ImplBase;
import org.apache.uima.analysis_engine.annotator.GenericAnnotator;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.SofaID;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
/**
* Simple English to German translator
*
*/
public class TransAnnotator extends Annotator_ImplBase implements GenericAnnotator {
public void process(CAS aCas, ResultSpecification aResultSpec) throws AnnotatorProcessException {
CAS engTcas, germTcas;
// get English text Sofa and open CAS view
SofaID realSofaName = getContext().mapToSofaID("EnglishDocument");
// System.out.println("TRANSANNOTATOR: real sofa name for English document " +
// realSofaName.getSofaID());
engTcas = aCas.getView(aCas.getSofa(realSofaName));
// if (engTcas == null)
// System.out.println(realSofaName + " sofa not found in CAS");
realSofaName = getContext().mapToSofaID("GermanDocument");
// System.out.println("TRANSANNOTATOR: real sofa name of GermanDocument " +
// realSofaName.getSofaID());
// Create the output German text Sofa and open CAS view
germTcas = aCas.getView(aCas.createSofa(realSofaName, "text"));
// Get some necessary Type System constants
Type annot = engTcas.getAnnotationType();
Type cross = engTcas.getTypeSystem().getType("sofa.test.CrossAnnotation");
Feature other = cross.getFeatureByBaseName("otherAnnotation");
// Get the English text
String engText = engTcas.getDocumentText();
// Setup for translated text
int engEnd = 0;
int germBegin = 0;
int germEnd = 0;
StringBuffer translation = new StringBuffer();
// Parse the English text
StringTokenizer st = new StringTokenizer(engText);
while (st.hasMoreTokens()) {
String thisTok = st.nextToken();
int engBegin = engText.indexOf(thisTok, engEnd);
engEnd = engBegin + thisTok.length();
// Create token annotations on English text
AnnotationFS engAnnot = engTcas.createAnnotation(annot, engBegin, engEnd);
engTcas.getIndexRepository().addFS(engAnnot);
// Simple word-by-word translation
String germWord = Translate(thisTok);
// Accumulate the translated text
if (germBegin > 0) {
translation.append(' ');
germBegin += 1;
}
translation.append(germWord.toCharArray(), 0, germWord.length());
// Create token annotations on German text
germEnd = germBegin + germWord.length();
AnnotationFS germAnnot = germTcas.createAnnotation(cross, germBegin, germEnd);
germTcas.getIndexRepository().addFS(germAnnot);
// add link to English text
germAnnot.setFeatureValue(other, engAnnot);
germBegin = germEnd;
}
// Finally, set the output tranlation Sofa data
germTcas.setDocumentText(translation.toString());
}
static char wThis[] = { 't', 'h', 'i', 's' };
static char wBeer[] = { 'b', 'e', 'e', 'r' };
static char wIs[] = { 'i', 's' };
private String Translate(String word) {
String lword = word.toLowerCase();
if (Arrays.equals(wThis, lword.toCharArray()))
return "das";
if (Arrays.equals(wBeer, lword.toCharArray()))
return "bier";
if (Arrays.equals(wIs, lword.toCharArray()))
return "ist";
return "gut";
}
}