blob: 381889d947c78074811543d6bbbe7591ce5cb9e9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.analysis_engine.impl;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.uima.Constants;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UIMA_IllegalStateException;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.CasIterator;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.asb.impl.ASB_impl;
import org.apache.uima.analysis_engine.asb.impl.FlowControllerContainer;
import org.apache.uima.analysis_engine.metadata.FixedFlow;
import org.apache.uima.analysis_engine.metadata.impl.FixedFlow_impl;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.FSIndexRepository;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.admin.FSIndexComparator;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.impl.URISpecifier_impl;
import org.apache.uima.resource.metadata.AllowedValue;
import org.apache.uima.resource.metadata.Capability;
import org.apache.uima.resource.metadata.ConfigurationParameter;
import org.apache.uima.resource.metadata.FeatureDescription;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.FsIndexKeyDescription;
import org.apache.uima.resource.metadata.NameValuePair;
import org.apache.uima.resource.metadata.TypeDescription;
import org.apache.uima.resource.metadata.TypePriorities;
import org.apache.uima.resource.metadata.TypePriorityList;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.AllowedValue_impl;
import org.apache.uima.resource.metadata.impl.Capability_impl;
import org.apache.uima.resource.metadata.impl.ConfigurationParameter_impl;
import org.apache.uima.resource.metadata.impl.FeatureDescription_impl;
import org.apache.uima.resource.metadata.impl.FsIndexDescription_impl;
import org.apache.uima.resource.metadata.impl.FsIndexKeyDescription_impl;
import org.apache.uima.resource.metadata.impl.NameValuePair_impl;
import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
import org.apache.uima.test.junit_extension.JUnitExtension;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.impl.ProcessTrace_impl;
/**
* Tests the TextAnalysisEngine_impl class.
*
*/
public class AnalysisEngine_implTest extends TestCase {
/**
* Constructor for TextAnalysisEngine_implTest.
*
* @param arg0
*/
public AnalysisEngine_implTest(String arg0) {
super(arg0);
}
/**
* @see TestCase#setUp()
*/
protected void setUp() throws Exception {
super.setUp();
}
public void testInitialize() throws Exception {
try {
PrimitiveAnalysisEngine_impl ae1 = new PrimitiveAnalysisEngine_impl();
// try to initialize with the wrong kind of specifier - should return false
boolean result = ae1.initialize(new URISpecifier_impl(), null);
Assert.assertFalse(result);
// try to initialize with an empty TaeDescription - should throw exception
Exception ex = null;
try {
AnalysisEngineDescription taeDesc = new AnalysisEngineDescription_impl();
taeDesc.setPrimitive(true);
ae1.initialize(taeDesc, null);
} catch (ResourceInitializationException e) {
ex = e;
}
Assert.assertNotNull(ex);
// initialize simple primitive TextAnalysisEngine
ae1 = new PrimitiveAnalysisEngine_impl();
AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl();
primitiveDesc.setFrameworkImplementation(Constants.JAVA_FRAMEWORK_NAME);
primitiveDesc.setPrimitive(true);
primitiveDesc
.setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
result = ae1.initialize(primitiveDesc, null);
Assert.assertTrue(result);
// initialize again - should fail
ex = null;
try {
ae1.initialize(primitiveDesc, null);
} catch (UIMA_IllegalStateException e) {
ex = e;
}
Assert.assertNotNull(ex);
// initialize simple aggregate TextAnalysisEngine (also pass TextAnalysisEngineProcessData as
// parameter)
AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl();
aggDesc.setFrameworkImplementation(Constants.JAVA_FRAMEWORK_NAME);
aggDesc.setPrimitive(false);
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
FixedFlow_impl flow = new FixedFlow_impl();
flow.setFixedFlow(new String[] { "Test" });
aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
AggregateAnalysisEngine_impl ae2 = new AggregateAnalysisEngine_impl();
result = ae2.initialize(aggDesc, null);
Assert.assertTrue(result);
// try some descriptors that are invalid due to config. param problems
for (int i = 1; i <= 13; i++) {
_testInvalidDescriptor(JUnitExtension
.getFile("TextAnalysisEngineImplTest/InvalidConfigParams" + i + ".xml"));
}
// try a descriptor with configuration parameter overrides - should work
XMLInputSource in = new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateTaeWithConfigParamOverrides.xml"));
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
AggregateAnalysisEngine_impl ae = new AggregateAnalysisEngine_impl();
ae.initialize(desc, Collections.EMPTY_MAP);
PrimitiveAnalysisEngine_impl delegate1 = (PrimitiveAnalysisEngine_impl) ae._getASB()
.getComponentAnalysisEngines().get("Annotator1");
PrimitiveAnalysisEngine_impl delegate2 = (PrimitiveAnalysisEngine_impl) ae._getASB()
.getComponentAnalysisEngines().get("Annotator2");
FlowControllerContainer flowController = ((ASB_impl) ae._getASB())
.getFlowControllerContainer();
String strVal1 = (String) delegate1.getUimaContext().getConfigParameterValue("en",
"StringParam");
Assert.assertEquals("override", strVal1);
String strVal2 = (String) delegate2.getUimaContext().getConfigParameterValue("en",
"StringParam");
Assert.assertEquals("en", strVal2);
String strVal3 = (String) flowController.getUimaContext().getConfigParameterValue("en",
"StringParam");
Assert.assertEquals("en", strVal3);
Assert.assertEquals("en", strVal2);
Integer intVal1 = (Integer) delegate1.getUimaContext().getConfigParameterValue("en",
"IntegerParam");
Assert.assertEquals(100, intVal1.intValue());
Integer intVal2 = (Integer) delegate1.getUimaContext().getConfigParameterValue("en",
"IntegerParam");
Assert.assertEquals(100, intVal2.intValue());
Integer intVal3 = (Integer) flowController.getUimaContext().getConfigParameterValue("en",
"IntegerParam");
Assert.assertEquals(100, intVal3.intValue());
String[] strArrVal1 = (String[]) delegate1.getUimaContext().getConfigParameterValue("en",
"StringArrayParam");
Assert.assertEquals(Arrays.asList(new String[] { "override" }), Arrays.asList(strArrVal1));
String[] strArrVal2 = (String[]) delegate2.getUimaContext().getConfigParameterValue("en",
"StringArrayParam");
Assert.assertEquals(Arrays.asList(new String[] { "override" }), Arrays.asList(strArrVal2));
String[] strArrVal3 = (String[]) flowController.getUimaContext().getConfigParameterValue(
"en", "StringArrayParam");
Assert.assertEquals(Arrays.asList(new String[] { "override" }), Arrays.asList(strArrVal3));
// anotherdescriptor with configuration parameter overrides (this time no groups)
in = new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateTaeWithConfigParamOverrides2.xml"));
desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
ae = new AggregateAnalysisEngine_impl();
ae.initialize(desc, Collections.EMPTY_MAP);
delegate1 = (PrimitiveAnalysisEngine_impl) ae._getASB().getComponentAnalysisEngines().get(
"Annotator1");
delegate2 = (PrimitiveAnalysisEngine_impl) ae._getASB().getComponentAnalysisEngines().get(
"Annotator2");
flowController = ((ASB_impl) ae._getASB()).getFlowControllerContainer();
strVal1 = (String) delegate1.getUimaContext().getConfigParameterValue("StringParam");
Assert.assertEquals("override", strVal1);
strVal2 = (String) delegate2.getUimaContext().getConfigParameterValue("StringParam");
Assert.assertEquals("myString", strVal2);
strVal3 = (String) flowController.getUimaContext().getConfigParameterValue("StringParam");
Assert.assertEquals("myString", strVal3);
intVal1 = (Integer) delegate1.getUimaContext().getConfigParameterValue("IntegerParam");
Assert.assertEquals(100, intVal1.intValue());
intVal2 = (Integer) delegate2.getUimaContext().getConfigParameterValue("IntegerParam");
Assert.assertEquals(100, intVal2.intValue());
intVal3 = (Integer) flowController.getUimaContext().getConfigParameterValue("IntegerParam");
Assert.assertEquals(100, intVal3.intValue());
strArrVal1 = (String[]) delegate1.getUimaContext()
.getConfigParameterValue("StringArrayParam");
Assert.assertEquals(Arrays.asList(new String[] { "override" }), Arrays.asList(strArrVal1));
strArrVal2 = (String[]) delegate2.getUimaContext()
.getConfigParameterValue("StringArrayParam");
Assert.assertEquals(Arrays.asList(new String[] { "override" }), Arrays.asList(strArrVal2));
strArrVal3 = (String[]) flowController.getUimaContext().getConfigParameterValue(
"StringArrayParam");
Assert.assertEquals(Arrays.asList(new String[] { "override" }), Arrays.asList(strArrVal3));
// try a descriptor that's invalid due to an unsatisfied resource dependency
_testInvalidDescriptor(JUnitExtension
.getFile("TextAnalysisEngineImplTest/UnsatisfiedResourceDependency.xml"));
ae.destroy();
// test an aggregate TAE containing a CAS Consumer
in = new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateTaeWithCasConsumer.xml"));
desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
ae = new AggregateAnalysisEngine_impl();
ae.initialize(desc, Collections.EMPTY_MAP);
delegate1 = (PrimitiveAnalysisEngine_impl) ae._getASB().getComponentAnalysisEngines().get(
"Annotator");
delegate2 = (PrimitiveAnalysisEngine_impl) ae._getASB().getComponentAnalysisEngines().get(
"CasConsumer");
assertTrue(delegate1.getAnalysisEngineMetaData().getOperationalProperties().getModifiesCas());
assertFalse(delegate2.getAnalysisEngineMetaData().getOperationalProperties().getModifiesCas());
ae.destroy();
// try an aggregate with no components (tests that empty flow works)
in = new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/EmptyAggregate.xml"));
desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
FixedFlow emptyFlow = (FixedFlow) desc.getAnalysisEngineMetaData().getFlowConstraints();
assertNotNull(emptyFlow.getFixedFlow());
assertTrue(emptyFlow.getFixedFlow().length == 0);
ae = new AggregateAnalysisEngine_impl();
ae.initialize(desc, Collections.EMPTY_MAP);
ae.destroy();
// aggregate with duplicate group overrides
in = new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWithDuplicateGroupOverrides.xml"));
desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
ae = new AggregateAnalysisEngine_impl();
ae.initialize(desc, Collections.EMPTY_MAP);
delegate1 = (PrimitiveAnalysisEngine_impl) ae._getASB().getComponentAnalysisEngines().get(
"Annotator1");
delegate2 = (PrimitiveAnalysisEngine_impl) ae._getASB().getComponentAnalysisEngines().get(
"Annotator2");
String commonParamA = (String) delegate1.getUimaContext().getConfigParameterValue("a",
"CommonParam");
Assert.assertEquals("AggregateParam1a", commonParamA);
String ann1_groupBParamBC = (String) delegate1.getUimaContext().getConfigParameterValue("b",
"BCParam");
Assert.assertEquals("AggregateParam2b", ann1_groupBParamBC);
String ann2_groupBParamBC = (String) delegate2.getUimaContext().getConfigParameterValue("b",
"BCParam");
Assert.assertEquals("AggregateParam3b", ann2_groupBParamBC);
ae.destroy();
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
protected void _testInvalidDescriptor(File aFile) throws IOException {
XMLInputSource in = new XMLInputSource(aFile);
Exception ex = null;
try {
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
UIMAFramework.produceAnalysisEngine(desc);
} catch (InvalidXMLException e) {
// e.printStackTrace();
ex = e;
} catch (ResourceInitializationException e) {
// e.printStackTrace();
ex = e;
}
Assert.assertNotNull(ex);
Assert.assertNotNull(ex.getMessage());
Assert.assertFalse(ex.getMessage().startsWith("EXCEPTION MESSAGE LOCALIZATION FAILED"));
}
public void testProcess() throws Exception {
try {
// test simple primitive TextAnalysisEngine (using TestAnnotator class)
AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl();
primitiveDesc.setPrimitive(true);
primitiveDesc
.setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
primitiveDesc.getMetaData().setName("Test Primitive TAE");
Capability cap = new Capability_impl();
cap.addOutputType("NamedEntity", true);
cap.addOutputType("DocumentStructure", true);
Capability[] caps = new Capability[] {cap};
primitiveDesc.getAnalysisEngineMetaData().setCapabilities(caps);
_testProcess(primitiveDesc);
// test simple aggregate TextAnalysisEngine (again using TestAnnotator class)
AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl();
aggDesc.setPrimitive(false);
aggDesc.getMetaData().setName("Test Aggregate TAE");
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
FixedFlow_impl flow = new FixedFlow_impl();
flow.setFixedFlow(new String[] { "Test" });
aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
aggDesc.getAnalysisEngineMetaData().setCapabilities(caps);
_testProcess(aggDesc);
// test aggregate TAE containing a CAS Consumer
File outFile = JUnitExtension.getFile("CpmOutput.txt");
if(outFile.exists()) {
//outFile.delete() //can't be relied upon. Instead set file to zero length.
FileOutputStream fos = new FileOutputStream(outFile, false);
fos.close();
assertEquals(0,outFile.length());
}
AnalysisEngineDescription aggWithCcDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateTaeWithCasConsumer.xml")));
_testProcess(aggWithCcDesc);
// test that CAS Consumer ran
assertTrue(outFile.exists());
assertTrue(outFile.length() > 0);
outFile.delete();
//test aggregate that uses ParallelStep
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepTest.xml")));
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(desc);
CAS cas = ae.newCAS();
cas.setDocumentText("new test");
ae.process(cas);
assertEquals("new test", TestAnnotator.lastDocument);
assertEquals("new test", TestAnnotator2.lastDocument);
cas.reset();
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
/**
* Auxilliary method used by testProcess()
*
* @param aTaeDesc
* description of TextAnalysisEngine to test
*/
protected void _testProcess(AnalysisEngineDescription aTaeDesc) throws UIMAException {
// create and initialize TextAnalysisEngine
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aTaeDesc);
// Test each form of the process method. When TestAnnotator executes, it
// stores in static fields the document text and the ResultSpecification.
// We use thse to make sure the information propogates correctly to the annotator.
// process(CAS)
CAS tcas = ae.newCAS();
tcas.setDocumentText("new test");
ae.process(tcas);
assertEquals("new test", TestAnnotator.lastDocument);
tcas.reset();
// process(CAS,ResultSpecification)
ResultSpecification resultSpec = new ResultSpecification_impl();
resultSpec.addResultType("NamedEntity", true);
tcas.setDocumentText("testing...");
ae.process(tcas, resultSpec);
assertEquals("testing...", TestAnnotator.lastDocument);
assertEquals(resultSpec, TestAnnotator.lastResultSpec);
tcas.reset();
ae.destroy();
}
public void testReconfigure() throws Exception {
try {
// create simple primitive TextAnalysisEngine descriptor (using TestAnnotator class)
AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl();
primitiveDesc.setPrimitive(true);
primitiveDesc.getMetaData().setName("Test Primitive TAE");
primitiveDesc
.setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
ConfigurationParameter p1 = new ConfigurationParameter_impl();
p1.setName("StringParam");
p1.setDescription("parameter with String data type");
p1.setType(ConfigurationParameter.TYPE_STRING);
primitiveDesc.getMetaData().getConfigurationParameterDeclarations()
.setConfigurationParameters(new ConfigurationParameter[] { p1 });
primitiveDesc.getMetaData().getConfigurationParameterSettings().setParameterSettings(
new NameValuePair[] { new NameValuePair_impl("StringParam", "Test1") });
// instantiate TextAnalysisEngine
PrimitiveAnalysisEngine_impl ae = new PrimitiveAnalysisEngine_impl();
ae.initialize(primitiveDesc, null);
// check value of string param (TestAnnotator saves it in a static field)
assertEquals("Test1", TestAnnotator.stringParamValue);
// reconfigure
ae.setConfigParameterValue("StringParam", "Test2");
ae.reconfigure();
// test again
assertEquals("Test2", TestAnnotator.stringParamValue);
// test aggregate TAE
AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl();
aggDesc.setFrameworkImplementation(Constants.JAVA_FRAMEWORK_NAME);
aggDesc.setPrimitive(false);
aggDesc.getMetaData().setName("Test Aggregate TAE");
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
FixedFlow_impl flow = new FixedFlow_impl();
flow.setFixedFlow(new String[] { "Test" });
aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
ConfigurationParameter p2 = new ConfigurationParameter_impl();
p2.setName("StringParam");
p2.setDescription("parameter with String data type");
p2.setType(ConfigurationParameter.TYPE_STRING);
aggDesc.getMetaData().getConfigurationParameterDeclarations().setConfigurationParameters(
new ConfigurationParameter[] { p2 });
aggDesc.getMetaData().getConfigurationParameterSettings().setParameterSettings(
new NameValuePair[] { new NameValuePair_impl("StringParam", "Test3") });
// instantiate TextAnalysisEngine
AggregateAnalysisEngine_impl aggAe = new AggregateAnalysisEngine_impl();
aggAe.initialize(aggDesc, null);
assertEquals("Test3", TestAnnotator.stringParamValue);
// reconfigure
aggAe.setConfigParameterValue("StringParam", "Test4");
aggAe.reconfigure();
// test again
assertEquals("Test4", TestAnnotator.stringParamValue);
// reconfigure WITHOUT setting that parameter
aggAe.reconfigure();
// test again
assertEquals("Test4", TestAnnotator.stringParamValue);
// test aggregate TAE that does NOT override parameter
primitiveDesc.getMetaData().getConfigurationParameterSettings().setParameterSettings(
new NameValuePair[] { new NameValuePair_impl("StringParam", "Test1") });
AnalysisEngineDescription aggDesc2 = new AnalysisEngineDescription_impl();
aggDesc2.setFrameworkImplementation(Constants.JAVA_FRAMEWORK_NAME);
aggDesc2.setPrimitive(false);
aggDesc2.getMetaData().setName("Test Aggregate TAE");
aggDesc2.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
FixedFlow_impl flow2 = new FixedFlow_impl();
flow2.setFixedFlow(new String[] { "Test" });
aggDesc2.getAnalysisEngineMetaData().setFlowConstraints(flow2);
ConfigurationParameter p3 = new ConfigurationParameter_impl();
p3.setName("IntParam");
p3.setDescription("parameter with Integer data type");
p3.setType(ConfigurationParameter.TYPE_INTEGER);
aggDesc2.getMetaData().getConfigurationParameterDeclarations().setConfigurationParameters(
new ConfigurationParameter[] { p3 });
aggDesc2.getMetaData().getConfigurationParameterSettings().setParameterSettings(
new NameValuePair[] { new NameValuePair_impl("IntParam", new Integer(42)) });
// instantiate TextAnalysisEngine
AggregateAnalysisEngine_impl aggAe2 = new AggregateAnalysisEngine_impl();
aggAe2.initialize(aggDesc2, null);
// call process - this should generate an event with a resource name equal
// to the value of StringParam
assertEquals("Test1", TestAnnotator.stringParamValue);
// reconfigure
aggAe2.setConfigParameterValue("IntParam", new Integer(0));
aggAe2.reconfigure();
// test again - should not have changed
assertEquals("Test1", TestAnnotator.stringParamValue);
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testCreateAnalysisProcessData() throws Exception {
try {
// create simple primitive TAE with type system and indexes
AnalysisEngineDescription desc = new AnalysisEngineDescription_impl();
desc.setPrimitive(true);
desc.getMetaData().setName("Test Primitive TAE");
desc.setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
TypeSystemDescription typeSystem = new TypeSystemDescription_impl();
TypeDescription type1 = typeSystem.addType("Type1", "Test Type One",
CAS.TYPE_NAME_ANNOTATION);
FeatureDescription feat1 = new FeatureDescription_impl();
feat1.setName("Feature1");
feat1.setRangeTypeName(CAS.TYPE_NAME_INTEGER);
type1.setFeatures(new FeatureDescription[] { feat1 });
TypeDescription type2 = typeSystem.addType("Type2", "Test Type Two",
CAS.TYPE_NAME_ANNOTATION);
FeatureDescription feat2 = new FeatureDescription_impl();
feat2.setName("Feature2");
feat2.setRangeTypeName("EnumType");
type2.setFeatures(new FeatureDescription[] { feat2 });
TypeDescription enumType = typeSystem.addType("EnumType", "Test Enumerated Type",
"uima.cas.String");
enumType.setAllowedValues(new AllowedValue[] { new AllowedValue_impl("One", "First Value"),
new AllowedValue_impl("Two", "Second Value") });
desc.getAnalysisEngineMetaData().setTypeSystem(typeSystem);
TypePriorities typePriorities = new TypePriorities_impl();
TypePriorityList priorityList = typePriorities.addPriorityList();
priorityList.addType("Type1");
priorityList.addType("Type2");
desc.getAnalysisEngineMetaData().setTypePriorities(typePriorities);
FsIndexDescription index1 = new FsIndexDescription_impl();
index1.setLabel("Index1");
index1.setTypeName("Type1");
FsIndexKeyDescription key1 = new FsIndexKeyDescription_impl();
key1.setFeatureName("Feature1");
key1.setComparator(FSIndexComparator.STANDARD_COMPARE);
index1.setKeys(new FsIndexKeyDescription[] { key1 });
FsIndexDescription index2 = new FsIndexDescription_impl();
index2.setLabel("Index2");
index2.setTypeName("Type2");
index2.setKind(FsIndexDescription.KIND_SET);
FsIndexKeyDescription key2 = new FsIndexKeyDescription_impl();
key2.setFeatureName("Feature2");
key2.setComparator(FSIndexComparator.REVERSE_STANDARD_COMPARE);
index2.setKeys(new FsIndexKeyDescription[] { key2 });
FsIndexDescription index3 = new FsIndexDescription_impl();
index3.setLabel("Index3");
index3.setTypeName("uima.tcas.Annotation");
index3.setKind(FsIndexDescription.KIND_SORTED);
FsIndexKeyDescription key3 = new FsIndexKeyDescription_impl();
key3.setFeatureName("begin");
key3.setComparator(FSIndexComparator.STANDARD_COMPARE);
FsIndexKeyDescription key4 = new FsIndexKeyDescription_impl();
key4.setTypePriority(true);
index3.setKeys(new FsIndexKeyDescription[] { key3, key4 });
desc.getAnalysisEngineMetaData().setFsIndexes(
new FsIndexDescription[] { index1, index2, index3 });
// instantiate TextAnalysisEngine
PrimitiveAnalysisEngine_impl ae = new PrimitiveAnalysisEngine_impl();
ae.initialize(desc, null); // this calls createAnalysisProcessData
// check results in CAS
// type system
CAS cas = ae.newCAS();
TypeSystem ts = cas.getTypeSystem();
Type t1 = ts.getType("Type1");
Assert.assertEquals("Type1", t1.getName());
Feature f1 = t1.getFeatureByBaseName("Feature1");
Feature f1a = ts.getFeatureByFullName("Type1:Feature1");
Assert.assertEquals(f1, f1a);
Assert.assertEquals("Feature1", f1.getShortName());
Assert.assertEquals(t1, f1.getDomain());
Type t2 = ts.getType("Type2");
Assert.assertEquals("Type2", t2.getName());
Feature f2 = t2.getFeatureByBaseName("Feature2");
Feature f2a = ts.getFeatureByFullName("Type2:Feature2");
Assert.assertEquals(f2, f2a);
Assert.assertEquals("Feature2", f2.getShortName());
Assert.assertEquals(t2, f2.getDomain());
Type et = ts.getType("EnumType");
Assert.assertEquals("EnumType", et.getName());
Assert.assertEquals(et, f2.getRange());
// indexes
FSIndexRepository irep = cas.getIndexRepository();
FSIndex ind = irep.getIndex("Index1");
Assert.assertNotNull(ind);
Assert.assertEquals("Type1", ind.getType().getName());
Assert.assertEquals(FSIndex.SORTED_INDEX, ind.getIndexingStrategy());
FeatureStructure fs1 = cas.createFS(t1);
fs1.setIntValue(f1, 0);
FeatureStructure fs2 = cas.createFS(t1);
fs2.setIntValue(f1, 1);
Assert.assertTrue(ind.compare(fs1, fs2) < 0);
FSIndex ind2 = irep.getIndex("Index2");
Assert.assertNotNull(ind2);
Assert.assertEquals("Type2", ind2.getType().getName());
Assert.assertEquals(FSIndex.SET_INDEX, ind2.getIndexingStrategy());
FeatureStructure fs3 = cas.createFS(t2);
fs3.setStringValue(f2, "One");
FeatureStructure fs4 = cas.createFS(t2);
fs4.setStringValue(f2, "Two");
Assert.assertTrue(ind2.compare(fs3, fs4) > 0);
FSIndex ind3 = irep.getIndex("Index3");
Assert.assertNotNull(ind3);
Assert.assertEquals("uima.tcas.Annotation", ind3.getType().getName());
Assert.assertEquals(FSIndex.SORTED_INDEX, ind3.getIndexingStrategy());
AnnotationFS fs5 = cas.createAnnotation(t1, 0, 0);
AnnotationFS fs6 = cas.createAnnotation(t2, 0, 0);
AnnotationFS fs7 = cas.createAnnotation(t1, 0, 0);
Assert.assertTrue(ind3.compare(fs5, fs6) < 0);
Assert.assertTrue(ind3.compare(fs6, fs7) > 0);
// only way to check if allowed values is correct is to try to set an
// invalid value?
CASRuntimeException ex = null;
try {
fs4.setStringValue(f2, "Three");
} catch (CASRuntimeException e) {
ex = e;
}
Assert.assertNotNull(ex);
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testProcessDelegateAnalysisEngineMetaData() throws Exception {
try {
// create aggregate analysis engine whose delegates each declare
// type system, type priorities, and indexes
XMLInputSource in = new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateTaeForMergeTest.xml"));
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
AggregateAnalysisEngine_impl ae = new AggregateAnalysisEngine_impl();
ae.initialize(desc, Collections.EMPTY_MAP);
// initialize method automatically calls processDelegateAnalysisEngineMetaData()
// test results of merge
// TypeSystem
TypeSystemDescription typeSys = ae.getAnalysisEngineMetaData().getTypeSystem();
Assert.assertEquals(8, typeSys.getTypes().length);
TypeDescription type0 = typeSys.getType("NamedEntity");
Assert.assertNotNull(type0);
Assert.assertEquals("uima.tcas.Annotation", type0.getSupertypeName());
Assert.assertEquals(1, type0.getFeatures().length);
TypeDescription type1 = typeSys.getType("Person");
Assert.assertNotNull(type1);
Assert.assertEquals("NamedEntity", type1.getSupertypeName());
Assert.assertEquals(1, type1.getFeatures().length);
TypeDescription type2 = typeSys.getType("Place");
Assert.assertNotNull(type2);
Assert.assertEquals("NamedEntity", type2.getSupertypeName());
Assert.assertEquals(3, type2.getFeatures().length);
TypeDescription type3 = typeSys.getType("Org");
Assert.assertNotNull(type3);
Assert.assertEquals("uima.tcas.Annotation", type3.getSupertypeName());
Assert.assertEquals(0, type3.getFeatures().length);
TypeDescription type4 = typeSys.getType("DocumentStructure");
Assert.assertNotNull(type4);
Assert.assertEquals("uima.tcas.Annotation", type4.getSupertypeName());
Assert.assertEquals(0, type4.getFeatures().length);
TypeDescription type5 = typeSys.getType("Paragraph");
Assert.assertNotNull(type5);
Assert.assertEquals("DocumentStructure", type5.getSupertypeName());
Assert.assertEquals(0, type5.getFeatures().length);
TypeDescription type6 = typeSys.getType("Sentence");
Assert.assertNotNull(type6);
Assert.assertEquals("DocumentStructure", type6.getSupertypeName());
Assert.assertEquals(0, type6.getFeatures().length);
TypeDescription type7 = typeSys.getType("test.flowController.Test");
Assert.assertNotNull(type7);
Assert.assertEquals("uima.tcas.Annotation", type7.getSupertypeName());
Assert.assertEquals(1, type7.getFeatures().length);
// TypePriorities
TypePriorities pri = ae.getAnalysisEngineMetaData().getTypePriorities();
Assert.assertNotNull(pri);
TypePriorityList[] priLists = pri.getPriorityLists();
Assert.assertEquals(3, priLists.length);
String[] list0 = priLists[0].getTypes();
String[] list1 = priLists[1].getTypes();
String[] list2 = priLists[2].getTypes();
// order of the three lists is not defined
Assert.assertTrue((list0.length == 2 && list1.length == 2 && list2.length == 3)
|| (list0.length == 2 && list1.length == 3 && list2.length == 2)
|| (list0.length == 3 && list1.length == 2 && list2.length == 2));
// Indexes
FsIndexDescription[] indexes = ae.getAnalysisEngineMetaData().getFsIndexes();
Assert.assertEquals(3, indexes.length);
// order of indexes is not defined
String label0 = indexes[0].getLabel();
String label1 = indexes[1].getLabel();
String label2 = indexes[2].getLabel();
Assert.assertTrue(label0.equals("DocStructIndex") || label1.equals("DocStructIndex")
|| label2.equals("DocStructIndex"));
Assert.assertTrue(label0.equals("PlaceIndex") || label1.equals("PlaceIndex")
|| label2.equals("PlaceIndex"));
Assert.assertTrue(label0.equals("FlowControllerTestIndex")
|| label1.equals("FlowControllerTestIndex")
|| label2.equals("FlowControllerTestIndex"));
// test that we can create a CAS
CAS cas = ae.newCAS();
TypeSystem ts = cas.getTypeSystem();
assertNotNull(ts.getType("NamedEntity"));
assertNotNull(ts.getType("Person"));
assertNotNull(ts.getType("Place"));
assertNotNull(ts.getType("Org"));
assertNotNull(ts.getType("DocumentStructure"));
assertNotNull(ts.getType("Paragraph"));
assertNotNull(ts.getType("Sentence"));
assertNotNull(ts.getType("test.flowController.Test"));
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testCollectionProcessComplete() throws Exception {
try {
// test simple primitive TextAnalysisEngine (using TestAnnotator class)
AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl();
primitiveDesc.setPrimitive(true);
primitiveDesc
.setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
primitiveDesc.getMetaData().setName("Test Primitive TAE");
PrimitiveAnalysisEngine_impl ae = new PrimitiveAnalysisEngine_impl();
ae.initialize(primitiveDesc, null);
ae.collectionProcessComplete(new ProcessTrace_impl());
// test simple aggregate TextAnalysisEngine (again using TestAnnotator class)
AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl();
aggDesc.setPrimitive(false);
aggDesc.getMetaData().setName("Test Aggregate TAE");
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
FixedFlow_impl flow = new FixedFlow_impl();
aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
AggregateAnalysisEngine_impl aggAe = new AggregateAnalysisEngine_impl();
aggAe.initialize(aggDesc, null);
aggAe.collectionProcessComplete(new ProcessTrace_impl());
//test that fixedFlow order is used
File descFile = JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForCollectionProcessCompleteTest.xml");
AnalysisEngineDescription cpcTestDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(new XMLInputSource(descFile));
AnalysisEngine cpcTestAe = UIMAFramework.produceAnalysisEngine(cpcTestDesc);
cpcTestAe.collectionProcessComplete();
assertEquals("One", AnnotatorForCollectionProcessCompleteTest.lastValue);
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testBatchProcessComplete() throws Exception {
try {
// test simple primitive TextAnalysisEngine (using TestAnnotator class)
AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl();
primitiveDesc.setPrimitive(true);
primitiveDesc
.setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
primitiveDesc.getMetaData().setName("Test Primitive TAE");
PrimitiveAnalysisEngine_impl ae = new PrimitiveAnalysisEngine_impl();
ae.initialize(primitiveDesc, null);
ae.batchProcessComplete(new ProcessTrace_impl());
// test simple aggregate TextAnalysisEngine (again using TestAnnotator class)
AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl();
aggDesc.setPrimitive(false);
aggDesc.getMetaData().setName("Test Aggregate TAE");
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
FixedFlow_impl flow = new FixedFlow_impl();
flow.setFixedFlow(new String[] { "Test" });
aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
AggregateAnalysisEngine_impl aggAe = new AggregateAnalysisEngine_impl();
aggAe.initialize(aggDesc, null);
aggAe.batchProcessComplete(new ProcessTrace_impl());
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testTypeSystemInit() throws Exception {
try {
AnalysisEngineDescription aggWithCcDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateTaeWithCasConsumer.xml")));
AggregateAnalysisEngine_impl aggAe = new AggregateAnalysisEngine_impl();
aggAe.initialize(aggWithCcDesc, null);
CAS tcas = aggAe.newCAS();
tcas.setDocumentText("This is a test");
aggAe.process(tcas);
assertTrue(TestAnnotator.typeSystemInitCalled);
assertTrue(AnnotationWriter.typeSystemInitCalled);
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testProcessAndOutputNewCASes() throws Exception {
try {
// primitive
AnalysisEngineDescription segmenterDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/NewlineSegmenter.xml")));
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
CAS cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
CasIterator iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
CAS outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
outCas.release();
assertFalse(iter.hasNext());
// aggregate
AnalysisEngineDescription aggSegDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
assertEquals("Line one", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
assertEquals("Line two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
assertEquals("Line three", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Line three", TestAnnotator.lastDocument);
// nested aggregate
AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(
JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateContainingAggregateSegmenter.xml")));
ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
assertEquals("Line one", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
assertEquals("Line two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
assertEquals("Line three", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Line three", TestAnnotator.lastDocument);
// two segmenters
AnalysisEngineDescription twoSegDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWith2Segmenters.xml")));
ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
cas = ae.newCAS();
cas.setDocumentText("One\tTwo\nThree\tFour");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("One", outCas.getDocumentText());
assertEquals("One", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Two", outCas.getDocumentText());
assertEquals("Two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Three", outCas.getDocumentText());
assertEquals("Three", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Four", outCas.getDocumentText());
assertEquals("Four", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Four", TestAnnotator.lastDocument);
// dropping segments
aggSegDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateSegmenterForDropTest.xml")));
ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
cas = ae.newCAS();
cas.setDocumentText("Line one\nDROP\nLine two\nDROP\nLine three");
// results should be the same as the first aggregate segmenter test.
// segmetns whose text is DROP should not be output.
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
assertEquals("Line one", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
assertEquals("Line two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
assertEquals("Line three", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Line three", TestAnnotator.lastDocument);
//with ParallelStep
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepCasMultiplierTest.xml")));
ae = UIMAFramework.produceAnalysisEngine(desc);
cas.reset();
cas.setDocumentText("One\tTwo\nThree\tFour");
iter = ae.processAndOutputNewCASes(cas);
Set expectedOutputs = new HashSet();
expectedOutputs.add("One");
expectedOutputs.add("Two\nThree");
expectedOutputs.add("Four");
expectedOutputs.add("One\tTwo");
expectedOutputs.add("Three\tFour");
while (iter.hasNext()) {
outCas = iter.next();
assertTrue(expectedOutputs.remove(outCas.getDocumentText()));
outCas.release();
}
assertTrue(expectedOutputs.isEmpty());
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testProcessAndOutputNewCASesWithError() throws Exception {
try {
// aggregate
AnalysisEngineDescription aggSegDesc = UIMAFramework
.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(
JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWithSegmenterForErrorTest.xml")));
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
CAS cas = ae.newCAS();
for (int i = 0; i < 2; i++) // verify we can do this more than once
{
FlowControllerForErrorTest.reset();
cas.setDocumentText("Line one\nLine two\nERROR");
CasIterator iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
CAS outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
outCas.release();
try {
assertTrue(iter.hasNext());
outCas = iter.next();
fail(); // the above should throw an exception
} catch (AnalysisEngineProcessException e) {
//do nothing
}
//check that FlowController was notified twice, once for the
//segment's flow and once for the complete document's flow
assertEquals(2, FlowControllerForErrorTest.abortedDocuments.size());
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
cas.reset();
}
// nested aggregate
AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(
JUnitExtension
.getFile("TextAnalysisEngineImplTest/NestedAggregateSegmenterForErrorTest.xml")));
ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
cas = ae.newCAS();
for (int i = 0; i < 2; i++) // verify we can do this more than once
{
FlowControllerForErrorTest.reset();
cas.setDocumentText("Line one\nLine two\nERROR");
CasIterator iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
CAS outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
outCas.release();
try {
assertTrue(iter.hasNext());
outCas = iter.next();
fail(); // the above should throw an exception
} catch (AnalysisEngineProcessException e) {
//do nothing
}
//check that FlowController was notified three times, once for the
//segment's flow and twice for the complete document's flow (once
//in each aggregate)
assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
FlowControllerForErrorTest.abortedDocuments.remove("Line one\nLine two\nERROR");
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
cas.reset();
}
// 2 segmenters
AnalysisEngineDescription twoSegDesc = UIMAFramework
.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(
JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWith2SegmentersForErrorTest.xml")));
ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
cas = ae.newCAS();
for (int i = 0; i < 2; i++) // verify we can do this more than once
{
FlowControllerForErrorTest.abortedDocuments.clear();
cas.setDocumentText("One\tTwo\nThree\tERROR");
CasIterator iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
CAS outCas = iter.next();
assertEquals("One", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Two", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Three", outCas.getDocumentText());
outCas.release();
try {
assertTrue(iter.hasNext());
outCas = iter.next();
fail(); // the above should throw an exception
} catch (AnalysisEngineProcessException e) {
//do nothing
}
//check that FlowController was notified three times, once for each level of granularity
assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Three\tERROR"));
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("One\tTwo\nThree\tERROR"));
cas.reset();
}
// segmenter that requests too many CASes
AnalysisEngineDescription segmenterDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/BadSegmenter.xml")));
ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
CasIterator iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
CAS outCas = iter.next(); // first call OK
outCas.release();
assertTrue(iter.hasNext());
// next call should fail with AnalysisEngineProcessException
try {
iter.next();
fail(); // should not get here
} catch (AnalysisEngineProcessException e) {
// should get here
}
// bad segmenter in an aggregate
AnalysisEngineDescription aggWithBadSegmenterDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWithBadSegmenterForErrorTest.xml")));
ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
FlowControllerForErrorTest.reset();
cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next(); // first call OK
outCas.release();
assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
// next call should fail with AnalysisEngineProcessException
try {
if (iter.hasNext()) {
iter.next();
}
fail(); // should not get here
} catch (AnalysisEngineProcessException e) {
// should get here
}
assertEquals(1, FlowControllerForErrorTest.abortedDocuments.size());
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nLine three"));
assertEquals(1,FlowControllerForErrorTest.failedAEs.size());
assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));
//configure AE to continue after error
ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
ae.setConfigParameterValue("ContinueOnFailure", Boolean.TRUE);
ae.reconfigure();
FlowControllerForErrorTest.reset();
cas.reset();
cas.setDocumentText("Line one\nLine two\nLine three");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next(); // first call OK
outCas.release();
assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
//next call should not have aborted, but FC should have been notified of the failiure,
// and no CAS should come back
assertFalse(iter.hasNext());
assertEquals(0, FlowControllerForErrorTest.abortedDocuments.size());
assertEquals(1, FlowControllerForErrorTest.failedAEs.size());
assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testResegment() throws Exception {
try {
// primitive
AnalysisEngineDescription segmenterDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/NewlineResegmenter.xml")));
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
CAS inputCas1 = ae.newCAS();
Type sdiType = inputCas1.getTypeSystem().getType(
"org.apache.uima.examples.SourceDocumentInformation");
Feature uriFeat = sdiType.getFeatureByBaseName("uri");
inputCas1.setDocumentText("This is");
FeatureStructure sdiFS = inputCas1.createFS(sdiType);
sdiFS.setStringValue(uriFeat, "cas1");
inputCas1.getIndexRepository().addFS(sdiFS);
CAS inputCas2 = ae.newCAS();
inputCas2.setDocumentText(" one.\nThis is");
FeatureStructure sdiFS2 = inputCas2.createFS(sdiType);
sdiFS2.setStringValue(uriFeat, "cas2");
inputCas2.getIndexRepository().addFS(sdiFS2);
CAS inputCas3 = ae.newCAS();
inputCas3.setDocumentText(" two.\n");
FeatureStructure sdiFS3 = inputCas3.createFS(sdiType);
sdiFS3.setStringValue(uriFeat, "cas3");
inputCas3.getIndexRepository().addFS(sdiFS3);
// input first CAS. Should be no segments yet.
CasIterator iter = ae.processAndOutputNewCASes(inputCas1);
assertFalse(iter.hasNext());
// input second CAS. We should get back one segment.
iter = ae.processAndOutputNewCASes(inputCas2);
assertTrue(iter.hasNext());
CAS outCas = iter.next();
assertEquals("This is one.", outCas.getDocumentText());
// -- check SourceDocumentInformation FSs
Iterator sdiIter = outCas.getAnnotationIndex(sdiType).iterator();
assertTrue(sdiIter.hasNext());
AnnotationFS outSdiFs = (AnnotationFS) sdiIter.next();
assertEquals("This is", outSdiFs.getCoveredText());
assertEquals("cas1", outSdiFs.getStringValue(uriFeat));
assertTrue(sdiIter.hasNext());
outSdiFs = (AnnotationFS) sdiIter.next();
assertEquals(" one.", outSdiFs.getCoveredText());
assertEquals("cas2", outSdiFs.getStringValue(uriFeat));
assertFalse(sdiIter.hasNext());
// --
assertFalse(iter.hasNext());
// input third CAS. We should get back one more segment.
iter = ae.processAndOutputNewCASes(inputCas3);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("This is two.", outCas.getDocumentText());
// -- check SourceDocumentInformation FSs
sdiIter = outCas.getAnnotationIndex(sdiType).iterator();
assertTrue(sdiIter.hasNext());
outSdiFs = (AnnotationFS) sdiIter.next();
assertEquals("This is", outSdiFs.getCoveredText());
assertEquals("cas2", outSdiFs.getStringValue(uriFeat));
assertTrue(sdiIter.hasNext());
outSdiFs = (AnnotationFS) sdiIter.next();
assertEquals(" two.", outSdiFs.getCoveredText());
assertEquals("cas3", outSdiFs.getStringValue(uriFeat));
assertFalse(sdiIter.hasNext());
// --
assertFalse(iter.hasNext());
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testComputeAnalysisComponentResultSpec() throws Exception {
try {
AnalysisEngineDescription aeDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension.getFile("SequencerTest/Annotator1.xml")));
PrimitiveAnalysisEngine_impl ae = (PrimitiveAnalysisEngine_impl) UIMAFramework
.produceAnalysisEngine(aeDesc);
CAS cas = ae.newCAS();
ResultSpecification resultSpec = new ResultSpecification_impl();
resultSpec.addResultType("uima.tt.TokenLikeAnnotation", true);
resultSpec.compile(cas.getTypeSystem());
ResultSpecification acResultSpec = ae.computeAnalysisComponentResultSpec(resultSpec, ae
.getAnalysisEngineMetaData().getCapabilities());
assertTrue(acResultSpec.containsType("uima.tt.TokenAnnotation"));
assertFalse(acResultSpec.containsType("uima.tt.SentenceAnnotation"));
assertFalse(acResultSpec.containsType("uima.tt.Lemma"));
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testProcessWithError() throws Exception {
try {
//This test uses an aggregate AE fails if the document text is set to "ERROR".
AnalysisEngineDescription aeDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForErrorTest.xml")));
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aeDesc);
FlowControllerForErrorTest.reset();
CAS cas = ae.newCAS();
//try document that should succeed
cas.setDocumentText("This is OK");
ae.process(cas);
//flow controller should not be notified
assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
//now one that fails
cas.reset();
cas.setDocumentText("ERROR");
try {
ae.process(cas);
fail();
}
catch(AnalysisEngineProcessException e) {
//expected
}
assertEquals(1, FlowControllerForErrorTest.abortedDocuments.size());
assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
assertEquals(1, FlowControllerForErrorTest.failedAEs.size());
assertTrue(FlowControllerForErrorTest.failedAEs.contains("ErrorAnnotator"));
//AE should still be able to process a new document now
FlowControllerForErrorTest.reset();
cas.reset();
cas.setDocumentText("This is OK");
ae.process(cas);
assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
//configure AE to continue after error
ae.setConfigParameterValue("ContinueOnFailure", Boolean.TRUE);
ae.reconfigure();
cas.reset();
cas.setDocumentText("ERROR");
ae.process(cas); //should not throw exception now
//document should not have aborted, but FC should have been notified of the failiure
assertEquals(0, FlowControllerForErrorTest.abortedDocuments.size());
assertEquals(1, FlowControllerForErrorTest.failedAEs.size());
assertTrue(FlowControllerForErrorTest.failedAEs.contains("ErrorAnnotator"));
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
}