blob: 23207765a052c14b8f149660ec563efaa1a01bf9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.action;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.ruta.engine.Ruta;
import org.apache.uima.ruta.engine.RutaTestUtils;
import org.apache.uima.ruta.engine.RutaTestUtils.TestFeature;
import org.junit.Test;
public class SplitTest {
@Test
public void testDefault() {
String document = "Some text. More text , with 1 , and more. even more text.";
String script = "PERIOD #{-> T1} PERIOD;";
script += " #{-> T1} PERIOD;";
script += "T1{CONTAINS(NUM)-> CREATE(Complex, \"number\"= NUM)};";
script += "Complex{-> SPLIT(COMMA)};";
Map<String, String> typeMap = new TreeMap<String, String>();
String typeName = "Complex";
typeMap.put(typeName, "uima.tcas.Annotation");
Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
featureMap.put(typeName, list);
String fn = "number";
list.add(new TestFeature(fn, "", "uima.tcas.Annotation"));
CAS cas = null;
try {
cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
Ruta.apply(cas, script);
} catch (Exception e) {
e.printStackTrace();
}
Type t = null;
AnnotationIndex<AnnotationFS> ai = null;
FSIterator<AnnotationFS> iterator = null;
t = cas.getTypeSystem().getType(typeName);
Feature f1 = t.getFeatureByBaseName(fn);
ai = cas.getAnnotationIndex(t);
assertEquals(3, ai.size());
iterator = ai.iterator();
AnnotationFS next = iterator.next();
assertEquals("More text", next.getCoveredText());
FeatureStructure featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals("with 1", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals("and more", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
if (cas != null) {
cas.release();
}
}
@Test
public void testAddBegin() {
String document = "Some text. More text , with 1 , and more. even more text.";
String script = "PERIOD #{-> T1} PERIOD;";
script += " #{-> T1} PERIOD;";
script += "T1{CONTAINS(NUM)-> CREATE(Complex, \"number\"= NUM)};";
script += "Complex{-> SPLIT(COMMA, true, true, false)};";
Map<String, String> typeMap = new TreeMap<String, String>();
String typeName = "Complex";
typeMap.put(typeName, "uima.tcas.Annotation");
Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
featureMap.put(typeName, list);
String fn = "number";
list.add(new TestFeature(fn, "", "uima.tcas.Annotation"));
CAS cas = null;
try {
cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
Ruta.apply(cas, script);
} catch (Exception e) {
e.printStackTrace();
}
Type t = null;
AnnotationIndex<AnnotationFS> ai = null;
FSIterator<AnnotationFS> iterator = null;
t = cas.getTypeSystem().getType(typeName);
Feature f1 = t.getFeatureByBaseName(fn);
ai = cas.getAnnotationIndex(t);
assertEquals(3, ai.size());
iterator = ai.iterator();
AnnotationFS next = iterator.next();
assertEquals("More text", next.getCoveredText());
FeatureStructure featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals(", with 1", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals(", and more", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
if (cas != null) {
cas.release();
}
}
@Test
public void testAddEnd() {
String document = "Some text. More text , with 1 , and more. even more text.";
String script = "PERIOD #{-> T1} PERIOD;";
script += " #{-> T1} PERIOD;";
script += "T1{CONTAINS(NUM)-> CREATE(Complex, \"number\"= NUM)};";
script += "Complex{-> SPLIT(COMMA, true, false, true)};";
Map<String, String> typeMap = new TreeMap<String, String>();
String typeName = "Complex";
typeMap.put(typeName, "uima.tcas.Annotation");
Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
featureMap.put(typeName, list);
String fn = "number";
list.add(new TestFeature(fn, "", "uima.tcas.Annotation"));
CAS cas = null;
try {
cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
Ruta.apply(cas, script);
} catch (Exception e) {
e.printStackTrace();
}
Type t = null;
AnnotationIndex<AnnotationFS> ai = null;
FSIterator<AnnotationFS> iterator = null;
t = cas.getTypeSystem().getType(typeName);
Feature f1 = t.getFeatureByBaseName(fn);
ai = cas.getAnnotationIndex(t);
assertEquals(3, ai.size());
iterator = ai.iterator();
AnnotationFS next = iterator.next();
assertEquals("More text ,", next.getCoveredText());
FeatureStructure featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals("with 1 ,", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals("and more", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
if (cas != null) {
cas.release();
}
}
@Test
public void testAddBoth() throws Exception {
String document = "Some text. More text , with 1 , and more. even more text.";
String script = "PERIOD #{-> T1} PERIOD;";
script += " #{-> T1} PERIOD;";
script += "T1{CONTAINS(NUM)-> CREATE(Complex, \"number\"= NUM)};";
script += "Complex{-> SPLIT(COMMA, true, true, true)};";
Map<String, String> typeMap = new TreeMap<String, String>();
String typeName = "Complex";
typeMap.put(typeName, "uima.tcas.Annotation");
Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
featureMap.put(typeName, list);
String fn = "number";
list.add(new TestFeature(fn, "", "uima.tcas.Annotation"));
CAS cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
Ruta.apply(cas, script);
Type t = null;
AnnotationIndex<AnnotationFS> ai = null;
FSIterator<AnnotationFS> iterator = null;
t = cas.getTypeSystem().getType(typeName);
Feature f1 = t.getFeatureByBaseName(fn);
ai = cas.getAnnotationIndex(t);
assertEquals(3, ai.size());
iterator = ai.iterator();
AnnotationFS next = iterator.next();
assertEquals("More text ,", next.getCoveredText());
FeatureStructure featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals(", with 1 ,", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals(", and more", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
if (cas != null) {
cas.release();
}
}
@Test
public void testBoundarySplit() {
String document = "Some text. More text , with 1 , and more. even more text.";
String script = "PERIOD #{-> T1} PERIOD;";
script += "#{-> T1} PERIOD;";
script += "(# COMMA){-> T2};";
script += "NUM (COMMA #){-> T2};";
script += "T1{CONTAINS(NUM)-> CREATE(Complex, \"number\"= NUM)};";
script += "Complex{-> SPLIT(T2, false)};";
Map<String, String> typeMap = new TreeMap<String, String>();
String typeName = "Complex";
typeMap.put(typeName, "uima.tcas.Annotation");
Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
featureMap.put(typeName, list);
String fn = "number";
list.add(new TestFeature(fn, "", "uima.tcas.Annotation"));
CAS cas = null;
try {
cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
Ruta.apply(cas, script);
} catch (Exception e) {
e.printStackTrace();
}
Type t = null;
AnnotationIndex<AnnotationFS> ai = null;
FSIterator<AnnotationFS> iterator = null;
t = cas.getTypeSystem().getType(typeName);
Feature f1 = t.getFeatureByBaseName(fn);
ai = cas.getAnnotationIndex(t);
assertEquals(3, ai.size());
iterator = ai.iterator();
AnnotationFS next = iterator.next();
assertEquals("More text ,", next.getCoveredText());
FeatureStructure featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals("with 1", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
next = iterator.next();
assertEquals(", and more", next.getCoveredText());
featureValue = next.getFeatureValue(f1);
assertNotNull(featureValue);
assertEquals("1", ((AnnotationFS) featureValue).getCoveredText());
if (cas != null) {
cas.release();
}
}
@Test
public void testBoundary() throws Exception {
String document = "Some text. More text , with 1 , and more. even more text.";
String script = "PERIOD (# PERIOD){-> T1};";
script += "(# PERIOD){-> T1};";
script += "T1{-> SPLIT(PERIOD, true, false, true)};";
CAS cas = RutaTestUtils.getCAS(document);
Ruta.apply(cas, script);
Type t = null;
AnnotationIndex<AnnotationFS> ai = null;
FSIterator<AnnotationFS> iterator = null;
t = RutaTestUtils.getTestType(cas, 1);
ai = cas.getAnnotationIndex(t);
assertEquals(3, ai.size());
iterator = ai.iterator();
AnnotationFS next = iterator.next();
assertEquals("Some text.", next.getCoveredText());
next = iterator.next();
assertEquals("More text , with 1 , and more.", next.getCoveredText());
next = iterator.next();
assertEquals("even more text.", next.getCoveredText());
cas.release();
}
}