blob: 36ebbe0de1db91eb9c5e8581d04db6cdc0f164f0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.ruta.engine.Ruta;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.ruta.engine.RutaTestUtils;
import org.apache.uima.ruta.engine.RutaTestUtils.TestFeature;
import org.junit.Test;
public class WSDictionaryTest {
@Test
public void test() {
String document = "Peter Kluegl, Marshall Schor, Joern Kottmann\n";
document += "PeterKluegl, MarshallSchor, JoernKottmann\n";
document += "Peter<x>Kluegl, Marshall<x>Schor, Joern<x>Kottmann\n";
String script = "WORDLIST list = 'org/apache/uima/ruta/WSDictionaryTestList.txt';";
script += "MARKFAST(T1, list);";
CAS cas = null;
try {
cas = RutaTestUtils.getCAS(document);
Ruta.apply(cas, script);
} catch (Exception e) {
e.printStackTrace();
}
RutaTestUtils.assertAnnotationsEquals(cas, 1, 9, "Peter Kluegl", "Marshall Schor", "Joern Kottmann", "PeterKluegl",
"MarshallSchor", "JoernKottmann", "Peter<x>Kluegl", "Marshall<x>Schor", "Joern<x>Kottmann");
cas.release();
}
@Test
public void testDictRemoveWS() {
String document = "Peter Kluegl, Marshall Schor, Joern Kottmann\n";
document += "PeterKluegl, MarshallSchor, JoernKottmann\n";
document += "Peter<x>Kluegl, Marshall<x>Schor, Joern<x>Kottmann\n";
String script = "WORDLIST list = 'org/apache/uima/ruta/WSDictionaryTestList.txt';";
script += "MARKFAST(T1, list, true, 0, false);";
CAS cas = null;
Map<String,Object> map = new HashMap<String, Object>();
map.put(RutaEngine.PARAM_DICT_REMOVE_WS, true);
try {
cas = RutaTestUtils.getCAS(document);
Ruta.apply(cas, script, map);
} catch (Exception e) {
e.printStackTrace();
}
RutaTestUtils.assertAnnotationsEquals(cas, 1, 9, "Peter Kluegl", "Marshall Schor", "Joern Kottmann", "PeterKluegl",
"MarshallSchor", "JoernKottmann", "Peter<x>Kluegl", "Marshall<x>Schor", "Joern<x>Kottmann");
cas.release();
}
@Test
public void testTableWithWS() {
String document = "Peter Kluegl, Marshall Schor, Joern Kottmann\n";
document += "PeterKluegl, MarshallSchor, JoernKottmann\n";
document += "Peter<x>Kluegl, Marshall<x>Schor, Joern<x>Kottmann\n";
String script = "WORDTABLE table = 'org/apache/uima/ruta/table2.csv';";
script += "MARKTABLE(Struct, 1, table, true, 0, \"-.,\", 10, \"name\" = 1, \"system\" = 2);";
Map<String, String> complexTypes = new TreeMap<String, String>();
String typeName = "org.apache.uima.Struct";
complexTypes.put(typeName, "uima.tcas.Annotation");
Map<String, List<TestFeature>> features = new TreeMap<String, List<TestFeature>>();
List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
features.put(typeName, list);
String fn1 = "name";
list.add(new TestFeature(fn1, "", "uima.cas.String"));
String fn2 = "system";
list.add(new TestFeature(fn2, "", "uima.cas.String"));
CAS cas = null;
try {
cas = RutaTestUtils.getCAS(document, complexTypes, features);
Ruta.apply(cas, script);
} catch (Exception e) {
e.printStackTrace();
}
Type t = null;
AnnotationIndex<AnnotationFS> ai = null;
FSIterator<AnnotationFS> iterator = null;
AnnotationFS next = null;
String v1 = null;
String v2 = null;
t = cas.getTypeSystem().getType(typeName);
Feature f1 = t.getFeatureByBaseName(fn1);
Feature f2 = t.getFeatureByBaseName(fn2);
ai = cas.getAnnotationIndex(t);
assertEquals(9, ai.size());
iterator = ai.iterator();
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Peter Kluegl", v1);
assertEquals("UIMA Ruta", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Marshall Schor", v1);
assertEquals("UIMA Core", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Joern Kottmann", v1);
assertEquals("CAS Editor", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Peter Kluegl", v1);
assertEquals("UIMA Ruta", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Marshall Schor", v1);
assertEquals("UIMA Core", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Joern Kottmann", v1);
assertEquals("CAS Editor", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Peter Kluegl", v1);
assertEquals("UIMA Ruta", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Marshall Schor", v1);
assertEquals("UIMA Core", v2);
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
assertEquals("Joern Kottmann", v1);
assertEquals("CAS Editor", v2);
cas.release();
}
}