blob: 2669e119708ff72fc139dd2fea1840ecfc8abdc1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.resource;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.ruta.FilterManager;
import org.apache.uima.ruta.RutaStream;
import org.apache.uima.ruta.engine.Ruta;
import org.apache.uima.ruta.engine.RutaTestUtils;
import org.apache.uima.ruta.seed.TextSeeder;
import org.apache.uima.ruta.type.RutaBasic;
import org.apache.uima.ruta.visitor.InferenceCrowd;
import org.junit.Assert;
import org.junit.Test;
public class TreeWordListTest {
@Test
public void testWithAction() throws Exception {
String text = "ab";
String script = "STRINGLIST list = {\"ab\", \"a c\", \"a d\"};";
script += "MARKFAST(T1, list);";
CAS cas = RutaTestUtils.getCAS(text);
Ruta.apply(cas, script);
RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, text);
}
@Test
public void testFind() throws Exception {
String text = "ab";
List<String> data = Arrays.asList(text, "a c", "a d");
TreeWordList twl = new TreeWordList(data, false);
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText(text);
CAS cas = jcas.getCas();
RutaStream stream = createStream(text, cas);
List<AnnotationFS> result1 = twl.find(stream, false, 0, null, 0, false);
Assert.assertEquals(1, result1.size());
Assert.assertEquals(text, result1.get(0).getCoveredText());
List<AnnotationFS> result2 = twl.find(stream, false, 0, null, 0, true);
Assert.assertEquals(1, result2.size());
Assert.assertEquals(text, result2.get(0).getCoveredText());
List<AnnotationFS> result3 = twl.find(stream, true, 0, null, 0, false);
Assert.assertEquals(1, result3.size());
Assert.assertEquals(text, result3.get(0).getCoveredText());
}
private RutaStream createStream(String text, CAS cas) {
Type basicType = cas.getTypeSystem().getType(RutaBasic.class.getName());
Collection<Type> filterTypes = getDefaultFilterTypes(cas);
FilterManager filter = new FilterManager(filterTypes, true, cas);
TextSeeder seeder = new TextSeeder();
seeder.seed(text, cas);
InferenceCrowd crowd = new InferenceCrowd(new ArrayList<>());
RutaStream stream = new RutaStream(cas, basicType, filter, false, false, true, null, crowd);
stream.initalizeBasics(new String[] { CAS.TYPE_NAME_ANNOTATION }, false);
return stream;
}
private Collection<Type> getDefaultFilterTypes(CAS cas) {
Collection<Type> filterTypes = new ArrayList<Type>();
TypeSystem typeSystem = cas.getTypeSystem();
String[] defaultFilteredTypes = new String[] { "org.apache.uima.ruta.type.SPACE",
"org.apache.uima.ruta.type.BREAK", "org.apache.uima.ruta.type.MARKUP" };
for (String each : defaultFilteredTypes) {
Type type = typeSystem.getType(each);
if (type != null) {
filterTypes.add(type);
}
}
return filterTypes;
}
}