Merge pull request #37 from apache/feature/UIMA-6343-Ruta-should-provide-config-parameters-to-ease-type-disambiguation
UIMA-6343: Ruta should provide config parameters to ease type disambiguation
diff --git a/ruta-core-ext/src/main/java/org/apache/uima/ruta/block/fst/Automaton.java b/ruta-core-ext/src/main/java/org/apache/uima/ruta/block/fst/Automaton.java
index 6d99ecb..6596cec 100644
--- a/ruta-core-ext/src/main/java/org/apache/uima/ruta/block/fst/Automaton.java
+++ b/ruta-core-ext/src/main/java/org/apache/uima/ruta/block/fst/Automaton.java
@@ -17,7 +17,6 @@
* under the License.
*/
-
package org.apache.uima.ruta.block.fst;
import java.util.ArrayList;
@@ -168,7 +167,8 @@
* - the RutaBlock (is needed in called functions)
*/
private void doTransition(TransitionState startState, AnnotationFS anno, RuleElement ruleElement,
- LinkedList<RuleMatch> matches, RutaStream stream, InferenceCrowd crowd, RutaBlock parent) {
+ LinkedList<RuleMatch> matches, RutaStream stream, InferenceCrowd crowd,
+ RutaBlock parent) {
for (RuleElement element : startState.getPossibleTransitions()) {
RutaMatcher matcher = ((RutaRuleElement) element).getMatcher();
AbstractState targetState = startState.getTransition(element);
@@ -228,12 +228,12 @@
private void doMatch(AnnotationFS annotation, RuleMatch ruleMatch, RuleElement element,
ComposedRuleElementMatch containerMatch, RutaStream stream, InferenceCrowd crowd) {
RuleElementMatch result = new RuleElementMatch(element, containerMatch);
- List<EvaluatedCondition> evaluatedConditions = new ArrayList<EvaluatedCondition>(element
- .getConditions().size());
+ List<EvaluatedCondition> evaluatedConditions = new ArrayList<EvaluatedCondition>(
+ element.getConditions().size());
// boolean base = matcher.match(annotation, stream, getParent());
boolean base = true;
MatchContext context = new MatchContext(annotation, element, ruleMatch, true);
-
+
List<AnnotationFS> textsMatched = new ArrayList<AnnotationFS>(1);
if (annotation != null) {
textsMatched.add(annotation);
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java b/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java
index 0aab776..acc279d 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java
@@ -33,6 +33,7 @@
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+import java.util.regex.Pattern;
import org.antlr.runtime.CommonToken;
import org.apache.commons.lang3.StringUtils;
@@ -184,6 +185,8 @@
private RutaVerbalizer verbalizer = new RutaVerbalizer();
+ private Pattern typeIgnorePattern;
+
public RutaEnvironment(RutaBlock owner) {
super();
this.owner = owner;
@@ -475,9 +478,22 @@
return type;
}
- public void addType(String string, Type type) {
- importType(type.getName(), string);
- types.put(type.getName(), type);
+ public void addType(String shortName, Type type) {
+ String name = type.getName();
+
+ if (ignoreType(name)) {
+ return;
+ }
+
+ importType(name, shortName);
+ types.put(name, type);
+ }
+
+ private boolean ignoreType(String name) {
+ if (typeIgnorePattern == null) {
+ return false;
+ }
+ return typeIgnorePattern.matcher(name).matches();
}
public void addType(Type type) {
@@ -1231,4 +1247,8 @@
}
}
}
+
+ public void setTypeIgnorePattern(Pattern pattern) {
+ typeIgnorePattern = pattern;
+ }
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
index d2a98fc..97cafd4 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
@@ -32,6 +32,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.regex.Pattern;
import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.ANTLRStringStream;
@@ -389,6 +390,16 @@
private Boolean strictImports = false;
/**
+ * An optional pattern (regular expression) which defined types that should be ignored. These
+ * types will not be resolved even if strictImports is set to false. This parameter can be used to
+ * ignore complete namespaces of type that could contain ambiguous short names.
+ */
+ public static final String PARAM_TYPE_IGNORE_PATTERN = "typeIgnorePattern";
+
+ @ConfigurationParameter(name = PARAM_TYPE_IGNORE_PATTERN, mandatory = false)
+ private Pattern typeIgnorePattern;
+
+ /**
* If this parameter is set to true, then whitespaces are removed when dictionaries are loaded.
*/
public static final String PARAM_DICT_REMOVE_WS = "dictRemoveWS";
@@ -792,6 +803,7 @@
initialized.add(name);
}
}
+ mainRootBlock.getEnvironment().setTypeIgnorePattern(typeIgnorePattern);
mainRootBlock.getEnvironment().initializeTypes(cas, strictImports);
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/TypeIgnorePatternTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/TypeIgnorePatternTest.java
new file mode 100644
index 0000000..fcc0e04
--- /dev/null
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/TypeIgnorePatternTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.engine.RutaEngine;
+import org.apache.uima.ruta.engine.RutaTestUtils;
+import org.junit.Test;
+
+public class TypeIgnorePatternTest {
+
+ @Test
+ public void test() throws Exception {
+ Map<String, String> complexTypes = new HashMap<>();
+ complexTypes.put("uima.ruta.Type1", CAS.TYPE_NAME_ANNOTATION);
+ complexTypes.put("bad.package.Type1", CAS.TYPE_NAME_ANNOTATION);
+
+ CAS cas = RutaTestUtils.getCAS("text", complexTypes, null);
+ Map<String, Object> params = new HashMap<String, Object>();
+ params.put(RutaEngine.PARAM_TYPE_IGNORE_PATTERN, ".*bad.*");
+ Ruta.apply(cas, "W{->Type1};Type1{->T1};", params);
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "text");
+ }
+
+}
\ No newline at end of file
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/engine/DummySeeder.java b/ruta-core/src/test/java/org/apache/uima/ruta/engine/DummySeeder.java
index 057a175..8712425 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/engine/DummySeeder.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/engine/DummySeeder.java
@@ -24,7 +24,7 @@
import org.apache.uima.ruta.seed.RutaAnnotationSeeder;
import org.apache.uima.ruta.seed.TextSeeder;
-public class DummySeeder implements RutaAnnotationSeeder{
+public class DummySeeder implements RutaAnnotationSeeder {
@Override
public Type seed(String text, CAS cas) {
@@ -33,5 +33,4 @@
cas.addFsToIndexes(annotation);
return type;
}
-
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/engine/TestRutaInferenceVisitor.java b/ruta-core/src/test/java/org/apache/uima/ruta/engine/TestRutaInferenceVisitor.java
index 5cf4ca5..4988241 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/engine/TestRutaInferenceVisitor.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/engine/TestRutaInferenceVisitor.java
@@ -40,6 +40,7 @@
public void endVisit(RutaElement element, ScriptApply result) {
throw new NotImplementedException("Not supported.");
}
+
@Override
public void finished(RutaStream stream, List<RutaInferenceVisitor> visitors) {
throw new NotImplementedException("Not supported.");
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/RuleInference1Test.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/RuleInference1Test.java
index 02b9835..4c8d2bf 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/RuleInference1Test.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/RuleInference1Test.java
@@ -24,7 +24,6 @@
import org.junit.Test;
public class RuleInference1Test {
-
@Test
public void test() {
@@ -33,18 +32,23 @@
RutaTestUtils.assertAnnotationsEquals(cas, 1, 4, "Peter", "Jochen", "Flo", "Georg");
RutaTestUtils.assertAnnotationsEquals(cas, 2, 1, "und");
RutaTestUtils.assertAnnotationsEquals(cas, 3, 0);
- RutaTestUtils.assertAnnotationsEquals(cas, 4, 5, "Peter", "Jochen", "Flo und Georg", "und Georg", "Georg");
+ RutaTestUtils.assertAnnotationsEquals(cas, 4, 5, "Peter", "Jochen", "Flo und Georg",
+ "und Georg", "Georg");
RutaTestUtils.assertAnnotationsEquals(cas, 5, 2, ",", ",");
RutaTestUtils.assertAnnotationsEquals(cas, 6, 2, "Peter, Jochen,", "Jochen,");
RutaTestUtils.assertAnnotationsEquals(cas, 7, 5, "Peter", "Jochen", "Flo", "und", "Georg");
- RutaTestUtils.assertAnnotationsEquals(cas, 8, 5, "Peter", "Jochen", "Flo und Georg", "und Georg", "Georg");
+ RutaTestUtils.assertAnnotationsEquals(cas, 8, 5, "Peter", "Jochen", "Flo und Georg",
+ "und Georg", "Georg");
RutaTestUtils.assertAnnotationsEquals(cas, 9, 2, "Peter, Jochen, Flo", "Jochen, Flo");
- RutaTestUtils.assertAnnotationsEquals(cas, 10, 3, "Peter, Jochen, Flo und", "Jochen, Flo und", "Flo und");
- RutaTestUtils.assertAnnotationsEquals(cas, 11, 3, "Peter, Jochen, Flo und Georg.", "Jochen, Flo und Georg.",
- "Flo und Georg.");
+ RutaTestUtils.assertAnnotationsEquals(cas, 10, 3, "Peter, Jochen, Flo und", "Jochen, Flo und",
+ "Flo und");
+ RutaTestUtils.assertAnnotationsEquals(cas, 11, 3, "Peter, Jochen, Flo und Georg.",
+ "Jochen, Flo und Georg.", "Flo und Georg.");
RutaTestUtils.assertAnnotationsEquals(cas, 12, 6, ",", ",", ",", "und", "und", "und");
- RutaTestUtils.assertAnnotationsEquals(cas, 13, 6, "Peter,", "Jochen,", "Jochen,", "Flo und", "Flo und", "Flo und");
- RutaTestUtils.assertAnnotationsEquals(cas, 14, 3, "Peter, Jochen, Flo und", "Jochen, Flo und", "Flo und");
+ RutaTestUtils.assertAnnotationsEquals(cas, 13, 6, "Peter,", "Jochen,", "Jochen,", "Flo und",
+ "Flo und", "Flo und");
+ RutaTestUtils.assertAnnotationsEquals(cas, 14, 3, "Peter, Jochen, Flo und", "Jochen, Flo und",
+ "Flo und");
RutaTestUtils.assertAnnotationsEquals(cas, 15, 1, "Peter, Jochen");
RutaTestUtils.assertAnnotationsEquals(cas, 16, 1, "Georg.");
RutaTestUtils.assertAnnotationsEquals(cas, 17, 1, "Flo und Georg.");
diff --git a/ruta-docbook/src/docbook/tools.ruta.overview.xml b/ruta-docbook/src/docbook/tools.ruta.overview.xml
index ee2a5a0..ce38508 100644
--- a/ruta-docbook/src/docbook/tools.ruta.overview.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.overview.xml
@@ -959,6 +959,14 @@
</row>
<row>
<entry>
+ <link linkend='ugr.tools.ruta.ae.basic.parameter.typeIgnorePattern'>typeIgnorePattern</link>
+ </entry>
+ <entry>Option to ignore types even if they are available in the typesystem/CAS.
+ </entry>
+ <entry>Single String</entry>
+ </row>
+ <row>
+ <entry>
<link linkend='ugr.tools.ruta.ae.basic.parameter.dynamicAnchoring'>dynamicAnchoring</link>
</entry>
<entry>Option to allow rule matches to start at any rule element.</entry>
@@ -1324,6 +1332,14 @@
The default value is set to false.
</para>
</section>
+ <section id="ugr.tools.ruta.ae.basic.parameter.typeIgnorePattern">
+ <title>typeIgnorePattern</title>
+ <para>
+ An optional pattern (regular expression) which defined types that should be ignored. These
+ types will not be resolved even if strictImports is set to false. This parameter can be used to
+ ignore complete namespaces of type that could contain ambiguous short names.
+ </para>
+ </section>
<section id="ugr.tools.ruta.ae.basic.parameter.dynamicAnchoring">
<title>dynamicAnchoring</title>
<para>