Merge branch 'main' into release/UIMA-6449-Ruta-3.2.0-release
* main: (21 commits)
UIMA-6414: Ruta: missing match for optional after sidestep out of composed
UIMA-6414: Ruta: missing match for optional after sidestep out of composed
UIMA-6414: Ruta: missing match for optional after sidestep out of composed
UIMA-6414: Ruta: missing match for optional after sidestep out of composed
UIMA-6414: Ruta: missing match for optional after sidestep out of composed
UIMA-6411: Ruta: avoid creation of RutaBasics for bad annotations
UIMA-6411: Ruta: avoid creation of RutaBasics for bad annotations
UIMA-6409-Ruta-possible-endless-wildcard-lookahead-in-combination-with-subtokens
UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
UIMA-6408: Ruta: No type check of features in TRANSFER
no issue: fix method sig
UIMA-6394: Ruta: label assignment in alternative match causes problems
UIMA-6394: Ruta: label assignment in alternative match causes problems
UIMA-6404: Ruta: @ with quantifier ignores matches
UIMA-6405: Local variable not captured properly in a wildcard matching condition
UIMA-6406: Removing an annotation inside a BLOCK only takes effect outside the block
UIMA-6406: Removing an annotation inside a BLOCK only takes effect outside the block
UIMA-6405: Local variable not captured properly in a wildcard matching condition
...
diff --git a/ruta-core/.gitignore b/ruta-core/.gitignore
new file mode 100644
index 0000000..862d276
--- /dev/null
+++ b/ruta-core/.gitignore
@@ -0,0 +1,2 @@
+input/
+TypeSystem.xml
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java b/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
index f653652..7a5c612 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
@@ -346,12 +346,13 @@
createRutaBasic(0, 0);
} else if (anchors.size() == 1) {
Integer first = anchors.get(0);
- createRutaBasic(first, first);
+ if (first >= 0 && first <= cas.getDocumentText().length())
+ createRutaBasic(first, first);
} else {
for (int i = 0; i < anchors.size() - 1; i++) {
Integer first = anchors.get(i);
Integer second = anchors.get(i + 1);
- if (first < second) { // not really needed
+ if (first < second && first >= 0 && second <= cas.getDocumentText().length()) {
createRutaBasic(first, second);
}
}
@@ -1151,7 +1152,11 @@
if (cas.getTypeSystem().subsumes(type, windowAnnotation.getType())) {
if (!sensitiveToVisibility || isVisible(windowAnnotation)) {
- result.add(windowAnnotation);
+ // the window defined by a BLOCK could actually have already been removed, thus we do not
+ // want to return it
+ if (cas.getAnnotationIndex(windowAnnotation.getType()).contains(windowAnnotation)) {
+ result.add(windowAnnotation);
+ }
}
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java b/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java
index bf38e7b..cb4694b 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java
@@ -21,6 +21,7 @@
import java.util.List;
+import org.apache.uima.UimaContextHolder;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
@@ -79,10 +80,20 @@
if (newFeature != null) {
if (feature.getRange().isPrimitive()) {
String value = oldFS.getFeatureValueAsString(feature);
- newFS.setFeatureValueFromString(newFeature, value);
+ try {
+ newFS.setFeatureValueFromString(newFeature, value);
+ } catch (Exception e) {
+ UimaContextHolder.getContext().getLogger().debug("Unable to transfer feature {}: {}",
+ shortName, e.getMessage());
+ }
} else {
FeatureStructure value = oldFS.getFeatureValue(feature);
- newFS.setFeatureValue(newFeature, value);
+ try {
+ newFS.setFeatureValue(newFeature, value);
+ } catch (Exception e) {
+ UimaContextHolder.getContext().getLogger().debug("Unable to transfer feature {}: {}",
+ shortName, e.getMessage());
+ }
}
}
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java
index 5769578..d5230a4 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java
@@ -19,14 +19,18 @@
package org.apache.uima.ruta.engine;
+import static org.apache.uima.fit.factory.TypeSystemDescriptionFactory.createTypeSystemDescription;
+
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.lang.management.ManagementFactory;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
@@ -34,6 +38,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
+import java.util.regex.Pattern;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -60,6 +65,19 @@
public class RutaTestUtils {
+ public static final boolean DEBUG_MODE = isDebugging();
+
+ private static boolean isDebugging() {
+
+ Pattern debugPattern = Pattern.compile("-Xdebug|jdwp");
+ for (String arg : ManagementFactory.getRuntimeMXBean().getInputArguments()) {
+ if (debugPattern.matcher(arg).find()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
public static class TestFeature {
public String name;
@@ -151,24 +169,8 @@
AnalysisEngineDescription aed = (AnalysisEngineDescription) specifier;
TypeSystemDescription basicTypeSystem = aed.getAnalysisEngineMetaData().getTypeSystem();
- for (int i = 1; i <= amount; i++) {
- basicTypeSystem.addType(TYPE + i, "Type for Testing", "uima.tcas.Annotation");
- }
-
- if (complexTypes != null) {
- Set<Entry<String, String>> entrySet = complexTypes.entrySet();
- for (Entry<String, String> entry : entrySet) {
- String name = entry.getKey();
- TypeDescription addType = basicTypeSystem.addType(name, "Type for Testing",
- entry.getValue());
- if (features != null) {
- List<TestFeature> list = features.get(name);
- for (TestFeature f : list) {
- addType.addFeature(f.name, f.description, f.range);
- }
- }
- }
- }
+ addTestTypes(basicTypeSystem);
+ addAdditionalTypes(complexTypes, features, basicTypeSystem);
Collection<TypeSystemDescription> tsds = new ArrayList<TypeSystemDescription>();
tsds.add(basicTypeSystem);
@@ -248,23 +250,8 @@
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
AnalysisEngineDescription aed = (AnalysisEngineDescription) specifier;
TypeSystemDescription basicTypeSystem = aed.getAnalysisEngineMetaData().getTypeSystem();
- for (int i = 1; i <= 50; i++) {
- basicTypeSystem.addType("org.apache.uima.T" + i, "Type for Testing", "uima.tcas.Annotation");
- }
- if (complexTypes != null) {
- Set<Entry<String, String>> entrySet = complexTypes.entrySet();
- for (Entry<String, String> entry : entrySet) {
- String name = entry.getKey();
- TypeDescription addType = basicTypeSystem.addType(name, "Type for Testing",
- entry.getValue());
- if (features != null) {
- List<TestFeature> list = features.get(name);
- for (TestFeature f : list) {
- addType.addFeature(f.name, f.description, f.range);
- }
- }
- }
- }
+ addTestTypes(basicTypeSystem);
+ addAdditionalTypes(complexTypes, features, basicTypeSystem);
Collection<TypeSystemDescription> tsds = new ArrayList<TypeSystemDescription>();
tsds.add(basicTypeSystem);
TypeSystemDescription mergeTypeSystems = CasCreationUtils.mergeTypeSystems(tsds);
@@ -282,6 +269,31 @@
return cas;
}
+ public static void addTestTypes(TypeSystemDescription typeSystemDescription) {
+ for (int i = 1; i <= 50; i++) {
+ typeSystemDescription.addType("org.apache.uima.T" + i, "Type for Testing",
+ "uima.tcas.Annotation");
+ }
+ }
+
+ private static void addAdditionalTypes(Map<String, String> complexTypes,
+ Map<String, List<TestFeature>> features, TypeSystemDescription typeSystemDescription) {
+ if (complexTypes != null) {
+ Set<Entry<String, String>> entrySet = complexTypes.entrySet();
+ for (Entry<String, String> entry : entrySet) {
+ String name = entry.getKey();
+ TypeDescription addType = typeSystemDescription.addType(name, "Type for Testing",
+ entry.getValue());
+ if (features != null) {
+ List<TestFeature> list = features.get(name);
+ for (TestFeature f : list) {
+ addType.addFeature(f.name, f.description, f.range);
+ }
+ }
+ }
+ }
+ }
+
public static void printAnnotations(CAS cas, int typeId) {
Type t = getTestType(cas, typeId);
AnnotationIndex<AnnotationFS> ai = cas.getAnnotationIndex(t);
@@ -366,4 +378,34 @@
}
}
+ public static void storeTypeSystem() {
+ storeTypeSystem(Collections.emptyMap(), Collections.emptyMap());
+ }
+
+ public static void storeTypeSystem(Map<String, String> complexTypes,
+ Map<String, List<TestFeature>> features) {
+
+ File tsFile = new File("TypeSystem.xml");
+
+ try {
+
+ TypeSystemDescription typeSystemDescription = createTypeSystemDescription();
+ addTestTypes(typeSystemDescription);
+ addAdditionalTypes(complexTypes, features, typeSystemDescription);
+ try (OutputStream os = new FileOutputStream(tsFile)) {
+ typeSystemDescription.toXML(os);
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ public static Map<String, Object> getDebugParams() {
+ Map<String, Object> params = new LinkedHashMap<>();
+ params.put(RutaEngine.PARAM_DEBUG, true);
+ params.put(RutaEngine.PARAM_DEBUG_WITH_MATCHES, true);
+ params.put(RutaEngine.PARAM_CREATED_BY, true);
+ return params;
+ }
+
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
index 4b6d9ff..fd25685 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
@@ -63,6 +63,8 @@
/** The cost model we are using. */
private EditDistanceCostMap costMap;
+ private boolean dictRemoveWS = false;
+
/**
* Default constructor.
*
@@ -155,9 +157,25 @@
* When there is a problem reading a path.
*/
public MultiTreeWordList(String[] pathnames, File base) throws IOException {
+ this(pathnames, base, false);
+ }
+
+ /**
+ * Constructs a TreeWordList from a file with path = filename
+ *
+ * @param pathnames
+ * path of the file to create a TextWordList from
+ * @param base
+ * - the relative base
+ * @param dictRemoveWS
+ * remove white spaces
+ * @throws IOException
+ * When there is a problem reading a path.
+ */
+ public MultiTreeWordList(String[] pathnames, File base, boolean dictRemoveWS) throws IOException {
this.root = new MultiTextNode();
this.costMap = new EditDistanceCostMap();
-
+ this.dictRemoveWS = dictRemoveWS;
if (pathnames == null) {
return;
}
@@ -177,8 +195,23 @@
* - When there is a problem reading the files.
*/
public MultiTreeWordList(List<File> files, File base) throws IOException {
+ this(files, base, false);
+ }
+
+ /**
+ * @param files
+ * - the input files
+ * @param base
+ * - the relative base
+ * @param dictRemoveWS
+ * - remove white spaces
+ * @throws IOException
+ * - When there is a problem reading the files.
+ */
+ public MultiTreeWordList(List<File> files, File base, boolean dictRemoveWS) throws IOException {
this.root = new MultiTextNode();
this.costMap = new EditDistanceCostMap();
+ this.dictRemoveWS = dictRemoveWS;
if (files == null) {
return;
@@ -275,6 +308,10 @@
for (Character each : s.toCharArray()) {
+ if (dictRemoveWS && Character.isWhitespace(each)) {
+ continue;
+ }
+
MultiTextNode childNode = pointer.getChildNode(each);
if (childNode == null) {
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java
index f85c809..f367d61 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java
@@ -22,7 +22,6 @@
import static java.util.Arrays.asList;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -114,12 +113,16 @@
} else {
RuleElement nextRuleElement = getContainer().getNextElement(newDirection, this);
if (nextRuleElement != null) {
+ RuleElement sideStepOrigin = null;
+ if (getContainer() instanceof RuleElement) {
+ sideStepOrigin = (RuleElement) getContainer();
+ }
result = nextRuleElement.continueMatch(newDirection, annotation, ruleMatch, ruleApply,
- sideStepContainerMatch, null, null, stream, crowd);
+ sideStepContainerMatch, sideStepOrigin, entryPoint, stream, crowd);
} else if (getContainer() instanceof ComposedRuleElement) {
ComposedRuleElement composed = (ComposedRuleElement) getContainer();
result = composed.fallbackContinue(newDirection, false, annotation, ruleMatch, ruleApply,
- sideStepContainerMatch, null, entryPoint, stream, crowd);
+ sideStepContainerMatch, composed, entryPoint, stream, crowd);
}
}
}
@@ -128,15 +131,14 @@
protected void doneMatching(RuleMatch ruleMatch, RuleApply ruleApply, RutaStream stream,
InferenceCrowd crowd) {
- if (!ruleMatch.isApplied()) {
+ // do not execute actions if they already have been or if this is just a lookahead
+ // (ruleApply==null)
+ if (!ruleMatch.isApplied() && ruleApply != null) {
ruleApply.add(ruleMatch, stream);
- RutaRule rule = ruleMatch.getRule();
- Collection<String> localVariables = rule.getLabels();
if (ruleMatch.matchedCompletely()) {
- rule.getEnvironment().acceptTempVariableValues(localVariables);
+ RutaRule rule = ruleMatch.getRule();
+ rule.getEnvironment().acceptTempVariableValues(rule.getOwnLabels());
rule.getRoot().applyRuleElements(ruleMatch, stream, crowd);
- } else {
- rule.getEnvironment().clearTempVariables(localVariables);
}
ruleMatch.setApplied(true);
}
@@ -188,10 +190,15 @@
for (RutaStatement each : inlinedRules) {
ScriptApply apply = each.apply(windowStream, crowd);
blockResult.add(apply);
+ if (each instanceof RutaRule) {
+ // clean up temp variables produced by failed rules
+ ((RutaRule) each).clearOwnLabels();
+ }
}
result.add(blockResult);
}
}
+
return result;
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java
index e25038b..ade9153 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java
@@ -527,7 +527,8 @@
containerMatch, sideStepOrigin, entryPoint, stream, crowd);
}
- if (sideStepOrigin != null && !failed) {
+ if (sideStepOrigin != null && !failed && sideStepOrigin.getContainer() != null) {
+ // only continue sidestep if we did not yet reach the root
return sideStepOrigin.continueSideStep(after, ruleMatch, ruleApply, containerMatch,
entryPoint, stream, crowd);
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java
index d6638d2..7c6f653 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java
@@ -19,8 +19,9 @@
package org.apache.uima.ruta.rule;
+import java.util.ArrayList;
import java.util.Collection;
-import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -45,10 +46,13 @@
*/
private Map<String, Object> labels;
+ private Collection<String> ownLabels;
+
public RutaRule(List<RuleElement> elements, RutaBlock parent, int id) {
super(parent, id);
this.root = new ComposedRuleElement(elements, null, null, null, null, parent);
- this.labels = new HashMap<>();
+ this.labels = new LinkedHashMap<>();
+ this.ownLabels = new ArrayList<>();
}
@Override
@@ -92,33 +96,39 @@
if (elements != null) {
// update label map
for (RuleElement ruleElement : elements) {
- fillLabelMap(ruleElement);
+ fillLabelMap(ruleElement, true);
}
}
}
- private void fillLabelMap(RuleElement ruleElement) {
+ private void fillLabelMap(RuleElement ruleElement, boolean own) {
if (!StringUtils.isBlank(ruleElement.getLabel())) {
labels.put(ruleElement.getLabel(), null);
+ if (own) {
+ ownLabels.add(ruleElement.getLabel());
+ }
}
- fillLabelMapWithActions(ruleElement.getActions());
+ fillLabelMapWithActions(ruleElement.getActions(), own);
if (ruleElement instanceof ComposedRuleElement) {
ComposedRuleElement cre = (ComposedRuleElement) ruleElement;
List<RuleElement> ruleElements = cre.getRuleElements();
for (RuleElement each : ruleElements) {
- fillLabelMap(each);
+ fillLabelMap(each, own);
}
}
fillLabelMapWithInlinedRules(ruleElement.getInlinedConditionRuleBlocks());
fillLabelMapWithInlinedRules(ruleElement.getInlinedActionRuleBlocks());
}
- private void fillLabelMapWithActions(List<AbstractRutaAction> actions) {
+ private void fillLabelMapWithActions(List<AbstractRutaAction> actions, boolean own) {
if (actions != null) {
for (AbstractRutaAction action : actions) {
if (action != null && !StringUtils.isBlank(action.getLabel())) {
labels.put(action.getLabel(), null);
+ if (own) {
+ ownLabels.add(action.getLabel());
+ }
}
}
}
@@ -131,7 +141,7 @@
if (eachInlined instanceof RutaRule) {
RutaRule inlinedRule = (RutaRule) eachInlined;
inlinedRule.setInlined(true);
- fillLabelMap(inlinedRule.getRoot());
+ fillLabelMap(inlinedRule.getRoot(), false);
}
}
}
@@ -193,4 +203,13 @@
return labels.keySet();
}
+ public Collection<String> getOwnLabels() {
+ return ownLabels;
+ }
+
+ public void clearOwnLabels() {
+ RutaEnvironment environment = getParent().getEnvironment();
+ environment.clearTempVariables(ownLabels);
+ }
+
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java
index f616ab7..d90a999 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java
@@ -63,6 +63,10 @@
Collection<? extends AnnotationFS> anchors = getAnchors(stream);
boolean useAlternatives = anchors.size() != 1;
for (AnnotationFS eachAnchor : anchors) {
+
+ // clean up temp variables since we start a new matching iteration
+ ruleMatch.getRule().clearOwnLabels();
+
if (earlyExit(eachAnchor, ruleApply, stream)) {
// ... for different matching paradigms that avoid some matches
continue;
@@ -197,6 +201,11 @@
if (nextRuleElement != null) {
result = nextRuleElement.continueMatch(after, eachAnchor, extendedMatch, ruleApply,
extendedContainerMatch, sideStepOrigin, entryPoint, stream, crowd);
+ } else if (sideStepOrigin != null && !failed && containedIn(sideStepOrigin, getContainer())) {
+ // continue directly with the sidestep if it is contained in this container
+ // if not, we might miss matches in the same direction
+ result = sideStepOrigin.continueSideStep(after, extendedMatch, ruleApply,
+ extendedContainerMatch, entryPoint, stream, crowd);
} else if (getContainer() instanceof ComposedRuleElement) {
ComposedRuleElement composed = (ComposedRuleElement) getContainer();
result = composed.fallbackContinue(after, failed, eachAnchor, extendedMatch, ruleApply,
@@ -205,6 +214,26 @@
return result;
}
+ private boolean containedIn(RuleElement sideStepOrigin, RuleElementContainer container) {
+ // TODO: should we support this in interface?
+ if (container == null || sideStepOrigin == null) {
+ return false;
+ }
+ List<RuleElement> ruleElements = container.getRuleElements();
+ if (ruleElements.contains(sideStepOrigin)) {
+ return true;
+ } else {
+ for (RuleElement ruleElement : ruleElements) {
+ if (ruleElement instanceof RuleElementContainer) {
+ if (containedIn(sideStepOrigin, (RuleElementContainer) ruleElement)) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
@Override
public List<RuleMatch> continueMatch(boolean after, AnnotationFS annotation, RuleMatch ruleMatch,
RuleApply ruleApply, ComposedRuleElementMatch containerMatch, RuleElement sideStepOrigin,
@@ -335,8 +364,7 @@
InferenceCrowd crowd) {
RuleElementMatch result = new RuleElementMatch(this, containerMatch);
result.setRuleAnchor(ruleAnchor);
- List<EvaluatedCondition> evaluatedConditions = new ArrayList<>(
- conditions.size());
+ List<EvaluatedCondition> evaluatedConditions = new ArrayList<>(conditions.size());
// boolean base = matcher.match(annotation, stream, getParent());
boolean base = true;
MatchContext context = new MatchContext(annotation, this, ruleMatch, after);
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
index 94d07d5..1b688a3 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
@@ -507,8 +507,8 @@
}
if (annotation != null && iterator != null && iterator.isValid()) {
AnnotationFS pointer = iterator.get();
- if ((after && pointer.getEnd() == annotation.getEnd())
- || (!after && pointer.getBegin() == annotation.getBegin())) {
+ if ((after && pointer.getBegin() < annotation.getEnd())
+ || (!after && pointer.getEnd() > annotation.getBegin())) {
moveOn(after, iterator, stream);
}
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java
index 02c7bff..0dd61f2 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java
@@ -97,7 +97,7 @@
}
@Test
- public void testWithStartAnchor() {
+ public void testWithStartAnchor() throws Exception {
String document = "Peter did something.";
String script = "";
@@ -120,18 +120,12 @@
String fn3 = "tense";
list.add(new TestFeature(fn3, "", "uima.cas.String"));
- CAS cas = null;
- try {
- cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
- Ruta.apply(cas, script);
- } catch (Exception e) {
- e.printStackTrace();
- }
+ CAS cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
+ Ruta.apply(cas, script);
RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Peter did");
RutaTestUtils.assertAnnotationsEquals(cas, 2, 1, "Peter did");
RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "did something");
- cas.release();
}
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java
index 4476a55..af6b368 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java
@@ -23,12 +23,15 @@
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.SelectFSs;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.ruta.engine.Ruta;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.ruta.engine.RutaTestUtils;
import org.apache.uima.ruta.rule.RuleInference1Test;
+import org.apache.uima.ruta.type.RutaBasic;
+import org.junit.Assert;
import org.junit.Test;
public class EmptyDocumentTest {
@@ -62,4 +65,18 @@
RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "");
}
+ @Test
+ public void testSpaceWithInvalidAnnotation() throws Exception {
+ CAS cas = RutaTestUtils.getCAS(" ");
+ AnnotationFS annotation = cas.createAnnotation(cas.getAnnotationType(), -1, 2);
+ cas.addFsToIndexes(annotation);
+ Ruta.apply(cas, "Document;");
+
+ SelectFSs<RutaBasic> select = cas.select(RutaBasic.class);
+ Assert.assertEquals(1, select.count());
+ RutaBasic rutaBasic = select.findAny().get();
+ Assert.assertEquals(0, rutaBasic.getBegin());
+ Assert.assertEquals(1, rutaBasic.getEnd());
+ }
+
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java
index 31f4a89..bac9613 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java
@@ -21,6 +21,9 @@
import static org.junit.Assert.assertEquals;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -30,8 +33,10 @@
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.ruta.engine.Ruta;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.ruta.engine.RutaTestUtils;
+import org.apache.uima.ruta.engine.RutaTestUtils.TestFeature;
import org.junit.Test;
public class TransferTest {
@@ -45,8 +50,8 @@
complexTypes.put(type, CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
CAS cas = null;
try {
- cas = RutaTestUtils.process(namespace + "/" + name + RutaEngine.SCRIPT_FILE_EXTENSION, namespace + "/" + name
- + ".txt", 50, false, false, complexTypes, null);
+ cas = RutaTestUtils.process(namespace + "/" + name + RutaEngine.SCRIPT_FILE_EXTENSION,
+ namespace + "/" + name + ".txt", 50, false, false, complexTypes, null);
} catch (Exception e) {
e.printStackTrace();
assert (false);
@@ -63,7 +68,40 @@
Feature featureByBaseName = t.getFeatureByBaseName("language");
String stringValue = afs.getStringValue(featureByBaseName);
assertEquals("x-unspecified", stringValue);
-
+
cas.release();
}
+
+ @Test
+ public void testIncompatibleFeatureRanges() throws Exception {
+
+ Map<String, String> typeMap = new LinkedHashMap<String, String>();
+ typeMap.put("Struct11", "uima.tcas.Annotation");
+ typeMap.put("Struct12", "uima.tcas.Annotation");
+ typeMap.put("Struct21", "uima.tcas.Annotation");
+ typeMap.put("Struct22", "uima.tcas.Annotation");
+ Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
+ featureMap.put("Struct11", Arrays.asList(new TestFeature("f", "", CAS.TYPE_NAME_ANNOTATION)));
+ featureMap.put("Struct12", Arrays.asList(new TestFeature("f", "", CAS.TYPE_NAME_STRING)));
+ featureMap.put("Struct21",
+ Arrays.asList(new TestFeature("array", "", CAS.TYPE_NAME_STRING_ARRAY)));
+ featureMap.put("Struct22",
+ Arrays.asList(new TestFeature("array", "", CAS.TYPE_NAME_BOOLEAN_ARRAY)));
+
+ CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);
+ String script = "CW{->s:Struct11,s.f=CW};";
+ script += "CW{->s:Struct21,s.array={true, false, true}};";
+ script += "Struct11{-> TRANSFER(Struct12)};";
+ script += "Struct21{-> TRANSFER(Struct22)};";
+
+ Ruta.apply(cas, script, RutaTestUtils.getDebugParams());
+
+ if (RutaTestUtils.DEBUG_MODE) {
+ RutaTestUtils.storeTypeSystem(typeMap, featureMap);
+ RutaTestUtils.storeCas(cas, "testIncompatibleFeatureRanges");
+ }
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);
+
+ }
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java
index 64b7152..3eb4289 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java
@@ -20,6 +20,7 @@
package org.apache.uima.ruta.action;
import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -43,45 +44,70 @@
cas.release();
}
-
-
+
@Test
public void testAnnotationExpression() throws Exception {
- Map<String, String> typeMap = new TreeMap<String, String>();
+ Map<String, String> typeMap = new LinkedHashMap<String, String>();
typeMap.put("Complex", "uima.tcas.Annotation");
Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
- List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
+ List<TestFeature> list = new ArrayList<>();
featureMap.put("Complex", list);
list.add(new TestFeature("inner", "", "uima.tcas.Annotation"));
-
+
CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);
String script = "";
script += "CW{->T1};t:T1 SW SW{-> UNMARK(t)};";
script += "CW{->T2};\n t:T2 # PERIOD{-> Complex, Complex.inner=t};\n Complex{-> UNMARK(Complex.inner)};\n";
Ruta.apply(cas, script);
-
+
RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);
RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);
-
+
}
-
+
@Test
- public void testAnnotationListExpression() throws Exception {
- Map<String, String> typeMap = new TreeMap<String, String>();
+ public void testAnnotationListExpression() throws Exception {
+ Map<String, String> typeMap = new LinkedHashMap<String, String>();
typeMap.put("Complex", "uima.tcas.Annotation");
Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
- List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
+ List<TestFeature> list = new ArrayList<>();
featureMap.put("Complex", list);
list.add(new TestFeature("inner", "", "uima.cas.FSArray"));
-
+
CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);
String script = "";
script += "W{->T1}; Document{-> Complex, Complex.inner = T1};";
script += "Complex{-> UNMARK(Complex.inner)};\n";
Ruta.apply(cas, script);
-
+
RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);
RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);
}
-
+
+ @Test
+ public void testUnmarkWithFeatureMatchInBlock() throws Exception {
+
+ Map<String, String> typeMap = new LinkedHashMap<String, String>();
+ typeMap.put("Struct", "uima.tcas.Annotation");
+ Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
+ List<TestFeature> list = new ArrayList<>();
+ featureMap.put("Struct", list);
+ list.add(new TestFeature("s", "", CAS.TYPE_NAME_STRING));
+
+ CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);
+ String script = "\"a\"{->s:Struct,Struct.s=\"foo\"};";
+ script += "BLOCK(SoftRemove) Struct.s==\"foo\"{} {\r\n"
+ + " t:Struct.s==\"foo\"{-> UNMARK(t)};\r\n" //
+ + " t:Struct.s==\"foo\"{-> T1}; \r\n" + "}";
+
+ Ruta.apply(cas, script, RutaTestUtils.getDebugParams());
+
+ if (RutaTestUtils.DEBUG_MODE) {
+ RutaTestUtils.storeTypeSystem(typeMap, featureMap);
+ RutaTestUtils.storeCas(cas, "testUnmarkWithFeatureMatchInBlock");
+ }
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);
+ }
+
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java
index c4738aa..30c9255 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java
@@ -845,4 +845,36 @@
RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Some text");
}
+ @Test
+ public void testCompareStringFeatures() throws Exception {
+
+ String document = "A b.";
+ Map<String, String> typeMap = new TreeMap<String, String>();
+ typeMap.put("Struct1", "uima.tcas.Annotation");
+ typeMap.put("Struct2", "uima.tcas.Annotation");
+
+ Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();
+ List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();
+ featureMap.put("Struct1", list);
+ featureMap.put("Struct2", list);
+ list.add(new TestFeature("s", "", "uima.cas.String"));
+
+ String script = "";
+ script += "SW{->CREATE(Struct2, \"s\" = \"b\")};\n";
+ script += "SW{->CREATE(Struct2, \"s\" = \"a\")};\n";
+ script += "CW{->CREATE(Struct1, \"s\" = \"a\")};\n";
+ script += "(s1:Struct1 s2:Struct2){s1.s==s2.s-> T1};\n";
+
+ CAS cas = RutaTestUtils.getCAS(document, typeMap, featureMap);
+ Ruta.apply(cas, script, RutaTestUtils.getDebugParams());
+
+ if (RutaTestUtils.DEBUG_MODE) {
+ RutaTestUtils.storeTypeSystem(typeMap, featureMap);
+ RutaTestUtils.storeCas(cas, "testCompareStringFeatures");
+ }
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "A b");
+
+ }
+
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java
index 64b7561..e7d4a6c 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java
@@ -38,6 +38,31 @@
RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "A, B and C");
- cas.release();
}
+
+ @Test
+ public void testAcrossComposedInSequence() throws Exception {
+ String text = "bla CAP 1-2 bla";
+
+ String script = "FOREACH(cap) CAP{}{";
+ script += "ANY{-PARTOF(SPECIAL)} @cap (NUM SPECIAL NUM){-> T1} ANY{-PARTOF(SPECIAL)};";
+ script += "}";
+
+ CAS cas = RutaTestUtils.getCAS(text);
+ Ruta.apply(cas, script);
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "1-2");
+ }
+
+ @Test
+ public void testLeaveComposedInSequence() throws Exception {
+ String text = "bla w CAP w bla";
+ String script = "(W @CAP W) {->T1} ANY{-PARTOF(NUM)};";
+
+ CAS cas = RutaTestUtils.getCAS(text);
+ Ruta.apply(cas, script);
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "w CAP w");
+ }
+
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java
index c990a44..c97b34c 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java
@@ -66,4 +66,18 @@
}
+ @Test
+ public void testOptionalBeforeComposed() throws Exception {
+ String document = "test 05/05 test\n";
+ document += "test 06/06 . test\n";
+ document += "test . 07/07 test\n";
+ String script = "_{-PARTOF(PM)} (NUM SPECIAL @NUM){-> T1} _{-PARTOF({PM})};\n";
+
+ CAS cas = RutaTestUtils.getCAS(document);
+ Ruta.apply(cas, script);
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "05/05");
+
+ }
+
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java
index 38dabdf..03bd641 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java
@@ -275,13 +275,12 @@
public void testLabelForFailedLookahead() throws Exception {
String document = "A x B x C x D";
String script = "(w1:CW{REGEXP(\"A\")} # w2:CW{REGEXP(\"C\")})->{w1{->T1};};";
-
- CAS cas = RutaTestUtils.getCAS(document, null, null, false);
- Ruta.apply(cas, script);
-
- RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "A");
- }
+ CAS cas = RutaTestUtils.getCAS(document, null, null, false);
+ Ruta.apply(cas, script);
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "A");
+ }
@Test
public void testLastElementAlsoAnnotatedWithLookahead() throws Exception {
@@ -297,4 +296,33 @@
RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "c", "c");
}
+ @Test
+ public void testLookaheadWithFeatureMatch() throws Exception {
+ String document = "a 2 b 3 c 4 d";
+ String script = "";
+ script += "\"2\"{->s:Struct,s.s=\"x\"};\n";
+ script += "\"3\"{->s:Struct};\n";
+ script += "\"4\"{->s:Struct,s.s=\"y\"};\n";
+ script += "s1:Struct.s==\"x\" # s2:Struct.s==\"y\"{->s2.s=s1.s, T1};\n";
+ script += "s:Struct.s==\"x\"{->T2};\n";
+
+ Map<String, String> complexType = new HashMap<>();
+ complexType.put("Struct", CAS.TYPE_NAME_ANNOTATION);
+ Map<String, List<TestFeature>> featureMap = new HashMap<>();
+ List<TestFeature> list = new ArrayList<>();
+ list.add(new TestFeature("s", "", CAS.TYPE_NAME_STRING));
+ featureMap.put("Struct", list);
+
+ CAS cas = RutaTestUtils.getCAS(document, complexType, featureMap);
+ Ruta.apply(cas, script);
+
+ if (RutaTestUtils.DEBUG_MODE) {
+ RutaTestUtils.storeTypeSystem(complexType, featureMap);
+ RutaTestUtils.storeCas(cas, "testLookaheadWithFeatureMatch");
+ }
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "4");
+ RutaTestUtils.assertAnnotationsEquals(cas, 2, 2, "2", "4");
+ }
+
}
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java
index 425ee0e..84a8086 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java
@@ -37,6 +37,26 @@
Ruta.apply(cas, script);
RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "B, B, B");
- cas.release();
+ }
+
+ @Test
+ public void testMinMaxOnComposedWithAnchor() throws Exception {
+ String document = "1 2 3 4 5 6 7 8 9 10";
+ String script = "";
+ script += "(NUM @NUM NUM NUM)[2,2]{-> T1};\n";
+ script += "(NUM NUM @NUM NUM)[2,2]{-> T2};\n";
+
+ CAS cas = RutaTestUtils.getCAS(document);
+ Ruta.apply(cas, script);
+
+// if (RutaTestUtils.DEBUG_MODE) {
+// RutaTestUtils.storeTypeSystem();
+// RutaTestUtils.storeCas(cas, "testMinMaxOnComposedWithAnchor");
+// }
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 2 3 4 5 6 7 8", "2 3 4 5 6 7 8 9",
+ "3 4 5 6 7 8 9 10");
+ RutaTestUtils.assertAnnotationsEquals(cas, 2, 3, "1 2 3 4 5 6 7 8", "2 3 4 5 6 7 8 9",
+ "3 4 5 6 7 8 9 10");
}
}
diff --git a/ruta-docbook/src/docbook/tools.ruta.howtos.xml b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
index 3cecf68..a46a9e4 100644
--- a/ruta-docbook/src/docbook/tools.ruta.howtos.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
@@ -419,7 +419,11 @@
<!-- Compress resulting tree word list. -->
<!-- default value: true -->
<compress>true</compress>
-
+
+ <!-- Remove white spaces when generating word list. -->
+ <!-- default value: true -->
+ <dictRemoveWS>true</dictRemoveWS>
+
<!-- The source files for the tree word list. -->
<!-- default value: none -->
<inputFiles>
@@ -472,6 +476,10 @@
<!-- Compress resulting tree word list. -->
<!-- default value: true -->
<compress>true</compress>
+
+ <!-- Remove white spaces when generating word list. -->
+ <!-- default value: true -->
+ <dictRemoveWS>true</dictRemoveWS>
<!-- The source files for the multi tree word list. -->
<!-- default value: none -->
diff --git a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
index 641b743..806a054 100644
--- a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
@@ -90,5 +90,8 @@
<quote>generated.mtwl</quote>
will be created.
</para>
+ <para>
+ The preferences page provides the option to remove white spaces when generating the word lists.
+ </para>
</section>
\ No newline at end of file
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
index 8c3fbcd..1b6c318 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
@@ -54,12 +54,16 @@
public class MultiTWLConverterHandler implements IHandler {
private class ConverterHandlerJob extends Job {
ExecutionEvent event;
+
private boolean compress;
- ConverterHandlerJob(ExecutionEvent event, boolean compress) {
+ private boolean dictRemoveWS;
+
+ ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {
super("Converting...");
this.event = event;
this.compress = compress;
+ this.dictRemoveWS = dictRemoveWS;
setUser(true);
}
@@ -88,7 +92,7 @@
if (!paths.isEmpty()) {
MultiTreeWordList trie;
try {
- trie = new MultiTreeWordList(paths.toArray(new String[0]), null);
+ trie = new MultiTreeWordList(paths.toArray(new String[0]), null, dictRemoveWS);
} catch (IOException e) {
RutaAddonsPlugin.error(e);
return Status.CANCEL_STATUS;
@@ -126,16 +130,20 @@
}
}
+ @Override
public void addHandlerListener(IHandlerListener handlerListener) {
}
+ @Override
public void dispose() {
}
+ @Override
public Object execute(ExecutionEvent event) throws ExecutionException {
IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();
boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);
- new ConverterHandlerJob(event, compress).schedule();
+ boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);
+ new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();
return null;
}
@@ -161,14 +169,17 @@
return paths;
}
+ @Override
public boolean isEnabled() {
return true;
}
+ @Override
public boolean isHandled() {
return true;
}
+ @Override
public void removeHandlerListener(IHandlerListener handlerListener) {
}
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
index 45a7016..cd2784e 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
@@ -50,12 +50,16 @@
private class ConverterHandlerJob extends Job {
ExecutionEvent event;
+
private boolean compress;
- ConverterHandlerJob(ExecutionEvent event, boolean compress) {
+ private boolean dictRemoveWS;
+
+ ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {
super("Converting...");
this.event = event;
this.compress = compress;
+ this.dictRemoveWS = dictRemoveWS;
setUser(true);
}
@@ -81,7 +85,7 @@
String path = file.getRawLocation().toString();
TreeWordList list;
try {
- list = new TreeWordList(path, false);
+ list = new TreeWordList(path, dictRemoveWS);
} catch (IOException e) {
RutaAddonsPlugin.error(e);
return Status.CANCEL_STATUS;
@@ -109,27 +113,34 @@
}
}
+ @Override
public void addHandlerListener(IHandlerListener handlerListener) {
}
+ @Override
public void dispose() {
}
+ @Override
public Object execute(ExecutionEvent event) throws ExecutionException {
IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();
boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);
- new ConverterHandlerJob(event, compress).schedule();
+ boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);
+ new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();
return null;
}
+ @Override
public boolean isEnabled() {
return true;
}
+ @Override
public boolean isHandled() {
return true;
}
+ @Override
public void removeHandlerListener(IHandlerListener handlerListener) {
}
diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
index d4774eb..cc3df83 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
@@ -30,8 +30,8 @@
/**
* Preference page to manage preferences for the ide plugin.
*/
-public class RutaBuilderPreferencePage extends FieldEditorPreferencePage implements
- IWorkbenchPreferencePage {
+public class RutaBuilderPreferencePage extends FieldEditorPreferencePage
+ implements IWorkbenchPreferencePage {
private BooleanFieldEditor builderImport;
@@ -60,13 +60,16 @@
RutaCorePreferences.BUILDER_IGNORE_DUPLICATE_SHORTNAMES,
RutaPreferencesMessages.BuilderIgnoreDuplicateShortnames, getFieldEditorParent());
addField(builderShortNames);
-
- compressWordLists = new BooleanFieldEditor(
- RutaCorePreferences.COMPRESS_WORDLISTS,
+
+ compressWordLists = new BooleanFieldEditor(RutaCorePreferences.COMPRESS_WORDLISTS,
RutaPreferencesMessages.CompressWordLists, getFieldEditorParent());
+
+ compressWordLists = new BooleanFieldEditor(RutaCorePreferences.DICT_REMOVE_WS,
+ RutaPreferencesMessages.DictRemoveWS, getFieldEditorParent());
addField(compressWordLists);
}
+ @Override
public void init(IWorkbench workbench) {
}
diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
index 5503f03..8219a1e 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
@@ -22,8 +22,7 @@
import org.eclipse.osgi.util.NLS;
public class RutaPreferencesMessages extends NLS {
- private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$
-
+ private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$
private RutaPreferencesMessages() {
// Do not instantiate
@@ -78,12 +77,13 @@
public static String ProjectClearOutput;
public static String NoVMInDevMode;
-
+
public static String AddSDI;
-
+
public static String CompressWordLists;
-
+
+ public static String DictRemoveWS;
+
public static String DefaultCasSerializationFormat;
-
}
diff --git a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
index b1b237f..7ae71a5 100644
--- a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
+++ b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
@@ -44,4 +44,5 @@
NoVMInDevMode = Do not start a VM in development mode.
AddSDI = Update Source Document Information when launching a script.
CompressWordLists = Compress generated twl/mtwl word lists.
+DictRemoveWS = Remove white spaces when generating twl/mtwl word lists.
DefaultCasSerializationFormat = Default CAS serialization format:
diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
index e6dec76..4d1e86d 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
@@ -34,5 +34,7 @@
public static final String COMPRESS_WORDLISTS = "CompressWordLists";
+ public static final String DICT_REMOVE_WS = "dictRemoveWS";
+
public static final String DEFAULT_CAS_SERIALIZATION_FORMAT = "DefaultCasSerializationFormat";
}
diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
index 1217bf3..138a528 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
@@ -28,6 +28,7 @@
public RutaPreferenceInitializer() {
}
+ @Override
public void initializeDefaultPreferences() {
IPreferenceStore store = RutaIdeCorePlugin.getDefault().getPreferenceStore();
// TaskTagUtils.initializeDefaultValues(store);
@@ -38,6 +39,7 @@
store.setDefault(RutaCorePreferences.NO_VM_IN_DEV_MODE, false);
store.setDefault(RutaCorePreferences.ADD_SDI, false);
store.setDefault(RutaCorePreferences.COMPRESS_WORDLISTS, false);
+ store.setDefault(RutaCorePreferences.DICT_REMOVE_WS, false);
}
}
diff --git a/ruta-maven-plugin/src/it/wordlists/pom.xml b/ruta-maven-plugin/src/it/wordlists/pom.xml
index c71e3e2..6392fa4 100644
--- a/ruta-maven-plugin/src/it/wordlists/pom.xml
+++ b/ruta-maven-plugin/src/it/wordlists/pom.xml
@@ -99,6 +99,7 @@
</goals>
<configuration>
<compress>false</compress>
+ <dictRemoveWS>true</dictRemoveWS>
<inputFiles>
<directory>${basedir}/src/main/resources</directory>
<includes>
@@ -117,6 +118,7 @@
</goals>
<configuration>
<compress>false</compress>
+ <dictRemoveWS>true</dictRemoveWS>
<inputFiles>
<directory>${basedir}/src/main/resources</directory>
<includes>
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
index 254824c..93ebc1f 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
@@ -76,6 +76,12 @@
private boolean compress;
/**
+ * Remove white spaces while generating dictionaries.
+ */
+ @Parameter(defaultValue = "true", required = true)
+ private boolean dictRemoveWS;
+
+ /**
* Fail on error.
*/
@Parameter(defaultValue = "true", required = true)
@@ -107,7 +113,7 @@
MultiTreeWordList trie = null;
try {
- trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()));
+ trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()), dictRemoveWS);
} catch (IOException e) {
handleError("Error creating MTWL file.", e);
}
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
index 961014e..b10ab3c 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
@@ -78,6 +78,12 @@
private boolean compress;
/**
+ * Remove white spaces while generating dictionaries.
+ */
+ @Parameter(defaultValue = "true", required = true)
+ private boolean dictRemoveWS;
+
+ /**
* Fail on error.
*/
@Parameter(defaultValue = "true", required = true)
@@ -111,7 +117,7 @@
File outputFile = each.getValue();
TreeWordList list = null;
try {
- list = new TreeWordList(inputFile.getAbsolutePath(), false);
+ list = new TreeWordList(inputFile.getAbsolutePath(), dictRemoveWS);
} catch (IOException e) {
handleError("Error generating twl.", e);
}