Merge branch 'main' into release/UIMA-6449-Ruta-3.2.0-release

* main: (21 commits)
  UIMA-6414: Ruta: missing match for optional after sidestep out of composed
  UIMA-6414: Ruta: missing match for optional after sidestep out of composed
  UIMA-6414: Ruta: missing match for optional after sidestep out of composed
  UIMA-6414: Ruta: missing match for optional after sidestep out of composed
  UIMA-6414: Ruta: missing match for optional after sidestep out of composed
  UIMA-6411: Ruta: avoid creation of RutaBasics for bad annotations
  UIMA-6411: Ruta: avoid creation of RutaBasics for bad annotations
  UIMA-6409-Ruta-possible-endless-wildcard-lookahead-in-combination-with-subtokens
  UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
  UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
  UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
  UIMA-6408: Ruta: No type check of features in TRANSFER
  no issue: fix method sig
  UIMA-6394: Ruta: label assignment in alternative match causes problems
  UIMA-6394: Ruta: label assignment in alternative match causes problems
  UIMA-6404: Ruta: @ with quantifier ignores matches
  UIMA-6405: Local variable not captured properly in a wildcard matching condition
  UIMA-6406: Removing an annotation inside a BLOCK only takes effect outside the block
  UIMA-6406: Removing an annotation inside a BLOCK only takes effect outside the block
  UIMA-6405: Local variable not captured properly in a wildcard matching condition
  ...
diff --git a/ruta-core/.gitignore b/ruta-core/.gitignore
new file mode 100644
index 0000000..862d276
--- /dev/null
+++ b/ruta-core/.gitignore
@@ -0,0 +1,2 @@
+input/
+TypeSystem.xml
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java b/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
index f653652..7a5c612 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
@@ -346,12 +346,13 @@
       createRutaBasic(0, 0);

     } else if (anchors.size() == 1) {

       Integer first = anchors.get(0);

-      createRutaBasic(first, first);

+      if (first >= 0 && first <= cas.getDocumentText().length())

+        createRutaBasic(first, first);

     } else {

       for (int i = 0; i < anchors.size() - 1; i++) {

         Integer first = anchors.get(i);

         Integer second = anchors.get(i + 1);

-        if (first < second) { // not really needed

+        if (first < second && first >= 0 && second <= cas.getDocumentText().length()) {

           createRutaBasic(first, second);

         }

       }

@@ -1151,7 +1152,11 @@
 

     if (cas.getTypeSystem().subsumes(type, windowAnnotation.getType())) {

       if (!sensitiveToVisibility || isVisible(windowAnnotation)) {

-        result.add(windowAnnotation);

+        // the window defined by a BLOCK could actually have already been removed, thus we do not

+        // want to return it

+        if (cas.getAnnotationIndex(windowAnnotation.getType()).contains(windowAnnotation)) {

+          result.add(windowAnnotation);

+        }

       }

     }

 

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java b/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java
index bf38e7b..cb4694b 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/action/TransferAction.java
@@ -21,6 +21,7 @@
 

 import java.util.List;

 

+import org.apache.uima.UimaContextHolder;

 import org.apache.uima.cas.CAS;

 import org.apache.uima.cas.Feature;

 import org.apache.uima.cas.FeatureStructure;

@@ -79,10 +80,20 @@
       if (newFeature != null) {

         if (feature.getRange().isPrimitive()) {

           String value = oldFS.getFeatureValueAsString(feature);

-          newFS.setFeatureValueFromString(newFeature, value);

+          try {

+            newFS.setFeatureValueFromString(newFeature, value);

+          } catch (Exception e) {

+            UimaContextHolder.getContext().getLogger().debug("Unable to transfer feature {}: {}",

+                    shortName, e.getMessage());

+          }

         } else {

           FeatureStructure value = oldFS.getFeatureValue(feature);

-          newFS.setFeatureValue(newFeature, value);

+          try {

+            newFS.setFeatureValue(newFeature, value);

+          } catch (Exception e) {

+            UimaContextHolder.getContext().getLogger().debug("Unable to transfer feature {}: {}",

+                    shortName, e.getMessage());

+          }

         }

       }

     }

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java
index 5769578..d5230a4 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaTestUtils.java
@@ -19,14 +19,18 @@
 

 package org.apache.uima.ruta.engine;

 

+import static org.apache.uima.fit.factory.TypeSystemDescriptionFactory.createTypeSystemDescription;

+

 import java.io.File;

 import java.io.FileOutputStream;

 import java.io.IOException;

 import java.io.OutputStream;

+import java.lang.management.ManagementFactory;

 import java.net.URISyntaxException;

 import java.net.URL;

 import java.util.ArrayList;

 import java.util.Collection;

+import java.util.Collections;

 import java.util.HashMap;

 import java.util.Iterator;

 import java.util.LinkedHashMap;

@@ -34,6 +38,7 @@
 import java.util.Map;

 import java.util.Map.Entry;

 import java.util.Set;

+import java.util.regex.Pattern;

 

 import org.apache.uima.UIMAFramework;

 import org.apache.uima.analysis_engine.AnalysisEngine;

@@ -60,6 +65,19 @@
 

 public class RutaTestUtils {

 

+  public static final boolean DEBUG_MODE = isDebugging();

+

+  private static boolean isDebugging() {

+

+    Pattern debugPattern = Pattern.compile("-Xdebug|jdwp");

+    for (String arg : ManagementFactory.getRuntimeMXBean().getInputArguments()) {

+      if (debugPattern.matcher(arg).find()) {

+        return true;

+      }

+    }

+    return false;

+  }

+

   public static class TestFeature {

     public String name;

 

@@ -151,24 +169,8 @@
     AnalysisEngineDescription aed = (AnalysisEngineDescription) specifier;

 

     TypeSystemDescription basicTypeSystem = aed.getAnalysisEngineMetaData().getTypeSystem();

-    for (int i = 1; i <= amount; i++) {

-      basicTypeSystem.addType(TYPE + i, "Type for Testing", "uima.tcas.Annotation");

-    }

-

-    if (complexTypes != null) {

-      Set<Entry<String, String>> entrySet = complexTypes.entrySet();

-      for (Entry<String, String> entry : entrySet) {

-        String name = entry.getKey();

-        TypeDescription addType = basicTypeSystem.addType(name, "Type for Testing",

-                entry.getValue());

-        if (features != null) {

-          List<TestFeature> list = features.get(name);

-          for (TestFeature f : list) {

-            addType.addFeature(f.name, f.description, f.range);

-          }

-        }

-      }

-    }

+    addTestTypes(basicTypeSystem);

+    addAdditionalTypes(complexTypes, features, basicTypeSystem);

 

     Collection<TypeSystemDescription> tsds = new ArrayList<TypeSystemDescription>();

     tsds.add(basicTypeSystem);

@@ -248,23 +250,8 @@
     ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

     AnalysisEngineDescription aed = (AnalysisEngineDescription) specifier;

     TypeSystemDescription basicTypeSystem = aed.getAnalysisEngineMetaData().getTypeSystem();

-    for (int i = 1; i <= 50; i++) {

-      basicTypeSystem.addType("org.apache.uima.T" + i, "Type for Testing", "uima.tcas.Annotation");

-    }

-    if (complexTypes != null) {

-      Set<Entry<String, String>> entrySet = complexTypes.entrySet();

-      for (Entry<String, String> entry : entrySet) {

-        String name = entry.getKey();

-        TypeDescription addType = basicTypeSystem.addType(name, "Type for Testing",

-                entry.getValue());

-        if (features != null) {

-          List<TestFeature> list = features.get(name);

-          for (TestFeature f : list) {

-            addType.addFeature(f.name, f.description, f.range);

-          }

-        }

-      }

-    }

+    addTestTypes(basicTypeSystem);

+    addAdditionalTypes(complexTypes, features, basicTypeSystem);

     Collection<TypeSystemDescription> tsds = new ArrayList<TypeSystemDescription>();

     tsds.add(basicTypeSystem);

     TypeSystemDescription mergeTypeSystems = CasCreationUtils.mergeTypeSystems(tsds);

@@ -282,6 +269,31 @@
     return cas;

   }

 

+  public static void addTestTypes(TypeSystemDescription typeSystemDescription) {

+    for (int i = 1; i <= 50; i++) {

+      typeSystemDescription.addType("org.apache.uima.T" + i, "Type for Testing",

+              "uima.tcas.Annotation");

+    }

+  }

+

+  private static void addAdditionalTypes(Map<String, String> complexTypes,

+          Map<String, List<TestFeature>> features, TypeSystemDescription typeSystemDescription) {

+    if (complexTypes != null) {

+      Set<Entry<String, String>> entrySet = complexTypes.entrySet();

+      for (Entry<String, String> entry : entrySet) {

+        String name = entry.getKey();

+        TypeDescription addType = typeSystemDescription.addType(name, "Type for Testing",

+                entry.getValue());

+        if (features != null) {

+          List<TestFeature> list = features.get(name);

+          for (TestFeature f : list) {

+            addType.addFeature(f.name, f.description, f.range);

+          }

+        }

+      }

+    }

+  }

+

   public static void printAnnotations(CAS cas, int typeId) {

     Type t = getTestType(cas, typeId);

     AnnotationIndex<AnnotationFS> ai = cas.getAnnotationIndex(t);

@@ -366,4 +378,34 @@
     }

   }

 

+  public static void storeTypeSystem() {

+    storeTypeSystem(Collections.emptyMap(), Collections.emptyMap());

+  }

+

+  public static void storeTypeSystem(Map<String, String> complexTypes,

+          Map<String, List<TestFeature>> features) {

+

+    File tsFile = new File("TypeSystem.xml");

+

+    try {

+

+      TypeSystemDescription typeSystemDescription = createTypeSystemDescription();

+      addTestTypes(typeSystemDescription);

+      addAdditionalTypes(complexTypes, features, typeSystemDescription);

+      try (OutputStream os = new FileOutputStream(tsFile)) {

+        typeSystemDescription.toXML(os);

+      }

+    } catch (Exception e) {

+      throw new IllegalStateException(e);

+    }

+  }

+

+  public static Map<String, Object> getDebugParams() {

+    Map<String, Object> params = new LinkedHashMap<>();

+    params.put(RutaEngine.PARAM_DEBUG, true);

+    params.put(RutaEngine.PARAM_DEBUG_WITH_MATCHES, true);

+    params.put(RutaEngine.PARAM_CREATED_BY, true);

+    return params;

+  }

+

 }

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
index 4b6d9ff..fd25685 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
@@ -63,6 +63,8 @@
   /** The cost model we are using. */

   private EditDistanceCostMap costMap;

 

+  private boolean dictRemoveWS = false;

+

   /**

    * Default constructor.

    * 

@@ -155,9 +157,25 @@
    *           When there is a problem reading a path.

    */

   public MultiTreeWordList(String[] pathnames, File base) throws IOException {

+    this(pathnames, base, false);

+  }

+

+  /**

+   * Constructs a TreeWordList from a file with path = filename

+   * 

+   * @param pathnames

+   *          path of the file to create a TextWordList from

+   * @param base

+   *          - the relative base

+   * @param dictRemoveWS

+   *          remove white spaces

+   * @throws IOException

+   *           When there is a problem reading a path.

+   */

+  public MultiTreeWordList(String[] pathnames, File base, boolean dictRemoveWS) throws IOException {

     this.root = new MultiTextNode();

     this.costMap = new EditDistanceCostMap();

-

+    this.dictRemoveWS = dictRemoveWS;

     if (pathnames == null) {

       return;

     }

@@ -177,8 +195,23 @@
    *           - When there is a problem reading the files.

    */

   public MultiTreeWordList(List<File> files, File base) throws IOException {

+    this(files, base, false);

+  }

+

+  /**

+   * @param files

+   *          - the input files

+   * @param base

+   *          - the relative base

+   * @param dictRemoveWS

+   *          - remove white spaces

+   * @throws IOException

+   *           - When there is a problem reading the files.

+   */

+  public MultiTreeWordList(List<File> files, File base, boolean dictRemoveWS) throws IOException {

     this.root = new MultiTextNode();

     this.costMap = new EditDistanceCostMap();

+    this.dictRemoveWS = dictRemoveWS;

 

     if (files == null) {

       return;

@@ -275,6 +308,10 @@
 

     for (Character each : s.toCharArray()) {

 

+      if (dictRemoveWS && Character.isWhitespace(each)) {

+        continue;

+      }

+

       MultiTextNode childNode = pointer.getChildNode(each);

 

       if (childNode == null) {

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java
index f85c809..f367d61 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/AbstractRuleElement.java
@@ -22,7 +22,6 @@
 import static java.util.Arrays.asList;

 

 import java.util.ArrayList;

-import java.util.Collection;

 import java.util.Collections;

 import java.util.List;

 

@@ -114,12 +113,16 @@
       } else {

         RuleElement nextRuleElement = getContainer().getNextElement(newDirection, this);

         if (nextRuleElement != null) {

+          RuleElement sideStepOrigin = null;

+          if (getContainer() instanceof RuleElement) {

+            sideStepOrigin = (RuleElement) getContainer();

+          }

           result = nextRuleElement.continueMatch(newDirection, annotation, ruleMatch, ruleApply,

-                  sideStepContainerMatch, null, null, stream, crowd);

+                  sideStepContainerMatch, sideStepOrigin, entryPoint, stream, crowd);

         } else if (getContainer() instanceof ComposedRuleElement) {

           ComposedRuleElement composed = (ComposedRuleElement) getContainer();

           result = composed.fallbackContinue(newDirection, false, annotation, ruleMatch, ruleApply,

-                  sideStepContainerMatch, null, entryPoint, stream, crowd);

+                  sideStepContainerMatch, composed, entryPoint, stream, crowd);

         }

       }

     }

@@ -128,15 +131,14 @@
 

   protected void doneMatching(RuleMatch ruleMatch, RuleApply ruleApply, RutaStream stream,

           InferenceCrowd crowd) {

-    if (!ruleMatch.isApplied()) {

+    // do not execute actions if they already have been or if this is just a lookahead

+    // (ruleApply==null)

+    if (!ruleMatch.isApplied() && ruleApply != null) {

       ruleApply.add(ruleMatch, stream);

-      RutaRule rule = ruleMatch.getRule();

-      Collection<String> localVariables = rule.getLabels();

       if (ruleMatch.matchedCompletely()) {

-        rule.getEnvironment().acceptTempVariableValues(localVariables);

+        RutaRule rule = ruleMatch.getRule();

+        rule.getEnvironment().acceptTempVariableValues(rule.getOwnLabels());

         rule.getRoot().applyRuleElements(ruleMatch, stream, crowd);

-      } else {

-        rule.getEnvironment().clearTempVariables(localVariables);

       }

       ruleMatch.setApplied(true);

     }

@@ -188,10 +190,15 @@
         for (RutaStatement each : inlinedRules) {

           ScriptApply apply = each.apply(windowStream, crowd);

           blockResult.add(apply);

+          if (each instanceof RutaRule) {

+            // clean up temp variables produced by failed rules

+            ((RutaRule) each).clearOwnLabels();

+          }

         }

         result.add(blockResult);

       }

     }

+

     return result;

   }

 

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java
index e25038b..ade9153 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java
@@ -527,7 +527,8 @@
               containerMatch, sideStepOrigin, entryPoint, stream, crowd);

     }

 

-    if (sideStepOrigin != null && !failed) {

+    if (sideStepOrigin != null && !failed && sideStepOrigin.getContainer() != null) {

+      // only continue sidestep if we did not yet reach the root

       return sideStepOrigin.continueSideStep(after, ruleMatch, ruleApply, containerMatch,

               entryPoint, stream, crowd);

     }

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java
index d6638d2..7c6f653 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRule.java
@@ -19,8 +19,9 @@
 

 package org.apache.uima.ruta.rule;

 

+import java.util.ArrayList;

 import java.util.Collection;

-import java.util.HashMap;

+import java.util.LinkedHashMap;

 import java.util.List;

 import java.util.Map;

 import java.util.Map.Entry;

@@ -45,10 +46,13 @@
    */

   private Map<String, Object> labels;

 

+  private Collection<String> ownLabels;

+

   public RutaRule(List<RuleElement> elements, RutaBlock parent, int id) {

     super(parent, id);

     this.root = new ComposedRuleElement(elements, null, null, null, null, parent);

-    this.labels = new HashMap<>();

+    this.labels = new LinkedHashMap<>();

+    this.ownLabels = new ArrayList<>();

   }

 

   @Override

@@ -92,33 +96,39 @@
     if (elements != null) {

       // update label map

       for (RuleElement ruleElement : elements) {

-        fillLabelMap(ruleElement);

+        fillLabelMap(ruleElement, true);

       }

     }

 

   }

 

-  private void fillLabelMap(RuleElement ruleElement) {

+  private void fillLabelMap(RuleElement ruleElement, boolean own) {

     if (!StringUtils.isBlank(ruleElement.getLabel())) {

       labels.put(ruleElement.getLabel(), null);

+      if (own) {

+        ownLabels.add(ruleElement.getLabel());

+      }

     }

-    fillLabelMapWithActions(ruleElement.getActions());

+    fillLabelMapWithActions(ruleElement.getActions(), own);

     if (ruleElement instanceof ComposedRuleElement) {

       ComposedRuleElement cre = (ComposedRuleElement) ruleElement;

       List<RuleElement> ruleElements = cre.getRuleElements();

       for (RuleElement each : ruleElements) {

-        fillLabelMap(each);

+        fillLabelMap(each, own);

       }

     }

     fillLabelMapWithInlinedRules(ruleElement.getInlinedConditionRuleBlocks());

     fillLabelMapWithInlinedRules(ruleElement.getInlinedActionRuleBlocks());

   }

 

-  private void fillLabelMapWithActions(List<AbstractRutaAction> actions) {

+  private void fillLabelMapWithActions(List<AbstractRutaAction> actions, boolean own) {

     if (actions != null) {

       for (AbstractRutaAction action : actions) {

         if (action != null && !StringUtils.isBlank(action.getLabel())) {

           labels.put(action.getLabel(), null);

+          if (own) {

+            ownLabels.add(action.getLabel());

+          }

         }

       }

     }

@@ -131,7 +141,7 @@
           if (eachInlined instanceof RutaRule) {

             RutaRule inlinedRule = (RutaRule) eachInlined;

             inlinedRule.setInlined(true);

-            fillLabelMap(inlinedRule.getRoot());

+            fillLabelMap(inlinedRule.getRoot(), false);

           }

         }

       }

@@ -193,4 +203,13 @@
     return labels.keySet();

   }

 

+  public Collection<String> getOwnLabels() {

+    return ownLabels;

+  }

+

+  public void clearOwnLabels() {

+    RutaEnvironment environment = getParent().getEnvironment();

+    environment.clearTempVariables(ownLabels);

+  }

+

 }

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java
index f616ab7..d90a999 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaRuleElement.java
@@ -63,6 +63,10 @@
     Collection<? extends AnnotationFS> anchors = getAnchors(stream);

     boolean useAlternatives = anchors.size() != 1;

     for (AnnotationFS eachAnchor : anchors) {

+

+      // clean up temp variables since we start a new matching iteration

+      ruleMatch.getRule().clearOwnLabels();

+

       if (earlyExit(eachAnchor, ruleApply, stream)) {

         // ... for different matching paradigms that avoid some matches

         continue;

@@ -197,6 +201,11 @@
     if (nextRuleElement != null) {

       result = nextRuleElement.continueMatch(after, eachAnchor, extendedMatch, ruleApply,

               extendedContainerMatch, sideStepOrigin, entryPoint, stream, crowd);

+    } else if (sideStepOrigin != null && !failed && containedIn(sideStepOrigin, getContainer())) {

+      // continue directly with the sidestep if it is contained in this container

+      // if not, we might miss matches in the same direction

+      result = sideStepOrigin.continueSideStep(after, extendedMatch, ruleApply,

+              extendedContainerMatch, entryPoint, stream, crowd);

     } else if (getContainer() instanceof ComposedRuleElement) {

       ComposedRuleElement composed = (ComposedRuleElement) getContainer();

       result = composed.fallbackContinue(after, failed, eachAnchor, extendedMatch, ruleApply,

@@ -205,6 +214,26 @@
     return result;

   }

 

+  private boolean containedIn(RuleElement sideStepOrigin, RuleElementContainer container) {

+    // TODO: should we support this in interface?

+    if (container == null || sideStepOrigin == null) {

+      return false;

+    }

+    List<RuleElement> ruleElements = container.getRuleElements();

+    if (ruleElements.contains(sideStepOrigin)) {

+      return true;

+    } else {

+      for (RuleElement ruleElement : ruleElements) {

+        if (ruleElement instanceof RuleElementContainer) {

+          if (containedIn(sideStepOrigin, (RuleElementContainer) ruleElement)) {

+            return true;

+          }

+        }

+      }

+    }

+    return false;

+  }

+

   @Override

   public List<RuleMatch> continueMatch(boolean after, AnnotationFS annotation, RuleMatch ruleMatch,

           RuleApply ruleApply, ComposedRuleElementMatch containerMatch, RuleElement sideStepOrigin,

@@ -335,8 +364,7 @@
           InferenceCrowd crowd) {

     RuleElementMatch result = new RuleElementMatch(this, containerMatch);

     result.setRuleAnchor(ruleAnchor);

-    List<EvaluatedCondition> evaluatedConditions = new ArrayList<>(

-            conditions.size());

+    List<EvaluatedCondition> evaluatedConditions = new ArrayList<>(conditions.size());

     // boolean base = matcher.match(annotation, stream, getParent());

     boolean base = true;

     MatchContext context = new MatchContext(annotation, this, ruleMatch, after);

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java b/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
index 94d07d5..1b688a3 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
@@ -507,8 +507,8 @@
     }

     if (annotation != null && iterator != null && iterator.isValid()) {

       AnnotationFS pointer = iterator.get();

-      if ((after && pointer.getEnd() == annotation.getEnd())

-              || (!after && pointer.getBegin() == annotation.getBegin())) {

+      if ((after && pointer.getBegin() < annotation.getEnd())

+              || (!after && pointer.getEnd() > annotation.getBegin())) {

         moveOn(after, iterator, stream);

       }

     }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java
index 02c7bff..0dd61f2 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/ConjunctiveRuleElementTest.java
@@ -97,7 +97,7 @@
   }

 

   @Test

-  public void testWithStartAnchor() {

+  public void testWithStartAnchor() throws Exception {

 

     String document = "Peter did something.";

     String script = "";

@@ -120,18 +120,12 @@
     String fn3 = "tense";

     list.add(new TestFeature(fn3, "", "uima.cas.String"));

 

-    CAS cas = null;

-    try {

-      cas = RutaTestUtils.getCAS(document, typeMap, featureMap);

-      Ruta.apply(cas, script);

-    } catch (Exception e) {

-      e.printStackTrace();

-    }

+    CAS cas = RutaTestUtils.getCAS(document, typeMap, featureMap);

+    Ruta.apply(cas, script);

 

     RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Peter did");

     RutaTestUtils.assertAnnotationsEquals(cas, 2, 1, "Peter did");

     RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "did something");

 

-    cas.release();

   }

 }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java
index 4476a55..af6b368 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java
@@ -23,12 +23,15 @@
 

 import org.apache.uima.cas.CAS;

 import org.apache.uima.cas.FSIterator;

+import org.apache.uima.cas.SelectFSs;

 import org.apache.uima.cas.text.AnnotationFS;

 import org.apache.uima.cas.text.AnnotationIndex;

 import org.apache.uima.ruta.engine.Ruta;

 import org.apache.uima.ruta.engine.RutaEngine;

 import org.apache.uima.ruta.engine.RutaTestUtils;

 import org.apache.uima.ruta.rule.RuleInference1Test;

+import org.apache.uima.ruta.type.RutaBasic;

+import org.junit.Assert;

 import org.junit.Test;

 

 public class EmptyDocumentTest {

@@ -62,4 +65,18 @@
     RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "");

   }

 

+  @Test

+  public void testSpaceWithInvalidAnnotation() throws Exception {

+    CAS cas = RutaTestUtils.getCAS(" ");

+    AnnotationFS annotation = cas.createAnnotation(cas.getAnnotationType(), -1, 2);

+    cas.addFsToIndexes(annotation);

+    Ruta.apply(cas, "Document;");

+

+    SelectFSs<RutaBasic> select = cas.select(RutaBasic.class);

+    Assert.assertEquals(1, select.count());

+    RutaBasic rutaBasic = select.findAny().get();

+    Assert.assertEquals(0, rutaBasic.getBegin());

+    Assert.assertEquals(1, rutaBasic.getEnd());

+  }

+

 }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java
index 31f4a89..bac9613 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/action/TransferTest.java
@@ -21,6 +21,9 @@
 

 import static org.junit.Assert.assertEquals;

 

+import java.util.Arrays;

+import java.util.LinkedHashMap;

+import java.util.List;

 import java.util.Map;

 import java.util.TreeMap;

 

@@ -30,8 +33,10 @@
 import org.apache.uima.cas.Type;

 import org.apache.uima.cas.text.AnnotationFS;

 import org.apache.uima.cas.text.AnnotationIndex;

+import org.apache.uima.ruta.engine.Ruta;

 import org.apache.uima.ruta.engine.RutaEngine;

 import org.apache.uima.ruta.engine.RutaTestUtils;

+import org.apache.uima.ruta.engine.RutaTestUtils.TestFeature;

 import org.junit.Test;

 

 public class TransferTest {

@@ -45,8 +50,8 @@
     complexTypes.put(type, CAS.TYPE_NAME_DOCUMENT_ANNOTATION);

     CAS cas = null;

     try {

-      cas = RutaTestUtils.process(namespace + "/" + name + RutaEngine.SCRIPT_FILE_EXTENSION, namespace + "/" + name

-              + ".txt", 50, false, false, complexTypes, null);

+      cas = RutaTestUtils.process(namespace + "/" + name + RutaEngine.SCRIPT_FILE_EXTENSION,

+              namespace + "/" + name + ".txt", 50, false, false, complexTypes, null);

     } catch (Exception e) {

       e.printStackTrace();

       assert (false);

@@ -63,7 +68,40 @@
     Feature featureByBaseName = t.getFeatureByBaseName("language");

     String stringValue = afs.getStringValue(featureByBaseName);

     assertEquals("x-unspecified", stringValue);

-   

+

     cas.release();

   }

+

+  @Test

+  public void testIncompatibleFeatureRanges() throws Exception {

+

+    Map<String, String> typeMap = new LinkedHashMap<String, String>();

+    typeMap.put("Struct11", "uima.tcas.Annotation");

+    typeMap.put("Struct12", "uima.tcas.Annotation");

+    typeMap.put("Struct21", "uima.tcas.Annotation");

+    typeMap.put("Struct22", "uima.tcas.Annotation");

+    Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();

+    featureMap.put("Struct11", Arrays.asList(new TestFeature("f", "", CAS.TYPE_NAME_ANNOTATION)));

+    featureMap.put("Struct12", Arrays.asList(new TestFeature("f", "", CAS.TYPE_NAME_STRING)));

+    featureMap.put("Struct21",

+            Arrays.asList(new TestFeature("array", "", CAS.TYPE_NAME_STRING_ARRAY)));

+    featureMap.put("Struct22",

+            Arrays.asList(new TestFeature("array", "", CAS.TYPE_NAME_BOOLEAN_ARRAY)));

+

+    CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);

+    String script = "CW{->s:Struct11,s.f=CW};";

+    script += "CW{->s:Struct21,s.array={true, false, true}};";

+    script += "Struct11{-> TRANSFER(Struct12)};";

+    script += "Struct21{-> TRANSFER(Struct22)};";

+

+    Ruta.apply(cas, script, RutaTestUtils.getDebugParams());

+

+    if (RutaTestUtils.DEBUG_MODE) {

+      RutaTestUtils.storeTypeSystem(typeMap, featureMap);

+      RutaTestUtils.storeCas(cas, "testIncompatibleFeatureRanges");

+    }

+

+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);

+

+  }

 }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java
index 64b7152..3eb4289 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/action/UnmarkTest.java
@@ -20,6 +20,7 @@
 package org.apache.uima.ruta.action;

 

 import java.util.ArrayList;

+import java.util.LinkedHashMap;

 import java.util.List;

 import java.util.Map;

 import java.util.TreeMap;

@@ -43,45 +44,70 @@
 

     cas.release();

   }

-  

-  

+

   @Test

   public void testAnnotationExpression() throws Exception {

-    Map<String, String> typeMap = new TreeMap<String, String>();

+    Map<String, String> typeMap = new LinkedHashMap<String, String>();

     typeMap.put("Complex", "uima.tcas.Annotation");

     Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();

-    List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();

+    List<TestFeature> list = new ArrayList<>();

     featureMap.put("Complex", list);

     list.add(new TestFeature("inner", "", "uima.tcas.Annotation"));

-    

+

     CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);

     String script = "";

     script += "CW{->T1};t:T1 SW SW{-> UNMARK(t)};";

     script += "CW{->T2};\n t:T2 # PERIOD{-> Complex, Complex.inner=t};\n Complex{-> UNMARK(Complex.inner)};\n";

     Ruta.apply(cas, script);

-    

+

     RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);

     RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);

-    

+

   }

-  

+

   @Test

-  public void testAnnotationListExpression()  throws Exception {

-    Map<String, String> typeMap = new TreeMap<String, String>();

+  public void testAnnotationListExpression() throws Exception {

+    Map<String, String> typeMap = new LinkedHashMap<String, String>();

     typeMap.put("Complex", "uima.tcas.Annotation");

     Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();

-    List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();

+    List<TestFeature> list = new ArrayList<>();

     featureMap.put("Complex", list);

     list.add(new TestFeature("inner", "", "uima.cas.FSArray"));

-    

+

     CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);

     String script = "";

     script += "W{->T1}; Document{-> Complex, Complex.inner = T1};";

     script += "Complex{-> UNMARK(Complex.inner)};\n";

     Ruta.apply(cas, script);

-    

+

     RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);

     RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);

   }

-  

+

+  @Test

+  public void testUnmarkWithFeatureMatchInBlock() throws Exception {

+

+    Map<String, String> typeMap = new LinkedHashMap<String, String>();

+    typeMap.put("Struct", "uima.tcas.Annotation");

+    Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();

+    List<TestFeature> list = new ArrayList<>();

+    featureMap.put("Struct", list);

+    list.add(new TestFeature("s", "", CAS.TYPE_NAME_STRING));

+

+    CAS cas = RutaTestUtils.getCAS("This is a test.", typeMap, featureMap);

+    String script = "\"a\"{->s:Struct,Struct.s=\"foo\"};";

+    script += "BLOCK(SoftRemove) Struct.s==\"foo\"{} {\r\n"

+            + "    t:Struct.s==\"foo\"{-> UNMARK(t)};\r\n" //

+            + "    t:Struct.s==\"foo\"{-> T1}; \r\n" + "}";

+

+    Ruta.apply(cas, script, RutaTestUtils.getDebugParams());

+

+    if (RutaTestUtils.DEBUG_MODE) {

+      RutaTestUtils.storeTypeSystem(typeMap, featureMap);

+      RutaTestUtils.storeCas(cas, "testUnmarkWithFeatureMatchInBlock");

+    }

+

+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 0);

+  }

+

 }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java
index c4738aa..30c9255 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/expression/annotation/AnnotationLabelExpressionTest.java
@@ -845,4 +845,36 @@
     RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Some text");

   }

 

+  @Test

+  public void testCompareStringFeatures() throws Exception {

+

+    String document = "A b.";

+    Map<String, String> typeMap = new TreeMap<String, String>();

+    typeMap.put("Struct1", "uima.tcas.Annotation");

+    typeMap.put("Struct2", "uima.tcas.Annotation");

+

+    Map<String, List<TestFeature>> featureMap = new TreeMap<String, List<TestFeature>>();

+    List<TestFeature> list = new ArrayList<RutaTestUtils.TestFeature>();

+    featureMap.put("Struct1", list);

+    featureMap.put("Struct2", list);

+    list.add(new TestFeature("s", "", "uima.cas.String"));

+

+    String script = "";

+    script += "SW{->CREATE(Struct2, \"s\" = \"b\")};\n";

+    script += "SW{->CREATE(Struct2, \"s\" = \"a\")};\n";

+    script += "CW{->CREATE(Struct1, \"s\" = \"a\")};\n";

+    script += "(s1:Struct1 s2:Struct2){s1.s==s2.s-> T1};\n";

+

+    CAS cas = RutaTestUtils.getCAS(document, typeMap, featureMap);

+    Ruta.apply(cas, script, RutaTestUtils.getDebugParams());

+

+    if (RutaTestUtils.DEBUG_MODE) {

+      RutaTestUtils.storeTypeSystem(typeMap, featureMap);

+      RutaTestUtils.storeCas(cas, "testCompareStringFeatures");

+    }

+

+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "A b");

+

+  }

+

 }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java
index 64b7561..e7d4a6c 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/ManualAnchoringTest.java
@@ -38,6 +38,31 @@
 
     RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "A, B and C");
 
-    cas.release();
   }
+
+  @Test
+  public void testAcrossComposedInSequence() throws Exception {
+    String text = "bla CAP 1-2 bla";
+
+    String script = "FOREACH(cap) CAP{}{";
+    script += "ANY{-PARTOF(SPECIAL)} @cap (NUM SPECIAL NUM){-> T1} ANY{-PARTOF(SPECIAL)};";
+    script += "}";
+
+    CAS cas = RutaTestUtils.getCAS(text);
+    Ruta.apply(cas, script);
+
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "1-2");
+  }
+
+  @Test
+  public void testLeaveComposedInSequence() throws Exception {
+    String text = "bla w CAP w bla";
+    String script = "(W @CAP W) {->T1} ANY{-PARTOF(NUM)};";
+
+    CAS cas = RutaTestUtils.getCAS(text);
+    Ruta.apply(cas, script);
+
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "w CAP w");
+  }
+
 }
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java
index c990a44..c97b34c 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/SidestepInComposedTest.java
@@ -66,4 +66,18 @@
 

   }

 

+  @Test

+  public void testOptionalBeforeComposed() throws Exception {

+    String document = "test 05/05 test\n";

+    document += "test 06/06 . test\n";

+    document += "test . 07/07 test\n";

+    String script = "_{-PARTOF(PM)} (NUM SPECIAL @NUM){-> T1} _{-PARTOF({PM})};\n";

+

+    CAS cas = RutaTestUtils.getCAS(document);

+    Ruta.apply(cas, script);

+

+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "05/05");

+

+  }

+

 }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java
index 38dabdf..03bd641 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/WildCard2Test.java
@@ -275,13 +275,12 @@
   public void testLabelForFailedLookahead() throws Exception {

     String document = "A x B x C x D";

     String script = "(w1:CW{REGEXP(\"A\")} # w2:CW{REGEXP(\"C\")})->{w1{->T1};};";

-	

-	CAS cas = RutaTestUtils.getCAS(document, null, null, false);

-    Ruta.apply(cas, script);

-	

-	RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "A");

-  }

 

+    CAS cas = RutaTestUtils.getCAS(document, null, null, false);

+    Ruta.apply(cas, script);

+

+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "A");

+  }

 

   @Test

   public void testLastElementAlsoAnnotatedWithLookahead() throws Exception {

@@ -297,4 +296,33 @@
     RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "c", "c");

   }

 

+  @Test

+  public void testLookaheadWithFeatureMatch() throws Exception {

+    String document = "a 2 b 3 c 4 d";

+    String script = "";

+    script += "\"2\"{->s:Struct,s.s=\"x\"};\n";

+    script += "\"3\"{->s:Struct};\n";

+    script += "\"4\"{->s:Struct,s.s=\"y\"};\n";

+    script += "s1:Struct.s==\"x\" # s2:Struct.s==\"y\"{->s2.s=s1.s, T1};\n";

+    script += "s:Struct.s==\"x\"{->T2};\n";

+

+    Map<String, String> complexType = new HashMap<>();

+    complexType.put("Struct", CAS.TYPE_NAME_ANNOTATION);

+    Map<String, List<TestFeature>> featureMap = new HashMap<>();

+    List<TestFeature> list = new ArrayList<>();

+    list.add(new TestFeature("s", "", CAS.TYPE_NAME_STRING));

+    featureMap.put("Struct", list);

+

+    CAS cas = RutaTestUtils.getCAS(document, complexType, featureMap);

+    Ruta.apply(cas, script);

+

+    if (RutaTestUtils.DEBUG_MODE) {

+      RutaTestUtils.storeTypeSystem(complexType, featureMap);

+      RutaTestUtils.storeCas(cas, "testLookaheadWithFeatureMatch");

+    }

+

+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "4");

+    RutaTestUtils.assertAnnotationsEquals(cas, 2, 2, "2", "4");

+  }

+

 }

diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java
index 425ee0e..84a8086 100644
--- a/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/rule/quantifier/MinMaxQuantifierTest.java
@@ -37,6 +37,26 @@
     Ruta.apply(cas, script);
 
     RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "B, B, B");
-    cas.release();
+  }
+
+  @Test
+  public void testMinMaxOnComposedWithAnchor() throws Exception {
+    String document = "1 2 3 4 5 6 7 8 9 10";
+    String script = "";
+    script += "(NUM @NUM NUM NUM)[2,2]{-> T1};\n";
+    script += "(NUM NUM @NUM NUM)[2,2]{-> T2};\n";
+
+    CAS cas = RutaTestUtils.getCAS(document);
+    Ruta.apply(cas, script);
+
+//    if (RutaTestUtils.DEBUG_MODE) {
+//      RutaTestUtils.storeTypeSystem();
+//      RutaTestUtils.storeCas(cas, "testMinMaxOnComposedWithAnchor");
+//    }
+
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 2 3 4 5 6 7 8", "2 3 4 5 6 7 8 9",
+            "3 4 5 6 7 8 9 10");
+    RutaTestUtils.assertAnnotationsEquals(cas, 2, 3, "1 2 3 4 5 6 7 8", "2 3 4 5 6 7 8 9",
+            "3 4 5 6 7 8 9 10");
   }
 }
diff --git a/ruta-docbook/src/docbook/tools.ruta.howtos.xml b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
index 3cecf68..a46a9e4 100644
--- a/ruta-docbook/src/docbook/tools.ruta.howtos.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
@@ -419,7 +419,11 @@
   <!-- Compress resulting tree word list. -->
   <!-- default value: true -->
   <compress>true</compress>
-
+  
+  <!-- Remove white spaces when generating word list. -->
+  <!-- default value: true -->
+  <dictRemoveWS>true</dictRemoveWS>
+  
   <!-- The source files for the tree word list. -->
   <!-- default value: none -->
   <inputFiles>
@@ -472,6 +476,10 @@
   <!-- Compress resulting tree word list. -->
   <!-- default value: true -->
   <compress>true</compress>
+  
+  <!-- Remove white spaces when generating word list. -->
+  <!-- default value: true -->
+  <dictRemoveWS>true</dictRemoveWS>
 
   <!-- The source files for the multi tree word list. -->
   <!-- default value: none -->
diff --git a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
index 641b743..806a054 100644
--- a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
@@ -90,5 +90,8 @@
     <quote>generated.mtwl</quote>

     will be created.

   </para>

+  <para>

+    The preferences page provides the option to remove white spaces when generating the word lists.

+  </para>

 

 </section>
\ No newline at end of file
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
index 8c3fbcd..1b6c318 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
@@ -54,12 +54,16 @@
 public class MultiTWLConverterHandler implements IHandler {

   private class ConverterHandlerJob extends Job {

     ExecutionEvent event;

+

     private boolean compress;

 

-    ConverterHandlerJob(ExecutionEvent event, boolean compress) {

+    private boolean dictRemoveWS;

+

+    ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {

       super("Converting...");

       this.event = event;

       this.compress = compress;

+      this.dictRemoveWS = dictRemoveWS;

       setUser(true);

     }

 

@@ -88,7 +92,7 @@
         if (!paths.isEmpty()) {

           MultiTreeWordList trie;

           try {

-            trie = new MultiTreeWordList(paths.toArray(new String[0]), null);

+            trie = new MultiTreeWordList(paths.toArray(new String[0]), null, dictRemoveWS);

           } catch (IOException e) {

             RutaAddonsPlugin.error(e);

             return Status.CANCEL_STATUS;

@@ -126,16 +130,20 @@
     }

   }

 

+  @Override

   public void addHandlerListener(IHandlerListener handlerListener) {

   }

 

+  @Override

   public void dispose() {

   }

 

+  @Override

   public Object execute(ExecutionEvent event) throws ExecutionException {

     IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();

     boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);

-    new ConverterHandlerJob(event, compress).schedule();

+    boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);

+    new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();

     return null;

   }

 

@@ -161,14 +169,17 @@
     return paths;

   }

 

+  @Override

   public boolean isEnabled() {

     return true;

   }

 

+  @Override

   public boolean isHandled() {

     return true;

   }

 

+  @Override

   public void removeHandlerListener(IHandlerListener handlerListener) {

 

   }

diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
index 45a7016..cd2784e 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
@@ -50,12 +50,16 @@
 

   private class ConverterHandlerJob extends Job {

     ExecutionEvent event;

+

     private boolean compress;

 

-    ConverterHandlerJob(ExecutionEvent event, boolean compress) {

+    private boolean dictRemoveWS;

+

+    ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {

       super("Converting...");

       this.event = event;

       this.compress = compress;

+      this.dictRemoveWS = dictRemoveWS;

       setUser(true);

     }

 

@@ -81,7 +85,7 @@
         String path = file.getRawLocation().toString();

         TreeWordList list;

         try {

-          list = new TreeWordList(path, false);

+          list = new TreeWordList(path, dictRemoveWS);

         } catch (IOException e) {

           RutaAddonsPlugin.error(e);

           return Status.CANCEL_STATUS;

@@ -109,27 +113,34 @@
     }

   }

 

+  @Override

   public void addHandlerListener(IHandlerListener handlerListener) {

   }

 

+  @Override

   public void dispose() {

   }

 

+  @Override

   public Object execute(ExecutionEvent event) throws ExecutionException {

     IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();

     boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);

-    new ConverterHandlerJob(event, compress).schedule();

+    boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);

+    new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();

     return null;

   }

 

+  @Override

   public boolean isEnabled() {

     return true;

   }

 

+  @Override

   public boolean isHandled() {

     return true;

   }

 

+  @Override

   public void removeHandlerListener(IHandlerListener handlerListener) {

 

   }

diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
index d4774eb..cc3df83 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
@@ -30,8 +30,8 @@
 /**

  * Preference page to manage preferences for the ide plugin.

  */

-public class RutaBuilderPreferencePage extends FieldEditorPreferencePage implements

-        IWorkbenchPreferencePage {

+public class RutaBuilderPreferencePage extends FieldEditorPreferencePage

+        implements IWorkbenchPreferencePage {

 

   private BooleanFieldEditor builderImport;

 

@@ -60,13 +60,16 @@
             RutaCorePreferences.BUILDER_IGNORE_DUPLICATE_SHORTNAMES,

             RutaPreferencesMessages.BuilderIgnoreDuplicateShortnames, getFieldEditorParent());

     addField(builderShortNames);

-    

-    compressWordLists = new BooleanFieldEditor(

-            RutaCorePreferences.COMPRESS_WORDLISTS,

+

+    compressWordLists = new BooleanFieldEditor(RutaCorePreferences.COMPRESS_WORDLISTS,

             RutaPreferencesMessages.CompressWordLists, getFieldEditorParent());

+

+    compressWordLists = new BooleanFieldEditor(RutaCorePreferences.DICT_REMOVE_WS,

+            RutaPreferencesMessages.DictRemoveWS, getFieldEditorParent());

     addField(compressWordLists);

   }

 

+  @Override

   public void init(IWorkbench workbench) {

   }

 

diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
index 5503f03..8219a1e 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
@@ -22,8 +22,7 @@
 import org.eclipse.osgi.util.NLS;

 

 public class RutaPreferencesMessages extends NLS {

-  private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$	

-

+  private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$

 

   private RutaPreferencesMessages() {

     // Do not instantiate

@@ -78,12 +77,13 @@
   public static String ProjectClearOutput;

 

   public static String NoVMInDevMode;

-  

+

   public static String AddSDI;

-  

+

   public static String CompressWordLists;

-  

+

+  public static String DictRemoveWS;

+

   public static String DefaultCasSerializationFormat;

 

-  

 }

diff --git a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
index b1b237f..7ae71a5 100644
--- a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
+++ b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
@@ -44,4 +44,5 @@
 NoVMInDevMode = Do not start a VM in development mode.

 AddSDI = Update Source Document Information when launching a script.

 CompressWordLists = Compress generated twl/mtwl word lists.

+DictRemoveWS = Remove white spaces when generating twl/mtwl word lists.

 DefaultCasSerializationFormat = Default CAS serialization format:

diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
index e6dec76..4d1e86d 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
@@ -34,5 +34,7 @@
 

   public static final String COMPRESS_WORDLISTS = "CompressWordLists";

 

+  public static final String DICT_REMOVE_WS = "dictRemoveWS";

+

   public static final String DEFAULT_CAS_SERIALIZATION_FORMAT = "DefaultCasSerializationFormat";

 }

diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
index 1217bf3..138a528 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
@@ -28,6 +28,7 @@
   public RutaPreferenceInitializer() {

   }

 

+  @Override

   public void initializeDefaultPreferences() {

     IPreferenceStore store = RutaIdeCorePlugin.getDefault().getPreferenceStore();

     // TaskTagUtils.initializeDefaultValues(store);

@@ -38,6 +39,7 @@
     store.setDefault(RutaCorePreferences.NO_VM_IN_DEV_MODE, false);

     store.setDefault(RutaCorePreferences.ADD_SDI, false);

     store.setDefault(RutaCorePreferences.COMPRESS_WORDLISTS, false);

+    store.setDefault(RutaCorePreferences.DICT_REMOVE_WS, false);

   }

 

 }

diff --git a/ruta-maven-plugin/src/it/wordlists/pom.xml b/ruta-maven-plugin/src/it/wordlists/pom.xml
index c71e3e2..6392fa4 100644
--- a/ruta-maven-plugin/src/it/wordlists/pom.xml
+++ b/ruta-maven-plugin/src/it/wordlists/pom.xml
@@ -99,6 +99,7 @@
             </goals>
             <configuration>
               <compress>false</compress>
+              <dictRemoveWS>true</dictRemoveWS>
               <inputFiles>
                 <directory>${basedir}/src/main/resources</directory>
                 <includes>
@@ -117,6 +118,7 @@
             </goals>
             <configuration>
               <compress>false</compress>
+              <dictRemoveWS>true</dictRemoveWS>
               <inputFiles>
                 <directory>${basedir}/src/main/resources</directory>
                 <includes>
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
index 254824c..93ebc1f 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
@@ -76,6 +76,12 @@
   private boolean compress;

 

   /**

+   * Remove white spaces while generating dictionaries.

+   */

+  @Parameter(defaultValue = "true", required = true)

+  private boolean dictRemoveWS;

+

+  /**

    * Fail on error.

    */

   @Parameter(defaultValue = "true", required = true)

@@ -107,7 +113,7 @@
 

     MultiTreeWordList trie = null;

     try {

-      trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()));

+      trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()), dictRemoveWS);

     } catch (IOException e) {

       handleError("Error creating MTWL file.", e);

     }

diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
index 961014e..b10ab3c 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
@@ -78,6 +78,12 @@
   private boolean compress;

 

   /**

+   * Remove white spaces while generating dictionaries.

+   */

+  @Parameter(defaultValue = "true", required = true)

+  private boolean dictRemoveWS;

+

+  /**

    * Fail on error.

    */

   @Parameter(defaultValue = "true", required = true)

@@ -111,7 +117,7 @@
       File outputFile = each.getValue();

       TreeWordList list = null;

       try {

-        list = new TreeWordList(inputFile.getAbsolutePath(), false);

+        list = new TreeWordList(inputFile.getAbsolutePath(), dictRemoveWS);

       } catch (IOException e) {

         handleError("Error generating twl.", e);

       }