Merge pull request #71 from apache/UIMA-6383-Ruta-TRIE-Wordlist-entry-not-annotated

UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
index 4b6d9ff..fd25685 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
@@ -63,6 +63,8 @@
   /** The cost model we are using. */

   private EditDistanceCostMap costMap;

 

+  private boolean dictRemoveWS = false;

+

   /**

    * Default constructor.

    * 

@@ -155,9 +157,25 @@
    *           When there is a problem reading a path.

    */

   public MultiTreeWordList(String[] pathnames, File base) throws IOException {

+    this(pathnames, base, false);

+  }

+

+  /**

+   * Constructs a TreeWordList from a file with path = filename

+   * 

+   * @param pathnames

+   *          path of the file to create a TextWordList from

+   * @param base

+   *          - the relative base

+   * @param dictRemoveWS

+   *          remove white spaces

+   * @throws IOException

+   *           When there is a problem reading a path.

+   */

+  public MultiTreeWordList(String[] pathnames, File base, boolean dictRemoveWS) throws IOException {

     this.root = new MultiTextNode();

     this.costMap = new EditDistanceCostMap();

-

+    this.dictRemoveWS = dictRemoveWS;

     if (pathnames == null) {

       return;

     }

@@ -177,8 +195,23 @@
    *           - When there is a problem reading the files.

    */

   public MultiTreeWordList(List<File> files, File base) throws IOException {

+    this(files, base, false);

+  }

+

+  /**

+   * @param files

+   *          - the input files

+   * @param base

+   *          - the relative base

+   * @param dictRemoveWS

+   *          - remove white spaces

+   * @throws IOException

+   *           - When there is a problem reading the files.

+   */

+  public MultiTreeWordList(List<File> files, File base, boolean dictRemoveWS) throws IOException {

     this.root = new MultiTextNode();

     this.costMap = new EditDistanceCostMap();

+    this.dictRemoveWS = dictRemoveWS;

 

     if (files == null) {

       return;

@@ -275,6 +308,10 @@
 

     for (Character each : s.toCharArray()) {

 

+      if (dictRemoveWS && Character.isWhitespace(each)) {

+        continue;

+      }

+

       MultiTextNode childNode = pointer.getChildNode(each);

 

       if (childNode == null) {

diff --git a/ruta-docbook/src/docbook/tools.ruta.howtos.xml b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
index 3cecf68..a46a9e4 100644
--- a/ruta-docbook/src/docbook/tools.ruta.howtos.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
@@ -419,7 +419,11 @@
   <!-- Compress resulting tree word list. -->
   <!-- default value: true -->
   <compress>true</compress>
-
+  
+  <!-- Remove white spaces when generating word list. -->
+  <!-- default value: true -->
+  <dictRemoveWS>true</dictRemoveWS>
+  
   <!-- The source files for the tree word list. -->
   <!-- default value: none -->
   <inputFiles>
@@ -472,6 +476,10 @@
   <!-- Compress resulting tree word list. -->
   <!-- default value: true -->
   <compress>true</compress>
+  
+  <!-- Remove white spaces when generating word list. -->
+  <!-- default value: true -->
+  <dictRemoveWS>true</dictRemoveWS>
 
   <!-- The source files for the multi tree word list. -->
   <!-- default value: none -->
diff --git a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
index 641b743..806a054 100644
--- a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
@@ -90,5 +90,8 @@
     <quote>generated.mtwl</quote>

     will be created.

   </para>

+  <para>

+    The preferences page provides the option to remove white spaces when generating the word lists.

+  </para>

 

 </section>
\ No newline at end of file
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
index 8c3fbcd..1b6c318 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
@@ -54,12 +54,16 @@
 public class MultiTWLConverterHandler implements IHandler {

   private class ConverterHandlerJob extends Job {

     ExecutionEvent event;

+

     private boolean compress;

 

-    ConverterHandlerJob(ExecutionEvent event, boolean compress) {

+    private boolean dictRemoveWS;

+

+    ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {

       super("Converting...");

       this.event = event;

       this.compress = compress;

+      this.dictRemoveWS = dictRemoveWS;

       setUser(true);

     }

 

@@ -88,7 +92,7 @@
         if (!paths.isEmpty()) {

           MultiTreeWordList trie;

           try {

-            trie = new MultiTreeWordList(paths.toArray(new String[0]), null);

+            trie = new MultiTreeWordList(paths.toArray(new String[0]), null, dictRemoveWS);

           } catch (IOException e) {

             RutaAddonsPlugin.error(e);

             return Status.CANCEL_STATUS;

@@ -126,16 +130,20 @@
     }

   }

 

+  @Override

   public void addHandlerListener(IHandlerListener handlerListener) {

   }

 

+  @Override

   public void dispose() {

   }

 

+  @Override

   public Object execute(ExecutionEvent event) throws ExecutionException {

     IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();

     boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);

-    new ConverterHandlerJob(event, compress).schedule();

+    boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);

+    new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();

     return null;

   }

 

@@ -161,14 +169,17 @@
     return paths;

   }

 

+  @Override

   public boolean isEnabled() {

     return true;

   }

 

+  @Override

   public boolean isHandled() {

     return true;

   }

 

+  @Override

   public void removeHandlerListener(IHandlerListener handlerListener) {

 

   }

diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
index 45a7016..cd2784e 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
@@ -50,12 +50,16 @@
 

   private class ConverterHandlerJob extends Job {

     ExecutionEvent event;

+

     private boolean compress;

 

-    ConverterHandlerJob(ExecutionEvent event, boolean compress) {

+    private boolean dictRemoveWS;

+

+    ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {

       super("Converting...");

       this.event = event;

       this.compress = compress;

+      this.dictRemoveWS = dictRemoveWS;

       setUser(true);

     }

 

@@ -81,7 +85,7 @@
         String path = file.getRawLocation().toString();

         TreeWordList list;

         try {

-          list = new TreeWordList(path, false);

+          list = new TreeWordList(path, dictRemoveWS);

         } catch (IOException e) {

           RutaAddonsPlugin.error(e);

           return Status.CANCEL_STATUS;

@@ -109,27 +113,34 @@
     }

   }

 

+  @Override

   public void addHandlerListener(IHandlerListener handlerListener) {

   }

 

+  @Override

   public void dispose() {

   }

 

+  @Override

   public Object execute(ExecutionEvent event) throws ExecutionException {

     IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();

     boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);

-    new ConverterHandlerJob(event, compress).schedule();

+    boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);

+    new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();

     return null;

   }

 

+  @Override

   public boolean isEnabled() {

     return true;

   }

 

+  @Override

   public boolean isHandled() {

     return true;

   }

 

+  @Override

   public void removeHandlerListener(IHandlerListener handlerListener) {

 

   }

diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
index d4774eb..cc3df83 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
@@ -30,8 +30,8 @@
 /**

  * Preference page to manage preferences for the ide plugin.

  */

-public class RutaBuilderPreferencePage extends FieldEditorPreferencePage implements

-        IWorkbenchPreferencePage {

+public class RutaBuilderPreferencePage extends FieldEditorPreferencePage

+        implements IWorkbenchPreferencePage {

 

   private BooleanFieldEditor builderImport;

 

@@ -60,13 +60,16 @@
             RutaCorePreferences.BUILDER_IGNORE_DUPLICATE_SHORTNAMES,

             RutaPreferencesMessages.BuilderIgnoreDuplicateShortnames, getFieldEditorParent());

     addField(builderShortNames);

-    

-    compressWordLists = new BooleanFieldEditor(

-            RutaCorePreferences.COMPRESS_WORDLISTS,

+

+    compressWordLists = new BooleanFieldEditor(RutaCorePreferences.COMPRESS_WORDLISTS,

             RutaPreferencesMessages.CompressWordLists, getFieldEditorParent());

+

+    compressWordLists = new BooleanFieldEditor(RutaCorePreferences.DICT_REMOVE_WS,

+            RutaPreferencesMessages.DictRemoveWS, getFieldEditorParent());

     addField(compressWordLists);

   }

 

+  @Override

   public void init(IWorkbench workbench) {

   }

 

diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
index 5503f03..8219a1e 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
@@ -22,8 +22,7 @@
 import org.eclipse.osgi.util.NLS;

 

 public class RutaPreferencesMessages extends NLS {

-  private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$	

-

+  private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$

 

   private RutaPreferencesMessages() {

     // Do not instantiate

@@ -78,12 +77,13 @@
   public static String ProjectClearOutput;

 

   public static String NoVMInDevMode;

-  

+

   public static String AddSDI;

-  

+

   public static String CompressWordLists;

-  

+

+  public static String DictRemoveWS;

+

   public static String DefaultCasSerializationFormat;

 

-  

 }

diff --git a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
index b1b237f..7ae71a5 100644
--- a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
+++ b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
@@ -44,4 +44,5 @@
 NoVMInDevMode = Do not start a VM in development mode.

 AddSDI = Update Source Document Information when launching a script.

 CompressWordLists = Compress generated twl/mtwl word lists.

+DictRemoveWS = Remove white spaces when generating twl/mtwl word lists.

 DefaultCasSerializationFormat = Default CAS serialization format:

diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
index e6dec76..4d1e86d 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
@@ -34,5 +34,7 @@
 

   public static final String COMPRESS_WORDLISTS = "CompressWordLists";

 

+  public static final String DICT_REMOVE_WS = "dictRemoveWS";

+

   public static final String DEFAULT_CAS_SERIALIZATION_FORMAT = "DefaultCasSerializationFormat";

 }

diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
index 1217bf3..138a528 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
@@ -28,6 +28,7 @@
   public RutaPreferenceInitializer() {

   }

 

+  @Override

   public void initializeDefaultPreferences() {

     IPreferenceStore store = RutaIdeCorePlugin.getDefault().getPreferenceStore();

     // TaskTagUtils.initializeDefaultValues(store);

@@ -38,6 +39,7 @@
     store.setDefault(RutaCorePreferences.NO_VM_IN_DEV_MODE, false);

     store.setDefault(RutaCorePreferences.ADD_SDI, false);

     store.setDefault(RutaCorePreferences.COMPRESS_WORDLISTS, false);

+    store.setDefault(RutaCorePreferences.DICT_REMOVE_WS, false);

   }

 

 }

diff --git a/ruta-maven-plugin/src/it/wordlists/pom.xml b/ruta-maven-plugin/src/it/wordlists/pom.xml
index c71e3e2..6392fa4 100644
--- a/ruta-maven-plugin/src/it/wordlists/pom.xml
+++ b/ruta-maven-plugin/src/it/wordlists/pom.xml
@@ -99,6 +99,7 @@
             </goals>
             <configuration>
               <compress>false</compress>
+              <dictRemoveWS>true</dictRemoveWS>
               <inputFiles>
                 <directory>${basedir}/src/main/resources</directory>
                 <includes>
@@ -117,6 +118,7 @@
             </goals>
             <configuration>
               <compress>false</compress>
+              <dictRemoveWS>true</dictRemoveWS>
               <inputFiles>
                 <directory>${basedir}/src/main/resources</directory>
                 <includes>
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
index 254824c..93ebc1f 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
@@ -76,6 +76,12 @@
   private boolean compress;

 

   /**

+   * Remove white spaces while generating dictionaries.

+   */

+  @Parameter(defaultValue = "true", required = true)

+  private boolean dictRemoveWS;

+

+  /**

    * Fail on error.

    */

   @Parameter(defaultValue = "true", required = true)

@@ -107,7 +113,7 @@
 

     MultiTreeWordList trie = null;

     try {

-      trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()));

+      trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()), dictRemoveWS);

     } catch (IOException e) {

       handleError("Error creating MTWL file.", e);

     }

diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
index 961014e..b10ab3c 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
@@ -78,6 +78,12 @@
   private boolean compress;

 

   /**

+   * Remove white spaces while generating dictionaries.

+   */

+  @Parameter(defaultValue = "true", required = true)

+  private boolean dictRemoveWS;

+

+  /**

    * Fail on error.

    */

   @Parameter(defaultValue = "true", required = true)

@@ -111,7 +117,7 @@
       File outputFile = each.getValue();

       TreeWordList list = null;

       try {

-        list = new TreeWordList(inputFile.getAbsolutePath(), false);

+        list = new TreeWordList(inputFile.getAbsolutePath(), dictRemoveWS);

       } catch (IOException e) {

         handleError("Error generating twl.", e);

       }