Merge pull request #71 from apache/UIMA-6383-Ruta-TRIE-Wordlist-entry-not-annotated
UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
index 4b6d9ff..fd25685 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
@@ -63,6 +63,8 @@
/** The cost model we are using. */
private EditDistanceCostMap costMap;
+ private boolean dictRemoveWS = false;
+
/**
* Default constructor.
*
@@ -155,9 +157,25 @@
* When there is a problem reading a path.
*/
public MultiTreeWordList(String[] pathnames, File base) throws IOException {
+ this(pathnames, base, false);
+ }
+
+ /**
+ * Constructs a TreeWordList from a file with path = filename
+ *
+ * @param pathnames
+ * path of the file to create a TextWordList from
+ * @param base
+ * - the relative base
+ * @param dictRemoveWS
+ * remove white spaces
+ * @throws IOException
+ * When there is a problem reading a path.
+ */
+ public MultiTreeWordList(String[] pathnames, File base, boolean dictRemoveWS) throws IOException {
this.root = new MultiTextNode();
this.costMap = new EditDistanceCostMap();
-
+ this.dictRemoveWS = dictRemoveWS;
if (pathnames == null) {
return;
}
@@ -177,8 +195,23 @@
* - When there is a problem reading the files.
*/
public MultiTreeWordList(List<File> files, File base) throws IOException {
+ this(files, base, false);
+ }
+
+ /**
+ * @param files
+ * - the input files
+ * @param base
+ * - the relative base
+ * @param dictRemoveWS
+ * - remove white spaces
+ * @throws IOException
+ * - When there is a problem reading the files.
+ */
+ public MultiTreeWordList(List<File> files, File base, boolean dictRemoveWS) throws IOException {
this.root = new MultiTextNode();
this.costMap = new EditDistanceCostMap();
+ this.dictRemoveWS = dictRemoveWS;
if (files == null) {
return;
@@ -275,6 +308,10 @@
for (Character each : s.toCharArray()) {
+ if (dictRemoveWS && Character.isWhitespace(each)) {
+ continue;
+ }
+
MultiTextNode childNode = pointer.getChildNode(each);
if (childNode == null) {
diff --git a/ruta-docbook/src/docbook/tools.ruta.howtos.xml b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
index 3cecf68..a46a9e4 100644
--- a/ruta-docbook/src/docbook/tools.ruta.howtos.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.howtos.xml
@@ -419,7 +419,11 @@
<!-- Compress resulting tree word list. -->
<!-- default value: true -->
<compress>true</compress>
-
+
+ <!-- Remove white spaces when generating word list. -->
+ <!-- default value: true -->
+ <dictRemoveWS>true</dictRemoveWS>
+
<!-- The source files for the tree word list. -->
<!-- default value: none -->
<inputFiles>
@@ -472,6 +476,10 @@
<!-- Compress resulting tree word list. -->
<!-- default value: true -->
<compress>true</compress>
+
+ <!-- Remove white spaces when generating word list. -->
+ <!-- default value: true -->
+ <dictRemoveWS>true</dictRemoveWS>
<!-- The source files for the multi tree word list. -->
<!-- default value: none -->
diff --git a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
index 641b743..806a054 100644
--- a/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.workbench.create_dictionaries.xml
@@ -90,5 +90,8 @@
<quote>generated.mtwl</quote>
will be created.
</para>
+ <para>
+ The preferences page provides the option to remove white spaces when generating the word lists.
+ </para>
</section>
\ No newline at end of file
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
index 8c3fbcd..1b6c318 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
@@ -54,12 +54,16 @@
public class MultiTWLConverterHandler implements IHandler {
private class ConverterHandlerJob extends Job {
ExecutionEvent event;
+
private boolean compress;
- ConverterHandlerJob(ExecutionEvent event, boolean compress) {
+ private boolean dictRemoveWS;
+
+ ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {
super("Converting...");
this.event = event;
this.compress = compress;
+ this.dictRemoveWS = dictRemoveWS;
setUser(true);
}
@@ -88,7 +92,7 @@
if (!paths.isEmpty()) {
MultiTreeWordList trie;
try {
- trie = new MultiTreeWordList(paths.toArray(new String[0]), null);
+ trie = new MultiTreeWordList(paths.toArray(new String[0]), null, dictRemoveWS);
} catch (IOException e) {
RutaAddonsPlugin.error(e);
return Status.CANCEL_STATUS;
@@ -126,16 +130,20 @@
}
}
+ @Override
public void addHandlerListener(IHandlerListener handlerListener) {
}
+ @Override
public void dispose() {
}
+ @Override
public Object execute(ExecutionEvent event) throws ExecutionException {
IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();
boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);
- new ConverterHandlerJob(event, compress).schedule();
+ boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);
+ new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();
return null;
}
@@ -161,14 +169,17 @@
return paths;
}
+ @Override
public boolean isEnabled() {
return true;
}
+ @Override
public boolean isHandled() {
return true;
}
+ @Override
public void removeHandlerListener(IHandlerListener handlerListener) {
}
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
index 45a7016..cd2784e 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
@@ -50,12 +50,16 @@
private class ConverterHandlerJob extends Job {
ExecutionEvent event;
+
private boolean compress;
- ConverterHandlerJob(ExecutionEvent event, boolean compress) {
+ private boolean dictRemoveWS;
+
+ ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {
super("Converting...");
this.event = event;
this.compress = compress;
+ this.dictRemoveWS = dictRemoveWS;
setUser(true);
}
@@ -81,7 +85,7 @@
String path = file.getRawLocation().toString();
TreeWordList list;
try {
- list = new TreeWordList(path, false);
+ list = new TreeWordList(path, dictRemoveWS);
} catch (IOException e) {
RutaAddonsPlugin.error(e);
return Status.CANCEL_STATUS;
@@ -109,27 +113,34 @@
}
}
+ @Override
public void addHandlerListener(IHandlerListener handlerListener) {
}
+ @Override
public void dispose() {
}
+ @Override
public Object execute(ExecutionEvent event) throws ExecutionException {
IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();
boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);
- new ConverterHandlerJob(event, compress).schedule();
+ boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);
+ new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();
return null;
}
+ @Override
public boolean isEnabled() {
return true;
}
+ @Override
public boolean isHandled() {
return true;
}
+ @Override
public void removeHandlerListener(IHandlerListener handlerListener) {
}
diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
index d4774eb..cc3df83 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaBuilderPreferencePage.java
@@ -30,8 +30,8 @@
/**
* Preference page to manage preferences for the ide plugin.
*/
-public class RutaBuilderPreferencePage extends FieldEditorPreferencePage implements
- IWorkbenchPreferencePage {
+public class RutaBuilderPreferencePage extends FieldEditorPreferencePage
+ implements IWorkbenchPreferencePage {
private BooleanFieldEditor builderImport;
@@ -60,13 +60,16 @@
RutaCorePreferences.BUILDER_IGNORE_DUPLICATE_SHORTNAMES,
RutaPreferencesMessages.BuilderIgnoreDuplicateShortnames, getFieldEditorParent());
addField(builderShortNames);
-
- compressWordLists = new BooleanFieldEditor(
- RutaCorePreferences.COMPRESS_WORDLISTS,
+
+ compressWordLists = new BooleanFieldEditor(RutaCorePreferences.COMPRESS_WORDLISTS,
RutaPreferencesMessages.CompressWordLists, getFieldEditorParent());
+
+ compressWordLists = new BooleanFieldEditor(RutaCorePreferences.DICT_REMOVE_WS,
+ RutaPreferencesMessages.DictRemoveWS, getFieldEditorParent());
addField(compressWordLists);
}
+ @Override
public void init(IWorkbench workbench) {
}
diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
index 5503f03..8219a1e 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
@@ -22,8 +22,7 @@
import org.eclipse.osgi.util.NLS;
public class RutaPreferencesMessages extends NLS {
- private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$
-
+ private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$
private RutaPreferencesMessages() {
// Do not instantiate
@@ -78,12 +77,13 @@
public static String ProjectClearOutput;
public static String NoVMInDevMode;
-
+
public static String AddSDI;
-
+
public static String CompressWordLists;
-
+
+ public static String DictRemoveWS;
+
public static String DefaultCasSerializationFormat;
-
}
diff --git a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
index b1b237f..7ae71a5 100644
--- a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
+++ b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
@@ -44,4 +44,5 @@
NoVMInDevMode = Do not start a VM in development mode.
AddSDI = Update Source Document Information when launching a script.
CompressWordLists = Compress generated twl/mtwl word lists.
+DictRemoveWS = Remove white spaces when generating twl/mtwl word lists.
DefaultCasSerializationFormat = Default CAS serialization format:
diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
index e6dec76..4d1e86d 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
@@ -34,5 +34,7 @@
public static final String COMPRESS_WORDLISTS = "CompressWordLists";
+ public static final String DICT_REMOVE_WS = "dictRemoveWS";
+
public static final String DEFAULT_CAS_SERIALIZATION_FORMAT = "DefaultCasSerializationFormat";
}
diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
index 1217bf3..138a528 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
@@ -28,6 +28,7 @@
public RutaPreferenceInitializer() {
}
+ @Override
public void initializeDefaultPreferences() {
IPreferenceStore store = RutaIdeCorePlugin.getDefault().getPreferenceStore();
// TaskTagUtils.initializeDefaultValues(store);
@@ -38,6 +39,7 @@
store.setDefault(RutaCorePreferences.NO_VM_IN_DEV_MODE, false);
store.setDefault(RutaCorePreferences.ADD_SDI, false);
store.setDefault(RutaCorePreferences.COMPRESS_WORDLISTS, false);
+ store.setDefault(RutaCorePreferences.DICT_REMOVE_WS, false);
}
}
diff --git a/ruta-maven-plugin/src/it/wordlists/pom.xml b/ruta-maven-plugin/src/it/wordlists/pom.xml
index c71e3e2..6392fa4 100644
--- a/ruta-maven-plugin/src/it/wordlists/pom.xml
+++ b/ruta-maven-plugin/src/it/wordlists/pom.xml
@@ -99,6 +99,7 @@
</goals>
<configuration>
<compress>false</compress>
+ <dictRemoveWS>true</dictRemoveWS>
<inputFiles>
<directory>${basedir}/src/main/resources</directory>
<includes>
@@ -117,6 +118,7 @@
</goals>
<configuration>
<compress>false</compress>
+ <dictRemoveWS>true</dictRemoveWS>
<inputFiles>
<directory>${basedir}/src/main/resources</directory>
<includes>
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
index 254824c..93ebc1f 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
@@ -76,6 +76,12 @@
private boolean compress;
/**
+ * Remove white spaces while generating dictionaries.
+ */
+ @Parameter(defaultValue = "true", required = true)
+ private boolean dictRemoveWS;
+
+ /**
* Fail on error.
*/
@Parameter(defaultValue = "true", required = true)
@@ -107,7 +113,7 @@
MultiTreeWordList trie = null;
try {
- trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()));
+ trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()), dictRemoveWS);
} catch (IOException e) {
handleError("Error creating MTWL file.", e);
}
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
index 961014e..b10ab3c 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
@@ -78,6 +78,12 @@
private boolean compress;
/**
+ * Remove white spaces while generating dictionaries.
+ */
+ @Parameter(defaultValue = "true", required = true)
+ private boolean dictRemoveWS;
+
+ /**
* Fail on error.
*/
@Parameter(defaultValue = "true", required = true)
@@ -111,7 +117,7 @@
File outputFile = each.getValue();
TreeWordList list = null;
try {
- list = new TreeWordList(inputFile.getAbsolutePath(), false);
+ list = new TreeWordList(inputFile.getAbsolutePath(), dictRemoveWS);
} catch (IOException e) {
handleError("Error generating twl.", e);
}