updates sandbox component 'opennlp-wsd' to be compatible with latest opennlp-tools release- (#59)

- adjusts opennlp-tools to 2.1.0
- adjusts parent project (org.apache.apache) to version 18
- adjusts Java language level to 11
- adds missing test resources in a gzip compressed form to check whether the existing work; some don't
- changes some interfaces to use List instead of ArrayList in method signatures
- ignored tests that aren't functional even with corresponding test resources, see OpenNLP-1446
diff --git a/opennlp-wsd/pom.xml b/opennlp-wsd/pom.xml
index 47de8ec..9110b75 100644
--- a/opennlp-wsd/pom.xml
+++ b/opennlp-wsd/pom.xml
@@ -25,12 +25,13 @@
 	<parent>
 		<groupId>org.apache</groupId>
 		<artifactId>apache</artifactId>
-		<version>13</version>
+		<!-- TODO OPENNLP-1452 once this is resolved, move to 29 as well. -->
+		<version>18</version>
 		<relativePath />
 	</parent>
 
 	<artifactId>opennlp-wsd</artifactId>
-	<version>1.6.0-SNAPSHOT</version>
+	<version>2.1.1-SNAPSHOT</version>
 	<packaging>jar</packaging>
 	<name>Apache OpenNLP WSD</name>
 
@@ -38,7 +39,7 @@
 		<dependency>
 			<groupId>org.apache.opennlp</groupId>
 			<artifactId>opennlp-tools</artifactId>
-			<version>1.6.0</version>
+			<version>2.1.0</version>
 		</dependency>
 
 		<dependency>
@@ -62,7 +63,7 @@
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>
-			<version>4.8.1</version>
+			<version>4.13.1</version>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
@@ -80,8 +81,8 @@
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-compiler-plugin</artifactId>
 				<configuration>
-					<source>1.7</source>
-					<target>1.7</target>
+					<source>11</source>
+					<target>11</target>
 					<compilerArgument>-Xlint</compilerArgument>
 				</configuration>
 			</plugin>
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
index e440130..1efa729 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
@@ -33,14 +33,17 @@
 

 public final class DisambiguatorEvaluatorTool extends CmdLineTool {

 

+  @Override

   public String getName() {

     return "DisambiguatorEvaluator";

   }

 

+  @Override

   public String getShortDescription() {

     return "Disambiguator Evaluation Tool";

   }

 

+  @Override

   public String getHelp() {

     return "Usage: " + CLI.CMD + " " + getName() + " "

         + ArgumentParser.createUsage(DisambiguatorEvaluatorParams.class);

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
index 89d55a5..9de0c7f 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
@@ -20,16 +20,17 @@
 package opennlp.tools.cmdline.disambiguator;

 

 import java.io.File;

-import java.io.FileInputStream;

+import java.io.FileNotFoundException;

 import java.io.IOException;

-import java.io.InputStreamReader;

 import java.nio.charset.Charset;

+import java.nio.charset.StandardCharsets;

 

 import opennlp.tools.cmdline.ArgumentParser;

 import opennlp.tools.cmdline.CLI;

 import opennlp.tools.cmdline.CmdLineTool;

 import opennlp.tools.cmdline.CmdLineUtil;

 import opennlp.tools.cmdline.PerformanceMonitor;

+import opennlp.tools.cmdline.SystemInputStreamFactory;

 import opennlp.tools.cmdline.TerminateToolException;

 import opennlp.tools.disambiguator.Lesk;

 import opennlp.tools.disambiguator.WSDHelper;

@@ -37,7 +38,9 @@
 import opennlp.tools.disambiguator.WSDSampleStream;

 import opennlp.tools.disambiguator.WSDisambiguator;

 import opennlp.tools.disambiguator.MFS;

+import opennlp.tools.util.MarkableFileInputStreamFactory;

 import opennlp.tools.util.ObjectStream;

+import opennlp.tools.util.ParagraphStream;

 import opennlp.tools.util.PlainTextByLineStream;

 

 /*

@@ -47,14 +50,17 @@
 public class DisambiguatorTool extends CmdLineTool {

 

   // TODO CmdLineTool should be an interface not abstract class

+  @Override

   public String getName() {

     return "Disambiguator";

   }

 

+  @Override

   public String getShortDescription() {

     return "Word Sense Disambiguator";

   }

 

+  @Override

   public String getHelp() {

     return "Usage: " + CLI.CMD + " " + getName() + " "

         + ArgumentParser.createUsage(DisambiguatorToolParams.class)

@@ -75,12 +81,10 @@
 

     PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");

 

-    ObjectStream<String> lineStream = new PlainTextByLineStream(

-        new InputStreamReader(System.in));

-

     perfMon.start();

 

-    try {

+    try (ObjectStream<String> lineStream = new PlainTextByLineStream(

+            new SystemInputStreamFactory(), StandardCharsets.UTF_8)) {

       String line;

       while ((line = lineStream.read()) != null) {

 

@@ -115,13 +119,19 @@
 

   static ObjectStream<WSDSample> openSampleData(String sampleDataName,

       File sampleDataFile, Charset encoding) {

+

     CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);

+    final MarkableFileInputStreamFactory factory;

+    try {

+      factory = new MarkableFileInputStreamFactory(sampleDataFile);

+    } catch (FileNotFoundException e) {

+      throw new RuntimeException("Error finding specified input file!", e);

+    }

 

-    FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);

-

-    ObjectStream<String> lineStream = new PlainTextByLineStream(

-        sampleDataIn.getChannel(), encoding);

-

-    return new WSDSampleStream(lineStream);

+    try (ObjectStream<String> lineStream = new ParagraphStream(new PlainTextByLineStream(factory, encoding))) {

+      return new WSDSampleStream(lineStream);

+    } catch (IOException e) {

+      throw new RuntimeException("Error loading WSD samples from input data!", e);

+    }

   }

 }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
index c48d950..7873111 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
@@ -20,6 +20,7 @@
 import java.util.ArrayList;

 import java.util.Arrays;

 import java.util.HashSet;

+import java.util.List;

 

 public class IMSWSDContextGenerator implements WSDContextGenerator {

 

@@ -42,11 +43,10 @@
     return windowTags;

   }

 

-  public String[] extractSurroundingContext(int index, String[] toks,

-    String[] lemmas, int windowSize) {

+  public String[] extractSurroundingContext(int index, String[] toks, String[] lemmas, int windowSize) {

 

     // TODO consider the windowSize

-    ArrayList<String> contextWords = new ArrayList<String>();

+    List<String> contextWords = new ArrayList<>();

 

     for (int i = 0; i < toks.length; i++) {

       if (lemmas != null) {

@@ -67,14 +67,13 @@
     return contextWords.toArray(new String[contextWords.size()]);

   }

 

-  private String[] extractLocalCollocations(int index, String[] sentence,

-    int ngram) {

-    /**

+  private String[] extractLocalCollocations(int index, String[] sentence, int ngram) {

+    /*

      * Here the author used only 11 features of this type. the range was set to

      * 3 (bigrams extracted in a way that they are at max separated by 1 word).

      */

 

-    ArrayList<String> localCollocations = new ArrayList<String>();

+    ArrayList<String> localCollocations = new ArrayList<>();

 

     for (int i = index - ngram; i <= index + ngram; i++) {

 

@@ -108,9 +107,9 @@
    * @param model      The list of unigrams

    * @return The IMS context of the word to disambiguate

    */

-  @Override public String[] getContext(int index, String[] tokens,

-    String[] tags, String[] lemmas, int ngram, int windowSize,

-    ArrayList<String> model) {

+  @Override

+  public String[] getContext(int index, String[] tokens,

+    String[] tags, String[] lemmas, int ngram, int windowSize, List<String> model) {

 

     String[] posOfSurroundingWords = extractPosOfSurroundingWords(index, tokens,

       windowSize);

@@ -155,8 +154,9 @@
    * @param model      The list of unigrams

    * @return The IMS context of the word to disambiguate

    */

-  @Override public String[] getContext(WSDSample sample, int ngram,

-    int windowSize, ArrayList<String> model) {

+  @Override

+  public String[] getContext(WSDSample sample, int ngram,

+    int windowSize, List<String> model) {

     return getContext(sample.getTargetPosition(), sample.getSentence(),

       sample.getTags(), sample.getLemmas(), ngram, windowSize, model);

   }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
index 719fad8..6aff7fc 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
@@ -22,6 +22,8 @@
 import java.util.ArrayList;

 import java.util.Arrays;

 import java.util.HashSet;

+import java.util.List;

+import java.util.Set;

 

 import net.sf.extjwnl.data.Synset;

 

@@ -73,10 +75,11 @@
    *

    * @return The OSCC context of the word to disambiguate

    */

-  @Override public String[] getContext(int index, String[] toks, String[] tags,

-    String[] lemmas, int ngram, int windowSize, ArrayList<String> model) {

+  @Override

+  public String[] getContext(int index, String[] toks, String[] tags,

+    String[] lemmas, int ngram, int windowSize, List<String> model) {

 

-    HashSet<String> surroundingContextClusters = new HashSet<>();

+    Set<String> surroundingContextClusters = new HashSet<>();

     surroundingContextClusters.addAll(Arrays.asList(

       extractSurroundingContext(index, toks, tags, lemmas,

         windowSize)));

@@ -96,8 +99,8 @@
     return serializedFeatures;

   }

 

-  public String[] getContext(WSDSample sample, int ngram, int windowSize,

-    ArrayList<String> model) {

+  @Override

+  public String[] getContext(WSDSample sample, int ngram, int windowSize, List<String> model) {

     return getContext(sample.getTargetPosition(), sample.getSentence(),

       sample.getTags(), sample.getLemmas(), 0, windowSize, model);

   }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
index e84b72e..0c685c0 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
@@ -83,7 +83,7 @@
     }
 
     for (int i = 0; i < phypernyms.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) phypernyms.get(i);
+      PointerTargetNode ptn = phypernyms.get(i);
       this.hypernyms.add(ptn.getSynset());
     }
 
@@ -102,7 +102,7 @@
     }
 
     for (int i = 0; i < pmeronyms.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) pmeronyms.get(i);
+      PointerTargetNode ptn = pmeronyms.get(i);
       this.meronyms.add(ptn.getSynset());
     }
   }
@@ -120,7 +120,7 @@
     }
 
     for (int i = 0; i < pholonyms.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
+      PointerTargetNode ptn = pholonyms.get(i);
       this.holonyms.add(ptn.getSynset());
     }
 
@@ -139,7 +139,7 @@
     }
 
     for (int i = 0; i < phyponyms.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
+      PointerTargetNode ptn = phyponyms.get(i);
       this.hyponyms.add(ptn.getSynset());
     }
   }
@@ -157,7 +157,7 @@
     }
 
     for (int i = 0; i < pentailments.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) pentailments.get(i);
+      PointerTargetNode ptn = pentailments.get(i);
       this.entailments.add(ptn.getSynset());
     }
 
@@ -176,7 +176,7 @@
     }
 
     for (int i = 0; i < pcoordinateTerms.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) pcoordinateTerms.get(i);
+      PointerTargetNode ptn = pcoordinateTerms.get(i);
       this.coordinateTerms.add(ptn.getSynset());
     }
 
@@ -195,7 +195,7 @@
     }
 
     for (int i = 0; i < pcauses.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) pcauses.get(i);
+      PointerTargetNode ptn = pcauses.get(i);
       this.causes.add(ptn.getSynset());
     }
 
@@ -214,7 +214,7 @@
     }
 
     for (int i = 0; i < pattributes.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) pattributes.get(i);
+      PointerTargetNode ptn = pattributes.get(i);
       this.attributes.add(ptn.getSynset());
     }
 
@@ -233,7 +233,7 @@
     }
 
     for (int i = 0; i < ppertainyms.size(); i++) {
-      PointerTargetNode ptn = (PointerTargetNode) ppertainyms.get(i);
+      PointerTargetNode ptn = ppertainyms.get(i);
       this.pertainyms.add(ptn.getSynset());
     }
 
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
index 31e1dd3..3d717cc 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
@@ -19,17 +19,16 @@
 

 package opennlp.tools.disambiguator;

 

-import java.util.ArrayList;

+import java.util.List;

 

 /**

  * Interface for {@link WSDisambiguator} context generators.

  */

 public interface WSDContextGenerator {

 

-  public String[] getContext(int index, String[] toks, String[] tags,

-    String[] lemmas, int ngram, int windowSize, ArrayList<String> model);

+  String[] getContext(int index, String[] toks, String[] tags,

+    String[] lemmas, int ngram, int windowSize, List<String> model);

 

-  public String[] getContext(WSDSample sample, int ngram, int windowSize,

-    ArrayList<String> model);

+  String[] getContext(WSDSample sample, int ngram, int windowSize, List<String> model);

 

 }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
index 36369c6..7881b37 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
@@ -19,8 +19,7 @@
 
 import opennlp.tools.util.eval.EvaluationMonitor;
 
-public interface WSDEvaluationMonitor extends
-    EvaluationMonitor<WSDSample> {
+public interface WSDEvaluationMonitor extends EvaluationMonitor<WSDSample> {
 
 }
 
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
index 03a0af3..2880be0 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
@@ -17,22 +17,26 @@
 

 package opennlp.tools.disambiguator;

 

+import java.io.BufferedInputStream;

 import java.io.BufferedReader;

 import java.io.File;

 import java.io.FileInputStream;

-import java.io.FileNotFoundException;

 import java.io.FileReader;

 import java.io.IOException;

+import java.io.InputStream;

 import java.util.ArrayList;

 import java.util.Arrays;

 import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+import java.util.zip.GZIPInputStream;

 

 import net.sf.extjwnl.JWNLException;

 import net.sf.extjwnl.data.POS;

 import net.sf.extjwnl.dictionary.Dictionary;

 import net.sf.extjwnl.dictionary.MorphologicalProcessor;

 import opennlp.tools.cmdline.postag.POSModelLoader;

-import opennlp.tools.lemmatizer.SimpleLemmatizer;

+import opennlp.tools.lemmatizer.DictionaryLemmatizer;

 import opennlp.tools.postag.POSTaggerME;

 import opennlp.tools.tokenize.TokenizerME;

 import opennlp.tools.tokenize.TokenizerModel;

@@ -41,7 +45,7 @@
 

   protected static TokenizerME tokenizer;

   protected static POSTaggerME tagger;

-  protected static SimpleLemmatizer lemmatizer;

+  protected static DictionaryLemmatizer lemmatizer;

   protected static Dictionary dictionary;

   protected static MorphologicalProcessor morph;

 

@@ -50,12 +54,12 @@
   protected static String lemmatizerDictionaryPath;

 

   // local caches for faster lookup

-  private static HashMap<String, Object> stemCache;

-  private static HashMap<String, Object> stopCache;

-  private static HashMap<String, Object> relvCache;

+  private static Map<String, Object> stemCache;

+  private static Map<String, Object> stopCache;

+  private static Map<String, Object> relvCache;

 

-  private static HashMap<String, Object> englishWords;

-  private static HashMap<String, Object> nonRelevWordsDef;

+  private static Map<String, Object> englishWords;

+  private static Map<String, Object> nonRelevWordsDef;

 

   // List of all the PoS tags

   public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",

@@ -68,11 +72,11 @@
       "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };

 

   // List of Negation Words

-  public static ArrayList<String> negationWords = new ArrayList<String>(

+  public static List<String> negationWords = new ArrayList<>(

       Arrays.asList("not", "no", "never", "none", "nor", "non"));

 

   // List of Stop Words

-  public static ArrayList<String> stopWords = new ArrayList<String>(

+  public static List<String> stopWords = new ArrayList<>(

       Arrays.asList("a", "able", "about", "above", "according", "accordingly",

           "across", "actually", "after", "afterwards", "again", "against",

           "ain't", "all", "allow", "allows", "almost", "alone", "along",

@@ -153,9 +157,9 @@
           "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your",

           "you're", "yours", "yourself", "yourselves", "you've", "zero"));

 

-  public static HashMap<String, Object> getRelvCache() {

+  public static Map<String, Object> getRelvCache() {

     if (relvCache == null || relvCache.keySet().isEmpty()) {

-      relvCache = new HashMap<String, Object>();

+      relvCache = new HashMap<>();

       for (String t : relevantPOS) {

         relvCache.put(t, null);

       }

@@ -163,9 +167,9 @@
     return relvCache;

   }

 

-  public static HashMap<String, Object> getStopCache() {

+  public static Map<String, Object> getStopCache() {

     if (stopCache == null || stopCache.keySet().isEmpty()) {

-      stopCache = new HashMap<String, Object>();

+      stopCache = new HashMap<>();

       for (String s : stopWords) {

         stopCache.put(s, null);

       }

@@ -173,17 +177,17 @@
     return stopCache;

   }

 

-  public static HashMap<String, Object> getStemCache() {

+  public static Map<String, Object> getStemCache() {

     if (stemCache == null || stemCache.keySet().isEmpty()) {

-      stemCache = new HashMap<String, Object>();

+      stemCache = new HashMap<>();

       for (Object pos : POS.getAllPOS()) {

-        stemCache.put(((POS) pos).getKey(), new HashMap());

+        stemCache.put(((POS) pos).getKey(), new HashMap<String, Object>());

       }

     }

     return stemCache;

   }

 

-  public static HashMap<String, Object> getEnglishWords() {

+  public static Map<String, Object> getEnglishWords() {

     if (englishWords == null || englishWords.keySet().isEmpty()) {

       englishWords = getEnglishWords(lemmatizerDictionaryPath);

     }

@@ -191,16 +195,16 @@
   }

 

   /**

-   * This initializes the Hashmap of non relevant words definitions, and returns

-   * the definition of the non relevant word based on its pos-tag

+   * This initializes the Hashmap of irrelevant words definitions, and returns

+   * the definition of the irrelevant word based on its pos-tag

    * 

    * @param posTag

-   *          the pos-tag of the non relevant word

+   *          the pos-tag of the irrelevant word

    * @return the definition of the word

    */

   public static String getNonRelevWordsDef(String posTag) {

     if (nonRelevWordsDef == null || nonRelevWordsDef.keySet().isEmpty()) {

-      nonRelevWordsDef = new HashMap<String, Object>();

+      nonRelevWordsDef = new HashMap<>();

 

       nonRelevWordsDef.put("CC", "coordinating conjunction");

       nonRelevWordsDef.put("CD", "cardinal number");

@@ -262,14 +266,26 @@
     return dictionary;

   }

 

-  public static SimpleLemmatizer getLemmatizer() {

+  public static DictionaryLemmatizer getLemmatizer() {

+    if (lemmatizerDictionaryPath == null) {

+      throw new IllegalStateException("Loading a Lemmatizer is not possible without setting the " +

+              "corresponding model file!");

+    }

     if (lemmatizer == null) {

+      final InputStream resource;

       try {

-        lemmatizer = new SimpleLemmatizer(new FileInputStream(

-            lemmatizerDictionaryPath));

+        if (lemmatizerDictionaryPath.endsWith(".dict.gz")) {

+          resource = new GZIPInputStream(new FileInputStream(lemmatizerDictionaryPath));

+        } else {

+          resource = new FileInputStream(lemmatizerDictionaryPath);

+        }

+        try (InputStream in = new BufferedInputStream(resource)) {

+          lemmatizer = new DictionaryLemmatizer(in);

+        }

       } catch (IOException e) {

-        e.printStackTrace();

+        throw new RuntimeException("Error opening or loading a Lemmatizer from specified resource file!", e);

       }

+

     }

 

     return lemmatizer;

@@ -306,7 +322,7 @@
     return getTagger();

   }

 

-  public static SimpleLemmatizer loadLemmatizer(String path) {

+  public static DictionaryLemmatizer loadLemmatizer(String path) {

     lemmatizerDictionaryPath = path;

     return getLemmatizer();

   }

@@ -319,8 +335,7 @@
   }

 

   // Print a text in the console

-  public static void printResults(WSDisambiguator disambiguator,

-      String result) {

+  public static void printResults(WSDisambiguator disambiguator, String result) {

 

     if (result != null) {

 

@@ -328,7 +343,7 @@
       String sensekey;

       if (disambiguator instanceof Lesk) {

 

-        Double score;

+        double score;

 

           parts = result.split(" ");

           sensekey = parts[1];

@@ -429,20 +444,17 @@
    *          this file is the same that is used in the simple Lemmatizer

    *          (i.e.,"en-lemmatizer.dict")

    * 

-   * @return a list of all the English words

+   * @return a Map of all the English words

    */

-  public static HashMap<String, Object> getEnglishWords(String dict) {

+  public static Map<String, Object> getEnglishWords(String dict) {

 

-    HashMap<String, Object> words = new HashMap<String, Object>();

-

-    BufferedReader br = null;

+    Map<String, Object> words = new HashMap<>();

 

     File file = new File(lemmatizerDictionaryPath);

 

     if (file.exists()) {

 

-      try {

-        br = new BufferedReader(new FileReader(file));

+      try (BufferedReader br = new BufferedReader(new FileReader(file))) {

         String line = br.readLine();

         while (line != null) {

           line = br.readLine();

@@ -451,18 +463,8 @@
             words.put(word, null);

           }

         }

-      } catch (FileNotFoundException e) {

-        e.printStackTrace();

       } catch (IOException e) {

         e.printStackTrace();

-      } finally {

-        if (br != null) {

-          try {

-            br.close();

-          } catch (IOException e) {

-            e.printStackTrace();

-          }

-        }

       }

       return words;

     } else {

@@ -480,14 +482,10 @@
    */

   public static POS getPOS(String posTag) {

 

-    ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ",

-        "JJR", "JJS"));

-    ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR",

-        "RBS"));

-    ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS",

-        "NNP", "NNPS"));

-    ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD",

-        "VBG", "VBN", "VBP", "VBZ"));

+    List<String> adjective = Arrays.asList("JJ", "JJR", "JJS");

+    List<String> adverb = Arrays.asList("RB", "RBR", "RBS");

+    List<String> noun = Arrays.asList("NN", "NNS", "NNP", "NNPS");

+    List<String> verb = Arrays.asList("VB", "VBD", "VBG", "VBN", "VBP", "VBZ");

 

     if (adjective.contains(posTag))

       return POS.ADJECTIVE;

@@ -606,7 +604,7 @@
 

   public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) {

 

-    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();

+    ArrayList<WordPOS> relevantWords = new ArrayList<>();

 

     String[] tags = WSDHelper.getTagger().tag(sentence);

 

@@ -622,7 +620,7 @@
   }

 

   /**

-   * Stem a single word with WordNet dictionnary

+   * Stem a single word with WordNet dictionary.

    * 

    * @param wordToStem

    *          word to be stemmed

@@ -631,7 +629,7 @@
   public static ArrayList<String> StemWordWithWordNet(WordPOS wordToStem) {

     if (wordToStem == null)

       return null;

-    ArrayList<String> stems = new ArrayList<String>();

+    ArrayList<String> stems = new ArrayList<>();

     try {

       for (Object pos : POS.getAllPOS()) {

         stems.addAll(WSDHelper.getMorph().lookupAllBaseForms((POS) pos,

@@ -671,8 +669,7 @@
       return null;

     }

 

-    ArrayList<String> stemList = (ArrayList<String>) posMap.get(wordToStem

-        .getWord());

+    ArrayList<String> stemList = (ArrayList<String>) posMap.get(wordToStem.getWord());

     if (stemList != null) { // return it if we already cached it

       return stemList;

 

@@ -685,7 +682,7 @@
         WSDHelper.getStemCache().put(wordToStem.getPOS().getKey(), posMap);

         return stemList;

       } else { // could not be stemmed add it anyway (as it is)

-        stemList = new ArrayList<String>();

+        stemList = new ArrayList<>();

         stemList.add(wordToStem.getWord());

         posMap.put(wordToStem.getWord(), stemList);

         WSDHelper.getStemCache().put(wordToStem.getPOS().getKey(), posMap);

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
index 1041fec..c2ce95d 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
@@ -23,6 +23,7 @@
 import java.net.URL;

 import java.util.ArrayList;

 import java.util.Arrays;

+import java.util.List;

 import java.util.Map;

 import java.util.Properties;

 

@@ -45,12 +46,12 @@
   private static final String NGRAM = "ngram";

   private static final String CONTEXT = "context";

 

-  private ArrayList<String> contextEntries = new ArrayList<String>();

+  private List<String> contextEntries = new ArrayList<>();

   private String wordTag;

   private int windowSize;

   private int ngram;

 

-  public ArrayList<String> getContextEntries() {

+  public List<String> getContextEntries() {

     return contextEntries;

   }

 

@@ -145,7 +146,7 @@
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);

     String surroundings = (String) manifest.get(CONTEXT);

 

-    this.contextEntries = new ArrayList(Arrays.asList(surroundings.split(",")));

+    this.contextEntries = Arrays.asList(surroundings.split(","));

     this.wordTag = (String) manifest.get(WORDTAG);

     this.windowSize = Integer.parseInt((String) manifest.get(WINSIZE));

     this.ngram = Integer.parseInt((String) manifest.get(NGRAM));

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
index 0120b05..ed06aae 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
@@ -25,25 +25,19 @@
 import opennlp.tools.util.FilterObjectStream;

 import opennlp.tools.util.InvalidFormatException;

 import opennlp.tools.util.ObjectStream;

-import opennlp.tools.util.PlainTextByLineStream;

 

 public class WSDSampleStream extends FilterObjectStream<String, WSDSample> {

 

-  private static Logger logger = Logger.getLogger(WSDSampleStream.class

-      .getName());

+  private static Logger logger = Logger.getLogger(WSDSampleStream.class.getName());

 

   /**

    * Initializes the current instance.

    *

    * @param sentences

-   *          reader with sentences

+   *          An {@link ObjectStream} with sentences

    * @throws IOException

    *           IOException

    */

-  public WSDSampleStream(Reader sentences) throws IOException {

-    super(new PlainTextByLineStream(sentences));

-  }

-

   public WSDSampleStream(ObjectStream<String> sentences) {

     super(sentences);

   }

@@ -54,9 +48,9 @@
    * If an error occurs an empty {@link WSDSample} object is returned and an

    * warning message is logged. Usually it does not matter if one of many

    * sentences is ignored.

-   *

-   * TODO: An exception in error case should be thrown.

    */

+   // TODO: An exception in error case should be thrown.

+  @Override

   public WSDSample read() throws IOException {

 

     String sentence = samples.read();

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
index 096b788..01d4bb3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
@@ -21,7 +21,6 @@
 import opennlp.tools.ml.TrainerFactory;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
 import opennlp.tools.util.TrainingParameters;
@@ -30,6 +29,7 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 
 public class WSDisambiguatorME extends WSDisambiguator {
 
@@ -64,12 +64,12 @@
     ArrayList<String> surroundingContext = buildSurroundingContext(samples,
       ((WSDDefaultParameters) params).getWindowSize());
 
-    HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();
+    HashMap<String, String> manifestInfoEntries = new HashMap<>();
 
-    MaxentModel meModel = null;
+    MaxentModel meModel;
 
-    ArrayList<Event> events = new ArrayList<Event>();
-    ObjectStream<Event> es = null;
+    List<Event> events = new ArrayList<>();
+    ObjectStream<Event> es;
 
     WSDSample sample = samples.read();
     String wordTag = "";
@@ -86,8 +86,7 @@
     }
 
     es = ObjectStreamUtils.createObjectStream(events);
-    EventTrainer trainer = TrainerFactory
-      .getEventTrainer(mlParams.getSettings(), manifestInfoEntries);
+    EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries);
 
     meModel = trainer.train(es);
 
@@ -132,9 +131,6 @@
         if (file.exists() && !file.isDirectory()) {
           try {
             setModel(new WSDModel(file));
-
-          } catch (InvalidFormatException e) {
-            e.printStackTrace();
           } catch (IOException e) {
             e.printStackTrace();
           }
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
index 8fb2045..d9db0de 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
@@ -19,10 +19,7 @@
 
 package opennlp.tools.disambiguator;
 
-import opennlp.tools.disambiguator.WSDSample;
-import opennlp.tools.disambiguator.SynNode;
-
-public class WordSense implements Comparable {
+public class WordSense implements Comparable<WordSense> {
 
   protected WSDSample sample;
   protected SynNode node;
@@ -71,7 +68,8 @@
     this.id = id;
   }
 
-  public int compareTo(Object o) {
+  @Override
+  public int compareTo(WordSense o) {
     return (this.score - ((WordSense) o).score) < 0 ? 1 : -1;
   }
 
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
index e0decf2..55af1a9 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
@@ -27,6 +27,8 @@
 

 import opennlp.tools.disambiguator.WSDHelper;

 import opennlp.tools.disambiguator.WSDSample;

+import opennlp.tools.lemmatizer.Lemmatizer;

+import opennlp.tools.postag.POSTagger;

 import opennlp.tools.util.ObjectStream;

 import opennlp.tools.util.ObjectStreamUtils;

 

@@ -200,7 +202,7 @@
    */

   private ArrayList<WSDSample> getSemcorOneFileData(String file, String wordTag) {

 

-    ArrayList<WSDSample> setInstances = new ArrayList<WSDSample>();

+    ArrayList<WSDSample> setInstances = new ArrayList<>();

 

     try {

 

@@ -238,21 +240,19 @@
                   + isentences.get(j + 1).toString();

               index = isentences.get(j - 1).getIwords().size() + k;

             }

-            ArrayList<String> senses = new ArrayList<String>();

+            ArrayList<String> senses = new ArrayList<>();

             String sense = iword.getLexsn();

             if (sense != null) {

               senses.add(sense);

             }

 

             if (!senses.isEmpty()) {

-              String[] words = sentence.split("\\s");

-              String[] tags = WSDHelper.getTagger().tag(words);

-              String[] lemmas = new String[words.length];

+              final Lemmatizer lemmatizer = WSDHelper.getLemmatizer();

+              final POSTagger tagger = WSDHelper.getTagger();

 

-              for (int i = 0; i < words.length; i++) {

-                lemmas[i] = WSDHelper.getLemmatizer().lemmatize(words[i],

-                    tags[i]);

-              }

+              final String[] words = sentence.split("\\s");

+              final String[] tags = tagger.tag(words);

+              String[] lemmas = lemmatizer.lemmatize(words, tags);

 

               WSDSample wtd = new WSDSample(words, tags, lemmas, index, senses.toArray(new String[0]));

               setInstances.add(wtd);

@@ -285,7 +285,7 @@
    */

   private ArrayList<WSDSample> getSemcorFolderData(String folder, String wordTag) {

 

-    ArrayList<WSDSample> result = new ArrayList<WSDSample>();

+    ArrayList<WSDSample> result = new ArrayList<>();

 

     String directory = semcorDirectory + folder + tagfiles;

     File tempFolder = new File(directory);

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
index 9dfbb94..d1f9662 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
@@ -19,18 +19,25 @@
 

 package opennlp.tools.disambiguator.datareader;

 

+import java.io.BufferedInputStream;

 import java.io.BufferedReader;

-import java.io.File;

+import java.io.FileInputStream;

 import java.io.FileReader;

 import java.io.IOException;

+import java.io.InputStream;

+import java.io.InputStreamReader;

 import java.util.ArrayList;

 import java.util.HashMap;

 import java.util.Collections;

 import java.util.Arrays;

+import java.util.List;

+import java.util.zip.GZIPInputStream;

 

 import javax.xml.parsers.DocumentBuilder;

 import javax.xml.parsers.DocumentBuilderFactory;

 

+import opennlp.tools.lemmatizer.Lemmatizer;

+import opennlp.tools.postag.POSTagger;

 import org.w3c.dom.Document;

 import org.w3c.dom.Element;

 import org.w3c.dom.Node;

@@ -42,16 +49,17 @@
 import opennlp.tools.util.ObjectStreamUtils;

 

 /**

- * This class handles the extraction of Senseval-3 data from the different files

- * (training data, dictionary instances, etc.)

+ * This class handles the extraction of

+ * <a href="https://web.eecs.umich.edu/~mihalcea/senseval/senseval3/data.html">Senseval-3</a>

+ * data from the different files (training data, dictionary instances, etc.)

  */

 public class SensevalReader {

 

-  protected String sensevalDirectory = "src/test/resources/senseval3/";

+  private String sensevalDirectory = "src/test/resources/senseval3/";

 

-  protected String data = sensevalDirectory + "EnglishLS.train";

-  protected String sensemapFile = sensevalDirectory + "EnglishLS.sensemap";

-  protected String wordList = sensevalDirectory + "EnglishLS.train.key";

+  private String sensemapFile = sensevalDirectory + "EnglishLS.sensemap";

+  private String data = sensevalDirectory + "EnglishLS.train.gz";

+  private String wordList = sensevalDirectory + "EnglishLS.train.key.gz";

 

   public String getSensevalDirectory() {

     return sensevalDirectory;

@@ -73,15 +81,12 @@
    * This extracts the equivalent senses. This serves in the case of the

    * coarse-grained disambiguation

    *

-   * @param sensemapFile

-   *          the file containing the equivalent senses, each set of equivalent

-   *          senses per line

    * @return a {@link HashMap} conaining the new sense ID ({@link Integer}) and

    *         an {@link ArrayList} of the equivalent senses original IDs

    */

   public HashMap<Integer, ArrayList<String>> getEquivalentSense() {

 

-    HashMap<Integer, ArrayList<String>> mappedSenses = new HashMap<Integer, ArrayList<String>>();

+    HashMap<Integer, ArrayList<String>> mappedSenses = new HashMap<>();

 

     try (BufferedReader wordsList = new BufferedReader(new FileReader(

         sensemapFile))) {

@@ -94,7 +99,7 @@
 

         String[] temp = line.split("\\s");

 

-        ArrayList<String> tempSenses = new ArrayList<String>();

+        ArrayList<String> tempSenses = new ArrayList<>();

 

         for (String sense : temp) {

           if (sense.length() > 1) {

@@ -123,20 +128,26 @@
    */

   public ArrayList<String> getSensevalWords() {

 

-    ArrayList<String> wordTags = new ArrayList<String>();

+    ArrayList<String> wordTags = new ArrayList<>();

 

-    try (BufferedReader br = new BufferedReader(new FileReader(wordList))) {

+    final InputStream resource;

+    try {

+      if (wordList.endsWith(".train.key.gz")) {

+        resource = new GZIPInputStream(new FileInputStream(wordList));

+      } else {

+        resource = new FileInputStream(wordList);

+      }

+    } catch (IOException e) {

+      throw new RuntimeException("Error opening or loading Senseval wordlist from specified resource file!", e);

+    }

 

+    try (BufferedReader br = new BufferedReader(new InputStreamReader(resource))) {

       String line;

-

       while ((line = br.readLine()) != null) {

-

         String word = line.split("\\s")[0];

-

         if (!wordTags.contains(word)) {

           wordTags.add(word);

         }

-

       }

 

     } catch (IOException e) {

@@ -159,14 +170,23 @@
    */

   public ArrayList<WSDSample> getSensevalData(String wordTag) {

 

-    ArrayList<WSDSample> setInstances = new ArrayList<WSDSample>();

+    ArrayList<WSDSample> setInstances = new ArrayList<>();

 

+    final InputStream resource;

     try {

+      if (data.endsWith(".train.gz")) {

+        resource = new GZIPInputStream(new FileInputStream(data));

+      } else {

+        resource = new FileInputStream(data);

+      }

+    } catch (IOException e) {

+      throw new RuntimeException("Error opening or loading Senseval data from specified resource file!", e);

+    }

 

-      File xmlFile = new File(data);

+    try (InputStream xmlFileInputStream = new BufferedInputStream(resource)) {

       DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();

       DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();

-      Document doc = dBuilder.parse(xmlFile);

+      Document doc = dBuilder.parse(xmlFileInputStream);

 

       doc.getDocumentElement().normalize();

 

@@ -188,7 +208,7 @@
               Node nInstance = nInstances.item(j);

 

               if (nInstance.getNodeType() == Node.ELEMENT_NODE) {

-                ArrayList<String> senseIDs = new ArrayList<String>();

+                ArrayList<String> senseIDs = new ArrayList<>();

                 String rawWord = "";

                 String[] finalText = null;

                 int index = 0;

@@ -218,29 +238,26 @@
                       String textAfter = nChild.getChildNodes().item(2)

                           .getTextContent();

 

-                      ArrayList<String> textBeforeTokenzed = new ArrayList<String>(

-                          Arrays.asList(textBefore.split("\\s")));

-                      ArrayList<String> textAfterTokenzed = new ArrayList<String>(

-                          Arrays.asList(textAfter.split("\\s")));

+                      List<String> textBeforeTokenized = Arrays.asList(textBefore.split("\\s"));

+                      List<String> textAfterTokenized = Arrays.asList(textAfter.split("\\s"));

 

-                      textBeforeTokenzed.removeAll(Collections.singleton(null));

-                      textBeforeTokenzed.removeAll(Collections.singleton(""));

+                      textBeforeTokenized.removeAll(Collections.singleton(null));

+                      textBeforeTokenized.removeAll(Collections.singleton(""));

+                      textAfterTokenized.removeAll(Collections.singleton(null));

+                      textAfterTokenized.removeAll(Collections.singleton(""));

 

-                      textAfterTokenzed.removeAll(Collections.singleton(null));

-                      textAfterTokenzed.removeAll(Collections.singleton(""));

-

-                      finalText = new String[textBeforeTokenzed.size() + 1

-                          + textAfterTokenzed.size()];

+                      finalText = new String[textBeforeTokenized.size() + 1

+                          + textAfterTokenized.size()];

 

                       int l = 0;

-                      for (String tempWord : textBeforeTokenzed) {

+                      for (String tempWord : textBeforeTokenized) {

                         finalText[l] = tempWord;

                         l++;

                       }

                       index = l;

                       finalText[l] = rawWord.toLowerCase();

                       l++;

-                      for (String tempWord : textAfterTokenzed) {

+                      for (String tempWord : textAfterTokenized) {

                         finalText[l] = tempWord;

                         l++;

                       }

@@ -249,27 +266,20 @@
                   }

 

                 }

+                final Lemmatizer lemmatizer = WSDHelper.getLemmatizer();

+                final POSTagger tagger = WSDHelper.getTagger();

 

-                String[] words = finalText;

-                String[] tags = WSDHelper.getTagger().tag(words);

-                String[] lemmas = new String[words.length];

+                final String[] words = finalText;

+                final String[] tags = tagger.tag(finalText);

+                String[] lemmas = lemmatizer.lemmatize(words, tags);

 

-                for (int k = 0; k < words.length; k++) {

-                  lemmas[k] = WSDHelper.getLemmatizer().lemmatize(words[k],

-                      tags[k]);

-                }

-

-                WSDSample wtd = new WSDSample(words, tags, lemmas, index,

-                    senseIDs.toArray(new String[0]));

+                WSDSample wtd = new WSDSample(words, tags, lemmas, index, senseIDs.toArray(new String[0]));

                 setInstances.add(wtd);

 

               }

             }

-

           }

-

         }

-

       }

 

     } catch (Exception e) {

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/AbstractEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/AbstractEvaluatorTest.java
new file mode 100644
index 0000000..57840eb
--- /dev/null
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/AbstractEvaluatorTest.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import org.junit.BeforeClass;
+
+public abstract class AbstractEvaluatorTest {
+
+  private static final String MODELS_DIR = "src/test/resources/models/";
+
+  @BeforeClass
+  public static void initEnv() {
+    WSDHelper.loadTokenizer(MODELS_DIR + "en-token.bin");
+    WSDHelper.loadTagger(MODELS_DIR + "en-pos-maxent.bin");
+    WSDHelper.loadLemmatizer(MODELS_DIR + "en-lemmatizer.dict.gz");
+  }
+}
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
index 9e4310a..114306b 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
@@ -21,25 +21,23 @@
 
 import java.util.ArrayList;
 
-import opennlp.tools.disambiguator.datareader.SensevalReader;
-
+import org.junit.Ignore;
 import org.junit.Test;
 
-public class LeskEvaluatorTest {
+import opennlp.tools.disambiguator.datareader.SensevalReader;
+
+public class LeskEvaluatorTest extends AbstractEvaluatorTest {
 
   static SensevalReader seReader = new SensevalReader();
 
   @Test
-  public static void main(String[] args) {
+  @Ignore // TODO OPENNLP-1446: Investigate why test fails while parsing 'EnglishLS.train'
+  public void testEvaluation() {
     WSDHelper.print("Evaluation Started");
-    String modelsDir = "src/test/resources/models/";
-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
     Lesk lesk = new Lesk();
     LeskParameters leskParams = new LeskParameters();
-    boolean a[] = { true, true, true, true, true, false, false, false, false,
-        false };
+    boolean a[] = { true, true, true, true, true, false, false, false, false, false };
     leskParams.setFeatures(a);
     leskParams.setLeskType(LeskParameters.LESK_TYPE.LESK_EXT_CTXT);
     lesk.setParams(leskParams);
@@ -69,5 +67,4 @@
     }
   }
 
-
 }
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
index 0ef0091..46cb313 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
@@ -21,15 +21,16 @@
 
 import static org.junit.Assert.assertEquals;
 
-import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
-import opennlp.tools.disambiguator.LeskParameters.LESK_TYPE;
-import opennlp.tools.util.Span;
-
 import org.junit.BeforeClass;
 import org.junit.Test;
 
+import opennlp.tools.disambiguator.LeskParameters.LESK_TYPE;
+import opennlp.tools.lemmatizer.Lemmatizer;
+import opennlp.tools.util.Span;
+
 /**
  * This is the test class for {@link Lesk}.
  * 
@@ -58,9 +59,9 @@
   static String[] tags2;
   static String[] tags3;
 
-  static String[] lemmas1;
-  static String[] lemmas2;
-  static String[] lemmas3;
+  static List<List<String>> lemmas1;
+  static List<List<String>> lemmas2;
+  static List<List<String>> lemmas3;
 
   /*
    * Setup the testing variables
@@ -68,9 +69,9 @@
   @BeforeClass
   public static void setUp() {
 
-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict.gz");
 
     sentence1 = WSDHelper.getTokenizer().tokenize(test1);
     sentence2 = WSDHelper.getTokenizer().tokenize(test2);
@@ -80,33 +81,16 @@
     tags2 = WSDHelper.getTagger().tag(sentence2);
     tags3 = WSDHelper.getTagger().tag(sentence3);
 
-    List<String> tempLemmas1 = new ArrayList<String>();
-    for (int i = 0; i < sentence1.length; i++) {
-      tempLemmas1
-          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
-    }
-    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
-
-    List<String> tempLemmas2 = new ArrayList<String>();
-    for (int i = 0; i < sentence2.length; i++) {
-      tempLemmas2
-          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
-    }
-    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
-
-    List<String> tempLemmas3 = new ArrayList<String>();
-    for (int i = 0; i < sentence3.length; i++) {
-      tempLemmas3
-          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
-    }
-    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+    final Lemmatizer lemmatizer = WSDHelper.getLemmatizer();
+    lemmas1 = lemmatizer.lemmatize(Arrays.asList(sentence1), Arrays.asList(tags1));
+    lemmas2 = lemmatizer.lemmatize(Arrays.asList(sentence2), Arrays.asList(tags2));
+    lemmas3 = lemmatizer.lemmatize(Arrays.asList(sentence3), Arrays.asList(tags3));
 
     lesk = new Lesk();
 
     LeskParameters params = new LeskParameters();
     params.setLeskType(LESK_TYPE.LESK_EXT);
-    boolean a[] = { true, true, true, true, true, true, true, true, true,
-        true };
+    boolean a[] = { true, true, true, true, true, true, true, true, true, true };
     params.setFeatures(a);
     lesk.setParams(params);
   }
@@ -116,7 +100,7 @@
    */
   @Test
   public void testOneWordDisambiguation() {
-    String sense = lesk.disambiguate(sentence1, tags1, lemmas1, 8);
+    String sense = lesk.disambiguate(sentence1, tags1, lemmas1.get(0).toArray(new String[0]), 8);
     assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00:: -1", sense);
   }
 
@@ -128,7 +112,7 @@
   @Test
   public void testWordSpanDisambiguation() {
     Span span = new Span(3, 7);
-    List<String> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
+    List<String> senses = lesk.disambiguate(sentence2, tags2, lemmas2.get(0).toArray(new String[0]), span);
 
     assertEquals("Check number of returned words", 5, senses.size());
     assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01:: 3.8",
@@ -144,7 +128,7 @@
    */
   @Test
   public void testAllWordsDisambiguation() {
-    List<String> senses = lesk.disambiguate(sentence3, tags3, lemmas3);
+    List<String> senses = lesk.disambiguate(sentence3, tags3, lemmas3.get(0).toArray(new String[0]));
 
     assertEquals("Check number of returned words", 15, senses.size());
     assertEquals("Check preposition", "WSDHELPER personal pronoun",
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
index 098c096..1039338 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
@@ -21,24 +21,21 @@
 

 import java.util.ArrayList;

 

-import opennlp.tools.disambiguator.datareader.SensevalReader;

-import opennlp.tools.disambiguator.MFS;

-

+import org.junit.Ignore;

 import org.junit.Test;

 

-public class MFSEvaluatorTest {

+import opennlp.tools.disambiguator.datareader.SensevalReader;

+

+public class MFSEvaluatorTest extends AbstractEvaluatorTest {

 

   static SensevalReader seReader = new SensevalReader();

 

   @Test

-  public static void main(String[] args) {

+  @Ignore // TODO OPENNLP-1446: Investigate why test fails while parsing 'EnglishLS.train'

+  public void testEvaluation() {

     WSDHelper.print("Evaluation Started");

-    String modelsDir = "src/test/resources/models/";

-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

-    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

-    MFS mfs = new MFS();

 

+    MFS mfs = new MFS();

     ArrayList<String> words = seReader.getSensevalWords();

 

     for (String word : words) {

@@ -62,9 +59,7 @@
           WSDHelper.print("null instances");

         }

       }

-

     }

-

   }

 

 }

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
index c6ca4b0..20c418b 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
@@ -21,12 +21,13 @@
 

 import static org.junit.Assert.assertEquals;

 

-import java.util.ArrayList;

+import java.util.Arrays;

 import java.util.List;

+

 import org.junit.BeforeClass;

 import org.junit.Test;

 

-import opennlp.tools.disambiguator.MFS;

+import opennlp.tools.lemmatizer.Lemmatizer;

 import opennlp.tools.util.Span;

 

 /**

@@ -58,9 +59,9 @@
   static String[] tags2;

   static String[] tags3;

 

-  static String[] lemmas1;

-  static String[] lemmas2;

-  static String[] lemmas3;

+  static List<List<String>> lemmas1;

+  static List<List<String>> lemmas2;

+  static List<List<String>> lemmas3;

 

   /*

    * Setup the testing variables and the training files

@@ -68,9 +69,9 @@
   @BeforeClass

   public static void setUpAndTraining() {

 

-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict.gz");

 

     sentence1 = WSDHelper.getTokenizer().tokenize(test1);

     sentence2 = WSDHelper.getTokenizer().tokenize(test2);

@@ -80,26 +81,10 @@
     tags2 = WSDHelper.getTagger().tag(sentence2);

     tags3 = WSDHelper.getTagger().tag(sentence3);

 

-    List<String> tempLemmas1 = new ArrayList<String>();

-    for (int i = 0; i < sentence1.length; i++) {

-      tempLemmas1

-          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));

-    }

-    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);

-

-    List<String> tempLemmas2 = new ArrayList<String>();

-    for (int i = 0; i < sentence2.length; i++) {

-      tempLemmas2

-          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));

-    }

-    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);

-

-    List<String> tempLemmas3 = new ArrayList<String>();

-    for (int i = 0; i < sentence3.length; i++) {

-      tempLemmas3

-          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));

-    }

-    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);

+    final Lemmatizer lemmatizer = WSDHelper.getLemmatizer();

+    lemmas1 = lemmatizer.lemmatize(Arrays.asList(sentence1), Arrays.asList(tags1));

+    lemmas2 = lemmatizer.lemmatize(Arrays.asList(sentence2), Arrays.asList(tags2));

+    lemmas3 = lemmatizer.lemmatize(Arrays.asList(sentence3), Arrays.asList(tags3));

 

     mfs = new MFS();

 

@@ -110,7 +95,7 @@
    */

   @Test

   public void testOneWordDisambiguation() {

-    String sense = mfs.disambiguate(sentence1, tags1, lemmas1, 8);

+    String sense = mfs.disambiguate(sentence1, tags1, lemmas1.get(0).toArray(new String[0]), 8);

     assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);

   }

 

@@ -122,7 +107,7 @@
   @Test

   public void testWordSpanDisambiguation() {

     Span span = new Span(3, 7);

-    List<String> senses = mfs.disambiguate(sentence2, tags2, lemmas2, span);

+    List<String> senses = mfs.disambiguate(sentence2, tags2, lemmas2.get(0).toArray(new String[0]), span);

 

     assertEquals("Check number of returned words", 5, senses.size());

     assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",

@@ -138,7 +123,7 @@
    */

   @Test

   public void testAllWordsDisambiguation() {

-    List<String> senses = mfs.disambiguate(sentence3, tags3, lemmas3);

+    List<String> senses = mfs.disambiguate(sentence3, tags3, lemmas3.get(0).toArray(new String[0]));

 

     assertEquals("Check number of returned words", 15, senses.size());

     assertEquals("Check preposition", "WSDHELPER personal pronoun",

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
index 3b43d99..d6f37b3 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
@@ -23,6 +23,9 @@
 import java.io.IOException;
 import java.util.ArrayList;
 
+import org.junit.Ignore;
+import org.junit.Test;
+
 import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
 import opennlp.tools.disambiguator.datareader.SensevalReader;
 import opennlp.tools.util.ObjectStream;
@@ -32,11 +35,10 @@
 import static org.junit.Assert.fail;
 
 // TODO improve the tests improve parameters
-public class WSDEvaluatorTest {
+public class WSDEvaluatorTest extends AbstractEvaluatorTest {
 
   static SensevalReader seReader;
 
-  static String modelsDir = "src/test/resources/models/";
   static String trainingDataDirectory = "src/test/resources/supervised/models/";
 
   static WSDDefaultParameters params = new WSDDefaultParameters("");
@@ -44,14 +46,12 @@
   static WSDModel model;
 
   static ArrayList<String> testWords;
+  
+  @Test
+  @Ignore // TODO OPENNLP-1446: Investigate why test fails while parsing 'EnglishLS.train'
+  public void testTraining() {
 
-  /*
-   * Setup the testing variables
-   */
-  public static void setUpAndTraining() {
-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+    WSDHelper.print("Evaluation Started");
 
     seReader = new SensevalReader();
     testWords = seReader.getSensevalWords();
@@ -72,10 +72,10 @@
           ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(word);
 
           WSDModel writeModel = null;
-    /*
-     * Tests training the disambiguator We test both writing and reading a model
-     * file trained by semcor
-     */
+          /*
+           * Tests training the disambiguator We test both writing and reading a model
+           * file trained by semcor
+           */
           File outFile;
           try {
             writeModel = WSDisambiguatorME
@@ -97,7 +97,9 @@
     }
   }
 
-  public static void disambiguationEval() {
+  @Test
+  @Ignore // Make this work once we have migrated to JUnit5 in the sandbox components
+  public void testDisambiguationEval() {
 
     WSDHelper.print("Evaluation Started");
 
@@ -125,8 +127,4 @@
     }
   }
 
-  public static void main(String[] args) {
-    setUpAndTraining();
-    disambiguationEval();
-  }
 }
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
index 8470928..36ff2f3 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
@@ -23,14 +23,15 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
-import opennlp.tools.util.ObjectStream;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
+import opennlp.tools.lemmatizer.Lemmatizer;
+import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Span;
 import opennlp.tools.util.TrainingParameters;
 
@@ -74,17 +75,17 @@
   static String[] tags2;
   static String[] tags3;
 
-  static String[] lemmas1;
-  static String[] lemmas2;
-  static String[] lemmas3;
+  static List<List<String>> lemmas1;
+  static List<List<String>> lemmas2;
+  static List<List<String>> lemmas3;
 
   /*
    * Setup the testing variables
    */
   @BeforeClass public static void setUpAndTraining() {
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict.gz");
 
     sentence1 = WSDHelper.getTokenizer().tokenize(test1);
     sentence2 = WSDHelper.getTokenizer().tokenize(test2);
@@ -94,27 +95,11 @@
     tags2 = WSDHelper.getTagger().tag(sentence2);
     tags3 = WSDHelper.getTagger().tag(sentence3);
 
-    List<String> tempLemmas1 = new ArrayList<String>();
-    for (int i = 0; i < sentence1.length; i++) {
-      tempLemmas1
-        .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
-    }
-    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
-
-    List<String> tempLemmas2 = new ArrayList<String>();
-    for (int i = 0; i < sentence2.length; i++) {
-      tempLemmas2
-        .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
-    }
-    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
-
-    List<String> tempLemmas3 = new ArrayList<String>();
-    for (int i = 0; i < sentence3.length; i++) {
-      tempLemmas3
-        .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
-    }
-    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
-
+    final Lemmatizer lemmatizer = WSDHelper.getLemmatizer();
+    lemmas1 = lemmatizer.lemmatize(Arrays.asList(sentence1), Arrays.asList(tags1));
+    lemmas2 = lemmatizer.lemmatize(Arrays.asList(sentence2), Arrays.asList(tags2));
+    lemmas3 = lemmatizer.lemmatize(Arrays.asList(sentence3), Arrays.asList(tags3));
+    
     params = new WSDDefaultParameters("");
     params.setTrainingDataDirectory(trainingDataDirectory);
     TrainingParameters trainingParams = new TrainingParameters();
@@ -140,7 +125,7 @@
       assertNotNull("Checking the disambiguator", wsdME);
     } catch (IOException e1) {
       e1.printStackTrace();
-      fail("Exception in training");
+      fail("Exception in training: "+ e1.getMessage());
     }
   }
 
@@ -148,7 +133,7 @@
    * Tests disambiguating only one word : The ambiguous word "please"
    */
   @Test public void testOneWordDisambiguation() {
-    String sense = wsdME.disambiguate(sentence1, tags1, lemmas1, 8);
+    String sense = wsdME.disambiguate(sentence1, tags1, lemmas1.get(0).toArray(new String[0]), 8);
     assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
   }
 
@@ -159,7 +144,7 @@
    */
   @Test public void testWordSpanDisambiguation() {
     Span span = new Span(3, 7);
-    List<String> senses = wsdME.disambiguate(sentence2, tags2, lemmas2, span);
+    List<String> senses = wsdME.disambiguate(sentence2, tags2, lemmas2.get(0).toArray(new String[0]), span);
 
     assertEquals("Check number of returned words", 5, senses.size());
     assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
@@ -174,7 +159,7 @@
    * Tests disambiguating all the words
    */
   @Test public void testAllWordsDisambiguation() {
-    List<String> senses = wsdME.disambiguate(sentence3, tags3, lemmas3);
+    List<String> senses = wsdME.disambiguate(sentence3, tags3, lemmas3.get(0).toArray(new String[0]));
 
     assertEquals("Check number of returned words", 15, senses.size());
     assertEquals("Check preposition", "WSDHELPER personal pronoun",
diff --git a/opennlp-wsd/src/test/resources/models/en-lemmatizer.dict.gz b/opennlp-wsd/src/test/resources/models/en-lemmatizer.dict.gz
new file mode 100644
index 0000000..379db49
--- /dev/null
+++ b/opennlp-wsd/src/test/resources/models/en-lemmatizer.dict.gz
Binary files differ
diff --git a/opennlp-wsd/src/test/resources/models/en-pos-maxent.bin b/opennlp-wsd/src/test/resources/models/en-pos-maxent.bin
new file mode 100644
index 0000000..168f259
--- /dev/null
+++ b/opennlp-wsd/src/test/resources/models/en-pos-maxent.bin
Binary files differ
diff --git a/opennlp-wsd/src/test/resources/models/en-token.bin b/opennlp-wsd/src/test/resources/models/en-token.bin
new file mode 100644
index 0000000..eb7d770
--- /dev/null
+++ b/opennlp-wsd/src/test/resources/models/en-token.bin
Binary files differ
diff --git a/opennlp-wsd/src/test/resources/senseval3/EnglishLS.sensemap b/opennlp-wsd/src/test/resources/senseval3/EnglishLS.sensemap
new file mode 100644
index 0000000..2d9c5a7
--- /dev/null
+++ b/opennlp-wsd/src/test/resources/senseval3/EnglishLS.sensemap
@@ -0,0 +1,303 @@
+38202 2 38204
+38201
+38203
+38205
+42601 2 42606
+42602 2 42604
+42603
+42605
+190901
+190902
+190903
+argument%1:09:00::
+argument%1:10:00:: 2 argument%1:10:03::
+argument%1:10:01::
+argument%1:10:02::
+arm%1:06:00::
+arm%1:06:01::
+arm%1:06:02::
+arm%1:06:03::
+arm%1:08:00::
+arm%1:14:00::
+238101 2 238105
+238102 3 238106
+238103 3 238106
+238104
+atmosphere%1:07:00::
+atmosphere%1:15:00:: 2 atmosphere%1:17:00::
+atmosphere%1:23:00::
+atmosphere%1:26:00::
+atmosphere%1:26:01::
+audience%1:10:00:: 2 audience%1:26:00::
+audience%1:14:00:: 2 audience%1:14:01::
+bank%1:04:00::
+bank%1:06:00:: 2 bank%1:14:00::
+bank%1:06:01:: 2 bank%1:21:01::
+bank%1:14:00::
+bank%1:14:01::
+bank%1:17:00::
+bank%1:17:01::
+bank%1:17:02::
+bank%1:21:00::
+369201 2 369203
+369202
+369204
+770001 3 770005
+770002 3 770005
+770003
+770004
+1067501 2 1067502
+1067503
+1067504
+degree%1:07:00:: 2 degree%1:26:01::
+degree%1:07:01::
+degree%1:09:00::
+degree%1:10:00::
+degree%1:23:00:: 2 degree%1:23:03::
+difference%1:07:00:: 2 difference%1:24:00::
+difference%1:10:00::
+difference%1:11:00::
+difference%1:23:00::
+different%3:00:00:: 2 different%3:00:02::
+different%5:00:00:other:00
+different%5:00:00:unusual:00
+different%5:00:01:other:00
+difficulty%1:04:00:: 3 difficulty%1:26:00::
+difficulty%1:07:00:: 
+difficulty%1:09:02:: 3 difficulty%1:26:00::
+disc%1:06:00::
+disc%1:06:01::
+disc%1:06:03::
+disc%1:25:00::
+1297001 2 1297006
+1297002 2 1297007
+1297003
+1297004
+1297005
+1353101 3 1353104
+1353102
+1353103 3 1353104
+1440301
+1440302
+1440303
+1446801 2 1446802
+1446803
+1446804
+1892101 3 1892105
+1892102 2 1892106
+1892103 3 1892105
+1892104
+1892107
+hot%3:00:01::
+hot%3:00:02::
+hot%5:00:00:active:01
+hot%5:00:00:charged:00
+hot%5:00:00:eager:00
+hot%5:00:00:fast:01
+hot%5:00:00:fresh:01
+hot%5:00:00:good:01
+hot%5:00:00:illegal:00
+hot%5:00:00:lucky:00
+hot%5:00:00:near:00
+hot%5:00:00:new:00
+hot%5:00:00:popular:00
+hot%5:00:00:pungent:00
+hot%5:00:00:radioactive:00
+hot%5:00:00:sexy:00
+hot%5:00:00:skilled:00
+hot%5:00:00:unpleasant:00
+hot%5:00:00:violent:00
+hot%5:00:00:wanted:00
+hot%5:00:00:warm:03
+hot%5:00:02:fast:01
+image%1:06:00::
+image%1:06:01::
+image%1:07:00:: 2 image%1:18:00::
+image%1:09:00::
+image%1:09:02::
+image%1:10:00::
+important%3:00:00:: 2 important%3:00:02::
+important%3:00:02::
+important%3:00:04::
+important%5:00:00:immodest:02
+important%5:00:00:influential:00
+interest%1:04:01:: 2 interest%1:09:00::
+interest%1:07:01::
+interest%1:07:02::
+interest%1:14:00::
+interest%1:21:00::
+interest%1:21:03::
+judgment%1:04:00::
+judgment%1:04:02::
+judgment%1:07:00:: 3 judgment%1:09:00::
+judgment%1:09:01:: 3 judgment%1:09:00::
+judgment%1:09:04::
+judgment%1:10:00::
+2439901 2 2439902
+2439903
+2439904
+2439905
+2439906
+2439907
+2439908
+2439909
+2555501
+2555502
+2555503
+2555504
+2555505
+2555506
+2555507
+2644301 3 2644307
+2644302
+2644303
+2644304 2 2644308
+2644305 3 2644307
+2644306
+2822011
+2822012
+2822013
+2893201 2 2893205
+2893202
+2893203
+2893204
+organization%1:04:00:: 2 organization%1:04:02::
+organization%1:04:01:: 2 organization%1:09:00::
+organization%1:07:00::
+organization%1:14:00:: 2 organization%1:14:01::
+paper%1:06:00:: 2 paper%1:10:03::
+paper%1:10:00:: 2 paper%1:27:00::
+paper%1:10:01:: 2 paper%1:10:02::
+paper%1:14:00::
+party%1:11:00::
+party%1:14:00:: 2 party%1:14:02::
+party%1:14:01::
+party%1:18:00::
+performance%1:04:00:: 2 performance%1:04:03::
+performance%1:04:01:: 2 performance%1:10:00::
+performance%1:22:00::
+plan%1:06:00:: 2 plan%1:09:01::
+plan%1:09:00::
+3165210
+3165211
+3165212 3 3165214
+3165213 3 3165218
+3165215 
+3165216
+3165217 3 3165218
+3165219
+3165220 3 3165214
+3165221
+3288301 2 3288306
+3288302
+3288303
+3288304
+3288305
+3313901 3 3313905
+3313902 3 3313905
+3313903
+3313904
+3313906
+3434801 2 3434806
+3434802
+3434803 2 3434807
+3434804
+3434805
+3434808
+3434809
+3477801
+3477802
+3477803
+3597906
+3597907
+3597908
+3597910
+3597911
+shelter%1:06:00::
+shelter%1:06:01::
+shelter%1:21:00::
+shelter%1:26:00::
+simple%3:00:01::
+simple%3:00:02:: 2 simple%5:00:00:easy:01
+simple%5:00:00:naive:00
+simple%5:00:00:plain:01 2 simple%5:00:02:plain:01
+simple%5:00:00:retarded:00
+3893501 4 3893507
+3893502 4 3893507
+3893503
+3893505 4 3893507
+3893508
+3893509
+solid%3:00:01::
+solid%3:00:02::
+solid%5:00:00:cubic:00
+solid%5:00:00:frozen:00
+solid%5:00:00:good:01
+solid%5:00:00:hard:01
+solid%5:00:00:homogeneous:00
+solid%5:00:00:honorable:00
+solid%5:00:00:opaque:00
+solid%5:00:00:plain:02
+solid%5:00:00:sound:01
+solid%5:00:00:unbroken:02
+solid%5:00:00:undiversified:00
+solid%5:00:00:wholesome:00
+sort%1:07:00::
+sort%1:09:00:: 2 sort%1:18:00::
+sort%1:22:00::
+source%1:06:00::
+source%1:09:00::
+source%1:10:00:: 2 source%1:10:01::
+source%1:15:00::
+source%1:18:00::
+source%1:18:01::
+4155301
+4155302 3 4155307
+4155303 3 4155307
+4155304
+4155305
+4155306
+4198501 3 4198506
+4198502
+4198503 3 4198506
+4198504
+4198505
+4198507
+4198508
+4198509
+4380101
+4380102 2 4380108
+4380103
+4380104 2 4380105
+4380106 2 4380109
+4380107
+4530701
+4530702
+4530703
+4530704
+4530705
+4636101 5 4636109
+4636102 5 4636109
+4636103 2 4636110
+4636104 2 4636111
+4636105
+4636106
+4636107 5 4636109
+4636108 5 4636109
+4636112
+4640501 2 4640507
+4640502
+4640503 2 4640504
+4640508
+4640509
+4711401 2 4711403
+4711402
+4711404 2 4711406
+4711405
+4711407
+4753401 3 4753406
+4753402 
+4753404 3 4753406
+4753405
+4753407 2 4753403
+4753408
diff --git a/opennlp-wsd/src/test/resources/senseval3/EnglishLS.train.gz b/opennlp-wsd/src/test/resources/senseval3/EnglishLS.train.gz
new file mode 100644
index 0000000..e1abe66
--- /dev/null
+++ b/opennlp-wsd/src/test/resources/senseval3/EnglishLS.train.gz
Binary files differ
diff --git a/opennlp-wsd/src/test/resources/senseval3/EnglishLS.train.key.gz b/opennlp-wsd/src/test/resources/senseval3/EnglishLS.train.key.gz
new file mode 100644
index 0000000..c1f20d8
--- /dev/null
+++ b/opennlp-wsd/src/test/resources/senseval3/EnglishLS.train.key.gz
Binary files differ