OPENNLP-796 The two readers now return ObjectStream<WSDSample>. Thanks to Mondher Bouazizi for providing a patch.
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
index eb7a2d5..664f7b3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
@@ -27,6 +27,8 @@
 

 import opennlp.tools.disambiguator.WSDHelper;

 import opennlp.tools.disambiguator.WSDSample;

+import opennlp.tools.util.ObjectStream;

+import opennlp.tools.util.ObjectStreamUtils;

 

 import org.w3c.dom.Document;

 import org.w3c.dom.Element;

@@ -34,7 +36,7 @@
 import org.w3c.dom.NodeList;

 

 /**

- * This reads one semcor file. It requires the

+ * This class reads Semcor data.

  *

  */

 public class SemcorReaderExtended {

@@ -62,10 +64,19 @@
 

   private static final String ELEMENT_PUNCTUATION = "punc";

 

-  private static String path = "src\\test\\resources\\semcor3.0\\";

+  private static String semcorDirectory = "src\\test\\resources\\semcor3.0\\";

   private static String[] folders = { "brown1", "brown2", "brownv" };

   private static String tagfiles = "\\tagfiles\\";

 

+  

+  public static String getSemcorDirectory() {

+    return semcorDirectory;

+  }

+

+  public static void setSemcorDirectory(String semcorDirectory) {

+    SemcorReaderExtended.semcorDirectory = semcorDirectory;

+  }

+

   public SemcorReaderExtended() {

     super();

   }

@@ -73,7 +84,7 @@
   /**

    * This serves to read one Semcor XML file

    */

-  public ArrayList<Sentence> readFile(String file) {

+  private ArrayList<Sentence> readFile(String file) {

 

     ArrayList<Sentence> result = new ArrayList<Sentence>();

 

@@ -176,8 +187,18 @@
     return result;

   }

 

-  public ArrayList<WSDSample> getSemcorOneFileData(String file,

-      String wordTag) {

+  /**

+   * One Semcor folder reader: This reads all the files in one semcor folder,

+   * and return all the instances in the format {@link WSDSample} of a

+   * specific word

+   * 

+   * @param file

+   *          the name of the file to read

+   * @param wordTag

+   *          The word, of which we are looking for the instances

+   * @return the list of the {@link WSDSample} instances

+   */

+  private ArrayList<WSDSample> getSemcorOneFileData(String file, String wordTag) {

 

     ArrayList<WSDSample> setInstances = new ArrayList<WSDSample>();

 

@@ -227,11 +248,12 @@
               String[] words = sentence.split("\\s");

               String[] tags = WSDHelper.getTagger().tag(words);

               String[] lemmas = new String[words.length];

-              

+

               for (int i = 0; i < words.length; i++) {

-                lemmas[i] = WSDHelper.getLemmatizer().lemmatize(words[i], tags[i]);

+                lemmas[i] = WSDHelper.getLemmatizer().lemmatize(words[i],

+                    tags[i]);

               }

-              

+

               WSDSample wtd = new WSDSample(words, tags, lemmas, index, senses);

               setInstances.add(wtd);

             }

@@ -251,7 +273,7 @@
 

   /**

    * One Semcor folder reader: This reads all the files in one semcor folder,

-   * and return all the instances in the format {@link WordToDisambiguate} of a

+   * and return all the instances in the format {@link WSDSample} of a

    * specific word

    * 

    * @param folder

@@ -259,13 +281,13 @@
    *          are ["brown1", "brown2", "brownv"]

    * @param wordTag

    *          The word, of which we are looking for the instances

-   * @return the list of the {@link WordToDisambiguate} instances

+   * @return the list of the {@link WSDSample} instances

    */

-  public ArrayList<WSDSample> getSemcorFolderData(String folder, String wordTag) {

+  private ArrayList<WSDSample> getSemcorFolderData(String folder, String wordTag) {

 

     ArrayList<WSDSample> result = new ArrayList<WSDSample>();

 

-    String directory = path + folder + tagfiles;

+    String directory = semcorDirectory + folder + tagfiles;

     File tempFolder = new File(directory);

     File[] listOfFiles;

 

@@ -273,8 +295,8 @@
       listOfFiles = tempFolder.listFiles();

       for (File file : listOfFiles) {

 

-        ArrayList<WSDSample> list = getSemcorOneFileData(directory

-            + file.getName(), wordTag);

+        ArrayList<WSDSample> list = getSemcorOneFileData(

+            directory + file.getName(), wordTag);

         result.addAll(list);

       }

     }

@@ -285,11 +307,11 @@
 

   /**

    * Semcor reader: This reads all the files in semcor, and return all the

-   * instances in the format {@link WordToDisambiguate} of a specific word

+   * instances in the format {@link WSDSample} of a specific word

    * 

    * @param wordTag

    *          The word, of which we are looking for the instances

-   * @return the list of the {@link WordToDisambiguate} instances of the word to

+   * @return the list of the {@link WSDSample} instances of the word to

    *         disambiguate

    */

   public ArrayList<WSDSample> getSemcorData(String wordTag) {

@@ -305,4 +327,16 @@
 

   }

 

+  /**

+   * Semcor reader: This reads all the files in semcor, and return all the

+   * instances in the format {@link WSDSample} of a specific word

+   * 

+   * @param wordTag

+   *          The word, of which we are looking for the instances

+   * @return the stream of {@link WSDSample} of the word to disambiguate

+   */

+  public ObjectStream<WSDSample> getSemcorDataStream(String wordTag) {

+    return ObjectStreamUtils.createObjectStream(getSemcorData(wordTag));

+  }

+

 }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
index 464bc36..4e060b9 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
@@ -38,6 +38,8 @@
 

 import opennlp.tools.disambiguator.WSDHelper;

 import opennlp.tools.disambiguator.WSDSample;

+import opennlp.tools.util.ObjectStream;

+import opennlp.tools.util.ObjectStreamUtils;

 

 /**

  * This class handles the extraction of Senseval-3 data from the different files

@@ -45,13 +47,24 @@
  */

 public class SensevalReader {

 

-  private String resourcesFolder = "src\\test\\resources\\";

-  protected String sensevalDirectory = resourcesFolder + "senseval3\\";

+  protected String sensevalDirectory = "src\\test\\resources\\senseval3\\";

 

   protected String data = sensevalDirectory + "EnglishLS.train";

   protected String sensemapFile = sensevalDirectory + "EnglishLS.sensemap";

   protected String wordList = sensevalDirectory + "EnglishLS.train.key";

 

+  public String getSensevalDirectory() {

+    return sensevalDirectory;

+  }

+

+  public void setSensevalDirectory(String sensevalDirectory) {

+    this.sensevalDirectory = sensevalDirectory;

+

+    this.data = sensevalDirectory + "EnglishLS.train";

+    this.sensemapFile = sensevalDirectory + "EnglishLS.sensemap";

+    this.wordList = sensevalDirectory + "EnglishLS.train.key";

+  }

+

   public SensevalReader() {

     super();

   }

@@ -136,12 +149,12 @@
 

   /**

    * Main Senseval Reader: This checks if the data corresponding to the words to

-   * disambiguate exist in the folder, and extract the

-   * {@link WordToDisambiguate} instances

+   * disambiguate exist in the folder, and extract the {@link WSDSample}

+   * instances

    * 

    * @param wordTag

    *          The word, of which we are looking for the instances

-   * @return the list of the {@link WordToDisambiguate} instances of the word to

+   * @return the list of the {@link WSDSample} instances of the word to

    *         disambiguate

    */

   public ArrayList<WSDSample> getSensevalData(String wordTag) {

@@ -267,4 +280,16 @@
 

   }

 

+  /**

+   * Main Senseval Reader: This checks if the data corresponding to the words to

+   * disambiguate exist in the folder, and extract the

+   * 

+   * @param wordTag

+   *          The word, of which we are looking for the instances

+   * @return the stream of {@link WSDSample} of the word to disambiguate

+   */

+  public ObjectStream<WSDSample> getSemcorDataStream(String wordTag) {

+    return ObjectStreamUtils.createObjectStream(getSensevalData(wordTag));

+  }

+

 }