OPENNLP-801 1- IMS now no longer does the pre-processing steps (The user will have to introduce them). Thanks to Mondher Bouazizi for providing a patch!

commit: 729117f97c48cdbc700aae7ea81a862814c7456c [log] [tgz]
author: Jörn Kottmann <joern@apache.org> Thu Aug 20 22:01:59 2015 +0000
committer: Jörn Kottmann <joern@apache.org> Thu Aug 20 22:01:59 2015 +0000
tree: 642e77852ec0c75a8f8a6997c412fe32277b20ee
parent: 329b0df0607bc397c7690338827cef1ca28cf7a3 [diff]
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
index 188d9a9..37bcca5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java

@@ -1,403 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-
-import opennlp.tools.disambiguator.lesk.Lesk;
-import net.sf.extjwnl.JWNLException;
-import net.sf.extjwnl.data.POS;
-
-public class Constants {
-
-  private static String resourcesFolder = "src\\test\\resources\\";
-
-  private static String englishDict = resourcesFolder
-      + "models\\en-lemmatizer.dict";
-
-  public static String osPathChar = "\\";
-
-  // List of all the PoS tags
-  public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
-      "JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
-      "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB", "VBD",
-      "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
-
-  // List of the PoS tags of which the senses are to be extracted
-  public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB",
-      "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
-
-  // List of Negation Words
-  public static ArrayList<String> negationWords = new ArrayList<String>(
-      Arrays.asList("not", "no", "never", "none", "nor", "non"));
-
-  // List of Stop Words
-  public static ArrayList<String> stopWords = new ArrayList<String>(
-      Arrays.asList("a", "able", "about", "above", "according", "accordingly",
-          "across", "actually", "after", "afterwards", "again", "against",
-          "ain't", "all", "allow", "allows", "almost", "alone", "along",
-          "already", "also", "although", "always", "am", "among", "amongst",
-          "an", "and", "another", "any", "anybody", "anyhow", "anyone",
-          "anything", "anyway", "anyways", "anywhere", "apart", "appear",
-          "appreciate", "appropriate", "are", "aren't", "around", "as",
-          "aside", "ask", "asking", "associated", "at", "available", "away",
-          "awfully", "be", "became", "because", "become", "becomes",
-          "becoming", "been", "before", "beforehand", "behind", "being",
-          "believe", "below", "beside", "besides", "best", "better", "between",
-          "beyond", "both", "brief", "but", "by", "came", "can", "cannot",
-          "cant", "can't", "cause", "causes", "certain", "certainly",
-          "changes", "clearly", "c'mon", "co", "com", "come", "comes",
-          "concerning", "consequently", "consider", "considering", "contain",
-          "containing", "contains", "corresponding", "could", "couldn't",
-          "course", "c's", "currently", "definitely", "described", "despite",
-          "did", "didn't", "different", "do", "does", "doesn't", "doing",
-          "done", "don't", "down", "downwards", "during", "each", "edu", "eg",
-          "eight", "either", "else", "elsewhere", "enough", "entirely",
-          "especially", "et", "etc", "even", "ever", "every", "everybody",
-          "everyone", "everything", "everywhere", "ex", "exactly", "example",
-          "except", "far", "few", "fifth", "first", "five", "followed",
-          "following", "follows", "for", "former", "formerly", "forth", "four",
-          "from", "further", "furthermore", "get", "gets", "getting", "given",
-          "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings",
-          "had", "hadn't", "happens", "hardly", "has", "hasn't", "have",
-          "haven't", "having", "he", "hello", "help", "hence", "her", "here",
-          "hereafter", "hereby", "herein", "here's", "hereupon", "hers",
-          "herself", "he's", "hi", "him", "himself", "his", "hither",
-          "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
-          "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc",
-          "indeed", "indicate", "indicated", "indicates", "inner", "insofar",
-          "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll",
-          "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
-          "know", "known", "knows", "last", "lately", "later", "latter",
-          "latterly", "least", "less", "lest", "let", "let's", "like", "liked",
-          "likely", "little", "look", "looking", "looks", "ltd", "mainly",
-          "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might",
-          "more", "moreover", "most", "mostly", "much", "must", "my", "myself",
-          "name", "namely", "nd", "near", "nearly", "necessary", "need",
-          "needs", "neither", "never", "nevertheless", "new", "next", "nine",
-          "no", "nobody", "non", "none", "noone", "nor", "normally", "not",
-          "nothing", "novel", "now", "nowhere", "obviously", "of", "off",
-          "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones",
-          "only", "onto", "or", "other", "others", "otherwise", "ought", "our",
-          "ours", "ourselves", "out", "outside", "over", "overall", "own",
-          "particular", "particularly", "per", "perhaps", "placed", "please",
-          "plus", "possible", "presumably", "probably", "provides", "que",
-          "quite", "qv", "rather", "rd", "re", "really", "reasonably",
-          "regarding", "regardless", "regards", "relatively", "respectively",
-          "right", "said", "same", "saw", "say", "saying", "says", "second",
-          "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems",
-          "seen", "self", "selves", "sensible", "sent", "serious", "seriously",
-          "seven", "several", "shall", "she", "should", "shouldn't", "since",
-          "six", "so", "some", "somebody", "somehow", "someone", "something",
-          "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry",
-          "specified", "specify", "specifying", "still", "sub", "such", "sup",
-          "sure", "take", "taken", "tell", "tends", "th", "than", "thank",
-          "thanks", "thanx", "that", "thats", "that's", "the", "their",
-          "theirs", "them", "themselves", "then", "thence", "there",
-          "thereafter", "thereby", "therefore", "therein", "theres", "there's",
-          "thereupon", "these", "they", "they'd", "they'll", "they're",
-          "they've", "think", "third", "this", "thorough", "thoroughly",
-          "those", "though", "three", "through", "throughout", "thru", "thus",
-          "to", "together", "too", "took", "toward", "towards", "tried",
-          "tries", "truly", "try", "trying", "t's", "twice", "two", "un",
-          "under", "unfortunately", "unless", "unlikely", "until", "unto",
-          "up", "upon", "us", "use", "used", "useful", "uses", "using",
-          "usually", "value", "various", "very", "via", "viz", "vs", "want",
-          "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well",
-          "we'll", "went", "were", "we're", "weren't", "we've", "what",
-          "whatever", "what's", "when", "whence", "whenever", "where",
-          "whereafter", "whereas", "whereby", "wherein", "where's",
-          "whereupon", "wherever", "whether", "which", "while", "whither",
-          "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
-          "willing", "wish", "with", "within", "without", "wonder", "won't",
-          "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your",
-          "you're", "yours", "yourself", "yourselves", "you've", "zero"));
-
-  // Print a text in the console
-  // Print a text in the console
-  public static void printResults(WSDisambiguator disambiguator,
-      String[] results) {
-
-    if (results != null) {
-
-      String[] parts;
-      String sensekey;
-      if (disambiguator instanceof Lesk) {
-
-        Double score;
-     
-        for (int i = 0; i < results.length; i++) {
-          parts = results[i].split(" ");
-          sensekey = parts[1];
-          score = Double.parseDouble(parts[2]);
-          try {
-            Constants.print("score : "
-                + score
-                + " for sense "
-                + i
-                + " : "
-                + sensekey
-                + " : "
-                + Loader.getDictionary().getWordBySenseKey(sensekey)
-                    .getSynset().getGloss());
-          } catch (JWNLException e) {
-            e.printStackTrace();
-          }
-        }
-      } else {
-        for (int i = 0; i < results.length; i++) {
-          parts = results[i].split(" ");
-          sensekey = parts[1];
-          try {
-            Constants.print("sense "
-                + i
-                + " : "
-                + sensekey
-                + " : "
-                + Loader.getDictionary().getWordBySenseKey(sensekey)
-                    .getSynset().getGloss());
-          } catch (JWNLException e) {
-            e.printStackTrace();
-          }
-        }
-      }
-    }
-
-  }
-
-  public static void print(Object in) {
-    if (in == null) {
-      System.out.println("object is null");
-    } else {
-      System.out.println(in);
-    }
-  }
-
-  public static void print(Object[] array) {
-    if (array == null) {
-      System.out.println("object is null");
-    } else {
-      System.out.println(Arrays.asList(array));
-    }
-  }
-
-  public static void print(Object[][] array) {
-    if (array == null) {
-      System.out.println("object is null");
-    } else {
-      System.out.print("[");
-      for (int i = 0; i < array.length; i++) {
-        print(array[i]);
-        if (i != array.length - 1) {
-          System.out.print("\n");
-        }
-        print("]");
-      }
-    }
-  }
-
-  /**
-   * Extract the list of ALL English words
-   * 
-   * @param dict
-   *          this file is the same that is used in the simple Lemmatizer
-   *          (i.e.,"en-lemmatizer.dict")
-   * 
-   * @return a list of all the English words
-   */
-  public static HashMap<String, Object> getEnglishWords(String dict) {
-
-    HashMap<String, Object> words = new HashMap<String, Object>();
-
-    BufferedReader br = null;
-
-    File file = new File(englishDict);
-
-    if (file.exists()) {
-
-      try {
-        br = new BufferedReader(new FileReader(file));
-        String line = br.readLine();
-        while (line != null) {
-          line = br.readLine();
-          if (line != null) {
-            String word = line.split("\\t")[0];
-            words.put(word, null);
-          }
-        }
-      } catch (FileNotFoundException e) {
-        e.printStackTrace();
-      } catch (IOException e) {
-        e.printStackTrace();
-      } finally {
-        if (br != null) {
-          try {
-            br.close();
-          } catch (IOException e) {
-            e.printStackTrace();
-          }
-        }
-      }
-    }
-
-    return words;
-  }
-
-  /**
-   * return the PoS (Class POS) out of the PoS-tag
-   * 
-   * @param posTag
-   *          PoS tag (e.g., "JJS", "NNP", etc.)
-   * @return the Part of Speech (type {@link POS})
-   */
-  public static POS getPOS(String posTag) {
-
-    ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ",
-        "JJR", "JJS"));
-    ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR",
-        "RBS"));
-    ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS",
-        "NNP", "NNPS"));
-    ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD",
-        "VBG", "VBN", "VBP", "VBZ"));
-
-    if (adjective.contains(posTag))
-      return POS.ADJECTIVE;
-    else if (adverb.contains(posTag))
-      return POS.ADVERB;
-    else if (noun.contains(posTag))
-      return POS.NOUN;
-    else if (verb.contains(posTag))
-      return POS.VERB;
-    else
-      return null;
-
-  }
-
-  /**
-   * Check whether a PoS Tag is relevant of not. A PoS Tag is considered
-   * relevant when it corresponds to:
-   * <ul>
-   * <li>VERB</li>
-   * <li>ADJECTIVE</li>
-   * <li>ADVERB</li>
-   * <li>NOUN</li>
-   * </ul>
-   * 
-   * @param posTag
-   *          the PoS Tag to verify the relevance.
-   * @return whether a PoS Tag corresponds to a relevant Part of Speech (type
-   *         {@link POS}) or not ( true} if it is, false} otherwise)
-   */
-  public static boolean isRelevant(String posTag) {
-    return getPOS(posTag) != null;
-  }
-
-  /**
-   * Check whether a PoS Tag is relevant of not. A PoS Tag is considered
-   * relevant when it is:
-   * <ul>
-   * <li>VERB</li>
-   * <li>ADJECTIVE</li>
-   * <li>ADVERB</li>
-   * <li>NOUN</li>
-   * </ul>
-   * 
-   * @param pos
-   *          The Part of Speech of Type {@link POS}
-   * @return whether a Part of Speech is relevant (true) or not (false)
-   */
-  public static boolean isRelevant(POS pos) {
-    return pos.equals(POS.ADJECTIVE) || pos.equals(POS.ADVERB)
-        || pos.equals(POS.NOUN) || pos.equals(POS.VERB);
-  }
-
-  public static String getPOSabbreviation(String posTag) {
-
-    if (posTag == null) {
-      return null;
-    }
-    if (posTag.startsWith("JJ")) {
-      return "a";
-    } else if (posTag.startsWith("RB")) {
-      return "r";
-    } else if (posTag.startsWith("VB") || posTag.equals("MD")) {
-      return "v";
-    } else if (posTag.startsWith("NN")) {
-      return "n";
-    }
-
-    return null;
-
-  }
-
-  /**
-   * Check whether a list of arrays contains an array
-   * 
-   * @param array
-   *          The array To check
-   * @param fullList
-   *          The full list of Arrays
-   * @return whether the {@link ArrayList} of arrays contains the array (true)
-   *         or not (false)
-   */
-  public static boolean belongsTo(String[] array, ArrayList<String[]> fullList) {
-    for (String[] refArray : fullList) {
-      if (areStringArraysEqual(array, refArray))
-        return true;
-    }
-    return false;
-  }
-
-  /**
-   * Check whether two arrays of strings are equal
-   * 
-   * @param array1
-   *          first array
-   * @param array2
-   *          second array
-   * @return whether the two arrays are identical (true) or not (false)
-   */
-  public static boolean areStringArraysEqual(String[] array1, String[] array2) {
-
-    if (array1.equals(null) || array2.equals(null))
-      return false;
-
-    if (array1.length != array2.length) {
-      return false;
-    }
-    for (int i = 0; i < array1.length; i++) {
-      if (!array1[i].equals(array2[i])) {
-        return false;
-      }
-    }
-
-    return true;
-
-  }
-
-}
+// TODO to be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java
index 3cd2780..e1d2722 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java

@@ -1,414 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-
-import opennlp.tools.disambiguator.DictionaryInstance;
-import opennlp.tools.disambiguator.ims.WTDIMS;
-
-/**
- * This class handles the extraction of data from the different files (training
- * data, dictionary instances, etc.)
- */
-
-public class DataExtractor {
-
-  private static String englishDict = "src\\test\\resources\\models\\en-lemmatizer.dict";
-
-  /**
-   * Constructor
-   */
-  public DataExtractor() {
-    super();
-  }
-
-  private ArrayList<DictionaryInstance> extractDictionary(String xmlLocation) {
-
-    ArrayList<DictionaryInstance> dictionary = new ArrayList<DictionaryInstance>();
-
-    try {
-
-      File xmlFile = new File(xmlLocation);
-      DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
-      DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
-      Document doc = dBuilder.parse(xmlFile);
-      doc.getDocumentElement().normalize();
-
-      NodeList nLexelts = doc.getElementsByTagName("lexelt");
-
-      int index = 0;
-
-      for (int i = 0; i < nLexelts.getLength(); i++) {
-
-        Node nLexelt = nLexelts.item(i);
-
-        Element eLexelt = (Element) nLexelt;
-
-        String word = eLexelt.getAttribute("item");
-
-        if (nLexelt.getNodeType() == Node.ELEMENT_NODE) {
-
-          NodeList nSenses = eLexelt.getChildNodes();
-
-          for (int j = 0; j < nSenses.getLength(); j++) {
-
-            if (nSenses.item(j).getNodeType() == Node.ELEMENT_NODE) {
-
-              Element eSense = (Element) nSenses.item(j);
-
-              int ind = index; // rather use this than the ID used by default
-              String id = eSense.getAttribute("id");
-              String source = eSense.getAttribute("source");
-              String[] synset = eSense.getAttribute("synset").split("\\s");
-              String gloss = eSense.getAttribute("gloss");
-
-              DictionaryInstance wd = new DictionaryInstance(ind, word, id,
-                  source, synset, gloss);
-
-              dictionary.add(wd);
-              index++;
-            }
-          }
-
-        }
-      }
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-
-    return dictionary;
-
-  }
-
-  private HashMap<Integer, ArrayList<String>> getEquivalentSense(
-      String sensemapFile) {
-
-    HashMap<Integer, ArrayList<String>> mappedSenses = new HashMap<Integer, ArrayList<String>>();
-
-    try (BufferedReader wordsList = new BufferedReader(new FileReader(
-        sensemapFile))) {
-
-      int index = 0;
-
-      String line;
-
-      // Read the file
-      while ((line = wordsList.readLine()) != null) {
-
-        String[] temp = line.split("\\s");
-
-        ArrayList<String> tempSenses = new ArrayList<String>();
-
-        for (String sense : temp) {
-          if (sense.length() > 1) {
-            // System.out.println(sense);
-            tempSenses.add(sense);
-          }
-        }
-
-        mappedSenses.put(index, tempSenses);
-        // System.out.println(index);
-        index++;
-
-      }
-
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-
-    return mappedSenses;
-
-  }
-
-  private HashMap<String, ArrayList<DictionaryInstance>> extractCoarseGrainedDictionary(
-      String xmlLocation, String sensemapFile) {
-
-    HashMap<String, ArrayList<DictionaryInstance>> optimizedDictionary = new HashMap<String, ArrayList<DictionaryInstance>>();
-
-    HashMap<Integer, ArrayList<String>> equivalentSenses = getEquivalentSense(sensemapFile);
-
-    ArrayList<DictionaryInstance> dictionary = extractDictionary(xmlLocation);
-
-    for (int mapKey : equivalentSenses.keySet()) {
-      ArrayList<String> sensesIds = equivalentSenses.get(mapKey);
-      ArrayList<DictionaryInstance> optimizedDictionaryInstance = new ArrayList<DictionaryInstance>();
-
-      String word = "";
-
-      for (String senseId : sensesIds) {
-        for (int i = 0; i < dictionary.size(); i++) {
-          if (dictionary.get(i).getId().equals(senseId)) {
-            optimizedDictionaryInstance.add(dictionary.get(i));
-            word = dictionary.get(i).getWord();
-            word = word + "_" + mapKey;
-            break;
-          }
-        }
-
-      }
-
-      optimizedDictionary.put(word, optimizedDictionaryInstance);
-    }
-
-    return optimizedDictionary;
-  }
-
-  /**
-   * Extract the different senses (those which are equivalent are put together)
-   * of a word
-   * 
-   * @param xmlLocation
-   *          : location of the file containing the dictionary instances
-   * @param sensemapFile
-   *          : location of the file containing the equivalent senses in the
-   *          case of Coarse-grained disambiguation
-   * @param wordTag
-   *          : the word to disambiguate. It should be written in the format
-   *          "word.p" (Exp: "write.v", "well.r", "smart.a", "go.v"
-   * @return a {@link HashMap} of {@link DictionaryInstance} with their IDs
-   */
-  public HashMap<String, ArrayList<DictionaryInstance>> extractWordSenses(
-      String xmlLocation, String sensemapFile, String wordTag) {
-
-    /**
-     * word tag has to be in the format "word.t" (e.g., "activate.v", "smart.a",
-     * etc.)
-     */
-
-    HashMap<String, ArrayList<DictionaryInstance>> wordSenses = new HashMap<String, ArrayList<DictionaryInstance>>();
-
-    HashMap<String, ArrayList<DictionaryInstance>> optimalDictionary = extractCoarseGrainedDictionary(
-        xmlLocation, sensemapFile);
-
-    int i = 0;
-    for (String key : optimalDictionary.keySet()) {
-      if (key.startsWith(wordTag)) {
-        String newKey = wordTag + "_" + i;
-        wordSenses.put(newKey, optimalDictionary.get(key));
-        i++;
-      }
-    }
-
-    return wordSenses;
-  }
-
-  /**
-   * Extract the different senses. This class returns only the ID of the sense
-   * and the gloss. the synsets and other information are omitted.
-   * 
-   * @param xmlLocation
-   *          : location of the file containing the dictionary instances
-   * @param sensemapFile
-   *          : location of the file containing the equivalent senses in the
-   *          case of Coarse-grained disambiguation
-   * @param wordTag
-   *          the word to disambiguate. It should be written in the format
-   *          "word.p" (Exp: "write.v", "well.r", "smart.a", "go.v"
-   * @return a {@link HashMap} of word senses with their IDs
-   */
-  public HashMap<String, String> getDictionaryInstance(String xmlLocation,
-      String sensemapFile, String wordTag) {
-
-    HashMap<String, ArrayList<DictionaryInstance>> dict = extractWordSenses(
-        xmlLocation, sensemapFile, wordTag);
-
-    HashMap<String, String> senses = new HashMap<String, String>();
-
-    for (String key : dict.keySet()) {
-      String sense = dict.get(key).get(0).getGloss();
-      senses.put(key, sense);
-    }
-
-    return senses;
-
-  }
-
-  /**
-   * Extract the training instances from the training/test set File
-   * 
-   * @param xmlDataSet
-   *          : the file from which the data are to be extracted
-   * @return {@link ArrayList} of Word To Disambiguate (WTDIMS) instances
-   */
-  public ArrayList<WTDIMS> extractWSDInstances(String xmlDataSet) {
-
-    ArrayList<WTDIMS> setInstances = new ArrayList<WTDIMS>();
-
-    try {
-
-      File xmlFile = new File(xmlDataSet);
-      DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
-      DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
-      Document doc = dBuilder.parse(xmlFile);
-
-      doc.getDocumentElement().normalize();
-
-      NodeList lexelts = doc.getElementsByTagName("lexelt");
-
-      for (int i = 0; i < lexelts.getLength(); i++) {
-
-        Node nLexelt = lexelts.item(i);
-
-        if (nLexelt.getNodeType() == Node.ELEMENT_NODE) {
-          Element eLexelt = (Element) nLexelt;
-
-          NodeList nInstances = nLexelt.getChildNodes();
-
-          for (int j = 1; j < nInstances.getLength(); j++) {
-
-            Node nInstance = nInstances.item(j);
-
-            if (nInstance.getNodeType() == Node.ELEMENT_NODE) {
-
-              Element eInstance = (Element) nInstance;
-
-              String[] wordPos = eLexelt.getAttribute("item").split("\\.");
-              String word = wordPos[0]; // Word
-              String tag; // Part of Speech
-
-              if (wordPos[1].equals("n")) {
-                tag = "noun";
-              } else if (wordPos[1].equals("v")) {
-                tag = "verb";
-              } else if (wordPos[1].equals("a")) {
-                tag = "adjective";
-              } else {
-                tag = "adverb";
-              }
-
-              String id = eInstance.getAttribute("id");
-              String source = eInstance.getAttribute("docsrc");
-
-              ArrayList<String> answers = new ArrayList<String>();
-              String sentence = "";
-              String rawWord = "";
-
-              NodeList nChildren = nInstance.getChildNodes();
-
-              for (int k = 1; k < nChildren.getLength(); k++) {
-                Node nChild = nChildren.item(k);
-
-                if (nChild.getNodeName().equals("answer")) {
-                  // String answer =
-                  // nChild.getAttributes().item(0).getTextContent();
-                  String senseid = nChild.getAttributes().item(1)
-                      .getTextContent();
-
-                  String temp = senseid;
-                  // String[] temp = { answer, senseid };
-                  answers.add(temp);
-                }
-
-                if (nChild.getNodeName().equals("context")) {
-                  sentence = ((Element) nChild).getTextContent();
-
-                  if (nChild.hasChildNodes()) {
-                    // textbefore =
-                    // nChild.getChildNodes().item(0).getTextContent();
-                    rawWord = nChild.getChildNodes().item(1).getTextContent();
-                    // textAfter =
-                    // nChild.getChildNodes().item(2).getTextContent();
-                  }
-                }
-
-              }
-
-              WTDIMS wordToDisambiguate = new WTDIMS(word, answers, sentence,
-                  rawWord);
-              setInstances.add(wordToDisambiguate);
-            }
-
-          }
-
-        }
-
-      }
-
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-
-    return setInstances;
-
-  }
-
-  /**
-   * Extract the list of ALL English words
-   * 
-   * @param dict
-   *          : this file is the same that is used in the simple lemmatizer
-   *          (i.e.,"en-lemmatizer.dict")
-   * 
-   * @return a list of all the english words
-   */
-  public HashMap<String, Object> getEnglishWords(String dict) {
-
-    HashMap<String, Object> words = new HashMap<String, Object>();
-
-    BufferedReader br = null;
-
-    File file = new File(englishDict);
-
-    if (file.exists()) {
-
-      try {
-        br = new BufferedReader(new FileReader(file));
-        String line = br.readLine();
-        while (line != null) {
-          line = br.readLine();
-          if (line != null) {
-            String word = line.split("\\t")[0];
-            words.put(word, null);
-          }
-        }
-      } catch (FileNotFoundException e) {
-        e.printStackTrace();
-      } catch (IOException e) {
-        e.printStackTrace();
-      } finally {
-        if (br != null) {
-          try {
-            br.close();
-          } catch (IOException e) {
-            e.printStackTrace();
-          }
-        }
-      }
-    }
-
-    return words;
-  }
-
-}
\ No newline at end of file
+// TODO this is to be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java
index a30c887..ade7aab 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java

@@ -1,107 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-/**
- * An instance of the dictionary. A dictionary instance has:
- * <ul>
- * <li>index: an index for the current instance of the dictionary</li>
- * <li>word: the word to disambiguate</li>
- * <li>id: its id in the source (e.g., in WordNet, Wordsmyth, etc.)</li>
- * <li>source: the source of the instance (e.g., WordNet, Wordsmyth, etc.)</li>
- * <li>synset: the list of synonyms (i.e., the words that share the same current
- * meaning)</li>
- * <li>gloss: the sense of the word</li>
- * </ul>
- */
-public class DictionaryInstance {
-
-  protected int index;
-
-  protected String word;
-
-  protected String id;
-  protected String source;
-  protected String[] synset;
-  protected String gloss;
-
-  /**
-   * Constructor
-   */
-  public DictionaryInstance(int index, String word, String id, String source,
-      String[] synset, String gloss) {
-    super();
-    this.index = index;
-    this.word = word;
-    this.id = id;
-    this.source = source;
-    this.synset = synset;
-    this.gloss = gloss;
-  }
-
-  public int getIndex() {
-    return index;
-  }
-
-  public void setIndex(int index) {
-    this.index = index;
-  }
-
-  public String getWord() {
-    return word;
-  }
-
-  public void setWord(String word) {
-    this.word = word;
-  }
-
-  public String getId() {
-    return id;
-  }
-
-  public void setId(String id) {
-    this.id = id;
-  }
-
-  public String getSource() {
-    return source;
-  }
-
-  public void setSource(String source) {
-    this.source = source;
-  }
-
-  public String[] getSynset() {
-    return synset;
-  }
-
-  public void setSynset(String[] synset) {
-    this.synset = synset;
-  }
-
-  public String getGloss() {
-    return gloss;
-  }
-
-  public void setGloss(String gloss) {
-    this.gloss = gloss;
-  }
-
-}
+// TODO  to be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java
index d2c64a0..37bcca5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java

@@ -1,73 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-public class DistributionInstance {
-
-  protected String word;
-  protected String pos;
-  protected int trainingSetInstances;
-  protected int testSetInstances;
-
-  /**
-   * Constructor
-   */
-  public DistributionInstance(String word, String pos,
-      int trainingSetInstances, int testSetInstances) {
-    super();
-    this.word = word;
-    this.pos = pos;
-    this.trainingSetInstances = trainingSetInstances;
-    this.testSetInstances = testSetInstances;
-  }
-
-  public String getWord() {
-    return word;
-  }
-
-  public void setWord(String word) {
-    this.word = word;
-  }
-
-  public String getPos() {
-    return pos;
-  }
-
-  public void setPos(String pos) {
-    this.pos = pos;
-  }
-
-  public int getTrainingSetInstances() {
-    return trainingSetInstances;
-  }
-
-  public void setTrainingSetInstances(int trainingSetInstances) {
-    this.trainingSetInstances = trainingSetInstances;
-  }
-
-  public int getTestSetInstances() {
-    return testSetInstances;
-  }
-
-  public void setTestSetInstances(int testSetInstances) {
-    this.testSetInstances = testSetInstances;
-  }
-
-}
+// TODO to be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
index 3dfd00d..9785910 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java

@@ -21,12 +21,13 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 
 import opennlp.tools.disambiguator.ims.WTDIMS;
 
 /**
  * Class for the extraction of features for the different Supervised
- * Disambiguation apporaches.<br>
+ * Disambiguation approaches.<br>
  * Each set of methods refer to one approach
  * <ul>
  * <li>IMS (It Makes Sense): check {@link https
@@ -52,17 +53,18 @@
 
   // IMS
 
-  private String[] extractPosOfSurroundingWords(String[] sentence,
-      int wordIndex, int windowSize) {
+  private String[] extractPosOfSurroundingWords(WTDIMS wordToDisambiguate,
+      int windowSize) {
 
-    String[] taggedSentence = WSDHelper.getTagger().tag(sentence);
+    String[] taggedSentence = wordToDisambiguate.getPosTags();
 
     String[] tags = new String[2 * windowSize + 1];
 
     int j = 0;
 
-    for (int i = wordIndex - windowSize; i < wordIndex + windowSize; i++) {
-      if (i < 0 || i >= sentence.length) {
+    for (int i = wordToDisambiguate.getWordIndex() - windowSize; i < wordToDisambiguate
+        .getWordIndex() + windowSize; i++) {
+      if (i < 0 || i >= wordToDisambiguate.getSentence().length) {
         tags[j] = "null";
       } else {
         tags[j] = taggedSentence[i].toLowerCase();
@@ -73,33 +75,30 @@
     return tags;
   }
 
-  private String[] extractSurroundingWords(String[] sentence, int wordIndex) {
-
-    String[] posTags = WSDHelper.getTagger().tag(sentence);
+  private String[] extractSurroundingWords(WTDIMS wordToDisambiguate) {
 
     ArrayList<String> contextWords = new ArrayList<String>();
 
-    for (int i = 0; i < sentence.length; i++) {
+    for (int i = 0; i < wordToDisambiguate.getSentence().length; i++) {
+      if (wordToDisambiguate.getLemmas() != null) {
+        if (!WSDHelper.stopWords.contains(wordToDisambiguate.getSentence()[i]
+            .toLowerCase()) && (wordToDisambiguate.getWordIndex() != i)) {
 
-      if (!WSDHelper.stopWords.contains(sentence[i].toLowerCase())
-          && (wordIndex != i)) {
+          String lemma = wordToDisambiguate.getLemmas()[i].toLowerCase()
+              .replaceAll("[^a-z_]", "").trim();
 
-        String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
+          if (lemma.length() > 1) {
+            contextWords.add(lemma);
+          }
 
-        // if (!word.equals("") /*&& Constants.isRelevant(posTags[i])*/) {
-        if (WSDHelper.getEnglishWords().containsKey(word)) {
-          String lemma = WSDHelper.getLemmatizer().lemmatize(word, posTags[i]);
-          contextWords.add(lemma);
         }
-
       }
     }
 
     return contextWords.toArray(new String[contextWords.size()]);
   }
 
-  private String[] extractLocalCollocations(String[] sentence, int wordIndex,
-      int ngram) {
+  private String[] extractLocalCollocations(WTDIMS wordToDisambiguate, int ngram) {
     /**
      * Here the author used only 11 features of this type. the range was set to
      * 3 (bigrams extracted in a way that they are at max separated by 1 word).
@@ -107,17 +106,22 @@
 
     ArrayList<String> localCollocations = new ArrayList<String>();
 
-    for (int i = wordIndex - ngram; i <= wordIndex + ngram; i++) {
+    for (int i = wordToDisambiguate.getWordIndex() - ngram; i <= wordToDisambiguate
+        .getWordIndex() + ngram; i++) {
 
-      if (!(i < 0 || i > sentence.length - 3)) {
-        if ((i != wordIndex) && (i + 1 != wordIndex)
-            && (i + 1 < wordIndex + ngram)) {
-          String lc = (sentence[i] + " " + sentence[i + 1]).toLowerCase();
+      if (!(i < 0 || i > wordToDisambiguate.getSentence().length - 3)) {
+        if ((i != wordToDisambiguate.getWordIndex())
+            && (i + 1 != wordToDisambiguate.getWordIndex())
+            && (i + 1 < wordToDisambiguate.getWordIndex() + ngram)) {
+          String lc = (wordToDisambiguate.getSentence()[i] + " " + wordToDisambiguate
+              .getSentence()[i + 1]).toLowerCase();
           localCollocations.add(lc);
         }
-        if ((i != wordIndex) && (i + 2 != wordIndex)
-            && (i + 2 < wordIndex + ngram)) {
-          String lc = (sentence[i] + " " + sentence[i + 2]).toLowerCase();
+        if ((i != wordToDisambiguate.getWordIndex())
+            && (i + 2 != wordToDisambiguate.getWordIndex())
+            && (i + 2 < wordToDisambiguate.getWordIndex() + ngram)) {
+          String lc = (wordToDisambiguate.getSentence()[i] + " " + wordToDisambiguate
+              .getSentence()[i + 2]).toLowerCase();
           localCollocations.add(lc);
         }
       }
@@ -141,13 +145,20 @@
    */
   public ArrayList<String> extractTrainingSurroundingWords(
       ArrayList<WTDIMS> trainingData) {
+    
+    HashMap<String, Object> words = new HashMap<String, Object>();
+    
+    for (WTDIMS word : trainingData) {
+      for (String sWord : word.getSurroundingWords()) {
+        if (!words.containsKey(sWord.toLowerCase()));
+        words.put(sWord.toLowerCase(), null);
+      }
+    }
 
     ArrayList<String> list = new ArrayList<String>();
 
-    for (WTDIMS word : trainingData) {
-      for (String sWord : word.getSurroundingWords()) {
-        list.add(sWord);
-      }
+    for (String word : words.keySet()) {
+        list.add(word);
     }
 
     return list;
@@ -158,7 +169,7 @@
    * This method generates the different set of features related to the IMS
    * approach and store them in the corresponding attributes of the WTDIMS
    * 
-   * @param word
+   * @param wordToDisambiguate
    *          the word to disambiguate [object: WTDIMS]
    * @param windowSize
    *          the parameter required to generate the features qualified of
@@ -167,14 +178,15 @@
    *          the parameter required to generate the features qualified of
    *          "Local Collocations"
    */
-  public void extractIMSFeatures(WTDIMS word, int windowSize, int ngram) {
+  public void extractIMSFeatures(WTDIMS wordToDisambiguate, int windowSize,
+      int ngram) {
 
-    word.setPosOfSurroundingWords(extractPosOfSurroundingWords(
-        word.getSentence(), word.getWordIndex(), windowSize));
-    word.setSurroundingWords(extractSurroundingWords(word.getSentence(),
-        word.getWordIndex()));
-    word.setLocalCollocations(extractLocalCollocations(word.getSentence(),
-        word.getWordIndex(), ngram));
+    wordToDisambiguate.setPosOfSurroundingWords(extractPosOfSurroundingWords(
+        wordToDisambiguate, windowSize));
+    wordToDisambiguate
+        .setSurroundingWords(extractSurroundingWords(wordToDisambiguate));
+    wordToDisambiguate.setLocalCollocations(extractLocalCollocations(
+        wordToDisambiguate, ngram));
 
   }
 

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
index 9f606bc..37bcca5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java

@@ -1,261 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.HashMap;
-
-import net.sf.extjwnl.JWNLException;
-import net.sf.extjwnl.data.POS;
-import net.sf.extjwnl.dictionary.Dictionary;
-import net.sf.extjwnl.dictionary.MorphologicalProcessor;
-import opennlp.tools.cmdline.postag.POSModelLoader;
-import opennlp.tools.disambiguator.datareader.SensevalReader;
-import opennlp.tools.lemmatizer.SimpleLemmatizer;
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.util.InvalidFormatException;
-
-public class Loader {
-
-  private static SensevalReader dExtractor = new SensevalReader();
-
-  private static String modelsDir = "src\\test\\resources\\models\\";
-
-  private static SentenceDetectorME sdetector;
-  private static Tokenizer tokenizer;
-  private static POSTaggerME tagger;
-  private static NameFinderME nameFinder;
-  private static SimpleLemmatizer lemmatizer;
-
-  private static Dictionary dictionary;
-  private static MorphologicalProcessor morph;
-
-  // local caches for faster lookup
-  private static HashMap<String, Object> stemCache;
-  private static HashMap<String, Object> stopCache;
-  private static HashMap<String, Object> relvCache;
-
-  private static HashMap<String, Object> englishWords;
-
-  public Loader() {
-    super();
-    load();
-  }
-
-  public static HashMap<String, Object> getRelvCache() {
-    if (relvCache == null || relvCache.keySet().isEmpty()) {
-      relvCache = new HashMap<String, Object>();
-      for (String t : Constants.relevantPOS) {
-        relvCache.put(t, null);
-      }
-    }
-    return relvCache;
-  }
-
-  public static HashMap<String, Object> getStopCache() {
-    if (stopCache == null || stopCache.keySet().isEmpty()) {
-      stopCache = new HashMap<String, Object>();
-      for (String s : Constants.stopWords) {
-        stopCache.put(s, null);
-      }
-    }
-    return stopCache;
-  }
-
-  public static HashMap<String, Object> getStemCache() {
-    if (stemCache == null || stemCache.keySet().isEmpty()) {
-      stemCache = new HashMap<String, Object>();
-      for (Object pos : POS.getAllPOS()) {
-        stemCache.put(((POS) pos).getKey(), new HashMap());
-      }
-    }
-    return stemCache;
-  }
-
-  public static HashMap<String, Object> getEnglishWords() {
-    if (englishWords == null || englishWords.keySet().isEmpty()) {
-      englishWords = Constants
-          .getEnglishWords(modelsDir + "en-lemmatizer.dict");
-    }
-    return englishWords;
-  }
-
-  public static MorphologicalProcessor getMorph() {
-    if (morph == null) {
-      getDictionary();
-      morph = dictionary.getMorphologicalProcessor();
-    }
-    return morph;
-  }
-
-  public static Dictionary getDictionary() {
-    if (dictionary == null) {
-      try {
-        dictionary = Dictionary.getDefaultResourceInstance();
-      } catch (JWNLException e) {
-        e.printStackTrace();
-      }
-    }
-    return dictionary;
-  }
-
-  public static SimpleLemmatizer getLemmatizer() {
-    if (lemmatizer == null) {
-      try {
-        lemmatizer = new SimpleLemmatizer(new FileInputStream(modelsDir
-            + "en-lemmatizer.dict"));
-      } catch (IOException e) {
-        e.printStackTrace();
-      }
-    }
-
-    return lemmatizer;
-  }
-
-  public static NameFinderME getNameFinder() {
-    if (nameFinder == null) {
-      TokenNameFinderModel nameFinderModel;
-      try {
-        nameFinderModel = new TokenNameFinderModel(new FileInputStream(
-            modelsDir + "en-ner-person.bin"));
-        nameFinder = new NameFinderME(nameFinderModel);
-      } catch (IOException e) {
-        e.printStackTrace();
-      }
-    }
-    return nameFinder;
-  }
-
-  public static POSTaggerME getTagger() {
-    if (tagger == null) {
-      POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir
-          + "en-pos-maxent.bin"));
-      tagger = new POSTaggerME(posTaggerModel);
-    }
-    return tagger;
-  }
-
-  public static SentenceDetectorME getSDetector() {
-    if (sdetector == null) {
-      try {
-        SentenceModel enSentModel = new SentenceModel(new FileInputStream(
-            modelsDir + "en-sent.bin"));
-        sdetector = new SentenceDetectorME(enSentModel);
-      } catch (IOException e) {
-        e.printStackTrace();
-      }
-    }
-    return sdetector;
-  }
-
-  public static Tokenizer getTokenizer() {
-    if (tokenizer == null) {
-      try {
-        TokenizerModel tokenizerModel = new TokenizerModel(new FileInputStream(
-            modelsDir + "en-token.bin"));
-        tokenizer = new TokenizerME(tokenizerModel);
-      } catch (IOException e) {
-        e.printStackTrace();
-      }
-
-    }
-    return tokenizer;
-  }
-
-  public static boolean isInitialized() {
-    return (dictionary != null && morph != null && stemCache != null
-        && stopCache != null && relvCache != null);
-  }
-
-  public void load() {
-    try {
-      SentenceModel enSentModel = new SentenceModel(new FileInputStream(
-          modelsDir + "en-sent.bin"));
-      sdetector = new SentenceDetectorME(enSentModel);
-
-      TokenizerModel TokenizerModel = new TokenizerModel(new FileInputStream(
-          modelsDir + "en-token.bin"));
-      tokenizer = new TokenizerME(TokenizerModel);
-
-      POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir
-          + "en-pos-maxent.bin"));
-      tagger = new POSTaggerME(posTaggerModel);
-
-      TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(
-          new FileInputStream(modelsDir + "en-ner-person.bin"));
-      nameFinder = new NameFinderME(nameFinderModel);
-
-      lemmatizer = new SimpleLemmatizer(new FileInputStream(modelsDir
-          + "en-lemmatizer.dict"));
-
-      dictionary = Dictionary.getDefaultResourceInstance();
-      morph = dictionary.getMorphologicalProcessor();
-
-      // loading lookup caches
-      stemCache = new HashMap();
-      for (Object pos : POS.getAllPOS()) {
-        stemCache.put(((POS) pos).getKey(), new HashMap());
-      }
-
-      stopCache = new HashMap<String, Object>();
-      for (String s : Constants.stopWords) {
-        stopCache.put(s, null);
-      }
-
-      relvCache = new HashMap<String, Object>();
-      for (String t : Constants.relevantPOS) {
-        relvCache.put(t, null);
-      }
-
-      englishWords = new HashMap<String, Object>();
-
-      if (isInitialized()) {
-        Constants.print("loading was succesfull");
-      } else {
-        Constants.print("loading was unsuccesfull");
-      }
-
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (InvalidFormatException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (JWNLException e) {
-      e.printStackTrace();
-    }
-  }
-
-  public static void unload() {
-    dictionary.close();
-  }
-
-}
+// TODO to be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
index b7026fe..37bcca5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java

@@ -1,195 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import net.sf.extjwnl.JWNLException;
-import net.sf.extjwnl.data.POS;
-import opennlp.tools.util.Span;
-
-public class PreProcessor {
-
-  public PreProcessor() {
-    super();
-  }
-
-  public static String[] split(String text) {
-    return Loader.getSDetector().sentDetect(text);
-  }
-
-  public static String[] tokenize(String sentence) {
-    return Loader.getTokenizer().tokenize(sentence);
-  }
-
-  public static String[] tag(String[] tokenizedSentence) {
-    return Loader.getTagger().tag(tokenizedSentence);
-  }
-
-  public static String lemmatize(String word, String posTag) {
-    return Loader.getLemmatizer().lemmatize(word, posTag);
-  }
-
-  public static boolean isName(String word) {
-    Span nameSpans[] = Loader.getNameFinder().find(new String[] { word });
-    return (nameSpans.length != 0);
-  }
-
-  public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) {
-
-    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
-
-    String[] tags = tag(sentence);
-
-    for (int i = 0; i < sentence.length; i++) {
-      if (!Loader.getStopCache().containsKey(sentence[i])) {
-        if (Loader.getRelvCache().containsKey(tags[i])) {
-          relevantWords
-              .add(new WordPOS(sentence[i],tags[i]));
-        }
-
-      }
-    }
-    return relevantWords;
-  }
-
-  public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate word) {
-    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
-
-    String[] tags = tag(word.getSentence());
-
-    for (int i = 0; i < word.getSentence().length; i++) {
-      if (!Loader.getStopCache().containsKey(word.getSentence()[i])) {
-        if (Loader.getRelvCache().containsKey(tags[i])) {
-          WordPOS wordpos = new WordPOS(word.getSentence()[i],tags[i]);
-          if(i == word.getWordIndex()){
-            wordpos.isTarget = true;
-          }
-          relevantWords
-              .add(wordpos);
-        }
-
-      }
-    }
-    return relevantWords;
-  }
-
-  public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word,
-      int winBackward, int winForward) {
-
-    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
-
-    String[] sentence = word.getSentence();
-    String[] tags = tag(sentence);
-
-    int index = word.getWordIndex();
-
-    for (int i = index - winBackward; i <= index + winForward; i++) {
-
-      if (i >= 0 && i < sentence.length && i != index) {
-        if (!Loader.getStopCache().containsKey(sentence[i])) {
-
-          if (Loader.getRelvCache().containsKey(tags[i])) {
-            relevantWords.add(new WordPOS(sentence[i], tags[i]));
-          }
-
-        }
-      }
-    }
-    return relevantWords;
-  }
-
-  /**
-   * Stem a single word with WordNet dictionnary
-   * 
-   * @param wordToStem
-   *          word to be stemmed
-   * @return stemmed list of words
-   */
-  public static List StemWordWithWordNet(WordPOS wordToStem) {
-    if (wordToStem == null)
-      return null;
-    ArrayList<String> stems = new ArrayList();
-    try {
-      for (Object pos : POS.getAllPOS()) {
-        stems.addAll(Loader.getMorph().lookupAllBaseForms((POS) pos,
-            wordToStem.getWord()));
-      }
-
-      if (stems.size() > 0)
-        return stems;
-      else {
-        return null;
-      }
-
-    } catch (JWNLException e) {
-      e.printStackTrace();
-    }
-    return null;
-  }
-
-  /**
-   * Stem a single word tries to look up the word in the stemCache HashMap If
-   * the word is not found it is stemmed with WordNet and put into stemCache
-   * 
-   * @param wordToStem
-   *          word to be stemmed
-   * @return stemmed word list, null means the word is incorrect
-   */
-  public static List Stem(WordPOS wordToStem) {
-
-    // check if we already cached the stem map
-    HashMap posMap = (HashMap) Loader.getStemCache().get(
-        wordToStem.getPOS().getKey());
-
-    // don't check words with digits in them
-    if (containsNumbers(wordToStem.getWord())) {
-      return null;
-    }
-
-    List stemList = (List) posMap.get(wordToStem.getWord());
-    if (stemList != null) { // return it if we already cached it
-      return stemList;
-
-    } else { // unCached list try to stem it
-      stemList = StemWordWithWordNet(wordToStem);
-      if (stemList != null) {
-        // word was recognized and stemmed with wordnet:
-        // add it to cache and return the stemmed list
-        posMap.put(wordToStem.getWord(), stemList);
-        Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
-        return stemList;
-      } else { // could not be stemmed add it anyway (as incorrect with null
-               // list)
-        posMap.put(wordToStem.getWord(), null);
-        Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
-        return null;
-      }
-    }
-  }
-
-  public static boolean containsNumbers(String word) {
-    // checks if the word is or contains a number
-    return word.matches(".*[0-9].*");
-  }
-
-}
+// TODO to be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
index af2f108..1ec51e8 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java

@@ -60,8 +60,8 @@
 
     // get the best predicted sense
     String predictedSense = disambiguator.disambiguate(reference.getSentence(),
-        reference.getTags(), reference.getTargetPosition(),
-        reference.getTargetLemma())[0];
+        reference.getTags(),
+        reference.getLemmas(), reference.getTargetPosition())[0];
 
     if (predictedSense == null) {
       System.out.println("There was no sense for : " + reference.getTargetWord());
@@ -94,8 +94,8 @@
       }
     }
 
-    return new WSDSample(reference.getSentence(), reference.getTags(),
-        reference.getTargetPosition(), reference.getTargetLemma());
+    return new WSDSample(reference.getSentence(), reference.getTags(), reference.getLemmas(),
+        reference.getTargetPosition());
   }
 
   /**

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
index ae8c893..c3e9659 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java

@@ -26,7 +26,6 @@
 import java.util.ArrayList;

 import java.util.Arrays;

 import java.util.HashMap;

-import java.util.List;

 

 import net.sf.extjwnl.JWNLException;

 import net.sf.extjwnl.data.POS;

@@ -57,6 +56,7 @@
   private static HashMap<String, Object> relvCache;

 

   private static HashMap<String, Object> englishWords;

+  private static HashMap<String, Object> nonRelevWordsDef;

 

   // List of all the PoS tags

   public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",

@@ -191,6 +191,59 @@
     return englishWords;

   }

 

+  /**

+   * This initializes the Hashmap of non relevant words definitions, and returns

+   * the definition of the non relevant word based on its pos-tag

+   * 

+   * @param posTag

+   *          the pos-tag of the non relevant word

+   * @return the definition of the word

+   */

+  public static String getNonRelevWordsDef(String posTag) {

+    if (nonRelevWordsDef == null || nonRelevWordsDef.keySet().isEmpty()) {

+      nonRelevWordsDef = new HashMap<String, Object>();

+

+      nonRelevWordsDef.put("CC", "coordinating conjunction");

+      nonRelevWordsDef.put("CD", "cardinal number");

+      nonRelevWordsDef.put("DT", "determiner");

+      nonRelevWordsDef.put("EX", "existential there");

+      nonRelevWordsDef.put("FW", "foreign word");

+      nonRelevWordsDef.put("IN", "preposition / subordinating conjunction");

+      nonRelevWordsDef.put("JJ", "adjective");

+      nonRelevWordsDef.put("JJR", "adjective, comparative");

+      nonRelevWordsDef.put("JJS", "adjective, superlative");

+      nonRelevWordsDef.put("LS", "list marker");

+      nonRelevWordsDef.put("MD", "modal");

+      nonRelevWordsDef.put("NN", "noun, singular or mass");

+      nonRelevWordsDef.put("NNS", "noun plural");

+      nonRelevWordsDef.put("NNP", "proper noun, singular");

+      nonRelevWordsDef.put("NNPS", "proper noun, plural");

+      nonRelevWordsDef.put("PDT", "predeterminer");

+      nonRelevWordsDef.put("POS", "possessive ending");

+      nonRelevWordsDef.put("PRP", "personal pronoun");

+      nonRelevWordsDef.put("PRP$", "possessive pronoun");

+      nonRelevWordsDef.put("RB", "adverb");

+      nonRelevWordsDef.put("RBR", "adverb, comparative");

+      nonRelevWordsDef.put("RBS", "adverb, superlative");

+      nonRelevWordsDef.put("RP", "particle");

+      nonRelevWordsDef.put("SYM", "Symbol");

+      nonRelevWordsDef.put("TO", "to");

+      nonRelevWordsDef.put("UH", "interjection");

+      nonRelevWordsDef.put("VB", "verb, base form");

+      nonRelevWordsDef.put("VBD", "verb, past tense");

+      nonRelevWordsDef.put("VBG", "verb, gerund/present participle");

+      nonRelevWordsDef.put("VBN", "verb, past participle");

+      nonRelevWordsDef.put("VBP", "verb, sing. present, non-3d");

+      nonRelevWordsDef.put("VBZ", "verb, 3rd person sing. present");

+      nonRelevWordsDef.put("WDT", "wh-determiner");

+      nonRelevWordsDef.put("WP", "wh-pronoun");

+      nonRelevWordsDef.put("WP$", "possessive wh-pronoun");

+      nonRelevWordsDef.put("WRB", "wh-adverb");

+

+    }

+    return (String) nonRelevWordsDef.get(posTag);

+  }

+

   public static MorphologicalProcessor getMorph() {

     if (morph == null) {

       getDictionary();

@@ -281,35 +334,63 @@
         for (int i = 0; i < results.length; i++) {

           parts = results[i].split(" ");

           sensekey = parts[1];

-          score = Double.parseDouble(parts[2]);

-          try {

-            print("score : "

-                + score

-                + " for sense "

-                + i

-                + " : "

-                + sensekey

-                + " : "

-                + getDictionary().getWordBySenseKey(sensekey).getSynset()

-                    .getGloss());

-          } catch (JWNLException e) {

-            e.printStackTrace();

+          if (parts.length != 3) {

+            score = -1.0;

+          } else {

+            score = Double.parseDouble(parts[2]);

+          }

+          if (parts[0].equalsIgnoreCase(WSDParameters.SenseSource.WORDNET

+              .name())) {

+

+            try {

+              print("score : "

+                  + score

+                  + " for sense "

+                  + i

+                  + " : "

+                  + sensekey

+                  + " : "

+                  + getDictionary().getWordBySenseKey(sensekey).getSynset()

+                      .getGloss());

+

+            } catch (JWNLException e) {

+              e.printStackTrace();

+            }

+          } else {

+            if (parts[0].equalsIgnoreCase(WSDParameters.SenseSource.WSDHELPER

+                .name())) {

+

+              print("This word is a " + sensekey + " : "

+                  + WSDHelper.getNonRelevWordsDef(sensekey));

+

+            }

           }

         }

       } else {

         for (int i = 0; i < results.length; i++) {

           parts = results[i].split(" ");

           sensekey = parts[1];

-          try {

-            print("sense "

-                + i

-                + " : "

-                + sensekey

-                + " : "

-                + getDictionary().getWordBySenseKey(sensekey).getSynset()

-                    .getGloss());

-          } catch (JWNLException e) {

-            e.printStackTrace();

+

+          if (parts[0].equalsIgnoreCase(WSDParameters.SenseSource.WORDNET

+              .name())) {

+

+            try {

+              print("sense "

+                  + i

+                  + " : "

+                  + sensekey

+                  + " : "

+                  + getDictionary().getWordBySenseKey(sensekey).getSynset()

+                      .getGloss());

+            } catch (JWNLException e) {

+              e.printStackTrace();

+            }

+          } else if (parts[0]

+              .equalsIgnoreCase(WSDParameters.SenseSource.WSDHELPER.name())) {

+

+            print("This word is a " + sensekey + " : "

+                + WSDHelper.getNonRelevWordsDef(sensekey));

+

           }

         }

       }

@@ -443,7 +524,7 @@
    * @return whether a PoS Tag corresponds to a relevant Part of Speech (type

    *         {@link POS}) or not ( true} if it is, false} otherwise)

    */

-  public static boolean isRelevant(String posTag) {

+  public static boolean isRelevantPOSTag(String posTag) {

     return getPOS(posTag) != null;

   }

 

@@ -461,7 +542,7 @@
    *          The Part of Speech of Type {@link POS}

    * @return whether a Part of Speech is relevant (true) or not (false)

    */

-  public static boolean isRelevant(POS pos) {

+  public static boolean isRelevantPOS(POS pos) {

     return pos.equals(POS.ADJECTIVE) || pos.equals(POS.ADVERB)

         || pos.equals(POS.NOUN) || pos.equals(POS.VERB);

   }

@@ -547,51 +628,6 @@
     return relevantWords;

   }

 

-  public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate word) {

-    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();

-

-    String[] tags = WSDHelper.getTagger().tag(word.getSentence());

-

-    for (int i = 0; i < word.getSentence().length; i++) {

-      if (!WSDHelper.getStopCache().containsKey(word.getSentence()[i])) {

-        if (WSDHelper.getRelvCache().containsKey(tags[i])) {

-          WordPOS wordpos = new WordPOS(word.getSentence()[i], tags[i]);

-          if (i == word.getWordIndex()) {

-            wordpos.isTarget = true;

-          }

-          relevantWords.add(wordpos);

-        }

-

-      }

-    }

-    return relevantWords;

-  }

-

-  public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word,

-      int winBackward, int winForward) {

-

-    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();

-

-    String[] sentence = word.getSentence();

-    String[] tags = WSDHelper.getTagger().tag(sentence);

-

-    int index = word.getWordIndex();

-

-    for (int i = index - winBackward; i <= index + winForward; i++) {

-

-      if (i >= 0 && i < sentence.length && i != index) {

-        if (!WSDHelper.getStopCache().containsKey(sentence[i])) {

-

-          if (WSDHelper.getRelvCache().containsKey(tags[i])) {

-            relevantWords.add(new WordPOS(sentence[i], tags[i]));

-          }

-

-        }

-      }

-    }

-    return relevantWords;

-  }

-

   /**

    * Stem a single word with WordNet dictionnary

    * 

@@ -630,7 +666,9 @@
    * @return stemmed word list, null means the word is incorrect

    */

   public static ArrayList<String> Stem(WordPOS wordToStem) {

-

+    if (wordToStem.getPOS() == null) {

+      WSDHelper.print("the word is " + wordToStem.getWord());

+    }

     // check if we already cached the stem map

     HashMap posMap = (HashMap) WSDHelper.getStemCache().get(

         wordToStem.getPOS().getKey());

@@ -653,9 +691,10 @@
         posMap.put(wordToStem.getWord(), stemList);

         WSDHelper.getStemCache().put(wordToStem.getPOS().getKey(), posMap);

         return stemList;

-      } else { // could not be stemmed add it anyway (as incorrect with null

-               // list)

-        posMap.put(wordToStem.getWord(), null);

+      } else { // could not be stemmed add it anyway (as it is)

+        stemList = new ArrayList<String>();

+        stemList.add(wordToStem.getWord());

+        posMap.put(wordToStem.getWord(), stemList);

         WSDHelper.getStemCache().put(wordToStem.getPOS().getKey(), posMap);

         return null;

       }


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
index 385b17e..ea0d4f5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java

@@ -28,10 +28,17 @@
   protected boolean isCoarseSense;
   public static boolean isStemCompare;
 
-  public static enum Source {
-    WORDNET
+  public static enum TrainingSource {
+    SEMCOR, SEMEVAL, OTHER
   }
 
+  public static enum SenseSource {
+    WORDNET, WSDHELPER, OTHER;
+  }
+
+  protected SenseSource senseSource;
+  protected TrainingSource trainingSource;
+
   /**
    * @return if the disambiguation type is coarse grained or fine grained
    */
@@ -43,6 +50,30 @@
     this.isCoarseSense = isCoarseSense;
   }
 
+  public static boolean isStemCompare() {
+    return isStemCompare;
+  }
+
+  public static void setStemCompare(boolean isStemCompare) {
+    WSDParameters.isStemCompare = isStemCompare;
+  }
+
+  public SenseSource getSenseSource() {
+    return senseSource;
+  }
+
+  public void setSenseSource(SenseSource senseSource) {
+    this.senseSource = senseSource;
+  }
+
+  public TrainingSource getTrainingSource() {
+    return trainingSource;
+  }
+
+  public void setTrainingSource(TrainingSource trainingSource) {
+    this.trainingSource = trainingSource;
+  }
+
   public WSDParameters() {
     this.isCoarseSense = true;
   }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
index 443686c..6ce40b2 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java

@@ -32,69 +32,51 @@
 

   private List<String> sentence;

   private List<String> tags;

+  private List<String> lemmas;

   private int senseID;

   private List<String> senseIDs;

   private int targetPosition;

-  private String targetLemma;

 

-  public WSDSample(String sentence[], String tags[], int targetPosition,

-      String targetLemma, int senseID) {

+  public WSDSample(String sentence[], String tags[], String[] lemmas,

+      int targetPosition, int senseID) {

     this.sentence = Collections.unmodifiableList(new ArrayList<String>(Arrays

         .asList(sentence)));

     this.tags = Collections.unmodifiableList(new ArrayList<String>(Arrays

         .asList(tags)));

     this.targetPosition = targetPosition;

-    this.targetLemma = targetLemma;

+    this.lemmas = Collections.unmodifiableList(new ArrayList<String>(Arrays

+        .asList(lemmas)));

+    ;

     this.senseID = senseID;

     checkArguments();

   }

 

-  public WSDSample(String sentence[], String tags[], int targetPosition,

-      String targetLemma, String senseIDs[]) {

+  public WSDSample(String sentence[], String tags[], String[] lemmas,

+      int targetPosition) {

     this.sentence = Collections.unmodifiableList(new ArrayList<String>(Arrays

         .asList(sentence)));

     this.tags = Collections.unmodifiableList(new ArrayList<String>(Arrays

         .asList(tags)));

     this.targetPosition = targetPosition;

-    this.targetLemma = targetLemma;

-    this.senseIDs = Collections.unmodifiableList(new ArrayList<String>(Arrays

-        .asList(senseIDs)));

+    this.lemmas = Collections.unmodifiableList(new ArrayList<String>(Arrays

+        .asList(lemmas)));

     ;

     checkArguments();

   }

-

-  public WSDSample(List<String> sentence, List<String> tags,

-      int targetPosition, String targetLemma, int senseID) {

-    this.sentence = Collections

-        .unmodifiableList(new ArrayList<String>(sentence));

-    this.tags = Collections.unmodifiableList(new ArrayList<String>(tags));

+  

+  public WSDSample(String sentence[], String tags[], String[] lemmas,

+      int targetPosition, List<String> senseIDs) {

+    this.sentence = Collections.unmodifiableList(new ArrayList<String>(Arrays

+        .asList(sentence)));

+    this.tags = Collections.unmodifiableList(new ArrayList<String>(Arrays

+        .asList(tags)));

     this.targetPosition = targetPosition;

-    this.targetLemma = targetLemma;

-    this.senseID = senseID;

-    checkArguments();

-  }

-

-  public WSDSample(List<String> sentence, List<String> tags,

-      int targetPosition, String targetLemma, List<String> senseIDs) {

-    this.sentence = Collections

-        .unmodifiableList(new ArrayList<String>(sentence));

-    this.tags = Collections.unmodifiableList(new ArrayList<String>(tags));

-    this.targetPosition = targetPosition;

-    this.targetLemma = targetLemma;

+    this.lemmas = Collections.unmodifiableList(new ArrayList<String>(Arrays

+        .asList(lemmas)));

     this.senseIDs = senseIDs;

     checkArguments();

   }

 

-  public WSDSample(String sentence[], String tags[], int targetPosition,

-      String targetLemma) {

-    this(sentence, tags, targetPosition, targetLemma, -1);

-  }

-

-  public WSDSample(List<String> sentence, List<String> tags,

-      int targetPosition, String targetLemma) {

-    this(sentence, tags, targetPosition, targetLemma, -1);

-  }

-

   private void checkArguments() {

     if (sentence.size() != tags.size() || targetPosition < 0

         || targetPosition >= tags.size())

@@ -113,6 +95,10 @@
     return tags.toArray(new String[tags.size()]);

   }

 

+  public String[] getLemmas() {

+    return lemmas.toArray(new String[lemmas.size()]);

+  }

+

   public int getTargetPosition() {

     return targetPosition;

   }

@@ -133,10 +119,6 @@
     return tags.get(targetPosition);

   }

 

-  public String getTargetLemma() {

-    return targetLemma;

-  }

-

   public void setSentence(List<String> sentence) {

     this.sentence = sentence;

   }

@@ -145,6 +127,10 @@
     this.tags = tags;

   }

 

+  public void setLemmas(List<String> lemmas) {

+    this.lemmas = lemmas;

+  }

+

   public void setSenseID(int senseID) {

     this.senseID = senseID;

   }

@@ -157,10 +143,6 @@
     this.targetPosition = targetPosition;

   }

 

-  public void setTargetLemma(String targetLemma) {

-    this.targetLemma = targetLemma;

-  }

-

   @Override

   public String toString() {

 

@@ -192,11 +174,11 @@
     String tokenTags[] = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);

 

     int position = Integer.parseInt(tokenTags[0]);

-    String lemma = tokenTags[1];

-    String sentence[] = new String[tokenTags.length - 2];

-    String tags[] = new String[tokenTags.length - 2];

+    String sentence[] = new String[tokenTags.length - 1];

+    String tags[] = new String[tokenTags.length - 1];

+    String lemmas[] = new String[tokenTags.length - 1];

 

-    for (int i = 2; i < tokenTags.length; i++) {

+    for (int i = 1; i < tokenTags.length; i++) {

       int split = tokenTags[i].lastIndexOf("_");

 

       if (split == -1) {

@@ -205,9 +187,10 @@
 

       sentence[i] = tokenTags[i].substring(0, split);

       tags[i] = tokenTags[i].substring(split + 1);

+      lemmas[i] = tokenTags[i].substring(split + 2);

     }

 

-    return new WSDSample(sentence, tags, position, lemma);

+    return new WSDSample(sentence, tags, lemmas, position);

   }

 

   @Override


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
index 02891ac..c6c6aca 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java

@@ -20,7 +20,10 @@
 package opennlp.tools.disambiguator;
 
 import java.security.InvalidParameterException;
+import java.util.ArrayList;
+import java.util.List;
 
+import opennlp.tools.disambiguator.ims.IMSParameters;
 import opennlp.tools.util.Span;
 
 /**
@@ -45,46 +48,114 @@
  * @see Lesk
  * @see IMS
  */
-public interface WSDisambiguator {
+public abstract class WSDisambiguator {
 
   /**
    * @return the parameters of the disambiguation algorithm
    */
-  public WSDParameters getParams();
+  public abstract WSDParameters getParams();
 
   /**
    * @param the
    *          disambiguation implementation specific parameters.
    * @throws InvalidParameterException
    */
-  public void setParams(WSDParameters params) throws InvalidParameterException;
+  public abstract void setParams(WSDParameters params) throws InvalidParameterException;
 
   /**
    * @param tokenizedContext
    * @param tokenTags 
+   * @param lemmas
    * @param ambiguousTokenIndex
-   * @param ambiguousTokenLemma
    * @return result as an array of WordNet IDs
    */
-  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      int ambiguousTokenIndex, String ambiguousTokenLemma);
+  public abstract String[] disambiguate(String[] tokenizedContext, String[] tokenTags, String[] lemmas,
+      int ambiguousTokenIndex);
 
-  /**
+  /** The disambiguation method for all the words in a Span
    * @param tokenizedContext
    * @param tokenTags
    * @param ambiguousTokenIndexSpan
    * @param ambiguousTokenLemma
    * @return result as an array of WordNet IDs
    */
-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      Span ambiguousTokenIndexSpan, String ambiguousTokenLemma);
+  public List<String[]> disambiguate(String[] tokenizedContext, String[] tokenTags, String[] lemmas,
+      Span ambiguousTokenIndexSpan){
+    List<String[]> senses = new ArrayList<String[]>();
+
+    int start = Math.max(0, ambiguousTokenIndexSpan.getStart());
+    
+    int end = Math.max(start,Math.min(tokenizedContext.length, ambiguousTokenIndexSpan.getEnd()));
+
+
+    for (int i = start; i < end + 1; i++) {
+
+      if (WSDHelper.isRelevantPOSTag(tokenTags[i])) {
+        WSDSample sample = new WSDSample(tokenizedContext, tokenTags, lemmas, i);
+        String[] sense = disambiguate(sample);
+        senses.add(sense);
+      } else {
+
+        if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
+          String s = WSDParameters.SenseSource.WSDHELPER.name() + " " 
+              + WSDHelper.getNonRelevWordsDef(tokenTags[i]);
+          String[] sense = { s };
+
+          senses.add(sense);
+        } else {
+          senses.add(null);
+        }
+      }
+
+    }
+
+    return senses;
+  }
+  
+  /**
+   * The disambiguation method for all the words of the context
+   * 
+   * @param tokenizedContext
+   *          : the text containing the word to disambiguate
+   * @param tokenTags
+   *          : the tags corresponding to the context
+   * @param lemmas
+   *          : the lemmas of ALL the words in the context
+   * @return a List of arrays, each corresponding to the senses of each word of
+   *         the context which are to be disambiguated
+   */
+  public List<String[]> disambiguate(String[] tokenizedContext,
+      String[] tokenTags, String[] lemmas) {
+
+    List<String[]> senses = new ArrayList<String[]>();
+
+    for (int i = 0; i < tokenizedContext.length; i++) {
+
+      if (WSDHelper.isRelevantPOSTag(tokenTags[i])) {
+        WSDSample sample = new WSDSample(tokenizedContext, tokenTags, lemmas, i);
+        String[] sense = disambiguate(sample);
+        senses.add(sense);
+      } else {
+
+        if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
+          String s = IMSParameters.SenseSource.WSDHELPER.name() + " " + tokenTags[i];
+          String[] sense = { s };
+
+          senses.add(sense);
+        } else {
+          senses.add(null);
+        }
+      }
+
+    }
+
+    return senses;
+  }
   
   /**
    * @param WSDSample
    * @return result as an array of WordNet IDs
    */
-  public String[] disambiguate(WSDSample sample);
+  public abstract String[] disambiguate(WSDSample sample);
   
-  @Deprecated
-  public String[] disambiguate(String[] inputText, int inputWordIndex);
 }
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
index af81c97..c6b1e91 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java

@@ -33,10 +33,9 @@
 public class WordPOS {
 
   private String word;
-  private List stems;
+  private List<String> stems;
   private POS pos;
   private String posTag;
-  private int wordIndex;
   public boolean isTarget = false;
 
   public WordPOS(String word, String tag) throws IllegalArgumentException {
@@ -68,7 +67,7 @@
     return posTag;
   }
 
-  public List getStems() {
+  public List<String> getStems() {
     if (stems == null) {
       return WSDHelper.Stem(this);
     } else {
@@ -99,8 +98,8 @@
   // uses Stemming to check if two words are equivalent
   public boolean isStemEquivalent(WordPOS wordToCompare) {
     // check if there is intersection in the stems;
-    List originalList = this.getStems();
-    List listToCompare = wordToCompare.getStems();
+    List<String> originalList = this.getStems();
+    List<String> listToCompare = wordToCompare.getStems();
 
     if (originalList == null || listToCompare == null) {
       return false;
@@ -118,16 +117,4 @@
 
   }
 
-  // uses Lemma to check if two words are equivalent
-  public boolean isLemmaEquivalent(WordPOS wordToCompare) {
-
-    ArrayList<String> lemmas_word = new ArrayList();
-    ArrayList<String> lemmas_wordToCompare = new ArrayList();
-
-    for (String pos : WSDHelper.allPOS) {
-      WSDHelper.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
-    }
-    return false;
-  }
-
 }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
index 4ea9276..37bcca5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java

@@ -1,191 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import net.sf.extjwnl.data.POS;
-
-public class WordToDisambiguate {
-
-  // TODO Check if it is necessary to add an attribute [word] since the word in
-  // the sentence is not necessarily in the base form ??
-
-  protected String[] sentence;
-  protected String[] posTags;
-
-  protected int wordIndex;
-
-  protected int sense;
-
-  protected ArrayList<String> senseIDs;
-
-  public WordToDisambiguate(String[] sentence, int wordIndex)
-      throws IllegalArgumentException {
-    super();
-
-    if (wordIndex > sentence.length) {
-      throw new IllegalArgumentException("The index is out of bounds !");
-    }
-
-    this.sentence = sentence;
-    this.posTags = WSDHelper.getTagger().tag(sentence);
-
-    this.wordIndex = wordIndex;
-
-    this.sense = -1;
-  }
-
-  public WordToDisambiguate(String[] sentence, int wordIndex, int sense)
-      throws IllegalArgumentException {
-    super();
-
-    if (wordIndex > sentence.length) {
-      throw new IllegalArgumentException("The index is out of bounds !");
-    }
-
-    this.sentence = sentence;
-    this.posTags = WSDHelper.getTagger().tag(sentence);
-
-    this.wordIndex = wordIndex;
-
-    this.sense = sense;
-  }
-
-  public WordToDisambiguate(String[] sentence, int wordIndex,
-      ArrayList<String> senseIDs) throws IllegalArgumentException {
-    super();
-
-    if (wordIndex > sentence.length) {
-      throw new IllegalArgumentException("The index is out of bounds !");
-    }
-
-    this.sentence = sentence;
-    this.posTags = WSDHelper.getTagger().tag(sentence);
-
-    this.wordIndex = wordIndex;
-
-    this.senseIDs = senseIDs;
-  }
-
-  public WordToDisambiguate(String[] sentence, String[] tokenTags, int wordIndex) {
-    this(sentence, wordIndex, -1);
-  }
-
-  public WordToDisambiguate() {
-    String[] emptyString = {};
-    int emptyInteger = 0;
-
-    this.sentence = emptyString;
-    this.wordIndex = emptyInteger;
-    this.sense = -1;
-
-  }
-
-  // Sentence
-  public String[] getSentence() {
-    return sentence;
-  }
-
-  public void setSentence(String[] sentence) {
-    this.sentence = sentence;
-  }
-
-  // Sentence Pos-Tags
-  public String[] getPosTags() {
-    return posTags;
-  }
-
-  public void setPosTags(String[] posTags) {
-    this.posTags = posTags;
-  }
-
-  // Word to disambiguate
-  public int getWordIndex() {
-    return wordIndex;
-  }
-
-  public String getRawWord() {
-
-    String wordBaseForm = WSDHelper.getLemmatizer().lemmatize(
-        this.sentence[wordIndex], this.posTags[wordIndex]);
-
-    String ref = "";
-
-    if ((WSDHelper.getPOS(this.posTags[wordIndex]) != null)) {
-      if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.VERB)) {
-        ref = wordBaseForm + ".v";
-      } else if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.NOUN)) {
-        ref = wordBaseForm + ".n";
-      } else if (WSDHelper.getPOS(this.posTags[wordIndex])
-          .equals(POS.ADJECTIVE)) {
-        ref = wordBaseForm + ".a";
-      } else if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.ADVERB)) {
-        ref = wordBaseForm + ".r";
-      }
-
-    }
-
-    return ref;
-  }
-
-  public String getWord() {
-    return this.sentence[this.wordIndex];
-  }
-
-  public String getPosTag() {
-    return this.posTags[this.wordIndex];
-  }
-
-  public void setWordIndex(int wordIndex) {
-    this.wordIndex = wordIndex;
-  }
-
-  // Word to disambiguate sense
-  public int getSense() {
-    return sense;
-  }
-
-  public void setSense(int sense) {
-    this.sense = sense;
-  }
-
-  // Sense as in the source
-  // TODO fix the conflict between this ID of the sense and that in the
-  // attribute [sense]
-  public ArrayList<String> getSenseIDs() {
-    return senseIDs;
-  }
-
-  public void setSenseIDs(ArrayList<String> senseIDs) {
-    this.senseIDs = senseIDs;
-  }
-
-  public String toString() {
-    return (wordIndex + "\t" + getWord() + "\n" + sentence);
-  }
-
-  public void print() {
-    WSDHelper.print("Sentence:  " + Arrays.asList(sentence) + "\n" + "Index: "
-        + wordIndex + "\n" + "Word: " + getWord() + "\n" + "Sense ID: "
-        + senseIDs.get(0));
-  }
-}
+// TODO to be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
index 1ab3bed..d45fae3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java

@@ -20,6 +20,7 @@
 package opennlp.tools.disambiguator.contextclustering;

 

 import java.security.InvalidParameterException;

+import java.util.List;

 

 import opennlp.tools.disambiguator.WSDParameters;

 import opennlp.tools.disambiguator.WSDSample;

@@ -32,7 +33,7 @@
  * 

  * This implementation is based on {@link http://nlp.cs.rpi.edu/paper/wsd.pdf}

  */

-public class ContextClusterer implements WSDisambiguator {

+public class ContextClusterer extends WSDisambiguator {

 

   protected ContextClustererParameters params;

 

@@ -56,14 +57,7 @@
 

   @Override

   public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,

-      int ambiguousTokenIndex, String ambiguousTokenLemma) {

-    // TODO Auto-generated method stub

-    return null;

-  }

-

-  @Override

-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,

-      Span ambiguousTokenIndexSpan, String ambiguousTokenLemma) {

+      String[] lemmas, int ambiguousTokenIndex) {

     // TODO Auto-generated method stub

     return null;

   }

@@ -74,10 +68,6 @@
     return null;

   }

 

-  @Override

-  public String[] disambiguate(String[] inputText, int inputWordIndex) {

-    // TODO Auto-generated method stub

-    return null;

-  }

+

 

 }


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
index efd8603..eb7a2d5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java

@@ -25,7 +25,8 @@
 import javax.xml.parsers.DocumentBuilder;

 import javax.xml.parsers.DocumentBuilderFactory;

 

-import opennlp.tools.disambiguator.WordToDisambiguate;

+import opennlp.tools.disambiguator.WSDHelper;

+import opennlp.tools.disambiguator.WSDSample;

 

 import org.w3c.dom.Document;

 import org.w3c.dom.Element;

@@ -175,10 +176,10 @@
     return result;

   }

 

-  public ArrayList<WordToDisambiguate> getSemcorOneFileData(String file,

+  public ArrayList<WSDSample> getSemcorOneFileData(String file,

       String wordTag) {

 

-    ArrayList<WordToDisambiguate> setInstances = new ArrayList<WordToDisambiguate>();

+    ArrayList<WSDSample> setInstances = new ArrayList<WSDSample>();

 

     try {

 

@@ -223,8 +224,15 @@
             }

 

             if (!senses.isEmpty()) {

-              WordToDisambiguate wtd = new WordToDisambiguate(

-                  sentence.split("\\s"), index, senses);

+              String[] words = sentence.split("\\s");

+              String[] tags = WSDHelper.getTagger().tag(words);

+              String[] lemmas = new String[words.length];

+              

+              for (int i = 0; i < words.length; i++) {

+                lemmas[i] = WSDHelper.getLemmatizer().lemmatize(words[i], tags[i]);

+              }

+              

+              WSDSample wtd = new WSDSample(words, tags, lemmas, index, senses);

               setInstances.add(wtd);

             }

 

@@ -253,10 +261,9 @@
    *          The word, of which we are looking for the instances

    * @return the list of the {@link WordToDisambiguate} instances

    */

-  public ArrayList<WordToDisambiguate> getSemcorFolderData(String folder,

-      String wordTag) {

+  public ArrayList<WSDSample> getSemcorFolderData(String folder, String wordTag) {

 

-    ArrayList<WordToDisambiguate> result = new ArrayList<WordToDisambiguate>();

+    ArrayList<WSDSample> result = new ArrayList<WSDSample>();

 

     String directory = path + folder + tagfiles;

     File tempFolder = new File(directory);

@@ -266,7 +273,7 @@
       listOfFiles = tempFolder.listFiles();

       for (File file : listOfFiles) {

 

-        ArrayList<WordToDisambiguate> list = getSemcorOneFileData(directory

+        ArrayList<WSDSample> list = getSemcorOneFileData(directory

             + file.getName(), wordTag);

         result.addAll(list);

       }

@@ -285,12 +292,12 @@
    * @return the list of the {@link WordToDisambiguate} instances of the word to

    *         disambiguate

    */

-  public ArrayList<WordToDisambiguate> getSemcorData(String wordTag) {

+  public ArrayList<WSDSample> getSemcorData(String wordTag) {

 

-    ArrayList<WordToDisambiguate> result = new ArrayList<WordToDisambiguate>();

+    ArrayList<WSDSample> result = new ArrayList<WSDSample>();

 

     for (String folder : folders) {

-      ArrayList<WordToDisambiguate> list = getSemcorFolderData(folder, wordTag);

+      ArrayList<WSDSample> list = getSemcorFolderData(folder, wordTag);

       result.addAll(list);

     }

 


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
index 19325d8..464bc36 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java

@@ -36,8 +36,8 @@
 import org.w3c.dom.Node;

 import org.w3c.dom.NodeList;

 

-import opennlp.tools.disambiguator.WordToDisambiguate;

-import opennlp.tools.disambiguator.ims.WTDIMS;

+import opennlp.tools.disambiguator.WSDHelper;

+import opennlp.tools.disambiguator.WSDSample;

 

 /**

  * This class handles the extraction of Senseval-3 data from the different files

@@ -52,19 +52,6 @@
   protected String sensemapFile = sensevalDirectory + "EnglishLS.sensemap";

   protected String wordList = sensevalDirectory + "EnglishLS.train.key";

 

-  // protected String dict = sensevalDirectory + "EnglishLS.dictionary.xml";

-  // protected String map = sensevalDirectory + "EnglishLS.sensemap";

-

-  /**

-   * The XML file of Senseval presents some issues that need to be fixed first

-   */

-  private String fixXmlFile() {

-

-    // TODO fix this !

-

-    return null;

-  }

-

   public SensevalReader() {

     super();

   }

@@ -157,9 +144,9 @@
    * @return the list of the {@link WordToDisambiguate} instances of the word to

    *         disambiguate

    */

-  public ArrayList<WordToDisambiguate> getSensevalData(String wordTag) {

+  public ArrayList<WSDSample> getSensevalData(String wordTag) {

 

-    ArrayList<WordToDisambiguate> setInstances = new ArrayList<WordToDisambiguate>();

+    ArrayList<WSDSample> setInstances = new ArrayList<WSDSample>();

 

     try {

 

@@ -188,28 +175,7 @@
               Node nInstance = nInstances.item(j);

 

               if (nInstance.getNodeType() == Node.ELEMENT_NODE) {

-

-                Element eInstance = (Element) nInstance;

-

-                String[] wordPos = eLexelt.getAttribute("item").split("\\.");

-                String word = wordPos[0]; // Word

-                String tag; // Part of Speech

-

-                if (wordPos[1].equals("n")) {

-                  tag = "noun";

-                } else if (wordPos[1].equals("v")) {

-                  tag = "verb";

-                } else if (wordPos[1].equals("a")) {

-                  tag = "adjective";

-                } else {

-                  tag = "adverb";

-                }

-

-                String id = eInstance.getAttribute("id");

-                String source = eInstance.getAttribute("docsrc");

-

-                ArrayList<String> answers = new ArrayList<String>();

-                String sentence = "";

+                ArrayList<String> senseIDs = new ArrayList<String>();

                 String rawWord = "";

                 String[] finalText = null;

                 int index = 0;

@@ -227,11 +193,10 @@
 

                     String temp = senseid;

                     // String[] temp = { answer, senseid };

-                    answers.add(temp);

+                    senseIDs.add(temp);

                   }

 

                   if (nChild.getNodeName().equals("context")) {

-                    sentence = ((Element) nChild).getTextContent();

 

                     if (nChild.hasChildNodes()) {

                       String textBefore = nChild.getChildNodes().item(0)

@@ -272,9 +237,19 @@
 

                 }

 

-                WTDIMS wordToDisambiguate = new WTDIMS(finalText, index,

-                    answers);

-                setInstances.add(wordToDisambiguate);

+                String[] words = finalText;

+                String[] tags = WSDHelper.getTagger().tag(words);

+                String[] lemmas = new String[words.length];

+

+                for (int k = 0; k < words.length; k++) {

+                  lemmas[k] = WSDHelper.getLemmatizer().lemmatize(words[k],

+                      tags[k]);

+                }

+

+                WSDSample wtd = new WSDSample(words, tags, lemmas, index,

+                    senseIDs);

+                setInstances.add(wtd);

+

               }

             }

 


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
index a38ba82..9a2e2e2 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java

@@ -46,13 +46,12 @@
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.Span;
 import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.disambiguator.FeaturesExtractor;
+import opennlp.tools.disambiguator.WSDHelper;
 import opennlp.tools.disambiguator.WSDParameters;
 import opennlp.tools.disambiguator.WSDSample;
 import opennlp.tools.disambiguator.WSDisambiguator;
-import opennlp.tools.disambiguator.WordToDisambiguate;
 import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
 import opennlp.tools.disambiguator.datareader.SensevalReader;
 import opennlp.tools.disambiguator.mfs.MFS;
@@ -70,7 +69,7 @@
  * check {@link https://www.comp.nus.edu.sg/~nght/pubs/ims.pdf} for details
  * about this approach
  */
-public class IMS implements WSDisambiguator {
+public class IMS extends WSDisambiguator {
 
   public IMSParameters parameters;
 
@@ -244,8 +243,6 @@
       e.printStackTrace();
     }
 
-    System.out.println("Done");
-
   }
 
   private void extractFeature(WTDIMS word) {
@@ -344,15 +341,15 @@
   }
 
   /**
-   * The disambiguation method for a single word
+   * The disambiguation method for a single word, it requires as input one
+   * object of type WTDIMS
    * 
    * @param inputText
    *          : the text containing the word to disambiguate
    * @param inputWordIndex
    *          : the index of the word to disambiguate
    */
-  @Override
-  public String[] disambiguate(String[] inputText, int inputWordIndex) {
+  public String[] disambiguate(WTDIMS wordToDisambiguate) {
 
     String trainingDataDirectory = IMSParameters.trainingDataDirectory;
 
@@ -362,11 +359,10 @@
       file.mkdirs();
     }
 
-    WTDIMS word = new WTDIMS(inputText, inputWordIndex);
-    fExtractor.extractIMSFeatures(word, this.parameters.getWindowSize(),
-        this.parameters.getNgram());
+    fExtractor.extractIMSFeatures(wordToDisambiguate,
+        this.parameters.getWindowSize(), this.parameters.getNgram());
 
-    String wordTag = word.getWordTag();
+    String wordTag = wordToDisambiguate.getWordTag();
 
     String wordTrainingbinFile = trainingDataDirectory + wordTag + ".gz";
 
@@ -378,10 +374,10 @@
     if (bf.exists() && !bf.isDirectory()) {
       // If the trained model exists
       ArrayList<String> surrWords = getAllSurroundingWords(wordTag);
-      fExtractor.serializeIMSFeatures(word, surrWords);
+      fExtractor.serializeIMSFeatures(wordToDisambiguate, surrWords);
 
       loadedMaxentModel = load(wordTrainingbinFile);
-      String[] context = cg.getContext(word);
+      String[] context = cg.getContext(wordToDisambiguate);
 
       double[] outcomeProbs = loadedMaxentModel.eval(context);
       outcome = loadedMaxentModel.getBestOutcome(outcomeProbs);
@@ -389,10 +385,10 @@
     } else {
       // Depending on the source, go fetch the training data
       ArrayList<WTDIMS> trainingInstances = new ArrayList<WTDIMS>();
-      switch (this.parameters.getSource().code) {
-      case 1: {
+      switch (this.parameters.getTrainingSource()) {
+      case SEMCOR: {
         SemcorReaderExtended sReader = new SemcorReaderExtended();
-        for (WordToDisambiguate ti : sReader.getSemcorData(wordTag)) {
+        for (WSDSample ti : sReader.getSemcorData(wordTag)) {
           WTDIMS imsIT = new WTDIMS(ti);
           extractFeature(imsIT);
           trainingInstances.add(imsIT);
@@ -400,17 +396,17 @@
         break;
       }
 
-      case 2: {
+      case SEMEVAL: {
         SensevalReader sReader = new SensevalReader();
-        for (WordToDisambiguate ti : sReader.getSensevalData(wordTag)) {
-          WTDIMS imsIT = (WTDIMS) ti;
+        for (WSDSample ti : sReader.getSensevalData(wordTag)) {
+          WTDIMS imsIT = new WTDIMS(ti);
           extractFeature(imsIT);
           trainingInstances.add(imsIT);
         }
         break;
       }
 
-      case 3: {
+      case OTHER: {
         // TODO check the case when the user selects his own data set (make an
         // interface to collect training data)
         break;
@@ -423,11 +419,11 @@
 
         ArrayList<String> surrWords = getAllSurroundingWords(wordTag);
 
-        fExtractor.serializeIMSFeatures(word, surrWords);
+        fExtractor.serializeIMSFeatures(wordToDisambiguate, surrWords);
 
         bf = new File(wordTrainingbinFile);
         loadedMaxentModel = load(wordTrainingbinFile);
-        String[] context = cg.getContext(word);
+        String[] context = cg.getContext(wordToDisambiguate);
 
         double[] outcomeProbs = loadedMaxentModel.eval(context);
         outcome = loadedMaxentModel.getBestOutcome(outcomeProbs);
@@ -437,11 +433,8 @@
 
     if (!outcome.equals("")) {
 
-      // System.out.println("The sense is [" + outcome + "] : " /*+
-      // Loader.getDictionary().getWordBySenseKey(outcome.split("%")[1]).getSynset().getGloss()*/);
-
-      outcome = parameters.source.name() + " " + wordTag.split("\\.")[0] + "%"
-          + outcome;
+      outcome = parameters.getSenseSource().name() + " "
+          + wordTag.split("\\.")[0] + "%" + outcome;
 
       String[] s = { outcome };
 
@@ -449,29 +442,63 @@
 
     } else {
       // if no training data exist
-      return MFS.getMostFrequentSense(word);
+      MFS mfs = new MFS();
+      return mfs.disambiguate(wordTag);
     }
 
   }
 
   @Override
-  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      int ambiguousTokenIndex, String ambiguousTokenLemma) {
-    // TODO Update
-    return null;
-  }
-
-  @Override
-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      Span ambiguousTokenIndexSpan, String ambiguousTokenLemma) {
-    // TODO Update
-    return null;
-  }
-
-  @Override
   public String[] disambiguate(WSDSample sample) {
-    // TODO Update
-    return null;
+    if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+      WTDIMS wordToDisambiguate = new WTDIMS(sample);
+      return disambiguate(wordToDisambiguate);
+
+    } else {
+      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+        String s = IMSParameters.SenseSource.WSDHELPER.name() + " "
+            + sample.getTargetTag();
+        String[] sense = { s };
+        return sense;
+      } else {
+        return null;
+      }
+    }
+
+  }
+
+  /**
+   * The IMS disambiguation method for a single word
+   * 
+   * @param tokenizedContext
+   *          : the text containing the word to disambiguate
+   * @param tokenTags
+   *          : the tags corresponding to the context
+   * @param lemmas
+   *          : the lemmas of ALL the words in the context
+   * @param index
+   *          : the index of the word to disambiguate
+   * @return an array of the senses of the word to disambiguate
+   */
+  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
+      String[] lemmas, int index) {
+
+    if (WSDHelper.isRelevantPOSTag(tokenTags[index])) {
+      WTDIMS wordToDisambiguate = new WTDIMS(tokenizedContext, tokenTags,
+          lemmas, index);
+      return disambiguate(wordToDisambiguate);
+
+    } else {
+      if (WSDHelper.getNonRelevWordsDef(tokenTags[index]) != null) {
+        String s = IMSParameters.SenseSource.WSDHELPER.name() + " "
+            + tokenTags[index];
+        String[] sense = { s };
+        return sense;
+      } else {
+        return null;
+      }
+    }
+
   }
 
 }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
index e69de29..fe4add6 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java

@@ -0,0 +1 @@
+// TODO To be removed
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
index 89d2cb7..c2ec960 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java

@@ -29,22 +29,9 @@
  */
 public class IMSParameters extends WSDParameters {
 
-  public static enum Source {
-    SEMCOR(1, "semcor"), SEMEVAL(2, "semeval"), OTHER(3, "other");
-
-    public int code;
-    public String src;
-
-    private Source(int code, String src) {
-      this.code = code;
-      this.src = src;
-    }
-  }
-
   protected String languageCode;
   protected int windowSize;
   protected int ngram;
-  protected Source source;
 
   public static final String resourcesFolder = "src\\test\\resources\\";
   public static final String trainingDataDirectory = resourcesFolder
@@ -63,12 +50,13 @@
    * @param source
    *          the source of the training data
    */
-  public IMSParameters(int windowSize, int ngram, Source source) {
-    super();
+  public IMSParameters(int windowSize, int ngram,
+      TrainingSource trainingSource, SenseSource senseSource) {
     this.languageCode = "En";
     this.windowSize = windowSize;
     this.ngram = ngram;
-    this.source = source;
+    this.trainingSource = trainingSource;
+    this.senseSource = senseSource;
     this.isCoarseSense = false;
 
     File folder = new File(trainingDataDirectory);
@@ -77,15 +65,15 @@
   }
 
   public IMSParameters() {
-    this(3, 2, Source.SEMCOR);
+    this(3, 2, TrainingSource.SEMCOR, SenseSource.WORDNET);
   }
 
-  public IMSParameters(Source source) {
-    this(3, 2, source);
+  public IMSParameters(TrainingSource source) {
+    this(3, 2, source, SenseSource.WORDNET);
   }
 
   public IMSParameters(int windowSize, int ngram) {
-    this(windowSize, ngram, Source.SEMCOR);
+    this(windowSize, ngram, TrainingSource.SEMCOR, SenseSource.WORDNET);
   }
 
   public String getLanguageCode() {
@@ -112,14 +100,6 @@
     this.ngram = ngram;
   }
 
-  public Source getSource() {
-    return source;
-  }
-
-  public void setSource(Source source) {
-    this.source = source;
-  }
-
   void init() {
   }
 

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
index 6a5fcad..8115472 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java

@@ -20,39 +20,56 @@
 package opennlp.tools.disambiguator.ims;
 
 import java.util.ArrayList;
+import java.util.List;
 
 import net.sf.extjwnl.data.POS;
 import opennlp.tools.disambiguator.WSDHelper;
-import opennlp.tools.disambiguator.WordToDisambiguate;
+import opennlp.tools.disambiguator.WSDSample;
 
-public class WTDIMS extends WordToDisambiguate {
+public class WTDIMS {
 
+  // Attributes related to the context
+  protected String[] sentence;
+  protected String[] posTags;
+  protected String[] lemmas;
+  protected int wordIndex;
+  protected int sense;
+  protected List<String> senseIDs;
+
+  // Attributes related to IMS features
   protected String[] posOfSurroundingWords;
   protected String[] surroundingWords;
   protected String[] localCollocations;
-
   protected String[] features;
 
-  public WTDIMS(String[] sentence, int word, int sense) {
-    super(sentence, word, sense);
+  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
+      int wordIndex) {
+    this.sentence = sentence;
+    this.posTags = posTags;
+    this.wordIndex = wordIndex;
+    this.lemmas = lemmas;
+  }
+
+  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
+      int wordIndex, List<String> senseIDs) {
+    this.sentence = sentence;
+    this.posTags = posTags;
+    this.wordIndex = wordIndex;
+    this.lemmas = lemmas;
+    this.senseIDs = senseIDs;
 
   }
 
-  public WTDIMS(String[] sentence, int word) {
-    super(sentence, word);
-  }
-
-  public WTDIMS(String xmlWord, ArrayList<String> senseIDs, String xmlSentence,
-      String xmlrawWord) {
+  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
+      String word, List<String> senseIDs) {
     super();
 
-    // this.word = xmlWord;
-
-    this.sentence = WSDHelper.getTokenizer().tokenize(xmlSentence);
-    this.posTags = WSDHelper.getTagger().tag(this.sentence);
+    this.sentence = sentence;
+    this.posTags = posTags;
+    this.lemmas = lemmas;
 
     for (int i = 0; i < sentence.length; i++) {
-      if (xmlrawWord.equals(sentence[i])) {
+      if (word.equals(sentence[i])) {
         this.wordIndex = i;
         break;
       }
@@ -62,16 +79,93 @@
 
   }
 
-  public WTDIMS(WordToDisambiguate wtd) {
-    super(wtd.getSentence(), wtd.getWordIndex(), wtd.getSense());
-    this.senseIDs = wtd.getSenseIDs();
+  public WTDIMS(WSDSample sample) {
+    this.sentence = sample.getSentence();
+    this.posTags = sample.getTags();
+    this.lemmas = sample.getLemmas();
+    this.wordIndex = sample.getTargetPosition();
+    this.senseIDs = sample.getSenseIDs();
+
+  }
+  
+  public String[] getSentence() {
+    return sentence;
   }
 
-  public WTDIMS(String[] sentence, int wordIndex, ArrayList<String> senseIDs) {
-    super(sentence, wordIndex);
+  public void setSentence(String[] sentence) {
+    this.sentence = sentence;
+  }
+
+  public String[] getPosTags() {
+    return posTags;
+  }
+
+  public void setPosTags(String[] posTags) {
+    this.posTags = posTags;
+  }
+
+  public int getWordIndex() {
+    return wordIndex;
+  }
+
+  public void setWordIndex(int wordIndex) {
+    this.wordIndex = wordIndex;
+  }
+
+  public String[] getLemmas() {
+    return lemmas;
+  }
+
+  public void setLemmas(String[] lemmas) {
+    this.lemmas = lemmas;
+  }
+
+  public int getSense() {
+    return sense;
+  }
+
+  public void setSense(int sense) {
+    this.sense = sense;
+  }
+
+  public List<String> getSenseIDs() {
+    return senseIDs;
+  }
+
+  public void setSenseIDs(ArrayList<String> senseIDs) {
     this.senseIDs = senseIDs;
   }
 
+  public String getWord() {
+    return this.getSentence()[this.getWordIndex()];
+  }
+
+  public String getWordTag() {
+
+    String wordBaseForm = this.getLemmas()[this.getWordIndex()];
+
+    String ref = "";
+
+    if ((WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]) != null)) {
+      if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]).equals(
+          POS.VERB)) {
+        ref = wordBaseForm + ".v";
+      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
+          .equals(POS.NOUN)) {
+        ref = wordBaseForm + ".n";
+      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
+          .equals(POS.ADJECTIVE)) {
+        ref = wordBaseForm + ".a";
+      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
+          .equals(POS.ADVERB)) {
+        ref = wordBaseForm + ".r";
+      }
+    }
+
+    return ref;
+  }
+
+  
   public String[] getPosOfSurroundingWords() {
     return posOfSurroundingWords;
   }
@@ -104,25 +198,4 @@
     this.features = features;
   }
 
-  public String getWordTag() {
-
-    String wordBaseForm = WSDHelper.getLemmatizer().lemmatize(this.getWord(),
-        this.getPosTag());
-
-    String ref = "";
-
-    if ((WSDHelper.getPOS(this.getPosTag()) != null)) {
-      if (WSDHelper.getPOS(this.getPosTag()).equals(POS.VERB)) {
-        ref = wordBaseForm + ".v";
-      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.NOUN)) {
-        ref = wordBaseForm + ".n";
-      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.ADJECTIVE)) {
-        ref = wordBaseForm + ".a";
-      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.ADVERB)) {
-        ref = wordBaseForm + ".r";
-      }
-    }
-
-    return ref;
-  }
 }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
index 7ede37f..c96a645 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java

@@ -31,7 +31,6 @@
 import opennlp.tools.disambiguator.WordPOS;
 import opennlp.tools.disambiguator.WordSense;
 import opennlp.tools.disambiguator.mfs.MFS;
-import opennlp.tools.util.Span;
 import net.sf.extjwnl.JWNLException;
 import net.sf.extjwnl.data.Synset;
 import net.sf.extjwnl.data.Word;
@@ -44,7 +43,7 @@
  * the approach are included in this class.
  * 
  */
-public class Lesk implements WSDisambiguator {
+public class Lesk extends WSDisambiguator {
 
   /**
    * The lesk specific parameters
@@ -113,8 +112,12 @@
     ArrayList<SynNode> nodes = new ArrayList<SynNode>();
 
     for (int i = 0; i < sample.getSentence().length; i++) {
-      contextWords
-          .add(new WordPOS(sample.getSentence()[i], sample.getTags()[i]));
+      if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
+        if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
+          contextWords.add(new WordPOS(sample.getSentence()[i], sample
+              .getTags()[i]));
+        }
+      }
     }
     for (Synset synset : synsets) {
       SynNode node = new SynNode(synset, contextWords);
@@ -158,8 +161,12 @@
     for (int i = index - getParams().win_b_size; i <= index
         + getParams().win_f_size; i++) {
       if (i >= 0 && i < sample.getSentence().length && i != index) {
-        contextWords.add(new WordPOS(sample.getSentence()[i],
-            sample.getTags()[i]));
+        if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
+          if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
+            contextWords.add(new WordPOS(sample.getSentence()[i], sample
+                .getTags()[i]));
+          }
+        }
       }
     }
 
@@ -944,44 +951,18 @@
     return count;
   }
 
-  /**
-   * Disambiguates an ambiguous word in its context
-   * 
-   * @param tokenizedContext
-   * @param ambiguousTokenIndex
-   * @return array of sense indexes from WordNet ordered by their score. The
-   *         result format is <b>Source</b> <b>SenseID</b> If the input token is
-   *         non relevant a null is returned.
-   */
-  @Override
-  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      int ambiguousTokenIndex, String ambiguousTokenLemma) {
-    return disambiguate(new WSDSample(tokenizedContext, tokenTags,
-        ambiguousTokenIndex, ambiguousTokenLemma));
-  }
-
-  /**
-   * Disambiguates an ambiguous word in its context The user can set a span of
-   * inputWords from the tokenized input
-   * 
-   * @param inputText
-   * @param inputWordSpans
-   * @return array of array of sense indexes from WordNet ordered by their
-   *         score. The result format is <b>Source</b> <b>SenseID</b> If the
-   *         input token is non relevant a null is returned.
-   */
-  @Override
-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      Span ambiguousTokenSpan, String ambiguousTokenLemma) {
-    // TODO need to work on spans
-    return null;
-  }
-
   @Override
   public String[] disambiguate(WSDSample sample) {
-    // if the word is not relevant return null
-    if (!WSDHelper.isRelevant(sample.getTargetTag())) {
-      return null;
+    // if not relevant POS tag
+    if (!WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+        String s = WSDParameters.SenseSource.WSDHELPER.name() + " "
+            + sample.getTargetTag();
+        String[] sense = { s };
+        return sense;
+      } else {
+        return null;
+      }
     }
 
     ArrayList<WordSense> wsenses = null;
@@ -1020,7 +1001,8 @@
       for (int i = 0; i < wsenses.size(); i++) {
         synsetWords = wsenses.get(i).getNode().synset.getWords();
         for (Word synWord : synsetWords) {
-          if (synWord.getLemma().equals(sample.getTargetLemma())) {
+          if (synWord.getLemma().equals(
+              sample.getLemmas()[sample.getTargetPosition()])) {
             try {
               senseKey = synWord.getSenseKey();
             } catch (JWNLException e) {
@@ -1041,9 +1023,10 @@
   }
 
   @Override
-  public String[] disambiguate(String[] inputText, int inputWordIndex) {
-    // TODO Deprecate
-    return null;
+  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
+      String[] lemmas, int ambiguousTokenIndex) {
+    return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
+        ambiguousTokenIndex));
   }
 
 }

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
index 2efeba3..cdaeaa1 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java

@@ -37,7 +37,7 @@
 
   // DEFAULTS
   protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT;
-  protected static final Source DFLT_SOURCE = Source.WORDNET;
+  protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
   protected static final int DFLT_WIN_SIZE = 10;
   protected static final int DFLT_DEPTH = 1;
   protected static final double DFLT_DEPTH_WEIGHT = 0.8;
@@ -46,7 +46,7 @@
 
   protected LESK_TYPE leskType;
 
-  protected Source source;
+  protected SenseSource source;
   protected int win_f_size;
   protected int win_b_size;
   protected int depth;

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
index 9bc044d..cb83322 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java

@@ -32,15 +32,13 @@
 import opennlp.tools.disambiguator.WSDSample;

 import opennlp.tools.disambiguator.WSDisambiguator;

 import opennlp.tools.disambiguator.WordPOS;

-import opennlp.tools.disambiguator.WordToDisambiguate;

-import opennlp.tools.util.Span;

 

 /**

  * Implementation of the <b>Most Frequent Sense</b> baseline approach. This

  * approach returns the senses in order of frequency in WordNet. The first sense

  * is the most frequent.

  */

-public class MFS implements WSDisambiguator {

+public class MFS extends WSDisambiguator {

 

   public MFSParameters parameters;

 

@@ -52,47 +50,7 @@
     this.parameters = new MFSParameters();

   }

 

-  @Deprecated

-  public static String[] getMostFrequentSense(

-      WordToDisambiguate wordToDisambiguate) {

-

-    String word = wordToDisambiguate.getRawWord().toLowerCase();

-    POS pos = WSDHelper.getPOS(wordToDisambiguate.getPosTag());

-

-    if (pos != null) {

-

-      WordPOS wordPOS = new WordPOS(word, pos);

-

-      ArrayList<Synset> synsets = wordPOS.getSynsets();

-

-      int size = synsets.size();

-

-      String[] senses = new String[size];

-

-      for (int i = 0; i < size; i++) {

-        String senseKey = null;

-        for (Word wd : synsets.get(i).getWords()) {

-          if (wd.getLemma().equals(

-              wordToDisambiguate.getRawWord().split("\\.")[0])) {

-            try {

-              senseKey = wd.getSenseKey();

-            } catch (JWNLException e) {

-              e.printStackTrace();

-            }

-            senses[i] = "WordNet " + senseKey;

-            break;

-          }

-        }

-

-      }

-      return senses;

-    } else {

-      System.out.println("The word has no definitions in WordNet !");

-      return null;

-    }

-

-  }

-

+ 

   /*

    * @return the most frequent senses from wordnet

    */

@@ -102,19 +60,23 @@
     for (Word wd : synsets.get(0).getWords()) {

       if (WSDParameters.isStemCompare) {

         WordPOS wdPOS = new WordPOS(wd.getLemma(), wd.getPOS());

-        WordPOS samplePOS = new WordPOS(sample.getTargetLemma(),

+        WordPOS samplePOS = new WordPOS(

+            sample.getLemmas()[sample.getTargetPosition()],

             WSDHelper.getPOS(sample.getTargetTag()));

         if (wdPOS.isStemEquivalent(samplePOS)) {

           try {

-            return WSDParameters.Source.WORDNET.name() + " " + wd.getSenseKey();

+            return WSDParameters.SenseSource.WORDNET.name() + " "

+                + wd.getSenseKey();

           } catch (JWNLException e) {

             e.printStackTrace();

           }

         }

       } else {

-        if (wd.getLemma().equalsIgnoreCase((sample.getTargetLemma()))) {

+        if (wd.getLemma().equalsIgnoreCase(

+            (sample.getLemmas()[sample.getTargetPosition()]))) {

           try {

-            return WSDParameters.Source.WORDNET.name() + " " + wd.getSenseKey();

+            return WSDParameters.SenseSource.WORDNET.name() + " "

+                + wd.getSenseKey();

           } catch (JWNLException e) {

             e.printStackTrace();

           }

@@ -134,11 +96,12 @@
       for (Word wd : synsets.get(i).getWords()) {

         if (WSDParameters.isStemCompare) {

           WordPOS wdPOS = new WordPOS(wd.getLemma(), wd.getPOS());

-          WordPOS samplePOS = new WordPOS(sample.getTargetLemma(),

+          WordPOS samplePOS = new WordPOS(

+              sample.getLemmas()[sample.getTargetPosition()],

               WSDHelper.getPOS(sample.getTargetTag()));

           if (wdPOS.isStemEquivalent(samplePOS)) {

             try {

-              senseKeys[i] = WSDParameters.Source.WORDNET.name() + " "

+              senseKeys[i] = WSDParameters.SenseSource.WORDNET.name() + " "

                   + wd.getSenseKey();

               break;

             } catch (JWNLException e) {

@@ -147,9 +110,10 @@
             break;

           }

         } else {

-          if (wd.getLemma().equalsIgnoreCase((sample.getTargetLemma()))) {

+          if (wd.getLemma().equalsIgnoreCase(

+              (sample.getLemmas()[sample.getTargetPosition()]))) {

             try {

-              senseKeys[i] = WSDParameters.Source.WORDNET.name() + " "

+              senseKeys[i] = WSDParameters.SenseSource.WORDNET.name() + " "

                   + wd.getSenseKey();

               break;

             } catch (JWNLException e) {

@@ -185,27 +149,77 @@
 

   @Override

   public String[] disambiguate(WSDSample sample) {

-    return getMostFrequentSenses(sample);

+

+    if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {

+      return getMostFrequentSenses(sample);

+

+    } else {

+      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {

+        String s = WSDParameters.SenseSource.WSDHELPER.name() + " "

+            + sample.getTargetTag();

+        String[] sense = { s };

+        return sense;

+      } else {

+        return null;

+      }

+    }

   }

 

   @Override

   public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,

-      int ambiguousTokenIndex, String lemma) {

-    return disambiguate(new WSDSample(tokenizedContext, tokenTags,

-        ambiguousTokenIndex, lemma));

+      String[] lemmas, int ambiguousTokenIndex) {

+    return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,

+        ambiguousTokenIndex));

   }

 

-  @Override

-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,

-      Span ambiguousTokenIndexSpan, String ambiguousTokenLemma) {

-    // TODO A iterate over span

-    return null;

-  }

+  public String[] disambiguate(String wordTag) {

 

-  @Override

-  public String[] disambiguate(String[] inputText, int inputWordIndex) {

-    // TODO Deprecate

-    return null;

-  }

+    String word = wordTag.split("\\.")[0];

+    String tag = wordTag.split("\\.")[1];

 

+    POS pos;

+

+    if (tag.equalsIgnoreCase("a")) {

+      pos = POS.ADJECTIVE;

+    } else if (tag.equalsIgnoreCase("r")) {

+      pos = POS.ADVERB;

+    } else if (tag.equalsIgnoreCase("n")) {

+      pos = POS.NOUN;

+    } else if (tag.equalsIgnoreCase("a")) {

+      pos = POS.VERB;

+    } else

+      pos = null;

+

+    if (pos != null) {

+

+      WordPOS wordPOS = new WordPOS(word, pos);

+

+      ArrayList<Synset> synsets = wordPOS.getSynsets();

+

+      int size = synsets.size();

+

+      String[] senses = new String[size];

+

+      for (int i = 0; i < size; i++) {

+        String senseKey = null;

+        for (Word wd : synsets.get(i).getWords()) {

+          if (wd.getLemma().equals(word)) {

+            try {

+              senseKey = wd.getSenseKey();

+            } catch (JWNLException e) {

+              e.printStackTrace();

+            }

+            senses[i] = senseKey;

+            break;

+          }

+        }

+

+      }

+      return senses;

+    } else {

+      System.out.println("The word has no definitions in WordNet !");

+      return null;

+    }

+

+  }

 }


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
index d0be62e..63a226b 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java

@@ -27,22 +27,22 @@
 

   public MFSParameters() {

     this.isCoarseSense = false;

-    this.source = Source.WORDNET;

+    this.source = SenseSource.WORDNET;

   }

 

-  protected Source source;

+  protected SenseSource source;

 

-  public Source getSource() {

+  public SenseSource getSource() {

     return source;

   }

 

-  public void setSource(Source source) {

+  public void setSource(SenseSource source) {

     this.source = source;

   }

 

   @Override

   public boolean isValid() {

-    return EnumUtils.isValidEnum(Source.class, source.name());

+    return EnumUtils.isValidEnum(SenseSource.class, source.name());

   }

 

 }


diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
index 4dc3637..d205bd4 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java

@@ -39,11 +39,10 @@
     WSDHelper.print("Evaluation Started");

 

     String modelsDir = "src\\test\\resources\\models\\";

-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");

-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");

-    

-    

+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

+

     IMS ims = new IMS();

     IMSParameters imsParams = new IMSParameters();

     ims.setParams(imsParams);

@@ -56,7 +55,7 @@
       // don't take verbs because they are not from WordNet

       if (!word.split("\\.")[1].equals("v")) {

 

-        ArrayList<WSDSample> instances = getTestData(word);

+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);

         if (instances != null) {

           WSDHelper.print("------------------" + word + "------------------");

           for (WSDSample instance : instances) {

@@ -74,59 +73,4 @@
     }

 

   }

-

-  /**

-   * For a specific word, return the Semeval3 corresponding instances in form of

-   * {@link WSDIMS}

-   * 

-   * @param wordTag

-   *          the word of which the instances are to be collected. wordTag has

-   *          to be in the format "word.POS" (e.g., "activate.v", "smart.a",

-   *          etc.)

-   * @return list of {@link WSDIMS} instances of the wordTag

-   */

-  @Deprecated

-  protected static ArrayList<WTDIMS> getTestDataOld(String wordTag) {

-

-    ArrayList<WTDIMS> instances = new ArrayList<WTDIMS>();

-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {

-      WTDIMS wtdims = new WTDIMS(wtd);

-      instances.add(wtdims);

-    }

-

-    return instances;

-  }

-  

-  protected static ArrayList<WSDSample> getTestData(String wordTag) {

-

-    ArrayList<WSDSample> instances = new ArrayList<WSDSample>();

-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {

-      List<WordPOS> words = WSDHelper.getAllRelevantWords(wtd);

-      int targetWordIndex=0;

-      for (int i=0; i<words.size();i++){

-        if(words.get(i).isTarget){

-          targetWordIndex = i;

-        }   

-      }

-      String[] tags = new String[words.size()];

-      String[] tokens = new String[words.size()];

-      for (int i=0;i<words.size();i++){

-        tags[i] = words.get(i).getPosTag();

-        tokens[i] = words.get(i).getWord();

-      }

-      String targetLemma = WSDHelper.getLemmatizer().lemmatize(

-          tokens[targetWordIndex], tags[targetWordIndex]);

-      

-      WSDSample sample = new WSDSample(tokens,tags,targetWordIndex,targetLemma);

-      sample.setSenseIDs(wtd.getSenseIDs());

-      if (sample != null) {

-        if (sample.getSenseIDs().get(0) != null

-            && !sample.getSenseIDs().get(0).equalsIgnoreCase("U")) {

-          instances.add(sample);

-        }

-      }

-    }

-    return instances;

-  }

-

 }


diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
index 03e2e7d..691bce9 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java

@@ -19,7 +19,11 @@
 
 package opennlp.tools.disambiguator;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import opennlp.tools.disambiguator.ims.IMS;
+import opennlp.tools.util.Span;
 
 /**
  * This is a typical example of how to call the disambiguation function in the
@@ -34,26 +38,83 @@
 public class IMSTester {
 
   public static void main(String[] args) {
-
+    
     String modelsDir = "src\\test\\resources\\models\\";
     WSDHelper.loadTokenizer(modelsDir+"en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");
-    
+
     IMS ims = new IMS();
 
-    String test1 = "Please write to me soon.";
+    
+    /**
+     * This is how to make the context for one-word-disambiguation using IMS
+     */
+    String test1 = "We need to discuss important topic, please write to me soon.";
     String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
-    WSDHelper.print(ims.disambiguate(sentence1, 1));
+    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
+    List<String> tempLemmas1 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]);
+      tempLemmas1.add(lemma);
+    }
+    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
 
-    String test2 = "it was a strong argument that his hypothesis was true";
+    // output
+    String[] senses1 = ims.disambiguate(sentence1, tags1, lemmas1, 8);
+    System.out.print(lemmas1[8] + " :\t");
+    WSDHelper.print(senses1);
+    WSDHelper.print("*****************************");
+
+    
+    /**
+     * This is how to make the context for disambiguation of span of words
+     */
+    String test2 = "The component was highly radioactive to the point that"
+        + " it has been activated the second it touched water";
     String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
-    WSDHelper.print(ims.disambiguate(sentence2, 3));
+    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
+    List<String> tempLemmas2 = new ArrayList<String>();
+    for (int i = 0; i < sentence2.length; i++) {
+      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]);
+      tempLemmas2.add(lemma);
+    }
+    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+    Span span = new Span(3, 7);
 
-    String test3 = "the component was highly radioactive to the point that it has been activated the second it touched water";
+    // output
+    List<String[]> senses2 = ims.disambiguate(sentence2, tags2, lemmas2, span);
+    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
+      String[] senses = senses2.get(i-span.getStart());
+      System.out.print(lemmas2[i] + " :\t");
+      WSDHelper.print(senses);
+      WSDHelper.print("----------");
+    }
+
+    WSDHelper.print("*****************************");
+
+    
+    /**
+     * This is how to make the context for all-words-disambiguation
+     */
+    String test3 = "The summer almost over and I not to the beach even once";
     String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-    WSDHelper.print(ims.disambiguate(sentence3, 12));
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]);
+      tempLemmas3.add(lemma);
+    }
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
 
+    // output
+    List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
+    for (int i = 0; i < sentence3.length; i++) {
+      String[] senses = senses3.get(i);
+      System.out.print(lemmas3[i] + " :\t");
+      WSDHelper.print(senses);
+      WSDHelper.print("----------");
+    }
   }
 
 }

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
index 4c2fba3..fe5199c 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java

@@ -20,10 +20,8 @@
 package opennlp.tools.disambiguator;
 
 import java.util.ArrayList;
-import java.util.List;
 
 import opennlp.tools.disambiguator.datareader.SensevalReader;
-import opennlp.tools.disambiguator.ims.WTDIMS;
 import opennlp.tools.disambiguator.lesk.Lesk;
 import opennlp.tools.disambiguator.lesk.LeskParameters;
 
@@ -56,7 +54,7 @@
       // don't take verbs because they are not from WordNet
       if (!word.split("\\.")[1].equals("v")) {
 
-        ArrayList<WSDSample> instances = getTestData(word);
+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);
         if (instances != null) {
           WSDHelper.print("------------------" + word + "------------------");
           for (WSDSample instance : instances) {
@@ -73,37 +71,5 @@
     }
   }
 
-  protected static ArrayList<WSDSample> getTestData(String wordTag) {
-
-    ArrayList<WSDSample> instances = new ArrayList<WSDSample>();
-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {
-      List<WordPOS> words = WSDHelper.getAllRelevantWords(wtd);
-      int targetWordIndex = 0;
-      for (int i = 0; i < words.size(); i++) {
-        if (words.get(i).isTarget) {
-          targetWordIndex = i;
-        }
-      }
-      String[] tags = new String[words.size()];
-      String[] tokens = new String[words.size()];
-      for (int i = 0; i < words.size(); i++) {
-        tags[i] = words.get(i).getPosTag();
-        tokens[i] = words.get(i).getWord();
-      }
-      String targetLemma = WSDHelper.getLemmatizer().lemmatize(
-          tokens[targetWordIndex], tags[targetWordIndex]);
-
-      WSDSample sample = new WSDSample(tokens, tags, targetWordIndex,
-          targetLemma);
-      sample.setSenseIDs(wtd.getSenseIDs());
-      if (sample != null) {
-        if (sample.getSenseIDs().get(0) != null
-            && !sample.getSenseIDs().get(0).equalsIgnoreCase("U")) {
-          instances.add(sample);
-        }
-      }
-    }
-    return instances;
-  }
 
 }

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
index 9f6f477..13c959b 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java

@@ -19,7 +19,7 @@
 
 package opennlp.tools.disambiguator;
 
-
+import java.util.ArrayList;
 import java.util.List;
 
 import opennlp.tools.disambiguator.lesk.Lesk;
@@ -32,7 +32,6 @@
   @Test
   public static void main(String[] args) {
 
-    
     Lesk lesk = new Lesk();
     LeskParameters params = new LeskParameters();
     params.setLeskType(LESK_TYPE.LESK_EXT);
@@ -40,72 +39,60 @@
     params.setFeatures(a);
     lesk.setParams(params);
     String modelsDir = "src\\test\\resources\\models\\";
-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");
-    
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
     String test1 = "I went to the bank to deposit money.";
-    String[] sentence = WSDHelper.getTokenizer().tokenize(test1);
-    List<WordPOS> words = WSDHelper.getAllRelevantWords(sentence);
-    int targetWordIndex = 0;
-    String[] tags = new String[words.size()];
-    String[] tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      WSDHelper.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    int targetWordIndex1 = 5;
+    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
+    List<String> tempLemmas1 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence1[i], tags1[i]);
+      tempLemmas1.add(lemma);
     }
-    String targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-   // Constants.print("lemma  : "+ targetLemma);
-    WSDHelper.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma));
-    WSDHelper.printResults(lesk,
-        lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma));
-    
+    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+    String[] results1 = lesk.disambiguate(sentence1, tags1, lemmas1,
+        targetWordIndex1);
+    WSDHelper.print(results1);
+    WSDHelper.printResults(lesk, results1);
+
     WSDHelper.print("----------------------------------------");
-    
+
     String test2 = "it was a strong argument that his hypothesis was true";
-    sentence = WSDHelper.getTokenizer().tokenize(test2);
-    words = WSDHelper.getAllRelevantWords(sentence);
-    targetWordIndex = 1;
-    tags = new String[words.size()];
-    tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    int targetWordIndex2 = 4;
+    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
+    List<String> tempLemmas2 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence2[i], tags2[i]);
+      tempLemmas2.add(lemma);
     }
-    targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-    //Constants.print("lemma  : "+ targetLemma);
-    
-    WSDHelper.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma));
-    WSDHelper.printResults(lesk,
-        lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma));
+    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+    String[] results2 = lesk.disambiguate(sentence2, tags2, lemmas2,
+        targetWordIndex2);
+    WSDHelper.print(results2);
+    WSDHelper.printResults(lesk, results2);
     WSDHelper.print("----------------------------------------");
-    
+
     String test3 = "the component was highly radioactive to the point that it has been activated the second it touched water";
-    
-    sentence = WSDHelper.getTokenizer().tokenize(test3);
-    words = WSDHelper.getAllRelevantWords(sentence);
-    targetWordIndex = 4;
-    tags = new String[words.size()];
-    tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+    int targetWordIndex3 = 3;
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence3[i], tags3[i]);
+      tempLemmas3.add(lemma);
     }
-    targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-    //Constants.print("lemma  : "+ targetLemma);
-    
-    WSDHelper.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma));
-    WSDHelper.printResults(lesk,
-        lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma));
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+    String[] results3 = lesk.disambiguate(sentence3, tags3, lemmas3,
+        targetWordIndex3);
+    WSDHelper.print(results3);
+    WSDHelper.printResults(lesk, results3);
     WSDHelper.print("----------------------------------------");
   }
 

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
index 369791d..b71ca6e 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java

@@ -36,9 +36,9 @@
   public static void main(String[] args) {

     WSDHelper.print("Evaluation Started");

     String modelsDir = "src\\test\\resources\\models\\";

-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");

-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");

+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

     MFS mfs = new MFS();

     WSDParameters.isStemCompare = true;

 

@@ -50,7 +50,7 @@
       // don't take verbs because they are not from WordNet

       if (!word.split("\\.")[1].equals("v")) {

 

-        ArrayList<WSDSample> instances = getTestData(word);

+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);

 

         if (instances != null) {

           WSDHelper.print("------------------" + word + "------------------");

@@ -70,38 +70,4 @@
 

   }

 

-  /**

-   * For a specific word, return the Semeval3 corresponding instances in form of

-   * {@link WSDSample}

-   * 

-   * @param wordTag

-   *          the word of which the instances are to be collected. wordTag has

-   *          to be in the format "word.POS" (e.g., "activate.v", "smart.a",

-   *          etc.)

-   * @return list of {@link WSDSample} instances of the wordTag

-   */

-  protected static ArrayList<WSDSample> getTestData(String wordTag) {

-

-    ArrayList<WSDSample> instances = new ArrayList<WSDSample>();

-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {

-

-      String targetLemma = WSDHelper.getLemmatizer().lemmatize(wtd.getWord(),

-          wtd.getPosTag());

-

-      WSDSample sample = new WSDSample(wtd.getSentence(), wtd.getPosTags(),

-          wtd.getWordIndex(), targetLemma);

-      sample.setSenseIDs(wtd.getSenseIDs());

-      

-      if (sample != null) {

-        if (sample.getSenseIDs().get(0) != null

-            && !sample.getSenseIDs().get(0).equalsIgnoreCase("U")) {

-          instances.add(sample);

-        }

-      }

-

-    }

-

-    return instances;

-  }

-

 }


diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
index e42c655..f74faad 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java

@@ -19,9 +19,11 @@
 

 package opennlp.tools.disambiguator;

 

+import java.util.ArrayList;

 import java.util.List;

 

 import opennlp.tools.disambiguator.mfs.MFS;

+import opennlp.tools.util.Span;

 

 /**

  * This is a typical example of how to call the disambiguation function in the

@@ -30,78 +32,83 @@
 public class MFSTester {

 

   public static void main(String[] args) {

-    

     String modelsDir = "src\\test\\resources\\models\\";

-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");

-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");

-    

-    

+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

+

     MFS mfs = new MFS();

 

-    String test1 = "I went fishing for some sea bass.";

-    String[] sentence = WSDHelper.getTokenizer().tokenize(test1);

-    List<WordPOS> words = WSDHelper.getAllRelevantWords(sentence);

-    int targetWordIndex = 2;

-    String[] tags = new String[words.size()];

-    String[] tokens = new String[words.size()];

-    for (int i=0;i<words.size();i++){

-      tags[i] = words.get(i).getPosTag();

-      tokens[i] = words.get(i).getWord();

-      

-     // Constants.print("token : "+ tokens[i]  + "_" + tags[i]);

+    /**

+     * This is how to make the context for one-word-disambiguation using IMS

+     */

+    String test1 = "We need to discuss important topic, please write to me soon.";

+    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);

+    String[] tags1 = WSDHelper.getTagger().tag(sentence1);

+    List<String> tempLemmas1 = new ArrayList<String>();

+    for (int i = 0; i < sentence1.length; i++) {

+      String lemma = WSDHelper.getLemmatizer()

+          .lemmatize(sentence1[i], tags1[i]);

+      tempLemmas1.add(lemma);

     }

-    String targetLemma = WSDHelper.getLemmatizer().lemmatize(

-        tokens[targetWordIndex], tags[targetWordIndex]);

-   // Constants.print("lemma  : "+ targetLemma);

-    

-    WSDHelper.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma));

-    WSDHelper.printResults(mfs,

-        mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma));

-    WSDHelper.print("----------------------------------------");

-    

-    String test2 = "it was a strong argument that his hypothesis was true";

-    sentence = WSDHelper.getTokenizer().tokenize(test2);

-    words = WSDHelper.getAllRelevantWords(sentence);

-    targetWordIndex = 1;

-    tags = new String[words.size()];

-    tokens = new String[words.size()];

-    for (int i=0;i<words.size();i++){

-      tags[i] = words.get(i).getPosTag();

-      tokens[i] = words.get(i).getWord();

-      

-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);

+    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);

+

+    // output

+    String[] senses1 = mfs.disambiguate(sentence1, tags1, lemmas1, 8);

+    System.out.print(lemmas1[8] + " :\t");

+    WSDHelper.print(senses1);

+    WSDHelper.print("*****************************");

+

+    /**

+     * This is how to make the context for disambiguation of span of words

+     */

+    String test2 = "The component was highly radioactive to the point that"

+        + " it has been activated the second it touched water";

+    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);

+    String[] tags2 = WSDHelper.getTagger().tag(sentence2);

+    List<String> tempLemmas2 = new ArrayList<String>();

+    for (int i = 0; i < sentence2.length; i++) {

+      String lemma = WSDHelper.getLemmatizer()

+          .lemmatize(sentence2[i], tags2[i]);

+      tempLemmas2.add(lemma);

     }

-    targetLemma = WSDHelper.getLemmatizer().lemmatize(

-        tokens[targetWordIndex], tags[targetWordIndex]);

-    //Constants.print("lemma  : "+ targetLemma);

-    

-    WSDHelper.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma));

-    WSDHelper.printResults(mfs,

-        mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma));

-    WSDHelper.print("----------------------------------------");

-    

-    String test3 = "the component was highly radioactive to the point that it has been activated the second it touched water";

-   

-    sentence = WSDHelper.getTokenizer().tokenize(test3);

-    words = WSDHelper.getAllRelevantWords(sentence);

-    targetWordIndex = 4;

-    tags = new String[words.size()];

-    tokens = new String[words.size()];

-    for (int i=0;i<words.size();i++){

-      tags[i] = words.get(i).getPosTag();

-      tokens[i] = words.get(i).getWord();

-      

-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);

+    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);

+    Span span = new Span(3, 7);

+

+    // output

+    List<String[]> senses2 = mfs.disambiguate(sentence2, tags2, lemmas2, span);

+    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {

+      String[] senses = senses2.get(i - span.getStart());

+      System.out.print(lemmas2[i] + " :\t");

+      WSDHelper.print(senses);

+      WSDHelper.print("----------");

     }

-    targetLemma = WSDHelper.getLemmatizer().lemmatize(

-        tokens[targetWordIndex], tags[targetWordIndex]);

-    //Constants.print("lemma  : "+ targetLemma);

-    

-    WSDHelper.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma));

-    WSDHelper.printResults(mfs,

-        mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma));

-    WSDHelper.print("----------------------------------------");

+

+    WSDHelper.print("*****************************");

+

+    /**

+     * This is how to make the context for all-words-disambiguation

+     */

+    String test3 = "The summer is almost over and I have not been to the beach even once";

+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);

+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);

+    List<String> tempLemmas3 = new ArrayList<String>();

+    for (int i = 0; i < sentence3.length; i++) {

+      String lemma = WSDHelper.getLemmatizer()

+          .lemmatize(sentence3[i], tags3[i]);

+      tempLemmas3.add(lemma);

+    }

+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);

+

+    // output

+    List<String[]> senses3 = mfs.disambiguate(sentence3, tags3, lemmas3);

+    for (int i = 0; i < sentence3.length; i++) {

+      String[] senses = senses3.get(i);

+      System.out.print(lemmas3[i] + " :\t");

+      WSDHelper.print(senses);

+      WSDHelper.print("----------");

+    }

+

   }

 

 }
\ No newline at end of file

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
index e69de29..866fc4c 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java

@@ -0,0 +1,39 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.disambiguator.ims.IMS;
+
+public class Tester {
+
+  public static void main(String[] args) {
+
+    String modelsDir = "src\\test\\resources\\models\\";
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
+    IMS ims = new IMS();
+
+    String test3 = "The summer is almost over and I haven't been to the beach even once";
+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence3[i], tags3[i]);
+      tempLemmas3.add(lemma);
+    }
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+
+    // output
+    List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
+    for (int i = 0; i < sentence3.length; i++) {
+      System.out.print(sentence3[i] + " : ");
+      WSDHelper.printResults(ims, senses3.get(i));
+      WSDHelper.print("----------");
+    }
+
+  }
+}
\ No newline at end of file
commit	729117f97c48cdbc700aae7ea81a862814c7456c	[log] [tgz]
author	Jörn Kottmann <joern@apache.org>	Thu Aug 20 22:01:59 2015 +0000
committer	Jörn Kottmann <joern@apache.org>	Thu Aug 20 22:01:59 2015 +0000
tree	642e77852ec0c75a8f8a6997c412fe32277b20ee
parent	329b0df0607bc397c7690338827cef1ca28cf7a3 [diff]