OPENNLP-758 Formatted the code according to OpenNLP code conventions
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
index d29e5df..de8f9a4 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
@@ -5,130 +5,177 @@
import net.sf.extjwnl.data.POS;
-
public class Constants {
-
- public static String osPathChar = "\\";
- // List of all the PoS tags
- public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
- "JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
- "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB",
- "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
+ public static String osPathChar = "\\";
- // List of the PoS tags of which the senses are to be extracted
- public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB", "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
+ // List of all the PoS tags
+ public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
+ "JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
+ "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB", "VBD",
+ "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
-
- // List of Negation Words
- public static ArrayList<String> negationWords = new ArrayList<String>(
- Arrays.asList("not", "no", "never", "none", "nor", "non"));
-
- // List of Stop Words
- public static ArrayList<String> stopWords = new ArrayList<String>(Arrays.asList( "a", "able", "about", "above", "according", "accordingly", "across", "actually", "after",
- "afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also",
- "although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything",
- "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask",
- "asking", "associated", "at", "available", "away", "awfully", "be", "became", "because", "become", "becomes", "becoming", "been",
- "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both",
- "brief", "but", "by", "came", "can", "cannot", "cant", "can't", "cause", "causes", "certain", "certainly", "changes", "clearly",
- "c'mon", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing",
- "contains", "corresponding", "could", "couldn't", "course", "c's", "currently", "definitely", "described", "despite", "did", "didn't",
- "different", "do", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "during", "each", "edu", "eg", "eight",
- "either", "else", "elsewhere", "enough", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone",
- "everything", "everywhere", "ex", "exactly", "example", "except", "far", "few", "fifth", "first", "five", "followed", "following",
- "follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "get", "gets", "getting", "given",
- "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "had", "hadn't", "happens", "hardly", "has", "hasn't",
- "have", "haven't", "having", "he", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "here's", "hereupon",
- "hers", "herself", "he's", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
- "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar",
- "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
- "know", "known", "knows", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like",
- "liked", "likely", "little", "look", "looking", "looks", "ltd", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile",
- "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "name", "namely", "nd", "near", "nearly",
- "necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone",
- "nor", "normally", "not", "nothing", "novel", "now", "nowhere", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on",
- "once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside",
- "over", "overall", "own", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably",
- "probably", "provides", "que", "quite", "qv", "rather", "rd", "re", "really", "reasonably", "regarding", "regardless", "regards",
- "relatively", "respectively", "right", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing",
- "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several",
- "shall", "she", "should", "shouldn't", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime",
- "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure",
- "take", "taken", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "thats", "that's", "the", "their", "theirs",
- "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "theres", "there's",
- "thereupon", "these", "they", "they'd", "they'll", "they're", "they've", "think", "third", "this", "thorough", "thoroughly",
- "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards",
- "tried", "tries", "truly", "try", "trying", "t's", "twice", "two", "un", "under", "unfortunately", "unless", "unlikely", "until",
- "unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "value", "various", "very", "via", "viz", "vs",
- "want", "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've",
- "what", "whatever", "what's", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "where's",
- "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
- "willing", "wish", "with", "within", "without", "wonder", "won't", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll",
- "your", "you're", "yours", "yourself", "yourselves", "you've", "zero"));
-
- // Print a text in the console
- public static void print(Object in) {
- System.out.println(in);
- }
+ // List of the PoS tags of which the senses are to be extracted
+ public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB",
+ "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
- public static void print(Object[] array) {
- System.out.println(Arrays.asList(array));
- }
+ // List of Negation Words
+ public static ArrayList<String> negationWords = new ArrayList<String>(
+ Arrays.asList("not", "no", "never", "none", "nor", "non"));
- public static void print(Object[][] array) {
- System.out.print("[");
- for (int i = 0; i < array.length; i++) {
- print(array[i]);
- if (i != array.length - 1) {
- System.out.print("\n");
- }
- print("]");
- }
- }
+ // List of Stop Words
+ public static ArrayList<String> stopWords = new ArrayList<String>(
+ Arrays.asList("a", "able", "about", "above", "according", "accordingly",
+ "across", "actually", "after", "afterwards", "again", "against",
+ "ain't", "all", "allow", "allows", "almost", "alone", "along",
+ "already", "also", "although", "always", "am", "among", "amongst",
+ "an", "and", "another", "any", "anybody", "anyhow", "anyone",
+ "anything", "anyway", "anyways", "anywhere", "apart", "appear",
+ "appreciate", "appropriate", "are", "aren't", "around", "as",
+ "aside", "ask", "asking", "associated", "at", "available", "away",
+ "awfully", "be", "became", "because", "become", "becomes",
+ "becoming", "been", "before", "beforehand", "behind", "being",
+ "believe", "below", "beside", "besides", "best", "better", "between",
+ "beyond", "both", "brief", "but", "by", "came", "can", "cannot",
+ "cant", "can't", "cause", "causes", "certain", "certainly",
+ "changes", "clearly", "c'mon", "co", "com", "come", "comes",
+ "concerning", "consequently", "consider", "considering", "contain",
+ "containing", "contains", "corresponding", "could", "couldn't",
+ "course", "c's", "currently", "definitely", "described", "despite",
+ "did", "didn't", "different", "do", "does", "doesn't", "doing",
+ "done", "don't", "down", "downwards", "during", "each", "edu", "eg",
+ "eight", "either", "else", "elsewhere", "enough", "entirely",
+ "especially", "et", "etc", "even", "ever", "every", "everybody",
+ "everyone", "everything", "everywhere", "ex", "exactly", "example",
+ "except", "far", "few", "fifth", "first", "five", "followed",
+ "following", "follows", "for", "former", "formerly", "forth", "four",
+ "from", "further", "furthermore", "get", "gets", "getting", "given",
+ "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings",
+ "had", "hadn't", "happens", "hardly", "has", "hasn't", "have",
+ "haven't", "having", "he", "hello", "help", "hence", "her", "here",
+ "hereafter", "hereby", "herein", "here's", "hereupon", "hers",
+ "herself", "he's", "hi", "him", "himself", "his", "hither",
+ "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
+ "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc",
+ "indeed", "indicate", "indicated", "indicates", "inner", "insofar",
+ "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll",
+ "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
+ "know", "known", "knows", "last", "lately", "later", "latter",
+ "latterly", "least", "less", "lest", "let", "let's", "like", "liked",
+ "likely", "little", "look", "looking", "looks", "ltd", "mainly",
+ "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might",
+ "more", "moreover", "most", "mostly", "much", "must", "my", "myself",
+ "name", "namely", "nd", "near", "nearly", "necessary", "need",
+ "needs", "neither", "never", "nevertheless", "new", "next", "nine",
+ "no", "nobody", "non", "none", "noone", "nor", "normally", "not",
+ "nothing", "novel", "now", "nowhere", "obviously", "of", "off",
+ "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones",
+ "only", "onto", "or", "other", "others", "otherwise", "ought", "our",
+ "ours", "ourselves", "out", "outside", "over", "overall", "own",
+ "particular", "particularly", "per", "perhaps", "placed", "please",
+ "plus", "possible", "presumably", "probably", "provides", "que",
+ "quite", "qv", "rather", "rd", "re", "really", "reasonably",
+ "regarding", "regardless", "regards", "relatively", "respectively",
+ "right", "said", "same", "saw", "say", "saying", "says", "second",
+ "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems",
+ "seen", "self", "selves", "sensible", "sent", "serious", "seriously",
+ "seven", "several", "shall", "she", "should", "shouldn't", "since",
+ "six", "so", "some", "somebody", "somehow", "someone", "something",
+ "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry",
+ "specified", "specify", "specifying", "still", "sub", "such", "sup",
+ "sure", "take", "taken", "tell", "tends", "th", "than", "thank",
+ "thanks", "thanx", "that", "thats", "that's", "the", "their",
+ "theirs", "them", "themselves", "then", "thence", "there",
+ "thereafter", "thereby", "therefore", "therein", "theres", "there's",
+ "thereupon", "these", "they", "they'd", "they'll", "they're",
+ "they've", "think", "third", "this", "thorough", "thoroughly",
+ "those", "though", "three", "through", "throughout", "thru", "thus",
+ "to", "together", "too", "took", "toward", "towards", "tried",
+ "tries", "truly", "try", "trying", "t's", "twice", "two", "un",
+ "under", "unfortunately", "unless", "unlikely", "until", "unto",
+ "up", "upon", "us", "use", "used", "useful", "uses", "using",
+ "usually", "value", "various", "very", "via", "viz", "vs", "want",
+ "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well",
+ "we'll", "went", "were", "we're", "weren't", "we've", "what",
+ "whatever", "what's", "when", "whence", "whenever", "where",
+ "whereafter", "whereas", "whereby", "wherein", "where's",
+ "whereupon", "wherever", "whether", "which", "while", "whither",
+ "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
+ "willing", "wish", "with", "within", "without", "wonder", "won't",
+ "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your",
+ "you're", "yours", "yourself", "yourselves", "you've", "zero"));
- // return the PoS (Class POS) out of the PoS-tag
- public static POS getPOS(String posTag) {
+ // Print a text in the console
+ public static void print(Object in) {
+ System.out.println(in);
+ }
- ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ", "JJR", "JJS"));
- ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR", "RBS"));
- ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS", "NNP", "NNPS"));
- ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD", "VBG", "VBN", "VBP", "VBZ"));
+ public static void print(Object[] array) {
+ System.out.println(Arrays.asList(array));
+ }
- if (adjective.contains(posTag)) return POS.ADJECTIVE;
- else if (adverb.contains(posTag)) return POS.ADVERB;
- else if (noun.contains(posTag)) return POS.NOUN;
- else if (verb.contains(posTag)) return POS.VERB;
- else return null;
+ public static void print(Object[][] array) {
+ System.out.print("[");
+ for (int i = 0; i < array.length; i++) {
+ print(array[i]);
+ if (i != array.length - 1) {
+ System.out.print("\n");
+ }
+ print("]");
+ }
+ }
- }
-
- // Check whether a list of arrays contains an array
- public static boolean belongsTo(String[] array, ArrayList<String[]> fullList) {
- for (String[] refArray : fullList) {
- if (areStringArraysEqual(array, refArray))
- return true;
- }
- return false;
- }
+ // return the PoS (Class POS) out of the PoS-tag
+ public static POS getPOS(String posTag) {
- // Check whether two arrays of strings are equal
- public static boolean areStringArraysEqual(String[] array1, String[] array2) {
+ ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ",
+ "JJR", "JJS"));
+ ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR",
+ "RBS"));
+ ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS",
+ "NNP", "NNPS"));
+ ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD",
+ "VBG", "VBN", "VBP", "VBZ"));
- if (array1.equals(null) || array2.equals(null))
- return false;
+ if (adjective.contains(posTag))
+ return POS.ADJECTIVE;
+ else if (adverb.contains(posTag))
+ return POS.ADVERB;
+ else if (noun.contains(posTag))
+ return POS.NOUN;
+ else if (verb.contains(posTag))
+ return POS.VERB;
+ else
+ return null;
- if (array1.length != array2.length) {
- return false;
- }
- for (int i = 0; i < array1.length; i++) {
- if (!array1[i].equals(array2[i])) {
- return false;
- }
- }
+ }
- return true;
+ // Check whether a list of arrays contains an array
+ public static boolean belongsTo(String[] array, ArrayList<String[]> fullList) {
+ for (String[] refArray : fullList) {
+ if (areStringArraysEqual(array, refArray))
+ return true;
+ }
+ return false;
+ }
- }
-
+ // Check whether two arrays of strings are equal
+ public static boolean areStringArraysEqual(String[] array1, String[] array2) {
+
+ if (array1.equals(null) || array2.equals(null))
+ return false;
+
+ if (array1.length != array2.length) {
+ return false;
+ }
+ for (int i = 0; i < array1.length; i++) {
+ if (!array1[i].equals(array2[i])) {
+ return false;
+ }
+ }
+
+ return true;
+
+ }
+
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
index 5b2952a..9851004 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
@@ -1,4 +1,5 @@
package opennlp.tools.disambiguator;
+
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
@@ -11,7 +12,6 @@
import net.sf.extjwnl.dictionary.MorphologicalProcessor;
import opennlp.tools.cmdline.postag.POSModelLoader;
import opennlp.tools.lemmatizer.SimpleLemmatizer;
-import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.postag.POSModel;
@@ -25,203 +25,203 @@
public class Loader {
- private static String modelsDir = "src\\test\\resources\\opennlp\\tools\\disambiguator\\";
-
- private static SentenceDetectorME sdetector;
- private static Tokenizer tokenizer;
- private static POSTaggerME tagger;
- private static NameFinderME nameFinder;
- private static SimpleLemmatizer lemmatizer;
+ private static String modelsDir = "src\\test\\resources\\opennlp\\tools\\disambiguator\\";
- private static Dictionary dictionary;
- private static MorphologicalProcessor morph;
- private static boolean IsInitialized = false;
+ private static SentenceDetectorME sdetector;
+ private static Tokenizer tokenizer;
+ private static POSTaggerME tagger;
+ private static NameFinderME nameFinder;
+ private static SimpleLemmatizer lemmatizer;
- // local caches for faster lookup
- private static HashMap<String,Object> stemCache;
- private static HashMap<String,Object> stopCache;
- private static HashMap<String,Object> relvCache;
-
-
+ private static Dictionary dictionary;
+ private static MorphologicalProcessor morph;
+ private static boolean IsInitialized = false;
- // Constructor
- public Loader(){
- super();
- load();
- }
+ // local caches for faster lookup
+ private static HashMap<String, Object> stemCache;
+ private static HashMap<String, Object> stopCache;
+ private static HashMap<String, Object> relvCache;
- public static HashMap<String,Object> getRelvCache(){
- if (relvCache==null || relvCache.keySet().isEmpty()){
- relvCache = new HashMap<String, Object>();
- for (String t : Constants.relevantPOS){
- relvCache.put(t, null);
- }
- }
- return relvCache;
- }
-
- public static HashMap<String,Object> getStopCache(){
- if (stopCache==null || stopCache.keySet().isEmpty()){
- stopCache = new HashMap<String, Object>();
- for (String s : Constants.stopWords){
- stopCache.put(s, null);
- }
- }
- return stopCache;
- }
-
- public static HashMap<String,Object> getStemCache(){
- if (stemCache==null || stemCache.keySet().isEmpty()){
- stemCache = new HashMap<String,Object>();
- for (Object pos : POS.getAllPOS()){
- stemCache.put(((POS)pos).getKey(),new HashMap());
- }
- }
- return stemCache;
- }
-
- public static MorphologicalProcessor getMorph(){
- if (morph==null){
- morph = dictionary.getMorphologicalProcessor();
- }
- return morph;
- }
+ // Constructor
+ public Loader() {
+ super();
+ load();
+ }
- public static Dictionary getDictionary(){
- if (dictionary==null){
- try {
- dictionary = Dictionary.getDefaultResourceInstance();
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- }
- return dictionary;
- }
+ public static HashMap<String, Object> getRelvCache() {
+ if (relvCache == null || relvCache.keySet().isEmpty()) {
+ relvCache = new HashMap<String, Object>();
+ for (String t : Constants.relevantPOS) {
+ relvCache.put(t, null);
+ }
+ }
+ return relvCache;
+ }
- public static SimpleLemmatizer getLemmatizer(){
- if (lemmatizer==null){
- try {
- lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- return lemmatizer;
- }
+ public static HashMap<String, Object> getStopCache() {
+ if (stopCache == null || stopCache.keySet().isEmpty()) {
+ stopCache = new HashMap<String, Object>();
+ for (String s : Constants.stopWords) {
+ stopCache.put(s, null);
+ }
+ }
+ return stopCache;
+ }
- public static NameFinderME getNameFinder(){
- if (nameFinder==null){
- TokenNameFinderModel nameFinderModel;
- try {
- nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
- nameFinder = new NameFinderME(nameFinderModel);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- return nameFinder;
- }
+ public static HashMap<String, Object> getStemCache() {
+ if (stemCache == null || stemCache.keySet().isEmpty()) {
+ stemCache = new HashMap<String, Object>();
+ for (Object pos : POS.getAllPOS()) {
+ stemCache.put(((POS) pos).getKey(), new HashMap());
+ }
+ }
+ return stemCache;
+ }
- public static POSTaggerME getTagger(){
- if (tagger==null){
- POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
- tagger = new POSTaggerME(posTaggerModel);
- }
- return tagger;
- }
+ public static MorphologicalProcessor getMorph() {
+ if (morph == null) {
+ morph = dictionary.getMorphologicalProcessor();
+ }
+ return morph;
+ }
- public static SentenceDetectorME getSDetector(){
- if (sdetector==null){
- try {
- SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
- sdetector = new SentenceDetectorME(enSentModel);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- return sdetector;
- }
+ public static Dictionary getDictionary() {
+ if (dictionary == null) {
+ try {
+ dictionary = Dictionary.getDefaultResourceInstance();
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ }
+ return dictionary;
+ }
- public static Tokenizer getTokenizer(){
- if (tokenizer == null){
- try {
- TokenizerModel tokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
- tokenizer = new TokenizerME(tokenizerModel);
- } catch (IOException e) {
- e.printStackTrace();
- }
+ public static SimpleLemmatizer getLemmatizer() {
+ if (lemmatizer == null) {
+ try {
+ lemmatizer = new SimpleLemmatizer(new FileInputStream(modelsDir
+ + "en-lemmatizer.dict"));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
- }
- return tokenizer;
- }
+ return lemmatizer;
+ }
- public static boolean isInitialized(){
- return (dictionary !=null
- && morph !=null
- && stemCache !=null
- && stopCache !=null
- && relvCache !=null);
- }
-
- public void load(){
- try {
- SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
- sdetector = new SentenceDetectorME(enSentModel);
+ public static NameFinderME getNameFinder() {
+ if (nameFinder == null) {
+ TokenNameFinderModel nameFinderModel;
+ try {
+ nameFinderModel = new TokenNameFinderModel(new FileInputStream(
+ modelsDir + "en-ner-person.bin"));
+ nameFinder = new NameFinderME(nameFinderModel);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ return nameFinder;
+ }
- TokenizerModel TokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
- tokenizer = new TokenizerME(TokenizerModel);
+ public static POSTaggerME getTagger() {
+ if (tagger == null) {
+ POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir
+ + "en-pos-maxent.bin"));
+ tagger = new POSTaggerME(posTaggerModel);
+ }
+ return tagger;
+ }
+ public static SentenceDetectorME getSDetector() {
+ if (sdetector == null) {
+ try {
+ SentenceModel enSentModel = new SentenceModel(new FileInputStream(
+ modelsDir + "en-sent.bin"));
+ sdetector = new SentenceDetectorME(enSentModel);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ return sdetector;
+ }
- POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
- tagger = new POSTaggerME(posTaggerModel);
+ public static Tokenizer getTokenizer() {
+ if (tokenizer == null) {
+ try {
+ TokenizerModel tokenizerModel = new TokenizerModel(new FileInputStream(
+ modelsDir + "en-token.bin"));
+ tokenizer = new TokenizerME(tokenizerModel);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
- TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
- nameFinder = new NameFinderME(nameFinderModel);
+ }
+ return tokenizer;
+ }
- lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
+ public static boolean isInitialized() {
+ return (dictionary != null && morph != null && stemCache != null
+ && stopCache != null && relvCache != null);
+ }
- dictionary = Dictionary.getDefaultResourceInstance();
- morph = dictionary.getMorphologicalProcessor();
+ public void load() {
+ try {
+ SentenceModel enSentModel = new SentenceModel(new FileInputStream(
+ modelsDir + "en-sent.bin"));
+ sdetector = new SentenceDetectorME(enSentModel);
- // loading lookup caches
- stemCache = new HashMap();
- for (Object pos : POS.getAllPOS()){
- stemCache.put(((POS)pos).getKey(),new HashMap());
- }
+ TokenizerModel TokenizerModel = new TokenizerModel(new FileInputStream(
+ modelsDir + "en-token.bin"));
+ tokenizer = new TokenizerME(TokenizerModel);
- stopCache = new HashMap<String, Object>();
- for (String s : Constants.stopWords){
- stopCache.put(s, null);
- }
- relvCache = new HashMap<String, Object>();
- for (String t : Constants.relevantPOS){
- relvCache.put(t, null);
- }
+ POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir
+ + "en-pos-maxent.bin"));
+ tagger = new POSTaggerME(posTaggerModel);
+ TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(
+ new FileInputStream(modelsDir + "en-ner-person.bin"));
+ nameFinder = new NameFinderME(nameFinderModel);
- if (isInitialized()){
- Constants.print("loading was succesfull");
- }else{
- Constants.print("loading was unsuccesfull");
- }
+ lemmatizer = new SimpleLemmatizer(new FileInputStream(modelsDir
+ + "en-lemmatizer.dict"));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (InvalidFormatException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- }
+ dictionary = Dictionary.getDefaultResourceInstance();
+ morph = dictionary.getMorphologicalProcessor();
- public static void unload ()
- {
- dictionary.close();
- }
+ // loading lookup caches
+ stemCache = new HashMap();
+ for (Object pos : POS.getAllPOS()) {
+ stemCache.put(((POS) pos).getKey(), new HashMap());
+ }
+ stopCache = new HashMap<String, Object>();
+ for (String s : Constants.stopWords) {
+ stopCache.put(s, null);
+ }
+ relvCache = new HashMap<String, Object>();
+ for (String t : Constants.relevantPOS) {
+ relvCache.put(t, null);
+ }
+ if (isInitialized()) {
+ Constants.print("loading was succesfull");
+ } else {
+ Constants.print("loading was unsuccesfull");
+ }
+
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (InvalidFormatException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public static void unload() {
+ dictionary.close();
+ }
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
index f17ffcd..8f06b8c 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
@@ -9,8 +9,6 @@
import net.sf.extjwnl.data.list.PointerTargetNode;
import net.sf.extjwnl.data.list.PointerTargetNodeList;
-
-
/**
* Convenience class to access some features.
*/
@@ -19,44 +17,42 @@
public Synset parent;
public Synset synset;
-
+
protected ArrayList<WordPOS> senseRelevantWords;
public ArrayList<Synset> hypernyms = new ArrayList<Synset>();
public ArrayList<Synset> hyponyms = new ArrayList<Synset>();
public ArrayList<Synset> meronyms = new ArrayList<Synset>();
public ArrayList<Synset> holonyms = new ArrayList<Synset>();
-
- public ArrayList<WordPOS> synonyms = new ArrayList<WordPOS>();
-
-
- public Node(Synset parent, Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
- this.parent = parent;
- this.synset = synSet;
- this.senseRelevantWords = senseRelevantWords;
- }
-
- public Node(Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
- this.synset = synSet;
- this.senseRelevantWords = senseRelevantWords;
- }
-
-
- public ArrayList<WordPOS> getSenseRelevantWords() {
- return senseRelevantWords;
- }
- public void setSenseRelevantWords(ArrayList<WordPOS> senseRelevantWords) {
- this.senseRelevantWords = senseRelevantWords;
- }
-
+ public ArrayList<WordPOS> synonyms = new ArrayList<WordPOS>();
+
+ public Node(Synset parent, Synset synSet,
+ ArrayList<WordPOS> senseRelevantWords) {
+ this.parent = parent;
+ this.synset = synSet;
+ this.senseRelevantWords = senseRelevantWords;
+ }
+
+ public Node(Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
+ this.synset = synSet;
+ this.senseRelevantWords = senseRelevantWords;
+ }
+
+ public ArrayList<WordPOS> getSenseRelevantWords() {
+ return senseRelevantWords;
+ }
+
+ public void setSenseRelevantWords(ArrayList<WordPOS> senseRelevantWords) {
+ this.senseRelevantWords = senseRelevantWords;
+ }
+
public String getSense() {
return this.synset.getGloss().toString();
}
-
public void setHypernyms() {
- // PointerUtils pointerUtils = PointerUtils.get();
+ // PointerUtils pointerUtils = PointerUtils.get();
PointerTargetNodeList phypernyms = new PointerTargetNodeList();
try {
phypernyms = PointerUtils.getDirectHypernyms(this.synset);
@@ -75,10 +71,10 @@
}
public void setMeronyms() {
- //PointerUtils pointerUtils = PointerUtils.getInstance();
+ // PointerUtils pointerUtils = PointerUtils.getInstance();
PointerTargetNodeList pmeronyms = new PointerTargetNodeList();
try {
- pmeronyms = PointerUtils.getMeronyms(this.synset);
+ pmeronyms = PointerUtils.getMeronyms(this.synset);
} catch (JWNLException e) {
e.printStackTrace();
} catch (NullPointerException e) {
@@ -91,67 +87,66 @@
this.meronyms.add(ptn.getSynset());
}
}
-
+
public void setHolonyms() {
- // PointerUtils pointerUtils = PointerUtils.getInstance();
- PointerTargetNodeList pholonyms = new PointerTargetNodeList();
- try {
- pholonyms = PointerUtils.getHolonyms(this.synset);
- } catch (JWNLException e) {
- e.printStackTrace();
- } catch (NullPointerException e) {
- System.err.println("Error finding the holonyms");
- e.printStackTrace();
- }
+ // PointerUtils pointerUtils = PointerUtils.getInstance();
+ PointerTargetNodeList pholonyms = new PointerTargetNodeList();
+ try {
+ pholonyms = PointerUtils.getHolonyms(this.synset);
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ } catch (NullPointerException e) {
+ System.err.println("Error finding the holonyms");
+ e.printStackTrace();
+ }
- for (int i = 0; i < pholonyms.size(); i++) {
- PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
- this.holonyms.add(ptn.getSynset());
- }
+ for (int i = 0; i < pholonyms.size(); i++) {
+ PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
+ this.holonyms.add(ptn.getSynset());
+ }
- }
-
+ }
+
public void setHyponyms() {
- // PointerUtils pointerUtils = PointerUtils.getInstance();
- PointerTargetNodeList phyponyms = new PointerTargetNodeList();
- try {
- phyponyms = PointerUtils.getDirectHyponyms(this.synset);
- } catch (JWNLException e) {
- e.printStackTrace();
- } catch (NullPointerException e) {
- System.err.println("Error finding the hyponyms");
- e.printStackTrace();
- }
+ // PointerUtils pointerUtils = PointerUtils.getInstance();
+ PointerTargetNodeList phyponyms = new PointerTargetNodeList();
+ try {
+ phyponyms = PointerUtils.getDirectHyponyms(this.synset);
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ } catch (NullPointerException e) {
+ System.err.println("Error finding the hyponyms");
+ e.printStackTrace();
+ }
- for (int i = 0; i < phyponyms.size(); i++) {
- PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
- this.hyponyms.add(ptn.getSynset());
- }
- }
-
- public void setSynonyms()
- {
+ for (int i = 0; i < phyponyms.size(); i++) {
+ PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
+ this.hyponyms.add(ptn.getSynset());
+ }
+ }
+
+ public void setSynonyms() {
for (Word word : synset.getWords())
- synonyms.add(new WordPOS(word.toString(),word.getPOS()));
- }
-
- public ArrayList<Synset> getHypernyms() {
- return hypernyms;
- }
-
- public ArrayList<Synset> getHyponyms() {
- return hyponyms;
- }
-
- public ArrayList<Synset> getMeronyms() {
- return meronyms;
- }
- public ArrayList<Synset> getHolonyms() {
- return holonyms;
+ synonyms.add(new WordPOS(word.toString(), word.getPOS()));
}
- public ArrayList<WordPOS> getSynonyms()
- {
+ public ArrayList<Synset> getHypernyms() {
+ return hypernyms;
+ }
+
+ public ArrayList<Synset> getHyponyms() {
+ return hyponyms;
+ }
+
+ public ArrayList<Synset> getMeronyms() {
+ return meronyms;
+ }
+
+ public ArrayList<Synset> getHolonyms() {
+ return holonyms;
+ }
+
+ public ArrayList<WordPOS> getSynonyms() {
return synonyms;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
index 9eaf895..c34d26b 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
@@ -5,159 +5,156 @@
import java.util.List;
import net.sf.extjwnl.JWNLException;
-import net.sf.extjwnl.data.IndexWord;
import net.sf.extjwnl.data.POS;
import opennlp.tools.util.Span;
-
-
public class PreProcessor {
- public PreProcessor() {
- super();
- }
+ public PreProcessor() {
+ super();
+ }
- public static String[] split(String text) {
- return Loader.getSDetector().sentDetect(text);
- }
+ public static String[] split(String text) {
+ return Loader.getSDetector().sentDetect(text);
+ }
- public static String[] tokenize(String sentence) {
- return Loader.getTokenizer().tokenize(sentence);
- }
+ public static String[] tokenize(String sentence) {
+ return Loader.getTokenizer().tokenize(sentence);
+ }
- public static String[] tag(String[] tokenizedSentence) {
- return Loader.getTagger().tag(tokenizedSentence);
- }
+ public static String[] tag(String[] tokenizedSentence) {
+ return Loader.getTagger().tag(tokenizedSentence);
+ }
- public static String lemmatize(String word, String posTag) {
- return Loader.getLemmatizer().lemmatize(word, posTag);
- }
+ public static String lemmatize(String word, String posTag) {
+ return Loader.getLemmatizer().lemmatize(word, posTag);
+ }
- public static boolean isName(String word) {
- Span nameSpans[] = Loader.getNameFinder().find(new String[] { word });
- return (nameSpans.length != 0);
- }
+ public static boolean isName(String word) {
+ Span nameSpans[] = Loader.getNameFinder().find(new String[] { word });
+ return (nameSpans.length != 0);
+ }
- public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) {
+ public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) {
- ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+ ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
- String[] tags = tag(sentence);
+ String[] tags = tag(sentence);
- for (int i = 0; i<sentence.length; i++) {
- if (!Loader.getStopCache().containsKey(sentence[i])) {
- if (Loader.getRelvCache().containsKey(tags[i])) {
- relevantWords.add(new WordPOS(sentence[i],Constants.getPOS(tags[i])));
- }
+ for (int i = 0; i < sentence.length; i++) {
+ if (!Loader.getStopCache().containsKey(sentence[i])) {
+ if (Loader.getRelvCache().containsKey(tags[i])) {
+ relevantWords
+ .add(new WordPOS(sentence[i], Constants.getPOS(tags[i])));
+ }
- }
- }
- return relevantWords;
- }
+ }
+ }
+ return relevantWords;
+ }
+ public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate word) {
+ return getAllRelevantWords(word.getSentence());
+ }
- public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate word) {
- return getAllRelevantWords(word.getSentence());
- }
+ public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word,
+ int winBackward, int winForward) {
+ ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
- public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word, int winBackward, int winForward) {
+ String[] sentence = word.getSentence();
+ String[] tags = tag(sentence);
- ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+ int index = word.getWordIndex();
- String[] sentence = word.getSentence();
- String[] tags = tag(sentence);
+ for (int i = index - winBackward; i <= index + winForward; i++) {
- int index = word.getWordIndex();
+ if (i >= 0 && i < sentence.length && i != index) {
+ if (!Loader.getStopCache().containsKey(sentence[i])) {
- for (int i = index - winBackward; i<=index + winForward; i++) {
+ if (Loader.getRelvCache().containsKey(tags[i])) {
+ relevantWords.add(new WordPOS(sentence[i], Constants
+ .getPOS(tags[i])));
+ }
- if (i >= 0 && i < sentence.length && i != index) {
- if (!Loader.getStopCache().containsKey(sentence[i])) {
+ }
+ }
+ }
+ return relevantWords;
+ }
- if (Loader.getRelvCache().containsKey(tags[i])) {
- relevantWords.add(new WordPOS(sentence[i],Constants.getPOS(tags[i])));
- }
+ /**
+ * Stem a single word with WordNet dictionnary
+ *
+ * @param wordToStem
+ * word to be stemmed
+ * @return stemmed list of words
+ */
+ public static List StemWordWithWordNet(WordPOS wordToStem) {
+ if (!Loader.isInitialized() || wordToStem == null)
+ return null;
+ ArrayList<String> stems = new ArrayList();
+ try {
+ for (Object pos : POS.getAllPOS()) {
+ stems.addAll(Loader.getMorph().lookupAllBaseForms((POS) pos,
+ wordToStem.getWord()));
+ }
- }
- }
- }
- return relevantWords;
- }
+ if (stems.size() > 0)
+ return stems;
+ else {
+ return null;
+ }
-
- /**
- * Stem a single word with WordNet dictionnary
- *
- * @param wordToStem
- * word to be stemmed
- * @return stemmed list of words
- */
- public static List StemWordWithWordNet(WordPOS wordToStem) {
- if (!Loader.isInitialized()
- || wordToStem == null)
- return null;
- ArrayList<String> stems = new ArrayList();
- try {
- for (Object pos : POS.getAllPOS()){
- stems.addAll(Loader.getMorph().lookupAllBaseForms((POS)pos, wordToStem.getWord())) ;
- }
-
- if (stems.size()>0)
- return stems;
- else{
- return null;
- }
-
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- return null;
- }
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
- /**
- * Stem a single word tries to look up the word in the stemCache HashMap If
- * the word is not found it is stemmed with WordNet and put into stemCache
- *
- * @param wordToStem
- * word to be stemmed
- * @return stemmed word list, null means the word is incorrect
- */
- public static List Stem(WordPOS wordToStem) {
-
- // check if we already cached the stem map
- HashMap posMap = (HashMap) Loader.getStemCache().get(wordToStem.getPOS().getKey());
-
- // don't check words with digits in them
- if (containsNumbers(wordToStem.getWord())){
- return null;
- }
-
- List stemList = (List) posMap.get(wordToStem.getWord());
- if (stemList != null){ // return it if we already cached it
- return stemList;
-
- } else { // unCached list try to stem it
- stemList = StemWordWithWordNet(wordToStem);
- if (stemList != null) {
- // word was recognized and stemmed with wordnet:
- // add it to cache and return the stemmed list
- posMap.put(wordToStem.getWord(),stemList);
- Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
- return stemList;
- }else{ // could not be stemmed add it anyway (as incorrect with null list)
- posMap.put(wordToStem.getWord(), null);
- Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
- return null;
- }
- }
- }
-
- public static boolean containsNumbers(String word) {
- // checks if the word is or contains a number
- return word.matches(".*[0-9].*");
- }
-
-
-
+ /**
+ * Stem a single word tries to look up the word in the stemCache HashMap If
+ * the word is not found it is stemmed with WordNet and put into stemCache
+ *
+ * @param wordToStem
+ * word to be stemmed
+ * @return stemmed word list, null means the word is incorrect
+ */
+ public static List Stem(WordPOS wordToStem) {
+
+ // check if we already cached the stem map
+ HashMap posMap = (HashMap) Loader.getStemCache().get(
+ wordToStem.getPOS().getKey());
+
+ // don't check words with digits in them
+ if (containsNumbers(wordToStem.getWord())) {
+ return null;
+ }
+
+ List stemList = (List) posMap.get(wordToStem.getWord());
+ if (stemList != null) { // return it if we already cached it
+ return stemList;
+
+ } else { // unCached list try to stem it
+ stemList = StemWordWithWordNet(wordToStem);
+ if (stemList != null) {
+ // word was recognized and stemmed with wordnet:
+ // add it to cache and return the stemmed list
+ posMap.put(wordToStem.getWord(), stemList);
+ Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+ return stemList;
+ } else { // could not be stemmed add it anyway (as incorrect with null
+ // list)
+ posMap.put(wordToStem.getWord(), null);
+ Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+ return null;
+ }
+ }
+ }
+
+ public static boolean containsNumbers(String word) {
+ // checks if the word is or contains a number
+ return word.matches(".*[0-9].*");
+ }
+
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
index d7e626c..01096c9 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
@@ -6,10 +6,8 @@
* The interface for word sense disambiguators.
*/
public interface WSDisambiguator {
-
- public String[] disambiguate(String[] inputText,int inputWordIndex);
-
- public String[] disambiguate(String[] inputText, Span[] inputWordSpans);
-
-}
\ No newline at end of file
+ public String[] disambiguate(String[] inputText, int inputWordIndex);
+
+ public String[] disambiguate(String[] inputText, Span[] inputWordSpans);
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
index 4efebad..5eac804 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
@@ -1,7 +1,6 @@
package opennlp.tools.disambiguator;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@@ -9,86 +8,84 @@
import net.sf.extjwnl.data.IndexWord;
import net.sf.extjwnl.data.POS;
import net.sf.extjwnl.data.Synset;
-import net.sf.extjwnl.dictionary.Dictionary;
-
public class WordPOS {
- private String word;
- private List stems;
- private POS pos;
+ private String word;
+ private List stems;
+ private POS pos;
- // Constructor
- public WordPOS(String word, POS pos) throws IllegalArgumentException{
- if (word==null || pos ==null){
- throw new IllegalArgumentException("Args are null");
- }
- this.word = word;
- this.pos = pos;
- }
+ // Constructor
+ public WordPOS(String word, POS pos) throws IllegalArgumentException {
+ if (word == null || pos == null) {
+ throw new IllegalArgumentException("Args are null");
+ }
+ this.word = word;
+ this.pos = pos;
+ }
- public String getWord() {
- return word;
- }
+ public String getWord() {
+ return word;
+ }
- public POS getPOS() {
- return pos;
- }
+ public POS getPOS() {
+ return pos;
+ }
- public List getStems() {
- if (stems==null){
- return PreProcessor.Stem(this);
- }else{
- return stems;
- }
- }
+ public List getStems() {
+ if (stems == null) {
+ return PreProcessor.Stem(this);
+ } else {
+ return stems;
+ }
+ }
+ // Return the synsets (thus the senses) of the current word
+ public ArrayList<Synset> getSynsets() {
- // Return the synsets (thus the senses) of the current word
- public ArrayList<Synset> getSynsets() {
+ IndexWord indexWord;
+ try {
+ indexWord = Loader.getDictionary().lookupIndexWord(pos, word);
+ List<Synset> synsets = indexWord.getSenses();
+ return (new ArrayList<Synset>(synsets));
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
- IndexWord indexWord;
- try {
- indexWord = Loader.getDictionary().lookupIndexWord(pos, word);
- List<Synset> synsets = indexWord.getSenses();
- return (new ArrayList<Synset>(synsets));
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- return null;
- }
+ // uses Stemming to check if two words are equivalent
+ public boolean isStemEquivalent(WordPOS wordToCompare) {
+ // check if there is intersection in the stems;
+ List originalList = this.getStems();
+ List listToCompare = wordToCompare.getStems();
- // uses Stemming to check if two words are equivalent
- public boolean isStemEquivalent(WordPOS wordToCompare) {
- // check if there is intersection in the stems;
- List originalList = this.getStems();
- List listToCompare = wordToCompare.getStems();
+ // Constants.print("+++++++++++++++++++++ ::: "+ this.getWord());
+ // Constants.print("+++++++++++++++++++++ ::: "+ wordToCompare.getWord());
+ // Constants.print("the first list is \n"+originalList.toString());
+ // Constants.print("the second list is \n"+listToCompare.toString());
-// Constants.print("+++++++++++++++++++++ ::: "+ this.getWord());
-// Constants.print("+++++++++++++++++++++ ::: "+ wordToCompare.getWord());
-// Constants.print("the first list is \n"+originalList.toString());
-// Constants.print("the second list is \n"+listToCompare.toString());
+ if (originalList == null || listToCompare == null) { // any of the two
+ // requested words do
+ // not exist
+ return false;
+ } else {
+ return !Collections.disjoint(originalList, listToCompare);
+ }
- if(originalList==null || listToCompare==null){ // any of the two requested words do not exist
- return false;
- }else{
- return !Collections.disjoint(originalList, listToCompare);
- }
+ }
- }
+ // uses Lemma to check if two words are equivalent
+ public boolean isLemmaEquivalent(WordPOS wordToCompare) {
+ // TODO use lemmatizer to compare with lemmas
+ ArrayList<String> lemmas_word = new ArrayList();
+ ArrayList<String> lemmas_wordToCompare = new ArrayList();
- // uses Lemma to check if two words are equivalent
- public boolean isLemmaEquivalent(WordPOS wordToCompare) {
- // TODO use lemmatizer to compare with lemmas
-
- ArrayList<String> lemmas_word = new ArrayList();
- ArrayList<String> lemmas_wordToCompare = new ArrayList();
-
- for (String pos : Constants.allPOS){
- Loader.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
- }
- return false;
- }
+ for (String pos : Constants.allPOS) {
+ Loader.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
+ }
+ return false;
+ }
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
index a0ac525..30e6e8b 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
@@ -1,75 +1,62 @@
package opennlp.tools.disambiguator;
-import java.util.ArrayList;
-import java.util.concurrent.Semaphore;
-
import opennlp.tools.disambiguator.lesk.WTDLesk;
-public class WordSense implements Comparable{
-
- protected WTDLesk WTDLesk;
- protected Node node;
- protected int id;
- protected double score;
-
-
- public WordSense(WTDLesk WTDLesk, Node node) {
- super();
- this.WTDLesk = WTDLesk;
- this.node = node;
- }
+public class WordSense implements Comparable {
- public WordSense() {
- super();
- }
+ protected WTDLesk WTDLesk;
+ protected Node node;
+ protected int id;
+ protected double score;
-
- public WTDLesk getWTDLesk() {
- return WTDLesk;
- }
+ public WordSense(WTDLesk WTDLesk, Node node) {
+ super();
+ this.WTDLesk = WTDLesk;
+ this.node = node;
+ }
- public void setWTDLesk(WTDLesk WTDLesk) {
- this.WTDLesk = WTDLesk;
- }
+ public WordSense() {
+ super();
+ }
-
- public Node getNode() {
- return node;
- }
+ public WTDLesk getWTDLesk() {
+ return WTDLesk;
+ }
- public void setNode(Node node) {
- this.node = node;
- }
+ public void setWTDLesk(WTDLesk WTDLesk) {
+ this.WTDLesk = WTDLesk;
+ }
-
- public double getScore() {
- return score;
- }
+ public Node getNode() {
+ return node;
+ }
- public void setScore(double score) {
- this.score = score;
- }
+ public void setNode(Node node) {
+ this.node = node;
+ }
- public int getId() {
- return id;
- }
+ public double getScore() {
+ return score;
+ }
- public void setId(int id) {
- this.id = id;
- }
+ public void setScore(double score) {
+ this.score = score;
+ }
+ public int getId() {
+ return id;
+ }
- public int compareTo(Object o) {
- return (this.score-((WordSense)o).score)<0?1:-1;
- }
-
-
- public String getSense() {
- return node.getSense();
- }
-
+ public void setId(int id) {
+ this.id = id;
+ }
+ public int compareTo(Object o) {
+ return (this.score - ((WordSense) o).score) < 0 ? 1 : -1;
+ }
+
+ public String getSense() {
+ return node.getSense();
+ }
}
-
-
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
index b9f8181..e572153 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
@@ -1,95 +1,77 @@
package opennlp.tools.disambiguator;
-
-
public class WordToDisambiguate {
-
- protected String [] sentence;
- protected int wordIndex;
- protected String posTag;
- protected int sense;
-
-
-
- /**
- * Constructor
- */
-
-
- public WordToDisambiguate(String[] sentence, int wordIndex, int sense) throws IllegalArgumentException{
- super();
-
- if (wordIndex>sentence.length){
- throw new IllegalArgumentException("The index is out of bounds !");
- }
- this.sentence = sentence;
- this.wordIndex = wordIndex;
- String[] posTags = PreProcessor.tag(sentence);
- this.posTag = posTags[wordIndex];
- this.sense = sense;
- }
-
- public WordToDisambiguate(String[] sentence, int wordIndex) {
- this(sentence,wordIndex,-1);
- }
-
-
-
- /**
- * Getters and Setters
- */
-
-
+ protected String[] sentence;
+ protected int wordIndex;
+ protected String posTag;
- // sentence
- public String[] getSentence() {
- return sentence;
- }
+ protected int sense;
- public void setSentence(String[] sentence) {
- this.sentence = sentence;
- }
+ /**
+ * Constructor
+ */
-
- // word
- public int getWordIndex() {
- return wordIndex;
- }
+ public WordToDisambiguate(String[] sentence, int wordIndex, int sense)
+ throws IllegalArgumentException {
+ super();
- public void setWordIndex(int wordIndex) {
- this.wordIndex = wordIndex;
- }
-
- public String getWord(){
- return sentence[wordIndex];
- }
-
-
- // posTag
- public String getPosTag() {
- return posTag;
- }
+ if (wordIndex > sentence.length) {
+ throw new IllegalArgumentException("The index is out of bounds !");
+ }
+ this.sentence = sentence;
+ this.wordIndex = wordIndex;
+ String[] posTags = PreProcessor.tag(sentence);
+ this.posTag = posTags[wordIndex];
+ this.sense = sense;
+ }
- public void setPosTag(String posTag) {
- this.posTag = posTag;
- }
-
-
- // sense
- public int getSense() {
- return sense;
- }
+ public WordToDisambiguate(String[] sentence, int wordIndex) {
+ this(sentence, wordIndex, -1);
+ }
- public void setSense(int sense) {
- this.sense = sense;
- }
+ /**
+ * Getters and Setters
+ */
+ // sentence
+ public String[] getSentence() {
+ return sentence;
+ }
+ public void setSentence(String[] sentence) {
+ this.sentence = sentence;
+ }
-
-
+ // word
+ public int getWordIndex() {
+ return wordIndex;
+ }
-
+ public void setWordIndex(int wordIndex) {
+ this.wordIndex = wordIndex;
+ }
+ public String getWord() {
+ return sentence[wordIndex];
+ }
+
+ // posTag
+ public String getPosTag() {
+ return posTag;
+ }
+
+ public void setPosTag(String posTag) {
+ this.posTag = posTag;
+ }
+
+ // sense
+ public int getSense() {
+ return sense;
+ }
+
+ public void setSense(int sense) {
+ this.sense = sense;
+ }
}
+
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
index aa582b5..e171fd0 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
@@ -5,108 +5,94 @@
import opennlp.tools.disambiguator.Constants;
import opennlp.tools.disambiguator.Loader;
-
public class FeaturesExtractor {
-
-
- public FeaturesExtractor() {
- super();
- }
+ public FeaturesExtractor() {
+ super();
+ }
+ /**
+ * @Algorithm: IMS (It Makes Sense)
+ *
+ * The following methods serve to extract the features for the
+ * algorithm IMS.
+ */
- /**
- * @Algorithm: IMS (It Makes Sense)
- *
- * The following methods serve to extract the features for the algorithm IMS.
- */
-
- public String[] extractPosOfSurroundingWords (String[] sentence, int wordIndex, int numberOfWords) {
-
- String[] taggedSentence = Loader.getTagger().tag(sentence);
-
- String[] tags = new String[2*numberOfWords+1];
-
- int j = 0;
-
- for (int i = wordIndex - numberOfWords; i < wordIndex + numberOfWords ; i++) {
- if (i < 0 || i >= sentence.length) {
- tags[j] = "null";
- } else {
- tags[j] = taggedSentence[i];
- }
- j++;
- }
-
- return tags;
- }
-
-
- public String[] extractSurroundingWords(String[] sentence, int wordIndex) {
+ public String[] extractPosOfSurroundingWords(String[] sentence,
+ int wordIndex, int numberOfWords) {
- String[] posTags = Loader.getTagger().tag(sentence);
-
- Constants.print(posTags);
-
- ArrayList<String> contextWords = new ArrayList<String>();
+ String[] taggedSentence = Loader.getTagger().tag(sentence);
- for (int i = 0; i < sentence.length; i++) {
+ String[] tags = new String[2 * numberOfWords + 1];
- if (!Constants.stopWords.contains(sentence[i].toLowerCase())
- && (wordIndex != i)) {
-
- String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
-
- if (!word.equals("")) {
- String lemma = Loader.getLemmatizer().lemmatize(sentence[i], posTags[i]);
- contextWords.add(lemma);
- }
-
-
+ int j = 0;
+ for (int i = wordIndex - numberOfWords; i < wordIndex + numberOfWords; i++) {
+ if (i < 0 || i >= sentence.length) {
+ tags[j] = "null";
+ } else {
+ tags[j] = taggedSentence[i];
+ }
+ j++;
+ }
- }
- }
+ return tags;
+ }
- return contextWords.toArray(new String[contextWords.size()]);
- }
-
-
- public ArrayList<String[]> extractLocalCollocations(String[] sentence, int wordIndex, int range) {
- /**
- * Here the author used only 11 features of this type. the range was set to 3 (bigrams extracted in a way that they are at max separated
- * by 1 word).
- */
-
- ArrayList<String[]> localCollocations = new ArrayList<String[]>();
-
- for (int i = wordIndex - range; i <= wordIndex + range ; i++) {
-
- if (!(i < 0 || i > sentence.length - 2)) {
- if ((i != wordIndex) && (i+1 != wordIndex) && (i+1 < wordIndex + range)) {
- String[] lc = {sentence[i], sentence[i+1]};
- localCollocations.add(lc);
- }
- if ((i != wordIndex) && (i+2 != wordIndex) && (i+2 < wordIndex + range)) {
- String[] lc = {sentence[i], sentence[i+2]};
- localCollocations.add(lc);
- }
- }
-
- }
-
- return localCollocations;
- }
+ public String[] extractSurroundingWords(String[] sentence, int wordIndex) {
-
- /**
- * @Algorithm: SST
- *
- * The following methods serve to extract the features for the algorithm SST.
- */
-
-
-
-
-
+ String[] posTags = Loader.getTagger().tag(sentence);
+
+ Constants.print(posTags);
+
+ ArrayList<String> contextWords = new ArrayList<String>();
+
+ for (int i = 0; i < sentence.length; i++) {
+
+ if (!Constants.stopWords.contains(sentence[i].toLowerCase())
+ && (wordIndex != i)) {
+
+ String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
+
+ if (!word.equals("")) {
+ String lemma = Loader.getLemmatizer().lemmatize(sentence[i],
+ posTags[i]);
+ contextWords.add(lemma);
+ }
+
+ }
+ }
+
+ return contextWords.toArray(new String[contextWords.size()]);
+ }
+
+ public ArrayList<String[]> extractLocalCollocations(String[] sentence,
+ int wordIndex, int range) {
+ /**
+ * Here the author used only 11 features of this type. the range was set to
+ * 3 (bigrams extracted in a way that they are at max separated by 1 word).
+ */
+
+ ArrayList<String[]> localCollocations = new ArrayList<String[]>();
+
+ for (int i = wordIndex - range; i <= wordIndex + range; i++) {
+
+ if (!(i < 0 || i > sentence.length - 2)) {
+ if ((i != wordIndex) && (i + 1 != wordIndex)
+ && (i + 1 < wordIndex + range)) {
+ String[] lc = { sentence[i], sentence[i + 1] };
+ localCollocations.add(lc);
+ }
+ if ((i != wordIndex) && (i + 2 != wordIndex)
+ && (i + 2 < wordIndex + range)) {
+ String[] lc = { sentence[i], sentence[i + 2] };
+ localCollocations.add(lc);
+ }
+ }
+
+ }
+
+ return localCollocations;
+ }
}
+
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
index 8f12ded..45af86d 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
@@ -5,95 +5,90 @@
import opennlp.tools.disambiguator.WSDisambiguator;
import opennlp.tools.util.Span;
-public class IMS implements WSDisambiguator{
-
- FeaturesExtractor fExtractor = new FeaturesExtractor();
-
- /**
- * PARAMETERS
- */
-
- int numberOfSurroundingWords;
- int ngram;
-
-
-
- /**
- * Constructors
- */
-
- public IMS() {
- super();
- numberOfSurroundingWords = 3;
- ngram = 2;
- }
-
- public IMS(int numberOfSurroundingWords, int ngram) {
- super();
- this.numberOfSurroundingWords = numberOfSurroundingWords;
- this.ngram = ngram;
- }
-
-
-
- /**
- * INTERNAL METHODS
- */
-
- private void extractFeature(ArrayList<WTDIMS> words) {
-
- for (WTDIMS word : words) {
-
- word.setPosOfSurroundingWords(fExtractor.extractPosOfSurroundingWords(word.getSentence(), word.getWordIndex(), numberOfSurroundingWords));
-
- word.setSurroundingWords(fExtractor.extractSurroundingWords(word.getSentence(), word.getWordIndex()));
-
- word.setLocalCollocations(fExtractor.extractLocalCollocations(word.getSentence(), word.getWordIndex(), ngram));
-
- }
+public class IMS implements WSDisambiguator {
- }
-
- private ArrayList<WTDIMS> extractTrainingData(String xmlFile) {
-
- ArrayList<WTDIMS> trainingData = new ArrayList<WTDIMS>();
-
- /**
- * TODO Processing of the xml File here (To check the format of the data)
- */
-
- return trainingData;
- }
-
-
- public void train(String trainingSetFile) { // TODO To revise after finihsing the implementation of the collector
-
- ArrayList<WTDIMS> instances = extractTrainingData(trainingSetFile);
-
- extractFeature(instances);
-
-
-
- }
-
-
- public void load (String binFile) {
- // TODO After finishing training the training data
-
- }
-
+ FeaturesExtractor fExtractor = new FeaturesExtractor();
- @Override
- public String[] disambiguate(String[] inputText, int inputWordIndex) {
- // TODO Auto-generated method stub
- return null;
- }
+ /**
+ * PARAMETERS
+ */
- @Override
- public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
- // TODO Auto-generated method stub
- return null;
- }
-
+ int numberOfSurroundingWords;
+ int ngram;
+
+ /**
+ * Constructors
+ */
+
+ public IMS() {
+ super();
+ numberOfSurroundingWords = 3;
+ ngram = 2;
+ }
+
+ public IMS(int numberOfSurroundingWords, int ngram) {
+ super();
+ this.numberOfSurroundingWords = numberOfSurroundingWords;
+ this.ngram = ngram;
+ }
+
+ /**
+ * INTERNAL METHODS
+ */
+
+ private void extractFeature(ArrayList<WTDIMS> words) {
+
+ for (WTDIMS word : words) {
+
+ word.setPosOfSurroundingWords(fExtractor.extractPosOfSurroundingWords(
+ word.getSentence(), word.getWordIndex(), numberOfSurroundingWords));
+
+ word.setSurroundingWords(fExtractor.extractSurroundingWords(
+ word.getSentence(), word.getWordIndex()));
+
+ word.setLocalCollocations(fExtractor.extractLocalCollocations(
+ word.getSentence(), word.getWordIndex(), ngram));
+
+ }
+
+ }
+
+ private ArrayList<WTDIMS> extractTrainingData(String xmlFile) {
+
+ ArrayList<WTDIMS> trainingData = new ArrayList<WTDIMS>();
+
+ /**
+ * TODO Processing of the xml File here (To check the format of the data)
+ */
+
+ return trainingData;
+ }
+
+ public void train(String trainingSetFile) { // TODO To revise after finihsing
+ // the implementation of the
+ // collector
+
+ ArrayList<WTDIMS> instances = extractTrainingData(trainingSetFile);
+
+ extractFeature(instances);
+
+ }
+
+ public void load(String binFile) {
+ // TODO After finishing training the training data
+
+ }
+
+ @Override
+ public String[] disambiguate(String[] inputText, int inputWordIndex) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
index 6cd87a4..dcb9f06 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
@@ -1,56 +1,49 @@
package opennlp.tools.disambiguator.ims;
+
import java.util.ArrayList;
import opennlp.tools.disambiguator.WordToDisambiguate;
-
public class WTDIMS extends WordToDisambiguate {
-
- protected String[] posOfSurroundingWords;
- protected String[] surroundingWords;
- protected ArrayList<String[]> localCollocations;
-
-
-
- /**
- * Constructor
- */
- public WTDIMS(String[] sentence, int word, int sense) {
- super(sentence, word, sense);
- }
-
-
- /**
- * Getters and Setters
- */
-
-
- public String[] getPosOfSurroundingWords() {
- return posOfSurroundingWords;
- }
+ protected String[] posOfSurroundingWords;
+ protected String[] surroundingWords;
+ protected ArrayList<String[]> localCollocations;
- public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
- this.posOfSurroundingWords = posOfSurroundingWords;
- }
-
+ /**
+ * Constructor
+ */
+ public WTDIMS(String[] sentence, int word, int sense) {
+ super(sentence, word, sense);
+ }
- public String[] getSurroundingWords() {
- return surroundingWords;
- }
+ /**
+ * Getters and Setters
+ */
- public void setSurroundingWords(String[] surroundingWords) {
- this.surroundingWords = surroundingWords;
- }
+ public String[] getPosOfSurroundingWords() {
+ return posOfSurroundingWords;
+ }
-
- public ArrayList<String[]> getLocalCollocations() {
- return localCollocations;
- }
+ public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
+ this.posOfSurroundingWords = posOfSurroundingWords;
+ }
- public void setLocalCollocations(ArrayList<String[]> localCollocations) {
- this.localCollocations = localCollocations;
- }
-
-
+ public String[] getSurroundingWords() {
+ return surroundingWords;
+ }
+
+ public void setSurroundingWords(String[] surroundingWords) {
+ this.surroundingWords = surroundingWords;
+ }
+
+ public ArrayList<String[]> getLocalCollocations() {
+ return localCollocations;
+ }
+
+ public void setLocalCollocations(ArrayList<String[]> localCollocations) {
+ this.localCollocations = localCollocations;
+ }
+
}
+
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
index a006196..cdc94d2 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
@@ -3,10 +3,7 @@
import java.security.InvalidParameterException;
import java.util.ArrayList;
-
-
import java.util.Collections;
-import java.util.Map;
import opennlp.tools.disambiguator.Constants;
import opennlp.tools.disambiguator.Loader;
@@ -18,668 +15,637 @@
import opennlp.tools.util.Span;
import net.sf.extjwnl.data.Synset;
-
/**
* Class for the Lesk algorithm and variants.
*/
-public class Lesk implements WSDisambiguator{
+public class Lesk implements WSDisambiguator {
- protected LeskParameters params;
+ protected LeskParameters params;
- public Loader loader;
+ public Loader loader;
- public Lesk(){
- this(null);
- }
+ public Lesk() {
+ this(null);
+ }
- public Lesk(LeskParameters params) throws InvalidParameterException{
- loader = new Loader();
- this.setParams(params);
- }
+ public Lesk(LeskParameters params) throws InvalidParameterException {
+ loader = new Loader();
+ this.setParams(params);
+ }
- public void setParams(LeskParameters params) throws InvalidParameterException{
- if(params==null){
- this.params = new LeskParameters();
- }
- else{
- if (params.isValid()){
- this.params = params;
- }else{
- throw new InvalidParameterException("wrong params");
- }
- }
- }
+ public void setParams(LeskParameters params) throws InvalidParameterException {
+ if (params == null) {
+ this.params = new LeskParameters();
+ } else {
+ if (params.isValid()) {
+ this.params = params;
+ } else {
+ throw new InvalidParameterException("wrong params");
+ }
+ }
+ }
- public ArrayList<WordSense> basic(WTDLesk wtd) {
+ public ArrayList<WordSense> basic(WTDLesk wtd) {
- ArrayList<WordPOS> relvWords = PreProcessor.getAllRelevantWords(wtd);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+ ArrayList<WordPOS> relvWords = PreProcessor.getAllRelevantWords(wtd);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
- ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+ ArrayList<WordSense> scoredSenses = updateSenses(nodes);
- for (WordSense wordSense : scoredSenses) {
- wordSense.setWTDLesk(wtd);
- int count = 0;
- for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
- ArrayList stems = (ArrayList)PreProcessor.Stem(senseWordPOS);
- for (WordPOS sentenceWordPOS : relvWords) {
- // TODO change to lemma check
- if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
- count = count + 1;
- }
- }
- }
- wordSense.setScore(count);
- }
+ for (WordSense wordSense : scoredSenses) {
+ wordSense.setWTDLesk(wtd);
+ int count = 0;
+ for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
+ ArrayList stems = (ArrayList) PreProcessor.Stem(senseWordPOS);
+ for (WordPOS sentenceWordPOS : relvWords) {
+ // TODO change to lemma check
+ if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+ count = count + 1;
+ }
+ }
+ }
+ wordSense.setScore(count);
+ }
- return scoredSenses;
- }
+ return scoredSenses;
+ }
- public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
- return this.basicContextual(wtd,LeskParameters.DFLT_WIN_SIZE);
- }
+ public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
+ return this.basicContextual(wtd, LeskParameters.DFLT_WIN_SIZE);
+ }
- public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowSize) {
- return this.basicContextual(wtd, windowSize,windowSize);
- }
+ public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowSize) {
+ return this.basicContextual(wtd, windowSize, windowSize);
+ }
- public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowBackward, int windowForward) {
+ public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowBackward,
+ int windowForward) {
- ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd, windowBackward, windowForward);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+ ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,
+ windowBackward, windowForward);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
+ ArrayList<WordSense> scoredSenses = updateSenses(nodes);
- ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+ for (WordSense wordSense : scoredSenses) {
+ wordSense.setWTDLesk(wtd);
+ int count = 0;
+ for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
- for (WordSense wordSense : scoredSenses) {
- wordSense.setWTDLesk(wtd);
+ for (WordPOS sentenceWordPOS : relvWords) {
+ // TODO change to lemma check
+ if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+ count = count + 1;
+ }
+ }
- int count = 0;
- for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
+ }
+ wordSense.setScore(count);
- for (WordPOS sentenceWordPOS : relvWords) {
- // TODO change to lemma check
- if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
- count = count + 1;
- }
- }
+ }
- }
- wordSense.setScore(count);
+ Collections.sort(scoredSenses);
- }
+ return scoredSenses;
+ }
- Collections.sort(scoredSenses);
+ public ArrayList<WordSense> extended(WTDLesk wtd, int depth,
+ double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
- return scoredSenses;
- }
+ return extendedContextual(wtd, 0, depth, depthScoreWeight, includeSynonyms,
+ includeHypernyms, includeHyponyms, includeMeronyms, includeHolonyms);
- public ArrayList<WordSense> extended(WTDLesk wtd,
- int depth, double depthScoreWeight, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
+ }
- return extendedContextual(wtd, 0, depth,
- depthScoreWeight, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
+ public ArrayList<WordSense> extendedContextual(WTDLesk wtd, int depth,
+ double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
- }
+ return extendedContextual(wtd, LeskParameters.DFLT_WIN_SIZE, depth,
+ depthScoreWeight, includeSynonyms, includeHypernyms, includeHyponyms,
+ includeMeronyms, includeHolonyms);
- public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
- int depth, double depthScoreWeight,
- boolean includeSynonyms, boolean includeHypernyms,
- boolean includeHyponyms, boolean includeMeronyms,
- boolean includeHolonyms){
+ }
- return extendedContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
- depth, depthScoreWeight, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
+ public ArrayList<WordSense> extendedContextual(WTDLesk wtd, int windowSize,
+ int depth, double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
- }
+ return extendedContextual(wtd, windowSize, windowSize, depth,
+ depthScoreWeight, includeSynonyms, includeHypernyms, includeHyponyms,
+ includeMeronyms, includeHolonyms);
+ }
- public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
- int windowSize, int depth, double depthScoreWeight,
- boolean includeSynonyms, boolean includeHypernyms,
- boolean includeHyponyms, boolean includeMeronyms,
- boolean includeHolonyms) {
+ public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
+ int windowBackward, int windowForward, int depth,
+ double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
- return extendedContextual(wtd, windowSize, windowSize,
- depth, depthScoreWeight, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
- }
+ ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,
+ windowBackward, windowForward);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
- public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
- int windowBackward, int windowForward, int depth,
- double depthScoreWeight, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
- ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
+ ArrayList<WordSense> scoredSenses = basicContextual(wtd, windowBackward,
+ windowForward);
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
+ for (WordSense wordSense : scoredSenses) {
- ArrayList<WordSense> scoredSenses = basicContextual(wtd,windowBackward, windowForward);
+ if (includeSynonyms) {
+ wordSense.setScore(wordSense.getScore() + depthScoreWeight
+ * assessSynonyms(wordSense.getNode().getSynonyms(), relvWords));
+ }
- for (WordSense wordSense : scoredSenses) {
+ if (includeHypernyms) {
+ fathomHypernyms(wordSense, wordSense.getNode().synset, relvWords,
+ depth, depth, depthScoreWeight);
+ }
- if (includeSynonyms) {
- wordSense.setScore(wordSense.getScore()
- + depthScoreWeight
- * assessSynonyms(wordSense.getNode().getSynonyms(),relvWords));
- }
+ if (includeHyponyms) {
- if (includeHypernyms) {
- fathomHypernyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
- }
+ fathomHyponyms(wordSense, wordSense.getNode().synset, relvWords, depth,
+ depth, depthScoreWeight);
+ }
- if (includeHyponyms) {
+ if (includeMeronyms) {
- fathomHyponyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
- }
+ fathomMeronyms(wordSense, wordSense.getNode().synset, relvWords, depth,
+ depth, depthScoreWeight);
- if (includeMeronyms) {
+ }
- fathomMeronyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
+ if (includeHolonyms) {
- }
+ fathomHolonyms(wordSense, wordSense.getNode().synset, relvWords, depth,
+ depth, depthScoreWeight);
- if (includeHolonyms) {
+ }
- fathomHolonyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
+ }
- }
+ return scoredSenses;
- }
+ }
- return scoredSenses;
+ public ArrayList<WordSense> extendedExponential(WTDLesk wtd, int depth,
+ double intersectionExponent, double depthExponent,
+ boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
- }
+ return extendedExponentialContextual(wtd, 0, depth, intersectionExponent,
+ depthExponent, includeSynonyms, includeHypernyms, includeHyponyms,
+ includeMeronyms, includeHolonyms);
- public ArrayList<WordSense> extendedExponential(WTDLesk wtd,
- int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
+ }
- return extendedExponentialContextual(wtd, 0, depth,
- intersectionExponent, depthExponent, includeSynonyms,
- includeHypernyms, includeHyponyms,
- includeMeronyms, includeHolonyms);
+ public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
+ int depth, double intersectionExponent, double depthExponent,
+ boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
- }
+ return extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
+ depth, intersectionExponent, depthExponent, includeSynonyms,
+ includeHypernyms, includeHyponyms, includeMeronyms, includeHolonyms);
+ }
- public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
- int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
+ public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
+ int windowSize, int depth, double intersectionExponent,
+ double depthExponent, boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
- return extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
- depth, intersectionExponent,depthExponent, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
- }
+ return extendedExponentialContextual(wtd, windowSize, windowSize, depth,
+ intersectionExponent, depthExponent, includeSynonyms, includeHypernyms,
+ includeHyponyms, includeMeronyms, includeHolonyms);
+ }
- public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
- int windowSize, int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
+ public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
+ int windowBackward, int windowForward, int depth,
+ double intersectionExponent, double depthExponent,
+ boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
+ ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,
+ windowBackward, windowForward);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
- return extendedExponentialContextual(wtd, windowSize, windowSize,
- depth, intersectionExponent,depthExponent, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
- }
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
- public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
- int windowBackward, int windowForward, int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
- ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
+ ArrayList<WordSense> scoredSenses = basicContextual(wtd, windowForward,
+ windowBackward);
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
+ for (WordSense wordSense : scoredSenses) {
- ArrayList<WordSense> scoredSenses = basicContextual(wtd, windowForward, windowBackward);
+ if (includeSynonyms) {
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(
+ assessSynonyms(wordSense.getNode().getSynonyms(), relvWords),
+ intersectionExponent));
+ }
- for (WordSense wordSense : scoredSenses) {
+ if (includeHypernyms) {
+ fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
+ }
+ if (includeHyponyms) {
- if (includeSynonyms) {
- wordSense.setScore(wordSense.getScore() + Math.pow(assessSynonyms(wordSense.getNode().getSynonyms(),
- relvWords),intersectionExponent));
- }
+ fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
+ }
- if (includeHypernyms) {
- fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth,intersectionExponent, depthExponent);
- }
+ if (includeMeronyms) {
- if (includeHyponyms) {
+ fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
- fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, intersectionExponent,depthExponent);
- }
+ }
- if (includeMeronyms) {
+ if (includeHolonyms) {
- fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, intersectionExponent,depthExponent);
+ fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
- }
+ }
- if (includeHolonyms) {
+ }
- fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, intersectionExponent,depthExponent);
+ return scoredSenses;
- }
+ }
- }
+ private void fathomHypernyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
- return scoredSenses;
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
- }
+ Node childNode = new Node(child, relvGlossWords);
- private void fathomHypernyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
+ childNode.setHypernyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHypernyms(), relvWords));
+ for (Synset hypernym : childNode.getHypernyms()) {
+ fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+ private void fathomHypernymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
- Node childNode = new Node(child, relvGlossWords);
+ Node childNode = new Node(child, relvGlossWords);
- childNode.setHypernyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHypernyms(), relvWords));
- for (Synset hypernym : childNode.getHypernyms()) {
- fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
+ childNode.setHypernyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getHypernyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset hypernym : childNode.getHypernyms()) {
- private void fathomHypernymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
+ fathomHypernymsExponential(wordSense, hypernym, relvGlossWords,
+ depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+ private void fathomHyponyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
- Node childNode = new Node(child, relvGlossWords);
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
- childNode.setHypernyms();
- wordSense
- .setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getHypernyms(),
- relvWords), intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset hypernym : childNode.getHypernyms()) {
+ Node childNode = new Node(child, relvGlossWords);
- fathomHypernymsExponential(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
-
- private void fathomHyponyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setHyponyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHyponyms(), relvWords));
- for (Synset hyponym : childNode.getHyponyms()) {
-
- fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
+ childNode.setHyponyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHyponyms(), relvWords));
+ for (Synset hyponym : childNode.getHyponyms()) {
- private void fathomHyponymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
+ fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+ private void fathomHyponymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
- Node childNode = new Node(child, relvGlossWords);
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
- childNode.setHyponyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getHyponyms(), relvWords),
- intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset hyponym : childNode.getHyponyms()) {
+ Node childNode = new Node(child, relvGlossWords);
- fathomHyponymsExponential(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
+ childNode.setHyponyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getHyponyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset hyponym : childNode.getHyponyms()) {
- private void fathomMeronyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
+ fathomHyponymsExponential(wordSense, hyponym, relvGlossWords, depth - 1,
+ maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+ private void fathomMeronyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
- Node childNode = new Node(child, relvGlossWords);
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
- childNode.setMeronyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getMeronyms(), relvWords));
- for (Synset meronym : childNode.getMeronyms()) {
+ Node childNode = new Node(child, relvGlossWords);
- fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
+ childNode.setMeronyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getMeronyms(), relvWords));
+ for (Synset meronym : childNode.getMeronyms()) {
- private void fathomMeronymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
+ fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+ private void fathomMeronymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
- Node childNode = new Node(child, relvGlossWords);
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
- childNode.setMeronyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getMeronyms(), relvWords),
- intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset meronym : childNode.getMeronyms()) {
+ Node childNode = new Node(child, relvGlossWords);
- fathomMeronymsExponential(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
+ childNode.setMeronyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getMeronyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset meronym : childNode.getMeronyms()) {
- private void fathomHolonyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
+ fathomMeronymsExponential(wordSense, meronym, relvGlossWords, depth - 1,
+ maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+ private void fathomHolonyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
- Node childNode = new Node(child, relvGlossWords);
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+ Node childNode = new Node(child, relvGlossWords);
- childNode.setHolonyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHolonyms(), relvWords));
- for (Synset holonym : childNode.getHolonyms()) {
+ childNode.setHolonyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHolonyms(), relvWords));
+ for (Synset holonym : childNode.getHolonyms()) {
- fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
+ fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
- private void fathomHolonymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
+ private void fathomHolonymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
- Node childNode = new Node(child, relvGlossWords);
+ Node childNode = new Node(child, relvGlossWords);
- childNode.setHolonyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getHolonyms(), relvWords),
- intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset holonym : childNode.getHolonyms()) {
+ childNode.setHolonyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getHolonyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset holonym : childNode.getHolonyms()) {
- fathomHolonymsExponential(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
+ fathomHolonymsExponential(wordSense, holonym, relvGlossWords, depth - 1,
+ maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
- private int assessFeature(ArrayList<Synset> featureSynsets,
- ArrayList<WordPOS> relevantWords) {
- int count = 0;
- for (Synset synset : featureSynsets) {
- Node subNode = new Node(synset, relevantWords);
+ private int assessFeature(ArrayList<Synset> featureSynsets,
+ ArrayList<WordPOS> relevantWords) {
+ int count = 0;
+ for (Synset synset : featureSynsets) {
+ Node subNode = new Node(synset, relevantWords);
- String[] tokenizedSense = Loader.getTokenizer().tokenize(subNode.getSense());
- ArrayList<WordPOS> relvSenseWords = PreProcessor.getAllRelevantWords(tokenizedSense);
+ String[] tokenizedSense = Loader.getTokenizer().tokenize(
+ subNode.getSense());
+ ArrayList<WordPOS> relvSenseWords = PreProcessor
+ .getAllRelevantWords(tokenizedSense);
- for (WordPOS senseWord : relvSenseWords) {
- for (WordPOS sentenceWord : relevantWords) {
- if (sentenceWord.isStemEquivalent(senseWord)) {
- count = count + 1;
- }
- }
- }
- }
- return count;
- }
+ for (WordPOS senseWord : relvSenseWords) {
+ for (WordPOS sentenceWord : relevantWords) {
+ if (sentenceWord.isStemEquivalent(senseWord)) {
+ count = count + 1;
+ }
+ }
+ }
+ }
+ return count;
+ }
- private int assessSynonyms(ArrayList<WordPOS> synonyms,
- ArrayList<WordPOS> relevantWords) {
- int count = 0;
+ private int assessSynonyms(ArrayList<WordPOS> synonyms,
+ ArrayList<WordPOS> relevantWords) {
+ int count = 0;
- for (WordPOS synonym : synonyms) {
- for (WordPOS sentenceWord : relevantWords) {
- // TODO try to switch to lemmatizer
- if (sentenceWord.isStemEquivalent(synonym)) {
- count = count + 1;
- }
- }
+ for (WordPOS synonym : synonyms) {
+ for (WordPOS sentenceWord : relevantWords) {
+ // TODO try to switch to lemmatizer
+ if (sentenceWord.isStemEquivalent(synonym)) {
+ count = count + 1;
+ }
+ }
- }
+ }
- return count;
- }
+ return count;
+ }
- public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
+ public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
- ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
+ ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
- for (int i=0; i< nodes.size(); i++ ) {
- ArrayList<WordPOS> sensesComponents = PreProcessor.getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getSense()));
- WordSense wordSense = new WordSense();
- nodes.get(i).setSenseRelevantWords(sensesComponents);
- wordSense.setNode(nodes.get(i));
- wordSense.setId(i);
- scoredSenses.add(wordSense);
- }
- return scoredSenses;
+ for (int i = 0; i < nodes.size(); i++) {
+ ArrayList<WordPOS> sensesComponents = PreProcessor
+ .getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getSense()));
+ WordSense wordSense = new WordSense();
+ nodes.get(i).setSenseRelevantWords(sensesComponents);
+ wordSense.setNode(nodes.get(i));
+ wordSense.setId(i);
+ scoredSenses.add(wordSense);
+ }
+ return scoredSenses;
- }
+ }
- // disambiguates a WTDLesk and returns an array of sense indexes from WordNet ordered by their score
- @Override
- public String[] disambiguate(String[] inputText, int inputWordIndex) {
- WTDLesk wtd = new WTDLesk(inputText,inputWordIndex);
- ArrayList<WordSense> wsenses = null;
+ // disambiguates a WTDLesk and returns an array of sense indexes from WordNet
+ // ordered by their score
+ @Override
+ public String[] disambiguate(String[] inputText, int inputWordIndex) {
+ WTDLesk wtd = new WTDLesk(inputText, inputWordIndex);
+ ArrayList<WordSense> wsenses = null;
- switch(this.params.leskType){
- case LESK_BASIC:
- wsenses = basic(wtd);
- break;
- case LESK_BASIC_CTXT :
- wsenses = basicContextual(wtd);
- break;
- case LESK_BASIC_CTXT_WIN :
- wsenses = basicContextual(wtd, this.params.win_b_size);
- break;
- case LESK_BASIC_CTXT_WIN_BF :
- wsenses = basicContextual(wtd, this.params.win_b_size, this.params.win_f_size);
- break;
- case LESK_EXT :
- wsenses = extended(wtd,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_CTXT :
- wsenses = extendedContextual(wtd,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_CTXT_WIN :
- wsenses = extendedContextual(wtd,
- this.params.win_b_size,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_CTXT_WIN_BF :
- wsenses = extendedContextual(wtd,
- this.params.win_b_size,
- this.params.win_f_size,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP :
- wsenses = extendedExponential(wtd,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP_CTXT :
- wsenses = extendedExponentialContextual(wtd,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP_CTXT_WIN :
- wsenses = extendedExponentialContextual(wtd,
- this.params.win_b_size,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP_CTXT_WIN_BF :
- wsenses = extendedExponentialContextual(wtd,
- this.params.win_b_size,
- this.params.win_f_size,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- }
+ switch (this.params.leskType) {
+ case LESK_BASIC:
+ wsenses = basic(wtd);
+ break;
+ case LESK_BASIC_CTXT:
+ wsenses = basicContextual(wtd);
+ break;
+ case LESK_BASIC_CTXT_WIN:
+ wsenses = basicContextual(wtd, this.params.win_b_size);
+ break;
+ case LESK_BASIC_CTXT_WIN_BF:
+ wsenses = basicContextual(wtd, this.params.win_b_size,
+ this.params.win_f_size);
+ break;
+ case LESK_EXT:
+ wsenses = extended(wtd, this.params.depth, this.params.depth_weight,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_CTXT:
+ wsenses = extendedContextual(wtd, this.params.depth,
+ this.params.depth_weight, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_CTXT_WIN:
+ wsenses = extendedContextual(wtd, this.params.win_b_size,
+ this.params.depth, this.params.depth_weight,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_CTXT_WIN_BF:
+ wsenses = extendedContextual(wtd, this.params.win_b_size,
+ this.params.win_f_size, this.params.depth, this.params.depth_weight,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP:
+ wsenses = extendedExponential(wtd, this.params.depth, this.params.iexp,
+ this.params.dexp, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP_CTXT:
+ wsenses = extendedExponentialContextual(wtd, this.params.depth,
+ this.params.iexp, this.params.dexp, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP_CTXT_WIN:
+ wsenses = extendedExponentialContextual(wtd, this.params.win_b_size,
+ this.params.depth, this.params.iexp, this.params.dexp,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP_CTXT_WIN_BF:
+ wsenses = extendedExponentialContextual(wtd, this.params.win_b_size,
+ this.params.win_f_size, this.params.depth, this.params.iexp,
+ this.params.dexp, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ }
- wsenses = extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,LeskParameters.DFLT_DEPTH,LeskParameters.DFLT_IEXP,LeskParameters.DFLT_DEXP, true,true,true,true,true );
- Collections.sort(wsenses);
+ wsenses = extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
+ LeskParameters.DFLT_DEPTH, LeskParameters.DFLT_IEXP,
+ LeskParameters.DFLT_DEXP, true, true, true, true, true);
+ Collections.sort(wsenses);
- String[] senses = new String[wsenses.size()];
- for (int i = 0; i < wsenses.size() ; i++) {
- senses[i] = wsenses.get(i).getSense();
- }
- return senses;
- }
+ String[] senses = new String[wsenses.size()];
+ for (int i = 0; i < wsenses.size(); i++) {
+ senses[i] = wsenses.get(i).getSense();
+ }
+ return senses;
+ }
- @Override
- public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
- // TODO need to work on spans
- return null;
- }
+ @Override
+ public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
+ // TODO need to work on spans
+ return null;
+ }
-}
\ No newline at end of file
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
index 80b3ccd..8618795 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
@@ -2,106 +2,84 @@
public class LeskParameters {
- // VARIATIONS
- public static enum LESK_TYPE {
- LESK_BASIC,
- LESK_BASIC_CTXT,
- LESK_BASIC_CTXT_WIN,
- LESK_BASIC_CTXT_WIN_BF,
- LESK_EXT,
- LESK_EXT_CTXT,
- LESK_EXT_CTXT_WIN,
- LESK_EXT_CTXT_WIN_BF,
- LESK_EXT_EXP,
- LESK_EXT_EXP_CTXT,
- LESK_EXT_EXP_CTXT_WIN,
- LESK_EXT_EXP_CTXT_WIN_BF,
- }
+ // VARIATIONS
+ public static enum LESK_TYPE {
+ LESK_BASIC, LESK_BASIC_CTXT, LESK_BASIC_CTXT_WIN, LESK_BASIC_CTXT_WIN_BF, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_CTXT_WIN, LESK_EXT_CTXT_WIN_BF, LESK_EXT_EXP, LESK_EXT_EXP_CTXT, LESK_EXT_EXP_CTXT_WIN, LESK_EXT_EXP_CTXT_WIN_BF,
+ }
- // DEFAULTS
- protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT_WIN;
- protected static final int DFLT_WIN_SIZE = 4;
- protected static final int DFLT_DEPTH = 3;
- protected static final double DFLT_IEXP = 0.3;
- protected static final double DFLT_DEXP = 0.3;
-
-
- public LESK_TYPE leskType;
- public int win_f_size;
- public int win_b_size;
- public int depth;
+ // DEFAULTS
+ protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT_WIN;
+ protected static final int DFLT_WIN_SIZE = 4;
+ protected static final int DFLT_DEPTH = 3;
+ protected static final double DFLT_IEXP = 0.3;
+ protected static final double DFLT_DEXP = 0.3;
- public boolean fathom_synonyms;
- public boolean fathom_hypernyms;
- public boolean fathom_hyponyms;
- public boolean fathom_meronyms;
- public boolean fathom_holonyms;
+ public LESK_TYPE leskType;
+ public int win_f_size;
+ public int win_b_size;
+ public int depth;
- public double depth_weight;
- public double iexp;
- public double dexp;
-
-
- public LeskParameters(){
- this.setDefaults();
- }
+ public boolean fathom_synonyms;
+ public boolean fathom_hypernyms;
+ public boolean fathom_hyponyms;
+ public boolean fathom_meronyms;
+ public boolean fathom_holonyms;
- public void setDefaults(){
- this.leskType = LeskParameters.DFLT_LESK_TYPE;
- this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
- this.win_b_size = LeskParameters.DFLT_WIN_SIZE;
- this.depth = LeskParameters.DFLT_DEPTH;
- this.iexp = LeskParameters.DFLT_IEXP;
- this.dexp = LeskParameters.DFLT_DEXP;
- this.fathom_holonyms = true;
- this.fathom_hypernyms = true;
- this.fathom_hyponyms = true;
- this.fathom_meronyms = true;
- this.fathom_synonyms = true;
- }
+ public double depth_weight;
+ public double iexp;
+ public double dexp;
- // Parameter Validation
- // TODO make isSet for semantic feature booleans
- public boolean isValid(){
+ public LeskParameters() {
+ this.setDefaults();
+ }
- switch(this.leskType){
- case LESK_BASIC:
- case LESK_BASIC_CTXT :
- return true;
- case LESK_BASIC_CTXT_WIN :
- return (this.win_b_size==this.win_f_size)
- && this.win_b_size>=0 ;
- case LESK_BASIC_CTXT_WIN_BF :
- return (this.win_b_size>=0)
- && (this.win_f_size>=0) ;
- case LESK_EXT :
- case LESK_EXT_CTXT :
- return (this.depth>=0)
- && (this.depth_weight >= 0);
+ public void setDefaults() {
+ this.leskType = LeskParameters.DFLT_LESK_TYPE;
+ this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
+ this.win_b_size = LeskParameters.DFLT_WIN_SIZE;
+ this.depth = LeskParameters.DFLT_DEPTH;
+ this.iexp = LeskParameters.DFLT_IEXP;
+ this.dexp = LeskParameters.DFLT_DEXP;
+ this.fathom_holonyms = true;
+ this.fathom_hypernyms = true;
+ this.fathom_hyponyms = true;
+ this.fathom_meronyms = true;
+ this.fathom_synonyms = true;
+ }
- case LESK_EXT_CTXT_WIN :
- case LESK_EXT_CTXT_WIN_BF :
- return (this.depth>=0)
- && (this.depth_weight >= 0)
- && (this.win_b_size>=0)
- && (this.win_f_size>=0);
+ // Parameter Validation
+ // TODO make isSet for semantic feature booleans
+ public boolean isValid() {
- case LESK_EXT_EXP :
- case LESK_EXT_EXP_CTXT :
- return (this.depth>=0)
- && (this.dexp >= 0)
- && (this.iexp>=0) ;
+ switch (this.leskType) {
+ case LESK_BASIC:
+ case LESK_BASIC_CTXT:
+ return true;
+ case LESK_BASIC_CTXT_WIN:
+ return (this.win_b_size == this.win_f_size) && this.win_b_size >= 0;
+ case LESK_BASIC_CTXT_WIN_BF:
+ return (this.win_b_size >= 0) && (this.win_f_size >= 0);
+ case LESK_EXT:
+ case LESK_EXT_CTXT:
+ return (this.depth >= 0) && (this.depth_weight >= 0);
- case LESK_EXT_EXP_CTXT_WIN :
- case LESK_EXT_EXP_CTXT_WIN_BF :
- return (this.depth>=0)
- && (this.dexp >= 0)
- && (this.iexp>=0)
- && (this.win_b_size>=0)
- && (this.win_f_size>=0);
- default :
- return false;
- }
- }
+ case LESK_EXT_CTXT_WIN:
+ case LESK_EXT_CTXT_WIN_BF:
+ return (this.depth >= 0) && (this.depth_weight >= 0)
+ && (this.win_b_size >= 0) && (this.win_f_size >= 0);
+
+ case LESK_EXT_EXP:
+ case LESK_EXT_EXP_CTXT:
+ return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0);
+
+ case LESK_EXT_EXP_CTXT_WIN:
+ case LESK_EXT_EXP_CTXT_WIN_BF:
+ return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0)
+ && (this.win_b_size >= 0) && (this.win_f_size >= 0);
+ default:
+ return false;
+ }
+ }
}
+
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
index d6ee78c..c871928 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
@@ -2,14 +2,9 @@
import opennlp.tools.disambiguator.WordToDisambiguate;
+public class WTDLesk extends WordToDisambiguate {
-public class WTDLesk extends WordToDisambiguate{
-
- public WTDLesk(String[] sentence, int wordIndex) {
- super(sentence,wordIndex,-1);
- }
-
-
-
-
-}
\ No newline at end of file
+ public WTDLesk(String[] sentence, int wordIndex) {
+ super(sentence, wordIndex, -1);
+ }
+}
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
index 102ae6b..e38f749 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
@@ -1,19 +1,12 @@
package opennlp.tools.disambiguator;
+
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
-import java.util.ArrayList;
-
-import org.junit.Test;
import opennlp.tools.cmdline.postag.POSModelLoader;
-import opennlp.tools.disambiguator.Constants;
-import opennlp.tools.disambiguator.Loader;
-import opennlp.tools.disambiguator.WordSense;
-import opennlp.tools.disambiguator.ims.FeaturesExtractor;
import opennlp.tools.disambiguator.lesk.Lesk;
import opennlp.tools.disambiguator.lesk.LeskParameters;
-import opennlp.tools.disambiguator.lesk.WTDLesk;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTagger;
import opennlp.tools.postag.POSTaggerME;
@@ -21,63 +14,63 @@
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
+import org.junit.Test;
public class Tester {
- @Test
- public static void main(String[] args) {
+ @Test
+ public static void main(String[] args) {
+ String sentence = "I went fishing for some sea bass.";
+ TokenizerModel TokenizerModel;
- String sentence = "I went fishing for some sea bass.";
- TokenizerModel TokenizerModel;
-
- try {
- TokenizerModel = new TokenizerModel(new FileInputStream("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-token.bin"));
- Tokenizer tokenizer = new TokenizerME(TokenizerModel);
+ try {
+ TokenizerModel = new TokenizerModel(new FileInputStream(
+ "src\\test\\resources\\opennlp\\tools\\disambiguator\\en-token.bin"));
+ Tokenizer tokenizer = new TokenizerME(TokenizerModel);
- String[] words = tokenizer.tokenize(sentence);
-
- POSModel posTaggerModel = new POSModelLoader().load(new File("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-pos-maxent.bin"));
- POSTagger tagger = new POSTaggerME(posTaggerModel);
-
+ String[] words = tokenizer.tokenize(sentence);
- Constants.print("\ntokens :");
- Constants.print(words);
- Constants.print(tagger.tag(words));
-
- Constants.print("\ntesting default lesk :");
- Lesk lesk = new Lesk();
- Constants.print(lesk.disambiguate(words, 6));
-
- Constants.print("\ntesting with null params :");
- lesk.setParams(null);
- Constants.print(lesk.disambiguate(words, 6));
-
- Constants.print("\ntesting with default params");
- lesk.setParams(new LeskParameters());
- Constants.print(lesk.disambiguate(words, 6));
-
- Constants.print("\ntesting with custom params :");
- LeskParameters leskParams = new LeskParameters();
- leskParams.leskType = LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF;
- leskParams.win_b_size = 4;
- leskParams.depth = 3;
- lesk.setParams(leskParams);
- Constants.print(lesk.disambiguate(words, 6));
-
- /*
- Constants.print("\ntesting with wrong params should throw exception :");
- LeskParameters leskWrongParams = new LeskParameters();
- leskWrongParams.depth = -1;
- lesk.setParams(leskWrongParams);
- Constants.print(lesk.disambiguate(words, 6));
- */
+ POSModel posTaggerModel = new POSModelLoader()
+ .load(new File(
+ "src\\test\\resources\\opennlp\\tools\\disambiguator\\en-pos-maxent.bin"));
+ POSTagger tagger = new POSTaggerME(posTaggerModel);
- } catch (IOException e) {
- e.printStackTrace();
- }
+ Constants.print("\ntokens :");
+ Constants.print(words);
+ Constants.print(tagger.tag(words));
+ Constants.print("\ntesting default lesk :");
+ Lesk lesk = new Lesk();
+ Constants.print(lesk.disambiguate(words, 6));
- }
+ Constants.print("\ntesting with null params :");
+ lesk.setParams(null);
+ Constants.print(lesk.disambiguate(words, 6));
+
+ Constants.print("\ntesting with default params");
+ lesk.setParams(new LeskParameters());
+ Constants.print(lesk.disambiguate(words, 6));
+
+ Constants.print("\ntesting with custom params :");
+ LeskParameters leskParams = new LeskParameters();
+ leskParams.leskType = LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF;
+ leskParams.win_b_size = 4;
+ leskParams.depth = 3;
+ lesk.setParams(leskParams);
+ Constants.print(lesk.disambiguate(words, 6));
+
+ /*
+ * Constants.print("\ntesting with wrong params should throw exception :");
+ * LeskParameters leskWrongParams = new LeskParameters();
+ * leskWrongParams.depth = -1; lesk.setParams(leskWrongParams);
+ * Constants.print(lesk.disambiguate(words, 6));
+ */
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
}