OPENNLP-758 Applied clean up patch. Thanks to Anthony Beylerian for providing a patch.
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
index 9909c70..e5a0c38 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
@@ -22,6 +22,8 @@
import java.util.ArrayList;
import java.util.Arrays;
+import opennlp.tools.disambiguator.lesk.Lesk;
+import net.sf.extjwnl.JWNLException;
import net.sf.extjwnl.data.POS;
public class Constants {
@@ -125,22 +127,62 @@
"you're", "yours", "yourself", "yourselves", "you've", "zero"));
// Print a text in the console
+ public static void printResults(WSDisambiguator disambiguator,
+ String[] results) {
+
+ if (results != null) {
+
+ if (disambiguator instanceof Lesk) {
+ POS pos;
+ long offset;
+ double score;
+ String[] parts;
+
+ for (String result : results) {
+ parts = result.split("@");
+ pos = POS.getPOSForKey(parts[0]);
+ offset = Long.parseLong(parts[1]);
+ score = Double.parseDouble(parts[2]);
+ try {
+ Constants.print("score : " + score + " for : "
+ + Loader.getDictionary().getSynsetAt(pos, offset).getGloss());
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ }
+
public static void print(Object in) {
- System.out.println(in);
+ if (in == null) {
+ System.out.println("object is null");
+ } else {
+ System.out.println(in);
+ }
}
public static void print(Object[] array) {
- System.out.println(Arrays.asList(array));
+ if (array == null) {
+ System.out.println("object is null");
+ } else {
+ System.out.println(Arrays.asList(array));
+ }
}
public static void print(Object[][] array) {
- System.out.print("[");
- for (int i = 0; i < array.length; i++) {
- print(array[i]);
- if (i != array.length - 1) {
- System.out.print("\n");
+ if (array == null) {
+ System.out.println("object is null");
+ } else {
+ System.out.print("[");
+ for (int i = 0; i < array.length; i++) {
+ print(array[i]);
+ if (i != array.length - 1) {
+ System.out.print("\n");
+ }
+ print("]");
}
- print("]");
}
}
@@ -169,6 +211,15 @@
}
+ public static boolean isRelevant(String posTag) {
+ return getPOS(posTag) != null;
+ }
+
+ public static boolean isRelevant(POS pos) {
+ return pos.equals(POS.ADJECTIVE) || pos.equals(POS.ADVERB)
+ || pos.equals(POS.NOUN) || pos.equals(POS.VERB);
+ }
+
// Check whether a list of arrays contains an array
public static boolean belongsTo(String[] array, ArrayList<String[]> fullList) {
for (String[] refArray : fullList) {
@@ -196,5 +247,4 @@
return true;
}
-
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java
index 0161504..1ad6bad 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java
@@ -41,10 +41,6 @@
public class DataExtractor {
- /**
- * Constructor
- */
-
public DataExtractor() {
super();
}
@@ -52,7 +48,6 @@
/**
* Extract the dictionary from the dictionary XML file and map the senses
*/
-
private ArrayList<DictionaryInstance> extractDictionary(String xmlLocation) {
ArrayList<DictionaryInstance> dictionary = new ArrayList<DictionaryInstance>();
@@ -283,9 +278,9 @@
* Extract the training instances from the training/test set File
*/
- public HashMap<Integer, WTDIMS> extractWSDInstances(String xmlDataSet) {
+ public ArrayList<WTDIMS> extractWSDInstances(String xmlDataSet) {
- HashMap<Integer, WTDIMS> setInstances = new HashMap<Integer, WTDIMS>();
+ ArrayList<WTDIMS> setInstances = new ArrayList<WTDIMS>();
try {
@@ -298,8 +293,6 @@
NodeList lexelts = doc.getElementsByTagName("lexelt");
- int index = 0;
-
for (int i = 0; i < lexelts.getLength(); i++) {
Node nLexelt = lexelts.item(i);
@@ -371,16 +364,12 @@
WTDIMS wordToDisambiguate = new WTDIMS(word, answers, sentence,
rawWord);
- setInstances.put(index, wordToDisambiguate);
- index++;
+ setInstances.add(wordToDisambiguate);
// System.out.print(index + "\t");
// System.out.println(wordToDisambiguate.toString());
}
-
}
-
}
-
}
} catch (Exception e) {
@@ -390,5 +379,4 @@
return setInstances;
}
-
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java
index e53ffa8..ae694f7 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java
@@ -20,8 +20,7 @@
package opennlp.tools.disambiguator;
public class DictionaryInstance {
-
-
+
protected int index;
protected String word;
@@ -31,7 +30,6 @@
protected String[] synset;
protected String gloss;
-
public DictionaryInstance(int index, String word, String id, String source,
String[] synset, String gloss) {
super();
@@ -43,6 +41,10 @@
this.gloss = gloss;
}
+ /**
+ * Getters and Setters
+ */
+
public int getIndex() {
return index;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
index abcff6c..40c1cbc 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
@@ -20,8 +20,6 @@
package opennlp.tools.disambiguator;
-
-
import java.util.ArrayList;
import opennlp.tools.disambiguator.ims.WTDIMS;
@@ -44,8 +42,6 @@
* Collocations: it requires one parameter: "the n-gram"
*
*/
-
- // private methods
private String[] extractPosOfSurroundingWords(String[] sentence,
int wordIndex, int windowSize) {
@@ -94,7 +90,7 @@
private String[] extractLocalCollocations(String[] sentence, int wordIndex,
int ngram) {
- /*
+ /**
* Here the author used only 11 features of this type. the range was set to
* 3 (bigrams extracted in a way that they are at max separated by 1 word).
*/
@@ -124,6 +120,7 @@
return res;
}
+ // public method
/**
* This method generates the different set of features related to the IMS
* approach and store them in the corresponding attributes of the WTDIMS
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
index 8481d0c..cb1eccc 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
@@ -31,7 +31,6 @@
/**
* Convenience class to access some features.
*/
-
public class Node {
public Synset parent;
@@ -69,6 +68,10 @@
public String getSense() {
return this.synset.getGloss().toString();
}
+
+ public long getSenseID() {
+ return this.synset.getOffset();
+ }
public void setHypernyms() {
// PointerUtils pointerUtils = PointerUtils.get();
@@ -168,5 +171,4 @@
public ArrayList<WordPOS> getSynonyms() {
return synonyms;
}
-
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
new file mode 100644
index 0000000..a08df71
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+/**
+ * Disambiguation Parameters
+ *
+ */
+public abstract class WSDParameters {
+
+ protected boolean isCoarseSense;
+
+ /**
+ * @return if the disambiguation type is coarse grained or fine grained
+ */
+ public boolean isCoarseSense() {
+ return isCoarseSense;
+ }
+
+ public void setCoarseSense(boolean isCoarseSense) {
+ this.isCoarseSense = isCoarseSense;
+ }
+
+ public WSDParameters(){
+ this.isCoarseSense = true;
+ }
+
+ /**
+ * @return checks if the parameters are valid or not
+ */
+ public abstract boolean isValid();
+
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
index 5b67511..8fc8e72 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
@@ -19,14 +19,49 @@
package opennlp.tools.disambiguator;
+import java.security.InvalidParameterException;
import opennlp.tools.util.Span;
+
/**
- * The interface for word sense disambiguators.
+ * A word sense disambiguator that determines which sense of a word is meant in a particular context.
+ * It is a classification task, where the classes are the different senses of the ambiguous word.
+ * Disambiguation can be achieved in either supervised or un-supervised approaches.
+ * For the moment this component relies on WordNet to retrieve sense definitions.
+ * It returns an array of WordNet sense IDs ordered by their disambiguation score.
+ * The sense with highest score is the most likely sense of the word.
+ *
+ * Please see {@link Lesk} for an un-supervised approach.
+ * Please see {@link IMS} for a supervised approach.
+ *
+ * @see Lesk
+ * @see IMS
*/
public interface WSDisambiguator {
- public String[] disambiguate(String[] inputText, int inputWordIndex);
+
+ /**
+ * @return the parameters of the disambiguation algorithm
+ */
+ public WSDParameters getParams();
+
+ /**
+ * @param the disambiguation implementation specific parameters.
+ * @throws InvalidParameterException
+ */
+ public void setParams(WSDParameters params) throws InvalidParameterException;
+
+ /**
+ * @param tokenizedContext
+ * @param ambiguousTokenIndex
+ * @return result as an array of WordNet IDs
+ */
+ public String[] disambiguate(String[] tokenizedContext, int ambiguousTokenIndex);
- public String[] disambiguate(String[] inputText, Span[] inputWordSpans);
+ /**
+ * @param tokenizedContext
+ * @param ambiguousTokenIndexSpans
+ * @return result as an array of WordNet IDs
+ */
+ public String[][] disambiguate(String[] tokenizedContext, Span[] ambiguousTokenIndexSpans);
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
index cf7e31d..9bc49ea 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
@@ -28,13 +28,13 @@
import net.sf.extjwnl.data.POS;
import net.sf.extjwnl.data.Synset;
+// TODO extend Word instead
public class WordPOS {
private String word;
private List stems;
private POS pos;
- // Constructor
public WordPOS(String word, POS pos) throws IllegalArgumentException {
if (word == null || pos == null) {
throw new IllegalArgumentException("Args are null");
@@ -106,5 +106,4 @@
}
return false;
}
-
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
index 3eaa1ed..5f27b38 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
@@ -25,160 +25,141 @@
-
public class WordToDisambiguate {
-
- // TODO Check if it is necessary to add an attribute [word] since the word in the sentence is not necessarily in the base form ??
-
- protected String [] sentence;
- protected String [] posTags;
-
- protected int wordIndex;
- protected int sense;
-
- protected ArrayList<String> senseID;
-
-
-
- /**
- * Constructor
- */
-
-
- public WordToDisambiguate(String[] sentence, int wordIndex, int sense) throws IllegalArgumentException{
- super();
-
- if (wordIndex>sentence.length){
- throw new IllegalArgumentException("The index is out of bounds !");
- }
-
- this.sentence = sentence;
- this.posTags = PreProcessor.tag(sentence);
-
- this.wordIndex = wordIndex;
-
- this.sense = sense;
- }
-
- public WordToDisambiguate(String[] sentence, int wordIndex) {
- this(sentence,wordIndex,-1);
- }
-
- public WordToDisambiguate() {
- String[] emptyString = {};
- int emptyInteger = 0;
-
- this.sentence = emptyString;
- this.wordIndex = emptyInteger;
- this.sense = -1;
-
- }
+ // TODO Check if it is necessary to add an attribute [word] since the word in
+ // the sentence is not necessarily in the base form ??
-
- /**
- * Getters and Setters
- */
-
- // Sentence
- public String[] getSentence() {
- return sentence;
- }
+ protected String[] sentence;
+ protected String[] posTags;
- public void setSentence(String[] sentence) {
- this.sentence = sentence;
- }
+ protected int wordIndex;
-
- // Sentence Pos-Tags
- public String[] getPosTags() {
- return posTags;
- }
+ protected int sense;
- public void setPosTags(String[] posTags) {
- this.posTags = posTags;
- }
+ protected ArrayList<String> senseIDs;
-
- // Word to disambiguate
- public int getWordIndex() {
- return wordIndex;
- }
+ /**
+ * Constructor
+ */
- public String getRawWord() {
-
- /**
- * For example, from the word "running" it returns "run.v"
- */
-
- String wordBaseForm = Loader.getLemmatizer().lemmatize(this.sentence[wordIndex], this.posTags[wordIndex]);
-
- String ref = "";
-
- if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.VERB)) {
- ref = wordBaseForm + ".v";
- } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.NOUN)) {
- ref = wordBaseForm + ".n";
- } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.ADJECTIVE)) {
- ref = wordBaseForm + ".a";
- } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.ADVERB)) {
- ref = wordBaseForm + ".r";
- } else {
-
- }
-
- return ref;
-
- }
-
- public String getWord() {
- return this.sentence[this.wordIndex];
- }
-
- public String getPosTag() {
- return this.posTags[this.wordIndex];
- }
-
- public void setWordIndex(int wordIndex) {
- this.wordIndex = wordIndex;
- }
-
+ public WordToDisambiguate(String[] sentence, int wordIndex, int sense)
+ throws IllegalArgumentException {
+ super();
-
-
- // Word to disambiguate sense
- public int getSense() {
- return sense;
- }
+ if (wordIndex > sentence.length) {
+ throw new IllegalArgumentException("The index is out of bounds !");
+ }
- public void setSense(int sense) {
- this.sense = sense;
- }
+ this.sentence = sentence;
+ this.posTags = PreProcessor.tag(sentence);
-
-
- // Sense as in the source
- // TODO fix the conflict between this ID of the sense and that in the attribute [sense]
- public ArrayList<String> getSenseID() {
- return senseID;
- }
+ this.wordIndex = wordIndex;
- public void setSenseID(ArrayList<String> senseID) {
- this.senseID = senseID;
- }
-
-
+ this.sense = sense;
+ }
+ public WordToDisambiguate(String[] sentence, int wordIndex) {
+ this(sentence, wordIndex, -1);
+ }
- /**
- * toString
- */
-
- public String toString() {
- return (wordIndex + "\t" + getWord() + "\n" + sentence);
- }
-
+ public WordToDisambiguate() {
+ String[] emptyString = {};
+ int emptyInteger = 0;
-
+ this.sentence = emptyString;
+ this.wordIndex = emptyInteger;
+ this.sense = -1;
+ }
+
+ /**
+ * Getters and Setters
+ */
+
+ // Sentence
+ public String[] getSentence() {
+ return sentence;
+ }
+
+ public void setSentence(String[] sentence) {
+ this.sentence = sentence;
+ }
+
+ // Sentence Pos-Tags
+ public String[] getPosTags() {
+ return posTags;
+ }
+
+ public void setPosTags(String[] posTags) {
+ this.posTags = posTags;
+ }
+
+ // Word to disambiguate
+ public int getWordIndex() {
+ return wordIndex;
+ }
+
+ public String getRawWord() {
+
+ /**
+ * For example, from the word "running" it returns "run.v"
+ */
+
+ String wordBaseForm = Loader.getLemmatizer().lemmatize(
+ this.sentence[wordIndex], this.posTags[wordIndex]);
+
+ String ref = "";
+
+ if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.VERB)) {
+ ref = wordBaseForm + ".v";
+ } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.NOUN)) {
+ ref = wordBaseForm + ".n";
+ } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.ADJECTIVE)) {
+ ref = wordBaseForm + ".a";
+ } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.ADVERB)) {
+ ref = wordBaseForm + ".r";
+ } else {
+
+ }
+
+ return ref;
+
+ }
+
+ public String getWord() {
+ return this.sentence[this.wordIndex];
+ }
+
+ public String getPosTag() {
+ return this.posTags[this.wordIndex];
+ }
+
+ public void setWordIndex(int wordIndex) {
+ this.wordIndex = wordIndex;
+ }
+
+ // Word to disambiguate sense
+ public int getSense() {
+ return sense;
+ }
+
+ public void setSense(int sense) {
+ this.sense = sense;
+ }
+
+ // Sense as in the source
+ // TODO fix the conflict between this ID of the sense and that in the
+ // attribute [sense]
+ public ArrayList<String> getSenseIDs() {
+ return senseIDs;
+ }
+
+ public void setSenseIDs(ArrayList<String> senseIDs) {
+ this.senseIDs = senseIDs;
+ }
+
+ public String toString() {
+ return (wordIndex + "\t" + getWord() + "\n" + sentence);
+ }
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
index b1710d7..0c8c4e6 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
@@ -34,8 +34,8 @@
/**
* Default context generator for IMS.
*/
- public DefaultIMSContextGenerator() {
+ public DefaultIMSContextGenerator() {
}
/**
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
index 961c6c3..a453ecb 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
@@ -35,6 +35,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
+import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.zip.GZIPInputStream;
@@ -51,6 +52,7 @@
import opennlp.tools.disambiguator.DataExtractor;
import opennlp.tools.disambiguator.FeaturesExtractor;
import opennlp.tools.disambiguator.PreProcessor;
+import opennlp.tools.disambiguator.WSDParameters;
import opennlp.tools.disambiguator.WordPOS;
import opennlp.tools.disambiguator.WSDisambiguator;
@@ -64,17 +66,11 @@
private FeaturesExtractor fExtractor = new FeaturesExtractor();
private DataExtractor dExtractor = new DataExtractor();
- /**
- * PARAMETERS
- */
private int windowSize;
+ private int word;
private int ngram;
- /**
- * Constructors
- */
-
public IMS() {
super();
windowSize = 3;
@@ -95,11 +91,7 @@
this.cg = factory.createContextGenerator();
}
- /**
- * INTERNAL METHODS
- */
-
- protected HashMap<Integer, WTDIMS> extractTrainingData(
+ protected ArrayList<WTDIMS> extractTrainingData(
String wordTrainingxmlFile,
HashMap<String, ArrayList<DictionaryInstance>> senses) {
@@ -108,18 +100,18 @@
* etc.)
*/
- HashMap<Integer, WTDIMS> trainingData = dExtractor
+ ArrayList<WTDIMS> trainingData = dExtractor
.extractWSDInstances(wordTrainingxmlFile);
// HashMap<Integer, WTDIMS> trainingData =
// dExtractor.extractWSDInstances(wordTrainingxmlFile);
- for (Integer key : trainingData.keySet()) {
- for (String senseId : trainingData.get(key).getSenseID()) {
+ for (WTDIMS data : trainingData) {
+ for (String senseId : data.getSenseIDs()) {
for (String dictKey : senses.keySet()) {
for (DictionaryInstance instance : senses.get(dictKey)) {
if (senseId.equals(instance.getId())) {
- trainingData.get(key).setSense(
+ data.setSense(
Integer.parseInt(dictKey.split("_")[1]));
break;
}
@@ -131,11 +123,11 @@
return trainingData;
}
- protected void extractFeature(HashMap<Integer, WTDIMS> words) {
+ protected void extractFeature(ArrayList<WTDIMS> words) {
- for (Integer key : words.keySet()) {
+ for (WTDIMS word : words) {
- fExtractor.extractIMSFeatures(words.get(key), windowSize, ngram);
+ fExtractor.extractIMSFeatures(word, windowSize, ngram);
}
@@ -217,18 +209,18 @@
HashMap<String, ArrayList<DictionaryInstance>> senses = dExtractor
.extractWordSenses(dict, map, wordTag);
- HashMap<Integer, WTDIMS> instances = extractTrainingData(
+ ArrayList<WTDIMS> instances = extractTrainingData(
wordTrainingxmlFile, senses);
extractFeature(instances);
ArrayList<Event> events = new ArrayList<Event>();
- for (int key : instances.keySet()) {
+ for (WTDIMS instance : instances) {
- int sense = instances.get(key).getSense();
+ int sense = instance.getSense();
- String[] context = cg.getContext(instances.get(key));
+ String[] context = cg.getContext(instance);
Event ev = new Event(sense + "", context);
@@ -338,9 +330,16 @@
}
@Override
- public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
- // TODO Auto-generated method stub
+ public String[][] disambiguate(String[] inputText, Span[] inputWordSpans) {
return null;
}
+ @Override
+ public WSDParameters getParams() {
+ return null;
+ }
+
+ @Override
+ public void setParams(WSDParameters params) throws InvalidParameterException {
+ }
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java
index 7b88474..cb670e4 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java
@@ -25,5 +25,4 @@
public interface IMSContextGenerator {
public String[] getContext(WTDIMS word);
-
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
index 5c078f6..81eac3f 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
@@ -99,4 +99,3 @@
public void validateArtifactMap() throws InvalidFormatException {
}
}
-
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
index ec9fd10..e40c75a 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
@@ -19,8 +19,6 @@
package opennlp.tools.disambiguator.ims;
-
-
import java.util.ArrayList;
import opennlp.tools.disambiguator.PreProcessor;
@@ -34,7 +32,6 @@
public WTDIMS(String[] sentence, int word, int sense) {
super(sentence, word, sense);
-
}
public WTDIMS(String[] sentence, int word) {
@@ -57,8 +54,7 @@
}
}
- this.senseID = xmlAnswers;
-
+ this.senseIDs = xmlAnswers;
}
public String[] getPosOfSurroundingWords() {
@@ -85,4 +81,3 @@
this.localCollocations = localCollocations;
}
}
-
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
index ecb1ec0..0b02df2 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
@@ -21,13 +21,13 @@
import java.security.InvalidParameterException;
import java.util.ArrayList;
-
import java.util.Collections;
import opennlp.tools.disambiguator.Constants;
import opennlp.tools.disambiguator.Loader;
import opennlp.tools.disambiguator.Node;
import opennlp.tools.disambiguator.PreProcessor;
+import opennlp.tools.disambiguator.WSDParameters;
import opennlp.tools.disambiguator.WSDisambiguator;
import opennlp.tools.disambiguator.WordPOS;
import opennlp.tools.disambiguator.WordSense;
@@ -35,36 +35,68 @@
import net.sf.extjwnl.data.Synset;
/**
- * Class for the Lesk algorithm and variants.
+ * Implementation of the <b>Overlap Of Senses</b> approach originally proposed by Lesk.
+ * The main idea is to check for word overlaps in the sense definitions of the surrounding context.
+ * An overlap is when two words have similar stems.
+ * The more overlaps a word has the higher its score.
+ * Different variations of the approach are included in this class.
+ *
*/
-
public class Lesk implements WSDisambiguator {
+ /**
+ * The lesk specific parameters
+ */
protected LeskParameters params;
-
- public Loader loader;
-
+
public Lesk() {
this(null);
}
+ /**
+ * Initializes the loader object and sets the input parameters
+ * @param Input Parameters
+ * @throws InvalidParameterException
+ */
public Lesk(LeskParameters params) throws InvalidParameterException {
- loader = new Loader();
+ Loader loader = new Loader();
this.setParams(params);
}
+
- public void setParams(LeskParameters params) throws InvalidParameterException {
+ /**
+ * If the parameters are null set the default ones, else only set them if they valid.
+ * Invalid parameters will return a exception
+ *
+ * @param Input parameters
+ * @throws InvalidParameterException
+ */
+ @Override
+ public void setParams(WSDParameters params) throws InvalidParameterException {
if (params == null) {
this.params = new LeskParameters();
} else {
if (params.isValid()) {
- this.params = params;
+ this.params = (LeskParameters) params;
} else {
throw new InvalidParameterException("wrong params");
}
}
}
+ /**
+ * @return the parameter settings
+ */
+ public LeskParameters getParams() {
+ return params;
+ }
+
+ /**
+ * The basic Lesk method where the entire context is considered for overlaps
+ *
+ * @param The word to disambiguate
+ * @return The array of WordSenses with their scores
+ */
public ArrayList<WordSense> basic(WTDLesk wtd) {
ArrayList<WordPOS> relvWords = PreProcessor.getAllRelevantWords(wtd);
@@ -98,14 +130,31 @@
return scoredSenses;
}
+ /**
+ * The basic Lesk method but applied to a default context windows
+ * @param The word to disambiguate
+ * @return The array of WordSenses with their scores
+ */
public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
return this.basicContextual(wtd, LeskParameters.DFLT_WIN_SIZE);
}
+ /**
+ * The basic Lesk method but applied to a custom context windows
+ * @param The word to disambiguate
+ * @param windowSize
+ * @return The array of WordSenses with their scores
+ */
public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowSize) {
return this.basicContextual(wtd, windowSize, windowSize);
}
+ /**
+ * The basic Lesk method but applied to a context windows set by custom backward and forward window lengths
+ * @param wtd the word to disambiguate
+ * @param windowBackward
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowBackward,
int windowForward) {
@@ -146,6 +195,19 @@
return scoredSenses;
}
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps across the entire context
+ * The scoring function uses linear weights.
+ * @param wtd the word to disambiguate
+ * @param depth how deep to go into each feature tree
+ * @param depthScoreWeight the weighing per depth level
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extended(WTDLesk wtd, int depth,
double depthScoreWeight, boolean includeSynonyms,
boolean includeHypernyms, boolean includeHyponyms,
@@ -156,6 +218,19 @@
}
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a default context window
+ * The scoring function uses linear weights.
+ * @param wtd the word to disambiguate
+ * @param depth how deep to go into each feature tree
+ * @param depthScoreWeight the weighing per depth level
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extendedContextual(WTDLesk wtd, int depth,
double depthScoreWeight, boolean includeSynonyms,
boolean includeHypernyms, boolean includeHyponyms,
@@ -167,6 +242,20 @@
}
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom context window
+ * The scoring function uses linear weights.
+ * @param wtd the word to disambiguate
+ * @param windowSize the custom context window size
+ * @param depth how deep to go into each feature tree
+ * @param depthScoreWeight the weighing per depth level
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extendedContextual(WTDLesk wtd, int windowSize,
int depth, double depthScoreWeight, boolean includeSynonyms,
boolean includeHypernyms, boolean includeHyponyms,
@@ -177,6 +266,22 @@
includeMeronyms, includeHolonyms);
}
+
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom context window
+ * The scoring function uses linear weights.
+ * @param wtd the word to disambiguate
+ * @param windowBackward the custom context backward window size
+ * @param windowForward the custom context forward window size
+ * @param depth how deep to go into each feature tree
+ * @param depthScoreWeight the weighing per depth level
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
int windowBackward, int windowForward, int depth,
double depthScoreWeight, boolean includeSynonyms,
@@ -236,6 +341,21 @@
}
+
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in all the context.
+ * The scoring function uses exponential weights.
+ * @param wtd the word to disambiguate
+ * @param depth how deep to go into each feature tree
+ * @param intersectionExponent
+ * @param depthExponent
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extendedExponential(WTDLesk wtd, int depth,
double intersectionExponent, double depthExponent,
boolean includeSynonyms, boolean includeHypernyms,
@@ -246,7 +366,21 @@
includeMeronyms, includeHolonyms);
}
-
+
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a default window in the context.
+ * The scoring function uses exponential weights.
+ * @param wtd the word to disambiguate
+ * @param depth how deep to go into each feature tree
+ * @param intersectionExponent
+ * @param depthExponent
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
int depth, double intersectionExponent, double depthExponent,
boolean includeSynonyms, boolean includeHypernyms,
@@ -256,7 +390,22 @@
depth, intersectionExponent, depthExponent, includeSynonyms,
includeHypernyms, includeHyponyms, includeMeronyms, includeHolonyms);
}
-
+
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom window in the context.
+ * The scoring function uses exponential weights.
+ * @param wtd the word to disambiguate
+ * @param windowSize
+ * @param depth how deep to go into each feature tree
+ * @param intersectionExponent
+ * @param depthExponent
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
int windowSize, int depth, double intersectionExponent,
double depthExponent, boolean includeSynonyms, boolean includeHypernyms,
@@ -267,6 +416,22 @@
includeHyponyms, includeMeronyms, includeHolonyms);
}
+ /**
+ * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom window in the context.
+ * The scoring function uses exponential weights.
+ * @param wtd the word to disambiguate
+ * @param windowBackward
+ * @param windowForward
+ * @param depth
+ * @param intersectionExponent
+ * @param depthExponent
+ * @param includeSynonyms
+ * @param includeHypernyms
+ * @param includeHyponyms
+ * @param includeMeronyms
+ * @param includeHolonyms
+ * @return the array of WordSenses with their scores
+ */
public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
int windowBackward, int windowForward, int depth,
double intersectionExponent, double depthExponent,
@@ -327,6 +492,15 @@
}
+ /**
+ * Recursively score the hypernym tree linearly
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param depthScoreWeight
+ */
private void fathomHypernyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
@@ -350,6 +524,16 @@
}
}
+ /**
+ * Recursively score the hypernym tree exponentially
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param intersectionExponent
+ * @param depthScoreExponent
+ */
private void fathomHypernymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
@@ -374,6 +558,15 @@
}
}
+ /**
+ * Recursively score the hyponym tree linearly
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param depthScoreWeight
+ */
private void fathomHyponyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
@@ -398,6 +591,16 @@
}
}
+ /**
+ * Recursively score the hyponym tree exponentially
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param intersectionExponent
+ * @param depthScoreExponent
+ */
private void fathomHyponymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
@@ -422,6 +625,15 @@
}
}
+ /**
+ * Recursively score the meronym tree linearly
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param depthScoreWeight
+ */
private void fathomMeronyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
@@ -446,6 +658,16 @@
}
}
+ /**
+ * Recursively score the meronym tree exponentially
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param intersectionExponent
+ * @param depthScoreExponent
+ */
private void fathomMeronymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
@@ -470,6 +692,15 @@
}
}
+ /**
+ * Recursively score the holonym tree linearly
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param depthScoreWeight
+ */
private void fathomHolonyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
@@ -494,6 +725,16 @@
}
}
+ /**
+ * Recursively score the holonym tree exponentially
+ * @param wordSense
+ * @param child
+ * @param relvWords
+ * @param depth
+ * @param maxDepth
+ * @param intersectionExponent
+ * @param depthScoreExponent
+ */
private void fathomHolonymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
@@ -518,6 +759,12 @@
}
}
+ /**
+ * Checks if the feature should be counted in the score
+ * @param featureSynsets
+ * @param relevantWords
+ * @return count of features to consider
+ */
private int assessFeature(ArrayList<Synset> featureSynsets,
ArrayList<WordPOS> relevantWords) {
int count = 0;
@@ -540,25 +787,32 @@
return count;
}
+ /**
+ * Checks if the synonyms should be counted in the score
+ * @param synonyms
+ * @param relevantWords
+ * @return count of synonyms to consider
+ */
private int assessSynonyms(ArrayList<WordPOS> synonyms,
ArrayList<WordPOS> relevantWords) {
int count = 0;
for (WordPOS synonym : synonyms) {
for (WordPOS sentenceWord : relevantWords) {
- // TODO try to switch to lemmatizer
if (sentenceWord.isStemEquivalent(synonym)) {
count = count + 1;
}
}
-
}
-
return count;
}
+ /**
+ * Gets the senses of the nodes
+ * @param nodes
+ * @return senses from the nodes
+ */
public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
-
ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
for (int i = 0; i < nodes.size(); i++) {
@@ -573,12 +827,25 @@
return scoredSenses;
}
-
- // disambiguates a WTDLesk and returns an array of sense indexes from WordNet
- // ordered by their score
+
+ /**
+ * Disambiguates an ambiguous word in its context
+ *
+ * @param tokenizedContext
+ * @param ambiguousTokenIndex
+ * @return array of sense indexes from WordNet ordered by their score.
+ * The result format is <b>POS</b>@<b>SenseID</b>@<b>Sense Score</b>
+ * If the input token is non relevant a null is returned.
+ */
@Override
- public String[] disambiguate(String[] inputText, int inputWordIndex) {
- WTDLesk wtd = new WTDLesk(inputText, inputWordIndex);
+ public String[] disambiguate(String[] tokenizedContext, int ambiguousTokenIndex) {
+
+ WTDLesk wtd = new WTDLesk(tokenizedContext, ambiguousTokenIndex);
+ // if the word is not relevant return null
+ if (!Constants.isRelevant(wtd.getPosTag())){
+ return null ;
+ }
+
ArrayList<WordSense> wsenses = null;
switch (this.params.leskType) {
@@ -654,15 +921,32 @@
LeskParameters.DFLT_DEXP, true, true, true, true, true);
Collections.sort(wsenses);
+ // TODO modify to longs but for now we have strings in the data for coarsing
String[] senses = new String[wsenses.size()];
for (int i = 0; i < wsenses.size(); i++) {
- senses[i] = wsenses.get(i).getSense();
+ senses[i] = Constants.getPOS(wsenses.get(i).getWTDLesk().getPosTag())
+ .getKey()
+ + "@"
+ + Long.toString(wsenses.get(i).getNode().getSenseID())
+ + "@"
+ + wsenses.get(i).getScore();
}
return senses;
}
- @Override
- public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
+
+ /**
+ * Disambiguates an ambiguous word in its context
+ * The user can set a span of inputWords from the tokenized input
+ *
+ * @param inputText
+ * @param inputWordSpans
+ * @return array of array of sense indexes from WordNet ordered by their score.
+ * The result format is <b>POS</b>@<b>SenseID</b>@<b>Sense Score</b>
+ * If the input token is non relevant a null is returned.
+ */
+ @Override
+ public String[][] disambiguate(String[] tokenizedContext, Span[] ambiguousTokenSpans) {
// TODO need to work on spans
return null;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
index 3d1834a..d0aa8f5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
@@ -19,11 +19,23 @@
package opennlp.tools.disambiguator.lesk;
-public class LeskParameters {
+import opennlp.tools.disambiguator.WSDParameters;
- // VARIATIONS
+/**
+ * Lesk specific parameter set
+ *
+ */
+public class LeskParameters extends WSDParameters {
+
+
+ /**
+ * Enum of all types of implemented variations of Lesk
+ *
+ */
public static enum LESK_TYPE {
- LESK_BASIC, LESK_BASIC_CTXT, LESK_BASIC_CTXT_WIN, LESK_BASIC_CTXT_WIN_BF, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_CTXT_WIN, LESK_EXT_CTXT_WIN_BF, LESK_EXT_EXP, LESK_EXT_EXP_CTXT, LESK_EXT_EXP_CTXT_WIN, LESK_EXT_EXP_CTXT_WIN_BF,
+ LESK_BASIC, LESK_BASIC_CTXT, LESK_BASIC_CTXT_WIN, LESK_BASIC_CTXT_WIN_BF,
+ LESK_EXT, LESK_EXT_CTXT, LESK_EXT_CTXT_WIN, LESK_EXT_CTXT_WIN_BF, LESK_EXT_EXP,
+ LESK_EXT_EXP_CTXT, LESK_EXT_EXP_CTXT_WIN, LESK_EXT_EXP_CTXT_WIN_BF,
}
// DEFAULTS
@@ -33,25 +45,124 @@
protected static final double DFLT_IEXP = 0.3;
protected static final double DFLT_DEXP = 0.3;
- public LESK_TYPE leskType;
- public int win_f_size;
- public int win_b_size;
- public int depth;
+ protected LESK_TYPE leskType;
+ protected int win_f_size;
+ protected int win_b_size;
+ protected int depth;
- public boolean fathom_synonyms;
- public boolean fathom_hypernyms;
- public boolean fathom_hyponyms;
- public boolean fathom_meronyms;
- public boolean fathom_holonyms;
+ protected boolean fathom_synonyms;
+ protected boolean fathom_hypernyms;
+ protected boolean fathom_hyponyms;
+ protected boolean fathom_meronyms;
+ protected boolean fathom_holonyms;
- public double depth_weight;
- public double iexp;
- public double dexp;
+ protected double depth_weight;
+ protected double iexp;
+ protected double dexp;
+
+ public LESK_TYPE getLeskType() {
+ return leskType;
+ }
+
+ public void setLeskType(LESK_TYPE leskType) {
+ this.leskType = leskType;
+ }
+
+ public int getWin_f_size() {
+ return win_f_size;
+ }
+
+ public void setWin_f_size(int win_f_size) {
+ this.win_f_size = win_f_size;
+ }
+
+ public int getWin_b_size() {
+ return win_b_size;
+ }
+
+ public void setWin_b_size(int win_b_size) {
+ this.win_b_size = win_b_size;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public void setDepth(int depth) {
+ this.depth = depth;
+ }
+
+ public boolean isFathom_synonyms() {
+ return fathom_synonyms;
+ }
+
+ public void setFathom_synonyms(boolean fathom_synonyms) {
+ this.fathom_synonyms = fathom_synonyms;
+ }
+
+ public boolean isFathom_hypernyms() {
+ return fathom_hypernyms;
+ }
+
+ public void setFathom_hypernyms(boolean fathom_hypernyms) {
+ this.fathom_hypernyms = fathom_hypernyms;
+ }
+
+ public boolean isFathom_hyponyms() {
+ return fathom_hyponyms;
+ }
+
+ public void setFathom_hyponyms(boolean fathom_hyponyms) {
+ this.fathom_hyponyms = fathom_hyponyms;
+ }
+
+ public boolean isFathom_meronyms() {
+ return fathom_meronyms;
+ }
+
+ public void setFathom_meronyms(boolean fathom_meronyms) {
+ this.fathom_meronyms = fathom_meronyms;
+ }
+
+ public boolean isFathom_holonyms() {
+ return fathom_holonyms;
+ }
+
+ public void setFathom_holonyms(boolean fathom_holonyms) {
+ this.fathom_holonyms = fathom_holonyms;
+ }
+
+ public double getDepth_weight() {
+ return depth_weight;
+ }
+
+ public void setDepth_weight(double depth_weight) {
+ this.depth_weight = depth_weight;
+ }
+
+ public double getIexp() {
+ return iexp;
+ }
+
+ public void setIexp(double iexp) {
+ this.iexp = iexp;
+ }
+
+ public double getDexp() {
+ return dexp;
+ }
+
+ public void setDexp(double dexp) {
+ this.dexp = dexp;
+ }
public LeskParameters() {
this.setDefaults();
}
+ /**
+ * Sets default parameters
+ */
public void setDefaults() {
this.leskType = LeskParameters.DFLT_LESK_TYPE;
this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
@@ -66,8 +177,10 @@
this.fathom_synonyms = true;
}
- // Parameter Validation
- // TODO make isSet for semantic feature booleans
+
+ /* (non-Javadoc)
+ * @see opennlp.tools.disambiguator.WSDParameters#isValid()
+ */
public boolean isValid() {
switch (this.leskType) {
@@ -81,16 +194,13 @@
case LESK_EXT:
case LESK_EXT_CTXT:
return (this.depth >= 0) && (this.depth_weight >= 0);
-
case LESK_EXT_CTXT_WIN:
case LESK_EXT_CTXT_WIN_BF:
return (this.depth >= 0) && (this.depth_weight >= 0)
&& (this.win_b_size >= 0) && (this.win_f_size >= 0);
-
case LESK_EXT_EXP:
case LESK_EXT_EXP_CTXT:
return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0);
-
case LESK_EXT_EXP_CTXT_WIN:
case LESK_EXT_EXP_CTXT_WIN_BF:
return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0)
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
index c78ba80..adae7ab 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
@@ -45,40 +45,47 @@
try {
TokenizerModel = new TokenizerModel(new FileInputStream(
- "src\\test\\resources\\opennlp\\tools\\disambiguator\\en-token.bin"));
+ "src\\test\\resources\\models\\en-token.bin"));
Tokenizer tokenizer = new TokenizerME(TokenizerModel);
String[] words = tokenizer.tokenize(sentence);
-
- POSModel posTaggerModel = new POSModelLoader()
- .load(new File(
- "src\\test\\resources\\opennlp\\tools\\disambiguator\\en-pos-maxent.bin"));
- POSTagger tagger = new POSTaggerME(posTaggerModel);
-
- Constants.print("\ntokens :");
+//
+// POSModel posTaggerModel = new POSModelLoader()
+// .load(new File(
+// "src\\test\\resources\\models\\en-pos-maxent.bin"));
+//// POSTagger tagger = new POSTaggerME(posTaggerModel);
+//
+// Constants.print("\ntokens :");
Constants.print(words);
- Constants.print(tagger.tag(words));
+
+ int wordIndex= 6;
+// Constants.print(tagger.tag(words));
Constants.print("\ntesting default lesk :");
Lesk lesk = new Lesk();
- Constants.print(lesk.disambiguate(words, 6));
+ Constants.print(lesk.disambiguate(words, wordIndex));
+ Constants.printResults(lesk,lesk.disambiguate(words, wordIndex));
+
Constants.print("\ntesting with null params :");
lesk.setParams(null);
- Constants.print(lesk.disambiguate(words, 6));
+ Constants.print(lesk.disambiguate(words, wordIndex));
+ Constants.printResults(lesk,lesk.disambiguate(words, wordIndex));
Constants.print("\ntesting with default params");
lesk.setParams(new LeskParameters());
- Constants.print(lesk.disambiguate(words, 6));
+ Constants.print(lesk.disambiguate(words, wordIndex));
+ Constants.printResults(lesk,lesk.disambiguate(words, wordIndex));
Constants.print("\ntesting with custom params :");
LeskParameters leskParams = new LeskParameters();
- leskParams.leskType = LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF;
- leskParams.win_b_size = 4;
- leskParams.depth = 3;
+ leskParams.setLeskType(LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF);
+ leskParams.setWin_b_size(4);
+ leskParams.setDepth(3);
lesk.setParams(leskParams);
- Constants.print(lesk.disambiguate(words, 6));
-
+ Constants.print(lesk.disambiguate(words, wordIndex));
+ Constants.printResults(lesk,lesk.disambiguate(words, wordIndex));
+
/*
* Constants.print("\ntesting with wrong params should throw exception :");
* LeskParameters leskWrongParams = new LeskParameters();