OPENNLP-758 fixes for parameters
Thanks to Anthony Beylerian for providing a patch!
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
index 1d165af..aadb6f3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
@@ -171,4 +171,26 @@
public long getSynsetID() {
return this.synset.getOffset();
}
+
+ /**
+ * Gets the senses of the nodes
+ *
+ * @param nodes
+ * @return senses from the nodes
+ */
+ public static ArrayList<WordSense> updateSenses(ArrayList<SynNode> nodes) {
+ ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
+
+ for (int i = 0; i < nodes.size(); i++) {
+ ArrayList<WordPOS> sensesComponents = PreProcessor
+ .getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getGloss()));
+ WordSense wordSense = new WordSense();
+ nodes.get(i).setSenseRelevantWords(sensesComponents);
+ wordSense.setNode(nodes.get(i));
+ wordSense.setId(i);
+ scoredSenses.add(wordSense);
+ }
+ return scoredSenses;
+
+ }
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
index dce333b..2b7f2ab 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
@@ -31,9 +31,9 @@
import opennlp.tools.disambiguator.WSDisambiguator;
import opennlp.tools.disambiguator.WordPOS;
import opennlp.tools.disambiguator.WordSense;
+import opennlp.tools.disambiguator.mfs.MFS;
import opennlp.tools.util.Span;
import net.sf.extjwnl.JWNLException;
-import net.sf.extjwnl.data.POS;
import net.sf.extjwnl.data.Synset;
import net.sf.extjwnl.data.Word;
@@ -54,6 +54,7 @@
public Lesk() {
this(null);
+
}
/**
@@ -96,36 +97,6 @@
return params;
}
- /*
- * @return the most frequent senses from wordnet
- */
- protected String getMostFrequentSenseKey(WTDLesk wtd) {
-
- String word = wtd.getRawWord().toLowerCase();
- POS pos = Constants.getPOS(wtd.getPosTag());
- String senseKey = null;
-
- if (pos != null) {
-
- WordPOS wordPOS = new WordPOS(word, pos);
-
- ArrayList<Synset> synsets = wordPOS.getSynsets();
-
- for (Word wd : synsets.get(0).getWords()) {
- if (wd.getLemma().equals(wtd.getRawWord().split("\\.")[0])) {
- try {
- senseKey = wd.getSenseKey();
- break;
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- break;
- }
- }
- }
- return senseKey;
- }
-
/**
* The basic Lesk method where the entire context is considered for overlaps
*
@@ -146,7 +117,7 @@
nodes.add(node);
}
- ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+ ArrayList<WordSense> scoredSenses = SynNode.updateSenses(nodes);
for (WordSense wordSense : scoredSenses) {
wordSense.setWTDLesk(wtd);
@@ -212,7 +183,7 @@
nodes.add(node);
}
- ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+ ArrayList<WordSense> scoredSenses = SynNode.updateSenses(nodes);
for (WordSense wordSense : scoredSenses) {
wordSense.setWTDLesk(wtd);
@@ -892,27 +863,7 @@
return count;
}
- /**
- * Gets the senses of the nodes
- *
- * @param nodes
- * @return senses from the nodes
- */
- public ArrayList<WordSense> updateSenses(ArrayList<SynNode> nodes) {
- ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
-
- for (int i = 0; i < nodes.size(); i++) {
- ArrayList<WordPOS> sensesComponents = PreProcessor
- .getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getGloss()));
- WordSense wordSense = new WordSense();
- nodes.get(i).setSenseRelevantWords(sensesComponents);
- wordSense.setNode(nodes.get(i));
- wordSense.setId(i);
- scoredSenses.add(wordSense);
- }
- return scoredSenses;
-
- }
+
/**
* Disambiguates an ambiguous word in its context
@@ -1028,13 +979,13 @@
break;
}
}
- senses[i] = "WordNet" + " " + senseKey + " "
+ senses[i] = params.source.name() + " " + senseKey + " "
+ wsenses.get(i).getScore();
}
} else { // get the MFS if no overlaps
senses = new String[1];
- senses[0] = "WordNet" + " " + this.getMostFrequentSenseKey(wtd) + " -1";
+ senses[0] = params.source.name() + " " + MFS.getMostFrequentSense(wtd) + " -1";
}
return senses;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
index ce833f6..e8e0b24 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
@@ -27,25 +27,25 @@
*/
public class LeskParameters extends WSDParameters {
-
/**
* Enum of all types of implemented variations of Lesk
*
*/
public static enum LESK_TYPE {
- LESK_BASIC, LESK_BASIC_CTXT, LESK_BASIC_CTXT_WIN, LESK_BASIC_CTXT_WIN_BF,
- LESK_EXT, LESK_EXT_CTXT, LESK_EXT_CTXT_WIN, LESK_EXT_CTXT_WIN_BF, LESK_EXT_EXP,
- LESK_EXT_EXP_CTXT, LESK_EXT_EXP_CTXT_WIN, LESK_EXT_EXP_CTXT_WIN_BF,
+ LESK_BASIC, LESK_BASIC_CTXT, LESK_BASIC_CTXT_WIN, LESK_BASIC_CTXT_WIN_BF, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_CTXT_WIN, LESK_EXT_CTXT_WIN_BF, LESK_EXT_EXP, LESK_EXT_EXP_CTXT, LESK_EXT_EXP_CTXT_WIN, LESK_EXT_EXP_CTXT_WIN_BF,
}
-
+
// DEFAULTS
protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT_WIN;
+ protected static final Source DFLT_SOURCE = Source.WORDNET;
protected static final int DFLT_WIN_SIZE = 5;
protected static final int DFLT_DEPTH = 2;
protected static final double DFLT_IEXP = 0.4;
protected static final double DFLT_DEXP = 0.4;
protected LESK_TYPE leskType;
+
+ protected Source source;
protected int win_f_size;
protected int win_b_size;
protected int depth;
@@ -165,6 +165,7 @@
*/
public void setDefaults() {
this.leskType = LeskParameters.DFLT_LESK_TYPE;
+ this.source = LeskParameters.DFLT_SOURCE;
this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
this.win_b_size = LeskParameters.DFLT_WIN_SIZE;
this.depth = LeskParameters.DFLT_DEPTH;
@@ -177,8 +178,9 @@
this.fathom_synonyms = true;
}
-
- /* (non-Javadoc)
+ /*
+ * (non-Javadoc)
+ *
* @see opennlp.tools.disambiguator.WSDParameters#isValid()
*/
public boolean isValid() {