OPENNLP-802
fix docs
cleaned the parameters
added static access to methods reused in other implementations
Thanks to Anthony Beylerian for providing a patch.
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
index a08df71..3f6cc75 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
@@ -26,6 +26,10 @@
public abstract class WSDParameters {
protected boolean isCoarseSense;
+
+ public static enum Source {
+ WORDNET
+ }
/**
* @return if the disambiguation type is coarse grained or fine grained
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
index e20bd6d..414d237 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
@@ -31,35 +31,61 @@
import opennlp.tools.disambiguator.WSDisambiguator;
import opennlp.tools.disambiguator.WordPOS;
import opennlp.tools.disambiguator.WordToDisambiguate;
+import opennlp.tools.disambiguator.lesk.LeskParameters;
+import opennlp.tools.disambiguator.lesk.WTDLesk;
import opennlp.tools.util.Span;
/**
* Implementation of the <b>Most Frequent Sense</b> baseline approach. This
- * approach returns the first sense retreived in WordNet which is supposed to be
- * the most frequent sense:
- * <ul>
- * <li>PoS-tags of the surrounding words</li>
- * <li>Local collocations</li>
- * <li>Surrounding words</li>
- * </ul>
- * check {@link https://www.comp.nus.edu.sg/~nght/pubs/ims.pdf} for details
- * about this approach
+ * approach returns the senses in order of frequency in WordNet. The first sense
+ * is the most frequent.
*/
public class MFS implements WSDisambiguator {
- public MFS(WSDParameters parameters) {
- super();
+ public MFSParameters parameters;
+
+ public MFS(MFSParameters parameters) {
this.parameters = parameters;
}
public MFS() {
- super();
this.parameters = new MFSParameters();
}
- public WSDParameters parameters;
+ /*
+ * @return the most frequent senses from wordnet
+ */
+ public static String getMostFrequentSense(
+ WordToDisambiguate wordToDisambiguate) {
- private String[] getMostFrequentSense(WordToDisambiguate wordToDisambiguate) {
+ String word = wordToDisambiguate.getRawWord().toLowerCase();
+ POS pos = Constants.getPOS(wordToDisambiguate.getPosTag());
+ String senseKey = null;
+
+ if (pos != null) {
+
+ WordPOS wordPOS = new WordPOS(word, pos);
+
+ ArrayList<Synset> synsets = wordPOS.getSynsets();
+
+ for (Word wd : synsets.get(0).getWords()) {
+ if (wd.getLemma().equals(
+ wordToDisambiguate.getRawWord().split("\\.")[0])) {
+ try {
+ senseKey = wd.getSenseKey();
+ break;
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ break;
+ }
+ }
+ }
+ return senseKey;
+ }
+
+ public static String[] getMostFrequentSenses(
+ WordToDisambiguate wordToDisambiguate) {
String word = wordToDisambiguate.getRawWord().toLowerCase();
POS pos = Constants.getPOS(wordToDisambiguate.getPosTag());
@@ -84,7 +110,7 @@
} catch (JWNLException e) {
e.printStackTrace();
}
- senses[i] = "WordNet " + senseKey;
+ senses[i] = "wordnet " + senseKey;
break;
}
}
@@ -166,7 +192,15 @@
@Override
public void setParams(WSDParameters params) throws InvalidParameterException {
- this.parameters = params;
+ if (params == null) {
+ this.parameters = new MFSParameters();
+ } else {
+ if (params.isValid()) {
+ this.parameters = (MFSParameters) params;
+ } else {
+ throw new InvalidParameterException("wrong parameters");
+ }
+ }
}
@@ -177,7 +211,7 @@
WordToDisambiguate wtd = new WordToDisambiguate(tokenizedContext,
ambiguousTokenIndex);
// System.out.println(wtd.getPosTags()[ambiguousTokenIndex]);
- return getMostFrequentSense(wtd);
+ return getMostFrequentSenses(wtd);
}
@Override
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
index 52bd4af..d0be62e 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
@@ -19,26 +19,16 @@
package opennlp.tools.disambiguator.mfs;
+import org.apache.commons.lang3.EnumUtils;
+
import opennlp.tools.disambiguator.WSDParameters;
public class MFSParameters extends WSDParameters {
- public MFSParameters(){
+ public MFSParameters() {
this.isCoarseSense = false;
this.source = Source.WORDNET;
}
-
- public static enum Source {
- WORDNET(1, "wordnet");
-
- public int code;
- public String src;
-
- private Source(int code, String src) {
- this.code = code;
- this.src = src;
- }
- }
protected Source source;
@@ -52,10 +42,7 @@
@Override
public boolean isValid() {
- if (this.source.code == 1) {
- return true;
- }
- return false;
+ return EnumUtils.isValidEnum(Source.class, source.name());
}
}