moved MFS and Lesk into main package
moved IMS and OSCC into main package as contextGenerators
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
index 774dc42..89d55a5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
@@ -31,13 +31,12 @@
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.disambiguator.Lesk;
import opennlp.tools.disambiguator.WSDHelper;
import opennlp.tools.disambiguator.WSDSample;
import opennlp.tools.disambiguator.WSDSampleStream;
import opennlp.tools.disambiguator.WSDisambiguator;
-import opennlp.tools.disambiguator.ims.IMSME;
-import opennlp.tools.disambiguator.lesk.Lesk;
-import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.disambiguator.MFS;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
new file mode 100644
index 0000000..3ee10d1
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
@@ -0,0 +1,5 @@
+package opennlp.tools.disambiguator;
+
+public class IMSWSDContextGenerator {
+
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java
new file mode 100644
index 0000000..e34001e
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java
@@ -0,0 +1,5 @@
+package opennlp.tools.disambiguator;
+
+public class IMSWSDSequenceValidator {
+
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
similarity index 84%
rename from opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
rename to opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
index c96a645..6138bde 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
@@ -16,21 +16,14 @@
* specific language governing permissions and limitations
* under the License.
*/
-package opennlp.tools.disambiguator.lesk;
+package opennlp.tools.disambiguator;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import opennlp.tools.disambiguator.WSDHelper;
-import opennlp.tools.disambiguator.SynNode;
-import opennlp.tools.disambiguator.WSDParameters;
-import opennlp.tools.disambiguator.WSDSample;
-import opennlp.tools.disambiguator.WSDisambiguator;
-import opennlp.tools.disambiguator.WordPOS;
-import opennlp.tools.disambiguator.WordSense;
-import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.disambiguator.MFS;
import net.sf.extjwnl.JWNLException;
import net.sf.extjwnl.data.Synset;
import net.sf.extjwnl.data.Word;
@@ -114,8 +107,8 @@
for (int i = 0; i < sample.getSentence().length; i++) {
if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
- contextWords.add(new WordPOS(sample.getSentence()[i], sample
- .getTags()[i]));
+ contextWords
+ .add(new WordPOS(sample.getSentence()[i], sample.getTags()[i]));
}
}
}
@@ -163,8 +156,8 @@
if (i >= 0 && i < sample.getSentence().length && i != index) {
if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
- contextWords.add(new WordPOS(sample.getSentence()[i], sample
- .getTags()[i]));
+ contextWords
+ .add(new WordPOS(sample.getSentence()[i], sample.getTags()[i]));
}
}
}
@@ -326,11 +319,9 @@
for (WordSense wordSense : scoredSenses) {
if (params.features[0]) {
- wordSense.setScore(wordSense.getScore()
- + Math
- .pow(
- assessSynonyms(wordSense.getNode().getSynonyms(),
- contextWords), params.iexp));
+ wordSense.setScore(wordSense.getScore() + Math.pow(
+ assessSynonyms(wordSense.getNode().getSynonyms(), contextWords),
+ params.iexp));
}
if (params.features[1]) {
@@ -403,17 +394,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHypernyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHypernyms(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHypernyms(), relvWords));
for (Synset hypernym : childNode.getHypernyms()) {
fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
depthScoreWeight);
@@ -437,8 +428,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -449,8 +440,8 @@
+ Math.pow(assessFeature(childNode.getHypernyms(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset hypernym : childNode.getHypernyms()) {
- fathomHypernymsExponential(wordSense, hypernym, relvGlossWords,
- depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
+ fathomHypernymsExponential(wordSense, hypernym, relvGlossWords, depth - 1,
+ maxDepth, intersectionExponent, depthScoreExponent);
}
}
@@ -470,17 +461,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHyponyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHyponyms(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHyponyms(), relvWords));
for (Synset hyponym : childNode.getHyponyms()) {
fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
@@ -505,8 +496,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -539,17 +530,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setMeronyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getMeronyms(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getMeronyms(), relvWords));
for (Synset meronym : childNode.getMeronyms()) {
fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
@@ -574,8 +565,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -608,17 +599,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHolonyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHolonyms(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHolonyms(), relvWords));
for (Synset holonym : childNode.getHolonyms()) {
fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
@@ -643,8 +634,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -667,17 +658,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setEntailements();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getEntailments(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getEntailments(), relvWords));
for (Synset entailment : childNode.getEntailments()) {
fathomEntailments(wordSense, entailment, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
@@ -691,8 +682,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -715,17 +706,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setCoordinateTerms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getCoordinateTerms(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getCoordinateTerms(), relvWords));
for (Synset coordinate : childNode.getCoordinateTerms()) {
fathomCoordinateTerms(wordSense, coordinate, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
@@ -739,8 +730,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -763,17 +754,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setCauses();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getCauses(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getCauses(), relvWords));
for (Synset cause : childNode.getCauses()) {
fathomEntailments(wordSense, cause, relvGlossWords, depth - 1, maxDepth,
depthScoreWeight);
@@ -787,8 +778,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -811,17 +802,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setAttributes();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getAttributes(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getAttributes(), relvWords));
for (Synset attribute : childNode.getAttributes()) {
fathomAttributes(wordSense, attribute, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
@@ -835,8 +826,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -859,17 +850,17 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setPertainyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getPertainyms(), relvWords));
+ wordSense.setScore(
+ wordSense.getScore() + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getPertainyms(), relvWords));
for (Synset pertainym : childNode.getPertainyms()) {
fathomPertainyms(wordSense, pertainym, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
@@ -883,8 +874,8 @@
if (depth == 0)
return;
- String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
- child.getGloss().toString());
+ String[] tokenizedGloss = WSDHelper.getTokenizer()
+ .tokenize(child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
@@ -914,8 +905,8 @@
for (Synset synset : featureSynsets) {
SynNode subNode = new SynNode(synset, relevantWords);
- String[] tokenizedSense = WSDHelper.getTokenizer().tokenize(
- subNode.getGloss());
+ String[] tokenizedSense = WSDHelper.getTokenizer()
+ .tokenize(subNode.getGloss());
ArrayList<WordPOS> relvSenseWords = WSDHelper
.getAllRelevantWords(tokenizedSense);
@@ -952,14 +943,12 @@
}
@Override
- public String[] disambiguate(WSDSample sample) {
+ public String disambiguate(WSDSample sample) {
// if not relevant POS tag
if (!WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
- String s = WSDParameters.SenseSource.WSDHELPER.name() + " "
+ return WSDParameters.SenseSource.WSDHELPER.name() + " "
+ sample.getTargetTag();
- String[] sense = { s };
- return sense;
} else {
return null;
}
@@ -993,37 +982,32 @@
Collections.sort(wsenses);
- String[] senses;
+ String sense;
if (wsenses.get(0).getScore() > 0) { // if at least one overlap
List<Word> synsetWords;
- senses = new String[wsenses.size()];
String senseKey = "?";
- for (int i = 0; i < wsenses.size(); i++) {
- synsetWords = wsenses.get(i).getNode().synset.getWords();
- for (Word synWord : synsetWords) {
- if (synWord.getLemma().equals(
- sample.getLemmas()[sample.getTargetPosition()])) {
- try {
- senseKey = synWord.getSenseKey();
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- break;
+ synsetWords = wsenses.get(0).getNode().synset.getWords();
+ for (Word synWord : synsetWords) {
+ if (synWord.getLemma()
+ .equals(sample.getLemmas()[sample.getTargetPosition()])) {
+ try {
+ senseKey = synWord.getSenseKey();
+ } catch (JWNLException e) {
+ e.printStackTrace();
}
+ break;
}
- senses[i] = params.source.name() + " " + senseKey + " "
- + wsenses.get(i).getScore();
-
}
+ sense = params.source.name() + " " + senseKey + " "
+ + wsenses.get(0).getScore();
} else { // get the MFS if no overlaps
- senses = new String[1];
- senses[0] = MFS.getMostFrequentSense(sample) + " -1";
+ sense = MFS.getMostFrequentSense(sample) + " -1";
}
- return senses;
+ return sense;
}
@Override
- public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
+ public String disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas, int ambiguousTokenIndex) {
return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
ambiguousTokenIndex));
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
similarity index 97%
rename from opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
rename to opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
index cdaeaa1..47ac1d3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
@@ -17,9 +17,7 @@
* under the License.
*/
-package opennlp.tools.disambiguator.lesk;
-
-import opennlp.tools.disambiguator.WSDParameters;
+package opennlp.tools.disambiguator;
/**
* Lesk specific parameter set
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java
new file mode 100644
index 0000000..c05b376
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.POS;
+import net.sf.extjwnl.data.Synset;
+import net.sf.extjwnl.data.Word;
+import opennlp.tools.disambiguator.WSDHelper;
+import opennlp.tools.disambiguator.WSDParameters;
+import opennlp.tools.disambiguator.WSDSample;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.disambiguator.WordPOS;
+
+/**
+ * Implementation of the <b>Most Frequent Sense</b> baseline approach. This
+ * approach returns the senses in order of frequency in WordNet. The first sense
+ * is the most frequent.
+ */
+public class MFS extends WSDisambiguator {
+
+ public MFS() {
+ super();
+ }
+
+ /*
+ * @return the most frequent senses from wordnet
+ */
+ public static String getMostFrequentSense(WSDSample sample) {
+
+ List<Synset> synsets = sample.getSynsets();
+ for (Word wd : synsets.get(0).getWords()) {
+ if (wd.getLemma()
+ .equalsIgnoreCase((sample.getLemmas()[sample.getTargetPosition()]))) {
+ try {
+ return WSDParameters.SenseSource.WORDNET.name() + " "
+ + wd.getSenseKey();
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ return "nonesense";
+
+ }
+
+ public static String[] getMostFrequentSenses(WSDSample sample) {
+
+ List<Synset> synsets = sample.getSynsets();
+ String[] senseKeys = new String[synsets.size()];
+
+ for (int i = 0; i < synsets.size(); i++) {
+ for (Word wd : synsets.get(i).getWords()) {
+ if (wd.getLemma().equalsIgnoreCase(
+ (sample.getLemmas()[sample.getTargetPosition()]))) {
+ try {
+ senseKeys[i] = WSDParameters.SenseSource.WORDNET.name() + " "
+ + wd.getSenseKey();
+ break;
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ break;
+
+ }
+ }
+ }
+ return senseKeys;
+
+ }
+
+ @Override
+ public String disambiguate(WSDSample sample) {
+
+ if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+ return disambiguate(sample.getTargetWordTag());
+
+ } else {
+ if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+ return WSDParameters.SenseSource.WSDHELPER.name() + " "
+ + sample.getTargetTag();
+ } else {
+ return null;
+ }
+ }
+ }
+
+
+ public String disambiguate(String wordTag) {
+
+ String word = wordTag.split("\\.")[0];
+ String tag = wordTag.split("\\.")[1];
+
+ POS pos;
+
+ if (tag.equalsIgnoreCase("a")) {
+ pos = POS.ADJECTIVE;
+ } else if (tag.equalsIgnoreCase("r")) {
+ pos = POS.ADVERB;
+ } else if (tag.equalsIgnoreCase("n")) {
+ pos = POS.NOUN;
+ } else if (tag.equalsIgnoreCase("v")) {
+ pos = POS.VERB;
+ } else
+ pos = null;
+
+ if (pos != null) {
+
+ WordPOS wordPOS = new WordPOS(word, pos);
+
+ ArrayList<Synset> synsets = wordPOS.getSynsets();
+
+ String sense = WSDParameters.SenseSource.WORDNET.name();
+
+ for (Word wd : synsets.get(0).getWords()) {
+ if (wd.getLemma().equals(word)) {
+ try {
+ sense = sense + " " + wd.getSenseKey();
+ break;
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ return sense;
+ } else {
+ WSDHelper.print(word + " " + pos);
+ WSDHelper.print("The word has no definitions in WordNet !");
+ return null;
+ }
+
+ }
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
new file mode 100644
index 0000000..978460c
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
@@ -0,0 +1,5 @@
+package opennlp.tools.disambiguator;
+
+public class OSCCWSDContextGenerator {
+
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
new file mode 100644
index 0000000..d8ce462
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+
+/**
+ * Interface for {@link WSDisambiguator} context generators.
+ */
+public interface WSDContextGenerator {
+
+ String[] getIMSContext(int index, String[] toks, String[] tags, String[] lemmas,
+ int ngram, int windowSize, ArrayList<String> model);
+
+ String[] getIMSContext(WSDSample sample, int ngram, int windowSize,
+ ArrayList<String> model);
+
+ String[] getOSCCContext(int index, String[] toks, String[] tags, String[] lemmas,
+ int windowSize, ArrayList<String> model);
+
+ String[] getOSCCContext(WSDSample sample, int windowSize,
+ ArrayList<String> model);
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
index cf42bc0..cdc76cd 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
@@ -55,43 +55,25 @@
// @Override
protected WSDSample processSample(WSDSample reference) {
- String[] referenceSenses = reference.getSenseIDs().toArray(
- new String[reference.getSenseIDs().size()]);
+ String[] referenceSenses = reference.getSenseIDs();
// get the best predicted sense
String predictedSense = disambiguator.disambiguate(reference.getSentence(),
reference.getTags(), reference.getLemmas(),
- reference.getTargetPosition())[0];
+ reference.getTargetPosition());
if (predictedSense == null) {
- System.out.println("There was no sense for : "
- + reference.getTargetWord());
+ System.out
+ .println("There was no sense for : " + reference.getTargetWord());
return null;
}
// get the senseKey from the result
String senseKey = predictedSense.split(" ")[1];
- // if we have multiple senses mapped to one sense
- if (disambiguator.getParams().isCoarseSense()) {
- // if we find the sense in one of the coarse senses
- int found = -1;
- for (int i = 0; i < referenceSenses.length; i++) {
- if (referenceSenses[i].equals(senseKey)) {
- accuracy.add(1);
- found = i;
- break;
- }
- }
- if (found < 0) {
- accuracy.add(0);
- }
- } // else we have fine grained senses (only one mapped sense)
- else {
- if (referenceSenses[0].equals(senseKey)) {
- accuracy.add(1);
- } else {
- accuracy.add(0);
- }
+ if (referenceSenses[0].equals(senseKey)) {
+ accuracy.add(1);
+ } else {
+ accuracy.add(0);
}
return new WSDSample(reference.getSentence(), reference.getTags(),
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
index c3e9659..03a0af3 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
@@ -32,7 +32,6 @@
import net.sf.extjwnl.dictionary.Dictionary;
import net.sf.extjwnl.dictionary.MorphologicalProcessor;
import opennlp.tools.cmdline.postag.POSModelLoader;
-import opennlp.tools.disambiguator.lesk.Lesk;
import opennlp.tools.lemmatizer.SimpleLemmatizer;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.TokenizerME;
@@ -321,9 +320,9 @@
// Print a text in the console
public static void printResults(WSDisambiguator disambiguator,
- String[] results) {
+ String result) {
- if (results != null) {
+ if (result != null) {
String[] parts;
String sensekey;
@@ -331,8 +330,7 @@
Double score;
- for (int i = 0; i < results.length; i++) {
- parts = results[i].split(" ");
+ parts = result.split(" ");
sensekey = parts[1];
if (parts.length != 3) {
score = -1.0;
@@ -346,7 +344,6 @@
print("score : "
+ score
+ " for sense "
- + i
+ " : "
+ sensekey
+ " : "
@@ -365,10 +362,8 @@
}
}
- }
} else {
- for (int i = 0; i < results.length; i++) {
- parts = results[i].split(" ");
+ parts = result.split(" ");
sensekey = parts[1];
if (parts[0].equalsIgnoreCase(WSDParameters.SenseSource.WORDNET
@@ -376,7 +371,6 @@
try {
print("sense "
- + i
+ " : "
+ sensekey
+ " : "
@@ -392,7 +386,6 @@
+ WSDHelper.getNonRelevWordsDef(sensekey));
}
- }
}
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
new file mode 100644
index 0000000..a117bb6
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.lang3.StringUtils;
+
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.BaseModel;
+
+public class WSDModel extends BaseModel {
+
+ private static final String COMPONENT_NAME = "WSD";
+ private static final String WSD_MODEL_ENTRY_NAME = "WSD.model";
+
+ private static final String WORDTAG = "wordtag";
+ private static final String WINSIZE = "winsize";
+ private static final String NGRAM = "ngram";
+ private static final String CONTEXT = "context";
+
+ private ArrayList<String> contextEntries = new ArrayList<String>();
+ private String wordTag;
+ private int windowSize;
+ private int ngram;
+
+ public ArrayList<String> getContextEntries() {
+ return contextEntries;
+ }
+
+ public int getWindowSize() {
+ return windowSize;
+ }
+
+ public void setWindowSize(int windowSize) {
+ this.windowSize = windowSize;
+ }
+
+ public int getNgram() {
+ return ngram;
+ }
+
+ public void setNgram(int ngram) {
+ this.ngram = ngram;
+ }
+
+ public void setContextEntries(ArrayList<String> contextEntries) {
+ this.contextEntries = contextEntries;
+ }
+
+ public String getWordTag() {
+ return wordTag;
+ }
+
+ public void setWordTag(String wordTag) {
+ this.wordTag = wordTag;
+ }
+
+ public WSDModel(String languageCode, String wordTag, int windowSize,
+ int ngram, MaxentModel wsdModel, ArrayList<String> contextEntries,
+ Map<String, String> manifestInfoEntries) {
+ super(COMPONENT_NAME, languageCode, manifestInfoEntries);
+
+ artifactMap.put(WSD_MODEL_ENTRY_NAME, wsdModel);
+ this.setManifestProperty(WORDTAG, wordTag);
+ this.setManifestProperty(WINSIZE, windowSize + "");
+ this.setManifestProperty(NGRAM, ngram + "");
+ this.setManifestProperty(CONTEXT, StringUtils.join(contextEntries, ","));
+
+ this.contextEntries = contextEntries;
+ checkArtifactMap();
+ }
+
+ public WSDModel(String languageCode, String wordTag, int windowSize,
+ int ngram, MaxentModel wsdModel, ArrayList<String> surroundingWords) {
+ this(languageCode, wordTag, windowSize, ngram, wsdModel, surroundingWords,
+ null);
+ }
+
+ public WSDModel(InputStream in) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, in);
+ updateAttributes();
+ }
+
+ public WSDModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ updateAttributes();
+ }
+
+ public WSDModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ updateAttributes();
+ }
+
+ // path must include the word.tag i.e. : write.v
+ public boolean writeModel(String path) {
+ File outFile = new File(path + ".wsd.model");
+ CmdLineUtil.writeModel("wsd model", outFile, this);
+ return true;
+ }
+
+ @Override
+ protected void validateArtifactMap() throws InvalidFormatException {
+ super.validateArtifactMap();
+
+ if (!(artifactMap.get(WSD_MODEL_ENTRY_NAME) instanceof AbstractModel)) {
+ throw new InvalidFormatException("WSD model is incomplete!");
+ }
+ }
+
+ public MaxentModel getWSDMaxentModel() {
+ if (artifactMap.get(WSD_MODEL_ENTRY_NAME) instanceof MaxentModel) {
+ return (MaxentModel) artifactMap.get(WSD_MODEL_ENTRY_NAME);
+ } else {
+ return null;
+ }
+ }
+
+ public void updateAttributes() {
+ Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+ String surroundings = (String) manifest.get(CONTEXT);
+
+ this.contextEntries = new ArrayList(Arrays.asList(surroundings.split(",")));
+ this.wordTag = (String) manifest.get(WORDTAG);
+ this.windowSize = Integer.parseInt((String) manifest.get(WINSIZE));
+ this.ngram = Integer.parseInt((String) manifest.get(NGRAM));
+ }
+
+}
\ No newline at end of file
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
index d890ba0..7b9c7bc 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
@@ -25,10 +25,6 @@
*/
public abstract class WSDParameters {
- protected boolean isCoarseSense;
- public static boolean isStemCompare;
- protected boolean returnMultiple;
-
public static enum SenseSource {
WORDNET, WSDHELPER, OTHER;
}
@@ -38,21 +34,6 @@
/**
* @return if the disambiguation type is coarse grained or fine grained
*/
- public boolean isCoarseSense() {
- return isCoarseSense;
- }
-
- public void setCoarseSense(boolean isCoarseSense) {
- this.isCoarseSense = isCoarseSense;
- }
-
- public static boolean isStemCompare() {
- return isStemCompare;
- }
-
- public static void setStemCompare(boolean isStemCompare) {
- WSDParameters.isStemCompare = isStemCompare;
- }
public SenseSource getSenseSource() {
return senseSource;
@@ -62,20 +43,11 @@
this.senseSource = senseSource;
}
- public boolean isReturnMultiple() {
- return returnMultiple;
- }
-
- public void setReturnMultiple(boolean returnMultiple) {
- this.returnMultiple = returnMultiple;
- }
-
public WSDParameters() {
- this.isCoarseSense = false;
- this.returnMultiple = false;
+ this.senseSource = SenseSource.WORDNET;
}
- /**
+ /*
* @return checks if the parameters are valid or not
*/
public abstract boolean isValid();
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
index b8cc87c..83dbec2 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
@@ -31,73 +31,54 @@
public class WSDSample {
- private List<String> sentence;
- private List<String> tags;
- private List<String> lemmas;
+ private String[] sentence;
+ private String[] tags;
+ private String[] lemmas;
private int senseID;
- private List<String> senseIDs;
+ private String[] senseIDs;
private int targetPosition;
- public WSDSample(String sentence[], String tags[], String[] lemmas,
+ public WSDSample(String[] sentence, String[] tags, String[] lemmas,
int targetPosition, int senseID) {
- this.sentence = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(sentence)));
- this.tags = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(tags)));
+ this.sentence = sentence;
+ this.tags = tags;
this.targetPosition = targetPosition;
- this.lemmas = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(lemmas)));
- ;
+ this.lemmas = lemmas;
this.senseID = senseID;
checkArguments();
}
- public WSDSample(String sentence[], String tags[], String[] lemmas,
+ public WSDSample(String[] sentence, String[] tags, String[] lemmas,
int targetPosition) {
- this.sentence = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(sentence)));
- this.tags = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(tags)));
- this.targetPosition = targetPosition;
- this.lemmas = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(lemmas)));
- ;
- checkArguments();
+ this(sentence, tags, lemmas, targetPosition, null);
}
- public WSDSample(String sentence[], String tags[], String[] lemmas,
- int targetPosition, List<String> senseIDs) {
- this.sentence = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(sentence)));
- this.tags = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(tags)));
+ public WSDSample(String[] sentence, String[] tags, String[] lemmas,
+ int targetPosition, String[] senseIDs) {
+ this.sentence = sentence;
+ this.tags = tags;
this.targetPosition = targetPosition;
- this.lemmas = Collections.unmodifiableList(new ArrayList<String>(Arrays
- .asList(lemmas)));
+ this.lemmas = lemmas;
this.senseIDs = senseIDs;
checkArguments();
}
private void checkArguments() {
- if (sentence.size() != tags.size() || targetPosition < 0
- || targetPosition >= tags.size())
- throw new IllegalArgumentException(
- "There must be exactly one tag for each token!");
-
- if (sentence.contains(null) || tags.contains(null))
- throw new IllegalArgumentException("null elements are not allowed!");
+ if (sentence.length != tags.length || tags.length != lemmas.length
+ || targetPosition < 0 || targetPosition >= tags.length)
+ throw new IllegalArgumentException("Some inputs are not correct");
}
public String[] getSentence() {
- return sentence.toArray(new String[sentence.size()]);
+ return sentence;
}
public String[] getTags() {
- return tags.toArray(new String[tags.size()]);
+ return tags;
}
public String[] getLemmas() {
- return lemmas.toArray(new String[lemmas.size()]);
+ return lemmas;
}
public int getTargetPosition() {
@@ -108,27 +89,27 @@
return senseID;
}
- public List<String> getSenseIDs() {
+ public String[] getSenseIDs() {
return senseIDs;
}
public String getTargetWord() {
- return sentence.get(targetPosition);
+ return sentence[targetPosition];
}
public String getTargetTag() {
- return tags.get(targetPosition);
+ return tags[targetPosition];
}
- public void setSentence(List<String> sentence) {
+ public void setSentence(String[] sentence) {
this.sentence = sentence;
}
- public void setTags(List<String> tags) {
+ public void setTags(String[] tags) {
this.tags = tags;
}
- public void setLemmas(List<String> lemmas) {
+ public void setLemmas(String[] lemmas) {
this.lemmas = lemmas;
}
@@ -136,7 +117,7 @@
this.senseID = senseID;
}
- public void setSenseIDs(List<String> senseIDs) {
+ public void setSenseIDs(String[] senseIDs) {
this.senseIDs = senseIDs;
}
@@ -212,10 +193,10 @@
// Return the synsets (thus the senses) of the current target word
public List<Synset> getSynsets() {
try {
- return Dictionary
- .getDefaultResourceInstance()
+ return Dictionary.getDefaultResourceInstance()
.lookupIndexWord(WSDHelper.getPOS(this.getTargetTag()),
- this.getTargetWord()).getSenses();
+ this.getTargetWord())
+ .getSenses();
} catch (JWNLException e) {
e.printStackTrace();
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
index a825e11..bd98872 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
@@ -30,10 +30,8 @@
* A word sense disambiguator that determines which sense of a word is meant in
* a particular context. It is a classification task, where the classes are the
* different senses of the ambiguous word. Disambiguation can be achieved in
- * either supervised or un-supervised approaches. A disambiguator returns an
- * array of sense IDs ordered by their disambiguation score as well their
- * source. The first sense ID is the most probable sense in the set context. The
- * context is a sentence or a chunk of text where the target word exists.
+ * either supervised or un-supervised approaches. A disambiguator returns a
+ * sense ID.
*
* <b>How it works :<b> Just supply the context as an array of tokens and the
* index of the target word to the disambiguate method.
@@ -41,12 +39,13 @@
* Otherwise for multiple words, you can set a word span instead of simply one
* index. For the moment the source of sense definitions is from WordNet. *
* Please see {@link Lesk} for an un-supervised approach. Please see {@link IMS}
- * for a supervised approach.
+ * {@link OSCC} for a supervised approach.
*
* Examples on how to use each approach are provided in the test section.
*
* @see Lesk
* @see IMS
+ * @see OSCC
*/
public abstract class WSDisambiguator {
@@ -75,10 +74,10 @@
* @param ambiguousTokenIndex
* @return result as an array of WordNet IDs
*/
- public String[] disambiguate(String[] tokenizedContext,
- String[] tokenTags, String[] lemmas, int ambiguousTokenIndex){
- return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
- ambiguousTokenIndex));
+ public String disambiguate(String[] tokenizedContext, String[] tokenTags,
+ String[] lemmas, int ambiguousTokenIndex) {
+ return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
+ ambiguousTokenIndex));
}
/**
@@ -90,9 +89,9 @@
* @param ambiguousTokenLemma
* @return result as an array of WordNet IDs
*/
- public List<String[]> disambiguate(String[] tokenizedContext,
+ public List<String> disambiguate(String[] tokenizedContext,
String[] tokenTags, String[] lemmas, Span ambiguousTokenIndexSpan) {
- List<String[]> senses = new ArrayList<String[]>();
+ List<String> senses = new ArrayList<String>();
int start = Math.max(0, ambiguousTokenIndexSpan.getStart());
@@ -102,16 +101,15 @@
for (int i = start; i < end + 1; i++) {
if (WSDHelper.isRelevantPOSTag(tokenTags[i])) {
- WSDSample sample = new WSDSample(tokenizedContext, tokenTags, lemmas, i);
- String[] sense = disambiguate(sample);
+ WSDSample sample = new WSDSample(tokenizedContext, tokenTags, lemmas,
+ i);
+ String sense = disambiguate(sample);
senses.add(sense);
} else {
if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
- String s = WSDParameters.SenseSource.WSDHELPER.name() + " "
+ String sense = WSDParameters.SenseSource.WSDHELPER.name() + " "
+ WSDHelper.getNonRelevWordsDef(tokenTags[i]);
- String[] sense = { s };
-
senses.add(sense);
} else {
senses.add(null);
@@ -135,24 +133,22 @@
* @return a List of arrays, each corresponding to the senses of each word of
* the context which are to be disambiguated
*/
- public List<String[]> disambiguate(String[] tokenizedContext,
+ public List<String> disambiguate(String[] tokenizedContext,
String[] tokenTags, String[] lemmas) {
- List<String[]> senses = new ArrayList<String[]>();
+ List<String> senses = new ArrayList<String>();
for (int i = 0; i < tokenizedContext.length; i++) {
if (WSDHelper.isRelevantPOSTag(tokenTags[i])) {
- WSDSample sample = new WSDSample(tokenizedContext, tokenTags, lemmas, i);
- String[] sense = disambiguate(sample);
- senses.add(sense);
+ WSDSample sample = new WSDSample(tokenizedContext, tokenTags, lemmas,
+ i);
+ senses.add(disambiguate(sample));
} else {
if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
- String s = IMSParameters.SenseSource.WSDHELPER.name() + " "
+ String sense = IMSParameters.SenseSource.WSDHELPER.name() + " "
+ WSDHelper.getNonRelevWordsDef(tokenTags[i]);
- String[] sense = { s };
-
senses.add(sense);
} else {
senses.add(null);
@@ -168,6 +164,6 @@
* @param WSDSample
* @return result as an array of WordNet IDs
*/
- public abstract String[] disambiguate(WSDSample sample);
+ public abstract String disambiguate(WSDSample sample);
}
\ No newline at end of file
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
new file mode 100644
index 0000000..166bcf4
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
@@ -0,0 +1,5 @@
+package opennlp.tools.disambiguator;
+
+public class WSDisambiguatorFactory {
+
+}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
index 664f7b3..c3ddd79 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
@@ -254,7 +254,7 @@
tags[i]);
}
- WSDSample wtd = new WSDSample(words, tags, lemmas, index, senses);
+ WSDSample wtd = new WSDSample(words, tags, lemmas, index, senses.toArray(new String[0]));
setInstances.add(wtd);
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
index 2da16e5..40884aa 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
@@ -260,7 +260,7 @@
}
WSDSample wtd = new WSDSample(words, tags, lemmas, index,
- senseIDs);
+ senseIDs.toArray(new String[0]));
setInstances.add(wtd);
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
index b1e8a18..1755b33 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
@@ -25,7 +25,7 @@
import opennlp.tools.disambiguator.WSDHelper;
import opennlp.tools.disambiguator.WSDSample;
import opennlp.tools.disambiguator.WSDisambiguator;
-import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.disambiguator.MFS;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.MaxentModel;
@@ -66,7 +66,8 @@
TrainingParameters mlParams, IMSParameters imsParams,
IMSFactory imsfactory) throws IOException {
- ArrayList<String> surroundingWordModel = buildSurroundingWords(samples, imsParams.getWindowSize());
+ ArrayList<String> surroundingWordModel = buildSurroundingWords(samples,
+ imsParams.getWindowSize());
HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();
@@ -81,7 +82,7 @@
wordTag = sample.getTargetWordTag();
do {
- String sense = sample.getSenseIDs().get(0);
+ String sense = sample.getSenseIDs()[0];
String[] context = cg.getContext(sample, imsParams.ngram,
imsParams.windowSize, surroundingWordModel);
@@ -122,7 +123,7 @@
}
@Override
- public String[] disambiguate(WSDSample sample) {
+ public String disambiguate(WSDSample sample) {
if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
String wordTag = sample.getTargetWordTag();
@@ -157,12 +158,9 @@
if (outcome != null && !outcome.equals("")) {
- outcome = this.getParams().getSenseSource().name() + " "
+ return this.getParams().getSenseSource().name() + " "
+ wordTag.split("\\.")[0] + "%" + outcome;
- String[] s = { outcome };
-
- return s;
} else {
MFS mfs = new MFS();
return mfs.disambiguate(wordTag);
@@ -186,12 +184,9 @@
if (outcome != null && !outcome.equals("")) {
- outcome = this.getParams().getSenseSource().name() + " "
+ return this.getParams().getSenseSource().name() + " "
+ wordTag.split("\\.")[0] + "%" + outcome;
- String[] s = { outcome };
-
- return s;
} else {
MFS mfs = new MFS();
@@ -201,10 +196,8 @@
} else {
if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
- String s = IMSParameters.SenseSource.WSDHELPER.name() + " "
+ return IMSParameters.SenseSource.WSDHELPER.name() + " "
+ sample.getTargetTag();
- String[] sense = { s };
- return sense;
} else {
return null;
}
@@ -226,7 +219,7 @@
* : the index of the word to disambiguate
* @return an array of the senses of the word to disambiguate
*/
- public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
+ public String disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas, int index) {
return disambiguate(
new WSDSample(tokenizedContext, tokenTags, lemmas, index));
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
index 1e540cf..2633a50 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
@@ -62,7 +62,6 @@
this.ngram = ngram;
this.senseSource = senseSource;
this.trainingDataDirectory = trainingDataDirectory;
- this.isCoarseSense = false;
File folder = new File(trainingDataDirectory);
if (!folder.exists())
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
index 8115472..32bb5da 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
@@ -34,7 +34,7 @@
protected String[] lemmas;
protected int wordIndex;
protected int sense;
- protected List<String> senseIDs;
+ protected String[] senseIDs;
// Attributes related to IMS features
protected String[] posOfSurroundingWords;
@@ -51,7 +51,7 @@
}
public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
- int wordIndex, List<String> senseIDs) {
+ int wordIndex, String[] senseIDs) {
this.sentence = sentence;
this.posTags = posTags;
this.wordIndex = wordIndex;
@@ -61,7 +61,7 @@
}
public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
- String word, List<String> senseIDs) {
+ String word, String[] senseIDs) {
super();
this.sentence = sentence;
@@ -87,7 +87,7 @@
this.senseIDs = sample.getSenseIDs();
}
-
+
public String[] getSentence() {
return sentence;
}
@@ -128,11 +128,11 @@
this.sense = sense;
}
- public List<String> getSenseIDs() {
+ public String[] getSenseIDs() {
return senseIDs;
}
- public void setSenseIDs(ArrayList<String> senseIDs) {
+ public void setSenseIDs(String[] senseIDs) {
this.senseIDs = senseIDs;
}
@@ -147,8 +147,8 @@
String ref = "";
if ((WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]) != null)) {
- if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]).equals(
- POS.VERB)) {
+ if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
+ .equals(POS.VERB)) {
ref = wordBaseForm + ".v";
} else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
.equals(POS.NOUN)) {
@@ -165,7 +165,6 @@
return ref;
}
-
public String[] getPosOfSurroundingWords() {
return posOfSurroundingWords;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
index f2c67ba..f06f140 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
@@ -25,7 +25,7 @@
import opennlp.tools.disambiguator.WSDHelper;
import opennlp.tools.disambiguator.WSDSample;
import opennlp.tools.disambiguator.WSDisambiguator;
-import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.disambiguator.MFS;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.model.MaxentModel;
@@ -103,7 +103,7 @@
if (sample != null) {
wordTag = sample.getTargetWordTag();
do {
- String sense = sample.getSenseIDs().get(0);
+ String sense = sample.getSenseIDs()[0];
String[] context = cg.getContext(sample, osccParams.windowSize,
surroundingClusterModel);
Event ev = new Event(sense + "", context);
@@ -143,7 +143,7 @@
}
@Override
- public String[] disambiguate(WSDSample sample) {
+ public String disambiguate(WSDSample sample) {
if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
String wordTag = sample.getTargetWordTag();
@@ -177,12 +177,9 @@
if (outcome != null && !outcome.equals("")) {
- outcome = this.getParams().getSenseSource().name() + " "
+ return this.getParams().getSenseSource().name() + " "
+ wordTag.split("\\.")[0] + "%" + outcome;
- String[] s = { outcome };
-
- return s;
} else {
MFS mfs = new MFS();
return mfs.disambiguate(wordTag);
@@ -205,12 +202,8 @@
if (outcome != null && !outcome.equals("")) {
- outcome = this.getParams().getSenseSource().name() + " "
+ return this.getParams().getSenseSource().name() + " "
+ wordTag.split("\\.")[0] + "%" + outcome;
-
- String[] s = { outcome };
-
- return s;
} else {
MFS mfs = new MFS();
@@ -220,10 +213,8 @@
} else {
if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
- String s = OSCCParameters.SenseSource.WSDHELPER.name() + " "
+ return OSCCParameters.SenseSource.WSDHELPER.name() + " "
+ sample.getTargetTag();
- String[] sense = { s };
- return sense;
} else {
return null;
}
@@ -245,7 +236,7 @@
* : the index of the word to disambiguate
* @return an array of the senses of the word to disambiguate
*/
- public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
+ public String disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas, int index) {
return disambiguate(
new WSDSample(tokenizedContext, tokenTags, lemmas, index));
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
index 15f1004..1c4fb21 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
@@ -54,7 +54,6 @@
this.windowSize = windowSize;
this.senseSource = senseSource;
this.trainingDataDirectory = trainingDataDirectory;
- this.isCoarseSense = false;
File folder = new File(trainingDataDirectory);
if (!folder.exists())