updated tests
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
index 220ad2c..c5e63cf 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
@@ -59,7 +59,7 @@
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
}
}
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
index ce0f86e..881de6a 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
@@ -43,8 +43,8 @@
*
* The scope of this test is to make sure that the IMS disambiguator code can be
* executed. This test can not detect mistakes which lead to incorrect feature
- * generation or other mistakes which decrease the disambiguation performance of the
- * disambiguator.
+ * generation or other mistakes which decrease the disambiguation performance of
+ * the disambiguator.
*
* In this test the {@link IMSME} is trained with Semcor and then the computed
* model is used to predict sentences from the training sentences.
@@ -154,9 +154,8 @@
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = ims.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = ims.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
}
/*
@@ -167,13 +166,15 @@
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = ims.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = ims.disambiguate(sentence2, tags2, lemmas2, span);
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 1, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00::", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -181,11 +182,11 @@
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = ims.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = ims.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
index fe5199c..737b8fa 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
@@ -22,8 +22,6 @@
import java.util.ArrayList;
import opennlp.tools.disambiguator.datareader.SensevalReader;
-import opennlp.tools.disambiguator.lesk.Lesk;
-import opennlp.tools.disambiguator.lesk.LeskParameters;
import org.junit.Test;
@@ -59,7 +57,7 @@
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
}
}
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
index edb1346..2aa3334 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
@@ -24,9 +24,7 @@
import java.util.ArrayList;
import java.util.List;
-import opennlp.tools.disambiguator.lesk.Lesk;
-import opennlp.tools.disambiguator.lesk.LeskParameters;
-import opennlp.tools.disambiguator.lesk.LeskParameters.LESK_TYPE;
+import opennlp.tools.disambiguator.LeskParameters.LESK_TYPE;
import opennlp.tools.util.Span;
import org.junit.BeforeClass;
@@ -35,10 +33,10 @@
/**
* This is the test class for {@link Lesk}.
*
- * The scope of this test is to make sure that the Lesk disambiguator code can be
- * executed. This test can not detect mistakes which lead to incorrect feature
- * generation or other mistakes which decrease the disambiguation performance of the
- * disambiguator.
+ * The scope of this test is to make sure that the Lesk disambiguator code can
+ * be executed. This test can not detect mistakes which lead to incorrect
+ * feature generation or other mistakes which decrease the disambiguation
+ * performance of the disambiguator.
*/
public class LeskTester {
// TODO write more tests
@@ -118,9 +116,8 @@
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = lesk.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = lesk.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00:: -1", sense);
}
/*
@@ -131,13 +128,15 @@
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 3, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01:: 4.8",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00:: 6.0", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -145,11 +144,11 @@
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = lesk.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = lesk.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
\ No newline at end of file
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
index b71ca6e..0195cae 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
@@ -22,9 +22,7 @@
import java.util.ArrayList;
import opennlp.tools.disambiguator.datareader.SensevalReader;
-import opennlp.tools.disambiguator.ims.WTDIMS;
-import opennlp.tools.disambiguator.mfs.MFS;
-import opennlp.tools.disambiguator.mfs.MFSParameters;
+import opennlp.tools.disambiguator.MFS;
import org.junit.Test;
@@ -40,7 +38,6 @@
WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
MFS mfs = new MFS();
- WSDParameters.isStemCompare = true;
ArrayList<String> words = seReader.getSensevalWords();
@@ -56,7 +53,7 @@
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
}
}
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
index a675268..025261e 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
@@ -26,7 +26,7 @@
import org.junit.BeforeClass;
import org.junit.Test;
-import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.disambiguator.MFS;
import opennlp.tools.util.Span;
/**
@@ -34,8 +34,8 @@
*
* The scope of this test is to make sure that the MFS disambiguator code can be
* executed. This test can not detect mistakes which lead to incorrect feature
- * generation or other mistakes which decrease the disambiguation performance of the
- * disambiguator.
+ * generation or other mistakes which decrease the disambiguation performance of
+ * the disambiguator.
*/
public class MFSTester {
// TODO write more tests
@@ -110,9 +110,8 @@
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = mfs.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = mfs.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
}
/*
@@ -123,13 +122,15 @@
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = mfs.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = mfs.disambiguate(sentence2, tags2, lemmas2, span);
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 1, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00::", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -137,10 +138,10 @@
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = mfs.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = mfs.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
\ No newline at end of file
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
index c9723fa..f46a58b 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
@@ -19,11 +19,15 @@
package opennlp.tools.disambiguator;
+import java.io.IOException;
import java.util.ArrayList;
import opennlp.tools.disambiguator.datareader.SensevalReader;
+import opennlp.tools.disambiguator.oscc.OSCCFactory;
import opennlp.tools.disambiguator.oscc.OSCCME;
+import opennlp.tools.disambiguator.oscc.OSCCModel;
import opennlp.tools.disambiguator.oscc.OSCCParameters;
+import opennlp.tools.util.TrainingParameters;
import org.junit.Test;
@@ -39,28 +43,41 @@
// TODO write unit test
String modelsDir = "src\\test\\resources\\models\\";
+ String trainingDataDirectory = "src\\test\\resources\\supervised\\models\\";
WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
OSCCParameters OSCCParams = new OSCCParameters("");
+ OSCCParams.setTrainingDataDirectory(trainingDataDirectory);
OSCCME oscc = new OSCCME(OSCCParams);
-
+ OSCCModel model = null;
ArrayList<String> words = seReader.getSensevalWords();
for (String word : words) {
- WSDEvaluator evaluator = new WSDEvaluator(oscc);
-
// don't take verbs because they are not from WordNet
if (!word.split("\\.")[1].equals("v")) {
-
+ try {
+ model = OSCCME.train("en", seReader.getSensevalDataStream(word), new TrainingParameters(), OSCCParams,
+ new OSCCFactory());
+ model.writeModel(OSCCParams.getTrainingDataDirectory() + word);
+ oscc = new OSCCME(model, OSCCParams);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ WSDHelper.print("skipped sample");
+ }
+
+ WSDEvaluator evaluator = new WSDEvaluator(oscc);
ArrayList<WSDSample> instances = seReader.getSensevalData(word);
if (instances != null) {
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
+ }else{
+ WSDHelper.print("skipped sample");
}
}
WSDHelper.print(evaluator.toString());
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
index d6f55a6..63fb07d 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
@@ -156,9 +156,8 @@
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = oscc.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = oscc.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
}
/*
@@ -169,13 +168,16 @@
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = oscc.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = oscc.disambiguate(sentence2, tags2, lemmas2, span);
+
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 1, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00::", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -183,11 +185,11 @@
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = oscc.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = oscc.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
\ No newline at end of file
diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
index d657f56..16172f8 100644
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
+++ b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
@@ -29,7 +29,7 @@
String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
// output
- List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
for (int i = 0; i < sentence3.length; i++) {
System.out.print(sentence3[i] + " : ");
WSDHelper.printResults(ims, senses3.get(i));