OPENNLP-821 Now builds and runs with 1.6.0
diff --git a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
index 2980131..5772925 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
@@ -29,8 +29,11 @@
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.ml.model.SequenceStream;
import cc.mallet.fst.CRF;
+import cc.mallet.fst.CRFOptimizableByLabelLikelihood;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
+import cc.mallet.fst.CRFTrainerByValueGradients;
import cc.mallet.fst.Transducer;
+import cc.mallet.optimize.Optimizable;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
@@ -45,11 +48,6 @@
// Dummy feature generator ?!
public class CRFTrainer extends AbstractSequenceTrainer {
- public CRFTrainer(Map<String, String> trainParams,
- Map<String, String> reportMap) {
- super(trainParams, reportMap);
- }
-
private int[] getOrders() {
String[] ordersString = "0,1".split(",");
int[] orders = new int[ordersString.length];
@@ -71,7 +69,8 @@
InstanceList trainingData = new InstanceList(dataAlphabet, targetAlphabet);
int nameIndex = 0;
- for (Sequence sequence : sequences) {
+ Sequence sequence;
+ while ((sequence = sequences.read()) != null) {
FeatureVector featureVectors[] = new FeatureVector[sequence.getEvents().length];
Label malletOutcomes[] = new Label[sequence.getEvents().length];
@@ -132,23 +131,22 @@
crf);
crfTrainer.setGaussianPriorVariance(1.0);
- // CRFOptimizableByLabelLikelihood optLabel = new
- // CRFOptimizableByLabelLikelihood(
- // crf, trainingData);
-
- // CRF trainer
- // Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] {
- // optLabel };
+// CRFOptimizableByLabelLikelihood optLabel = new
+// CRFOptimizableByLabelLikelihood(crf, trainingData);
+//
+// // CRF trainer
+// Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] {
+// optLabel };
// by default, use L-BFGS as the optimizer
- // CRFTrainerByValueGradients crfTrainer = new CRFTrainerByValueGradients(
- // crf, opts);
- // crfTrainer.setMaxResets(0);
+// CRFTrainerByValueGradients crfTrainer = new CRFTrainerByValueGradients(
+// crf, opts);
+// crfTrainer.setMaxResets(0);
// SNIP
crfTrainer.train(trainingData, Integer.MAX_VALUE);
-
+
// can be very similar to the other model
// one important difference is that the feature gen needs to be integrated
// ...
diff --git a/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java b/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
index 34f5f7c..2967bbc 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
@@ -37,11 +37,6 @@
public class MaxentTrainer extends AbstractEventTrainer {
- public MaxentTrainer(Map<String, String> trainParams,
- Map<String, String> reportMap) {
- super(trainParams, reportMap);
- }
-
@Override
public boolean isSortAndMerge() {
return true;
diff --git a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
index 52f2ce5..e713d83 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
@@ -27,7 +27,6 @@
import opennlp.tools.util.BeamSearchContextGenerator;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.model.SerializableArtifact;
-import cc.mallet.fst.CRF;
import cc.mallet.fst.MaxLatticeDefault;
import cc.mallet.fst.Transducer;
import cc.mallet.types.Alphabet;
@@ -53,6 +52,14 @@
return bestSequences(1, sequence, additionalContext, cg, validator)[0];
}
+ @Override
+ public opennlp.tools.util.Sequence[] bestSequences(int numSequences,
+ T[] sequence, Object[] additionalContext, double minSequenceScore,
+ BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
+ // TODO: How to implement min score filtering here?
+ return bestSequences(numSequences, sequence, additionalContext, cg, validator);
+ }
+
public opennlp.tools.util.Sequence[] bestSequences(int numSequences,
T[] sequence, Object[] additionalContext,
BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
@@ -121,4 +128,20 @@
public Class<?> getArtifactSerializerClass() {
return TransducerModelSerializer.class;
}
+
+
+
+ @Override
+ public String[] getOutcomes() {
+
+ Alphabet targetAlphabet = model.getInputPipe().getTargetAlphabet();
+
+ String outcomes[] = new String[targetAlphabet.size()];
+
+ for (int i = 0; i < targetAlphabet.size(); i++) {
+ outcomes[i] = targetAlphabet.lookupObject(i).toString();
+ }
+
+ return outcomes;
+ }
}