blob: 427e3d9afaa4bfd02ed66baba9e914a001c334ba [file] [log] [blame]
package joshua.decoder;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.lm.StateMinimizingLanguageModel;
import joshua.decoder.hypergraph.HyperGraph;
import joshua.decoder.hypergraph.KBestExtractor;
import joshua.decoder.hypergraph.ViterbiExtractor;
import joshua.decoder.io.DeNormalize;
import joshua.decoder.segment_file.Sentence;
/**
* This class represents translated input objects (sentences or lattices). It is aware of the source
* sentence and id and contains the decoded hypergraph. Translation objects are returned by
* DecoderThread instances to the InputHandler, where they are assembled in order for output.
*
* @author Matt Post <post@cs.jhu.edu>
*/
public class Translation {
private Sentence source;
/**
* This stores the output of the translation so we don't have to hold onto the hypergraph while we
* wait for the outputs to be assembled.
*/
private String output = null;
public Translation(Sentence source, HyperGraph hypergraph,
List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
this.source = source;
StringWriter sw = new StringWriter();
BufferedWriter out = new BufferedWriter(sw);
try {
if (hypergraph != null) {
if (!joshuaConfiguration.hypergraphFilePattern.equals("")) {
hypergraph.dump(String.format(joshuaConfiguration.hypergraphFilePattern, source.id()), featureFunctions);
}
long startTime = System.currentTimeMillis();
// We must put this weight as zero, otherwise we get an error when we try to retrieve it
// without checking
Decoder.weights.increment("BLEU", 0);
String best = ViterbiExtractor.extractViterbiString(hypergraph.goalNode).trim();
best = best.substring(best.indexOf(' ') + 1, best.lastIndexOf(' '));
Decoder.LOG(1, String.format("Translation %d: %.3f %s", source.id(), hypergraph.goalNode.getScore(),
best));
if (joshuaConfiguration.topN == 0) {
/*
* Setting topN to 0 turns off k-best extraction, in which case we need to parse through
* the output-string, with the understanding that we can only substitute variables for the
* output string, sentence number, and model score.
*/
String translation = joshuaConfiguration.outputFormat.replace("%s", best)
.replace("%S", DeNormalize.processSingleLine(best))
.replace("%c", String.format("%.3f", hypergraph.goalNode.getScore()))
.replace("%i", String.format("%d", source.id()));
out.write(translation);
out.newLine();
} else {
KBestExtractor kBestExtractor = new KBestExtractor(source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
if (joshuaConfiguration.rescoreForest) {
Decoder.weights.increment("BLEU", joshuaConfiguration.rescoreForestWeight);
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
Decoder.weights.increment("BLEU", -joshuaConfiguration.rescoreForestWeight);
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
}
}
float seconds = (float) (System.currentTimeMillis() - startTime) / 1000.0f;
Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
joshuaConfiguration.topN, seconds));
} else {
if (source.isEmpty()) {
// Empty output just gets echoed back
out.write("");
out.newLine();
} else {
// Failed translations get empty formatted outputs
// @formatter:off
String outputString = joshuaConfiguration.outputFormat
.replace("%s", source.source())
.replace("%e", "")
.replace("%S", "")
.replace("%t", "()")
.replace("%i", Integer.toString(source.id()))
.replace("%f", "")
.replace("%c", "0.000");
// @formatter:on
out.write(outputString);
out.newLine();
}
}
out.flush();
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
/*
* KenLM hack. If using KenLMFF, we need to tell KenLM to delete the pool used to create chart
* objects for this sentence.
*/
for (FeatureFunction feature : featureFunctions) {
if (feature instanceof StateMinimizingLanguageModel) {
((StateMinimizingLanguageModel) feature).destroyPool(getSourceSentence().id());
break;
}
}
this.output = sw.toString();
}
public Sentence getSourceSentence() {
return this.source;
}
public int id() {
return source.id();
}
@Override
public String toString() {
return output;
}
}