blob: aaab13fb7bc9e24162bbdf58a6ca435f66248333 [file] [log] [blame]
/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.segment_file.Sentence;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.state_maintenance.StateComputer;
import joshua.decoder.ff.tm.GrammarFactory;
import joshua.decoder.hypergraph.HyperGraph;
import joshua.discriminative.FileUtilityOld;
import joshua.util.FileUtility;
import joshua.util.Regex;
import joshua.util.io.LineReader;
/**
* this class implements:
* (1) parallel decoding: split the test file, initiate DecoderThread,
* wait and merge the decoding results
* (2) non-parallel decoding is a special case of parallel decoding
*
* @author Zhifei Li, <zhifei.work@gmail.com>
* @version $LastChangedDate$
*/
public class DecoderFactory {
private List<GrammarFactory> grammarFactories = null;
private List<FeatureFunction> featureFunctions = null;
private List<StateComputer> stateComputers;
private boolean useMaxLMCostForOOV = false;
/**
* Shared symbol table for source language terminals, target
* language terminals, and shared nonterminals.
*/
private SymbolTable symbolTable = null;
private DecoderThread[] decoderThreads;
private static final Logger logger =
Logger.getLogger(DecoderFactory.class.getName());
public DecoderFactory(List<GrammarFactory> grammarFactories, boolean useMaxLMCostForOOV, List<FeatureFunction> featureFunctions,
List<StateComputer> stateComputers, SymbolTable symbolTable) {
this.grammarFactories = grammarFactories;
this.useMaxLMCostForOOV = useMaxLMCostForOOV;
this.featureFunctions = featureFunctions;
this.stateComputers = stateComputers;
this.symbolTable = symbolTable;
}
/**
* This is the public-facing method to decode a set of
* sentences. This automatically detects whether we should
* run the decoder in parallel or not.
*
* (Matt Post, August 2011) This needs to be rewritten. The
* proper way to do it is to put all the sentences in a queue or
* wrap access to them in a thread-safe class. Then start the
* decoder threads. Each thread obtains the sentece to decode and
* deposits it somewhere. Deposits are then accumulated and
* output sequentially.
*/
public void decodeTestSet(String testFile, String nbestFile, String oracleFile) {
// create the input manager
InputHandler inputHandler = new InputHandler(testFile);
this.decoderThreads = new DecoderThread[JoshuaConfiguration.num_parallel_decoders];
for (int threadno = 0; threadno < decoderThreads.length; threadno++) {
try {
DecoderThread thread = new DecoderThread(
this.grammarFactories, this.featureFunctions, this.stateComputers,
this.symbolTable, inputHandler);
this.decoderThreads[threadno] = thread;
} catch (IOException e) {
e.printStackTrace();
}
}
// start them all
for (int threadno = 0; threadno < decoderThreads.length; threadno++) {
this.decoderThreads[threadno].start();
}
// wait for them to complete
for (int threadno = 0; threadno < decoderThreads.length; threadno++) {
try {
this.decoderThreads[threadno].join();
} catch (InterruptedException e) {
if (logger.isLoggable(Level.WARNING))
logger.warning("thread " + threadno + " was interupted");
}
}
// if (JoshuaConfiguration.save_disk_hg) {
// pdecoder.hypergraphSerializer.writeRulesNonParallel(
// nbestFile + ".hg.rules");
}
/**
* Decode a single sentence and return its hypergraph.
**/
public HyperGraph getHyperGraphForSentence(String sentence) {
try {
DecoderThread decoder = new DecoderThread(
this.grammarFactories, this.featureFunctions, this.stateComputers,
this.symbolTable, null);
return decoder.translate(new Sentence(sentence, 0), null);
}
catch (IOException e) {
e.printStackTrace();
}
return null;
}
//merge the grammar rules for disk hyper-graphs
// if (JoshuaConfiguration.save_disk_hg) {
// HashMap<Integer,Integer> tblDone = new HashMap<Integer,Integer>();
// BufferedWriter rulesWriter = FileUtility.getWriteFileStream(nbestFile + ".hg.rules");
// for (DecoderThread decoder : this.decoderThreads) {
// decoder.hypergraphSerializer.writeRulesParallel(rulesWriter, tblDone);
// //decoder.hypergraphSerializer.closeReaders();
// }
// rulesWriter.flush();
// rulesWriter.close();
// }
}