blob: 7182f71330649c7828b26feca688c7c84d20b16a [file] [log] [blame]
/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder;
import joshua.util.Cache;
import joshua.util.Regex;
import joshua.util.io.LineReader;
import java.util.Random;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.io.PrintWriter;
import java.io.FileWriter;
import java.io.File;
import java.io.IOException;
/**
* Configuration file for Joshua decoder.
* <p>
* When adding new features to Joshua, any new configurable parameters
* should be added to this class.
*
* @author Zhifei Li, <zhifei.work@gmail.com>
*/
public class JoshuaConfiguration {
//lm config
public static boolean use_srilm = false;
public static boolean use_kenlm = false;
public static boolean use_bloomfilter_lm = false;
public static boolean use_trie_lm = false;
public static double lm_ceiling_cost = 100;
public static boolean use_left_equivalent_state = false;
public static boolean use_right_equivalent_state = true;
public static int lm_order = 3;
public static boolean use_sent_specific_lm = false;
public static String g_sent_lm_file_name_prefix = "lm.";
public static String lm_file = null; // TODO
public static int ngramStateID = 0; // TODO ?????????????
//tm config
public static int span_limit = 10;
//note: owner should be different from each other, it can have same value as a word in LM/TM
public static String phrase_owner = "pt";
public static String glue_owner = "glue_owner";//if such a rule is get applied, then no reordering is possible
public static String default_non_terminal = "PHRASE";
public static String goal_symbol = "S";
public static boolean use_sent_specific_tm = false;
public static boolean keep_sent_specific_tm = false;
public static String g_sent_tm_file_name_prefix = "tm.";
public static String tm_file = null;
public static String tm_format = null;
// TODO: default to glue grammar provided with Joshua
// TODO: support multiple glue grammars
public static String glue_file = null;
public static String glue_format = null;
// syntax-constrained decoding
public static boolean constrain_parse = false;
public static boolean use_pos_labels = false;
// oov-specific
public static float oov_feature_cost = 100;
public static boolean use_max_lm_cost_for_oov = false;
public static int oov_feature_index = -1;
// number of phrasal features, for correct oov rule creation
public static int num_phrasal_features = 0;
// Parameters for suffix array grammar
// /** File name prefix for source language binary training files. */
// public static String sa_source = null;
//
// /** File name prefix for source language binary training files. */
// public static String sa_target = null;
//
// /** File name of source-target training corpus alignments. */
// public static String sa_alignment = null;
public static int sa_max_phrase_span = 10;
public static int sa_max_phrase_length = 10;
public static int sa_max_nonterminals = 2;
public static int sa_min_nonterminal_span = 2;
public static int sa_lex_sample_size = 1000;
public static int sa_lex_cache_size = Cache.DEFAULT_CAPACITY;
public static boolean sa_precalculate_lexprobs = false;
public static int sa_rule_sample_size = 300;
public static int sa_rule_cache_size = 1000;
public static boolean sa_sentence_initial_X = true;
public static boolean sa_sentence_final_X = true;
public static boolean sa_edgeXMayViolatePhraseSpan = true;
public static float sa_lex_floor_prob = Float.MIN_VALUE;
// TODO: introduce the various corpus/tm file package formats
// public static String sa_vocab_suffix = "vocab";
// public static String sa_corpus_suffix = "corpus";
// public static String sa_suffixes_suffix = "suffixes";
//pruning config
//note we can use both cube pruning and "beamAndThreshold" pruning
public static boolean useCubePrune = true;
public static boolean useBeamAndThresholdPrune = true;
public static double fuzz1 = 0.1;
public static double fuzz2 = 0.1;
public static int max_n_items = 30;
public static double relative_threshold = 10.0;
public static int max_n_rules = 50;
//nbest config
public static boolean use_unique_nbest = false;
public static boolean use_tree_nbest = false;
public static boolean include_align_index = false;
public static boolean add_combined_cost = true; //in the nbest file, compute the final score
public static int topN = 500;
public static boolean escape_trees = false;
//remote lm server
public static boolean use_remote_lm_server = false;
public static String remote_symbol_tbl = "null"; //this file will first be created by remote_lm_server, and read by remote_suffix_server and the decoder
public static int num_remote_lm_servers = 1;
public static String f_remote_server_list = "null";
//parallel decoding
public static String parallel_files_prefix = "/tmp/temp.parallel"; // C:\\Users\\zli\\Documents\\temp.parallel; used for parallel decoding
public static int num_parallel_decoders = 1; //number of threads should run
//disk hg
public static boolean save_disk_hg = false; //if true, save three files: fnbest, fnbest.hg.items, fnbest.hg.rules
public static boolean use_kbest_hg = false;
public static boolean forest_pruning = false;
public static double forest_pruning_threshold = 10;
// hypergraph visualization
public static boolean visualize_hypergraph = false;
//variational decoding
public static boolean use_variational_decoding = false;
//debug
public static boolean extract_confusion_grammar = false; //non-parallel version
public static String f_confusion_grammar = "C:\\Users\\zli\\Documents\\confusion.hg.grammar";
//debug end
// do we use a LM feature?
public static boolean have_lm_model = false;
public static String segmentFileParserClass = null;//PlainSegmentParser, HackishSegmentParser, SAXSegmentParser
// discriminative model options
public static boolean useTMFeat = true;
public static boolean useRuleIDName = false;
public static boolean useLMFeat = true;
public static boolean useTMTargetFeat = true;
public static boolean useEdgeNgramOnly = false;
public static int startNgramOrder = 1;
public static int endNgramOrder = 2;
public static boolean useMicroTMFeat = true;
public static String wordMapFile;/*tbl for mapping rule words*/
// use google linear corpus gain?
public static boolean useGoogleLinearCorpusGain = false;
public static double[] linearCorpusGainThetas = null;
public static boolean mark_oovs = true;
private static final Logger logger =
Logger.getLogger(JoshuaConfiguration.class.getName());
//===============================================================
// Methods
//===============================================================
/**
* To process command-line options, we write them to a file that
* looks like the config file, and then call readConfigFile() on
* it. It would be more general to define a class that sits on a
* stream and knows how to chop it up, but this was quicker to implement.
*/
public static void processCommandLineOptions(String[] options) {
try {
File tmpFile = File.createTempFile("options", null, null);
PrintWriter out = new PrintWriter(new FileWriter(tmpFile));
for (int i = 0; i < options.length; i++) {
String key = options[i].substring(1);
if (i + 1 == options.length || options[i+1].startsWith("-")) {
// if this is the last item, or if the next item
// is another flag, then this is an argument-less
// flag
out.println(key + "=true");
} else {
out.println(key + "=" + options[i+1]);
// skip the next item
i++;
}
}
out.close();
JoshuaConfiguration.readConfigFile(tmpFile.getCanonicalPath());
tmpFile.delete();
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
}
// This is static instead of a constructor because all the fields are static. Yuck.
public static void readConfigFile(String configFile) throws IOException {
LineReader configReader = new LineReader(configFile);
try { for (String line : configReader) {
line = line.trim(); // .toLowerCase();
if (Regex.commentOrEmptyLine.matches(line)) continue;
if (line.indexOf("=") != -1) { // parameters; (not feature function)
String[] fds = Regex.equalsWithSpaces.split(line);
if (fds.length != 2) {
logger.severe("Wrong config line: " + line);
System.exit(1);
}
if ("lm_file".equals(fds[0])) {
lm_file = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("lm file: %s", lm_file));
} else if ("tm_file".equals(fds[0])) {
tm_file = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("tm file: %s", tm_file));
} else if ("glue_file".equals(fds[0])) {
glue_file = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("glue file: %s", glue_file));
} else if ("tm_format".equals(fds[0])) {
tm_format = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("tm format: %s", tm_format));
} else if ("glue_format".equals(fds[0])) {
glue_format = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("glue format: %s", glue_format));
// } else if ("sa_source".equals(fds[0])) {
// sa_source = fds[1].trim();
// if (logger.isLoggable(Level.FINEST))
// logger.finest(String.format("suffix array source file: %s", sa_source));
//
// } else if ("sa_target".equals(fds[0])) {
// sa_target = fds[1].trim();
// if (logger.isLoggable(Level.FINEST))
// logger.finest(String.format("suffix array target file: %s", sa_target));
//
// } else if ("sa_alignment".equals(fds[0])) {
// sa_alignment = fds[1].trim();
// if (logger.isLoggable(Level.FINEST))
// logger.finest(String.format("suffix array alignment file: %s", sa_alignment));
//
} else if ("sa_max_phrase_span".equals(fds[0])) {
sa_max_phrase_span = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array maximum phrase span: %s", sa_max_phrase_span));
} else if ("sa_max_phrase_length".equals(fds[0])) {
sa_max_phrase_length = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array maximum phrase length: %s", sa_max_phrase_length));
} else if ("sa_max_phrase_length".equals(fds[0])) {
sa_max_phrase_length = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array maximum phrase length: %s", sa_max_phrase_length));
} else if ("sa_max_nonterminals".equals(fds[0])) {
sa_max_nonterminals = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array maximum number of nonterminals: %s", sa_max_nonterminals));
} else if ("sa_min_nonterminal_span".equals(fds[0])) {
sa_min_nonterminal_span = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array minimun nonterminal span: %s", sa_min_nonterminal_span));
} else if ("sa_lex_sample_size".equals(fds[0])) {
sa_lex_sample_size = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array sample size for lexical probability calculation: %s", sa_lex_sample_size));
} else if ("sa_precalculate_lexprobs".equals(fds[0])) {
sa_precalculate_lexprobs = Boolean.valueOf(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("should lexical probabilities be precalculated: %s", sa_precalculate_lexprobs));
} else if ("sa_rule_sample_size".equals(fds[0])) {
sa_rule_sample_size = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array sample size for rules: %s", sa_rule_sample_size));
} else if ("sa_rule_cache_size".equals(fds[0])) {
sa_rule_cache_size = Integer.parseInt(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("suffix array cache size for rules: %s", sa_rule_cache_size));
} else if ("sa_sentence_initial_X".equals(fds[0])) {
sa_sentence_initial_X = Boolean.valueOf(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("should suffix array rule extraction allow rules from sentence-initial X: %s", sa_sentence_initial_X));
} else if ("sa_sentence_final_X".equals(fds[0])) {
sa_sentence_final_X = Boolean.valueOf(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("should suffix array rule extraction allow rules from sentence-final X: %s", sa_sentence_final_X));
} else if ("sa_edgeXMayViolatePhraseSpan".equals(fds[0])) {
sa_edgeXMayViolatePhraseSpan = Boolean.valueOf(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("should suffix array rule extraction allow rules where sa_edgeXMayViolatePhraseSpan: %s", sa_edgeXMayViolatePhraseSpan));
} else if ("sa_lex_floor_prob".equals(fds[0])) {
sa_lex_floor_prob = Float.valueOf(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("floor value for probabilities returned as lexical transaltion probabilities: %s", sa_lex_floor_prob));
} else if ("use_srilm".equals(fds[0])) {
use_srilm = Boolean.valueOf(fds[1]);
if (use_srilm) {
use_kenlm = true;
System.err.println("WARNING: srilm no longer supported, will use KenLM instead");
}
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_srilm: %s", use_srilm));
} else if ("use_kenlm".equals(fds[0])) {
use_kenlm = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_kenlm: %s", use_kenlm));
} else if ("use_bloomfilter_lm".equals(fds[0])) {
use_bloomfilter_lm = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_bloomfilter_lm: %s", use_bloomfilter_lm));
} else if ("use_trie_lm".equals(fds[0])) {
use_trie_lm = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_trie_lm: %s", use_trie_lm));
} else if ("lm_ceiling_cost".equals(fds[0])) {
lm_ceiling_cost = Double.parseDouble(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("lm_ceiling_cost: %s", lm_ceiling_cost));
// BUG: accepting typos in config file is not acceptable
} else if ("use_left_euqivalent_state".equals(fds[0])) {
use_left_equivalent_state = Boolean.parseBoolean(fds[1]);
logger.warning("Misspelling in configuration file: 'use_right_euqivalent_state'");
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_left_equivalent_state: %s", use_left_equivalent_state));
// BUG: accepting typos in config file is not acceptable
} else if ("use_right_euqivalent_state".equals(fds[0])) {
use_right_equivalent_state = Boolean.parseBoolean(fds[1]);
logger.warning("Misspelling in configuration file: 'use_right_euqivalent_state'");
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_right_equivalent_state: %s", use_right_equivalent_state));
} else if ("use_left_equivalent_state".equals(fds[0])) {
use_left_equivalent_state = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_left_equivalent_state: %s", use_left_equivalent_state));
} else if ("use_right_equivalent_state".equals(fds[0])) {
use_right_equivalent_state = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_right_equivalent_state: %s", use_right_equivalent_state));
} else if ("order".equals(fds[0])) {
lm_order = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("g_lm_order: %s", lm_order));
} else if ("use_sent_specific_lm".equals(fds[0])) {
use_sent_specific_lm = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_sent_specific_lm: %s", use_sent_specific_lm));
} else if ("sent_lm_file_name_prefix".equals(fds[0])) {
g_sent_lm_file_name_prefix = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("sent_lm_file_name_prefix: %s", g_sent_lm_file_name_prefix));
} else if ("use_sent_specific_tm".equals(fds[0])) {
use_sent_specific_tm = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_sent_specific_tm: %s", use_sent_specific_tm));
} else if ("keep_sent_specific_tm".equals(fds[0])) {
keep_sent_specific_tm = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("keep_sent_specific_tm: %s", use_sent_specific_tm));
} else if ("sent_tm_file_name_prefix".equals(fds[0])) {
g_sent_tm_file_name_prefix = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("sent_tm_file_name_prefix: %s", g_sent_tm_file_name_prefix));
} else if ("span_limit".equals(fds[0])) {
span_limit = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("span_limit: %s", span_limit));
} else if ("phrase_owner".equals(fds[0])) {
phrase_owner = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("phrase_owner: %s", phrase_owner));
} else if ("glue_owner".equals(fds[0])) {
glue_owner = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("glue_owner: %s", glue_owner));
} else if ("default_non_terminal".equals(fds[0])) {
default_non_terminal = "[" + fds[1].trim() + "]";
// default_non_terminal = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("default_non_terminal: %s", default_non_terminal));
} else if ("goalSymbol".equals(fds[0]) || "goal_symbol".equals(fds[0]) ) {
goal_symbol = "[" + fds[1].trim() + "]";
// goal_symbol = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("goalSymbol: %s", goal_symbol));
} else if ("constrain_parse".equals(fds[0])) {
constrain_parse = Boolean.parseBoolean(fds[1]);
} else if ("oov_feature_index".equals(fds[0])) {
oov_feature_index = Integer.parseInt(fds[1]);
} else if ("use_pos_labels".equals(fds[0])) {
use_pos_labels = Boolean.parseBoolean(fds[1]);
} else if ("fuzz1".equals(fds[0])) {
fuzz1 = Double.parseDouble(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("fuzz1: %s", fuzz1));
} else if ("fuzz2".equals(fds[0])) {
fuzz2 = Double.parseDouble(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("fuzz2: %s", fuzz2));
} else if ("max_n_items".equals(fds[0])) {
max_n_items = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("max_n_items: %s", max_n_items));
} else if ("relative_threshold".equals(fds[0])) {
relative_threshold = Double.parseDouble(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("relative_threshold: %s", relative_threshold));
} else if ("max_n_rules".equals(fds[0])) {
max_n_rules = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("max_n_rules: %s", max_n_rules));
} else if ("use_unique_nbest".equals(fds[0])) {
use_unique_nbest = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_unique_nbest: %s", use_unique_nbest));
} else if ("add_combined_cost".equals(fds[0])) {
add_combined_cost = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("add_combined_cost: %s", add_combined_cost));
} else if ("use_tree_nbest".equals(fds[0])) {
use_tree_nbest = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_tree_nbest: %s", use_tree_nbest));
} else if ("escape_trees".equals(fds[0])) {
escape_trees = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("escape_trees: %s", escape_trees));
} else if ("include_align_index".equals(fds[0])) {
include_align_index = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("include_align_index: %s", include_align_index));
} else if ("top_n".equals(fds[0])) {
topN = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("topN: %s", topN));
} else if ("use_remote_lm_server".equals(fds[0])) {
use_remote_lm_server = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_remote_lm_server: %s", use_remote_lm_server));
} else if ("f_remote_server_list".equals(fds[0])) {
f_remote_server_list = fds[1];
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("f_remote_server_list: %s", f_remote_server_list));
} else if ("num_remote_lm_servers".equals(fds[0])) {
num_remote_lm_servers = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("num_remote_lm_servers: %s", num_remote_lm_servers));
} else if ("remote_symbol_tbl".equals(fds[0])) {
remote_symbol_tbl = fds[1];
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("remote_symbol_tbl: %s", remote_symbol_tbl));
} else if ("remote_lm_server_port".equals(fds[0])) {
//port = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("remote_lm_server_port: not used"));
} else if ("parallel_files_prefix".equals(fds[0])) {
Random random = new Random();
int v = random.nextInt(10000000);//make it random
parallel_files_prefix = fds[1] + v;
logger.info(String.format("parallel_files_prefix: %s", parallel_files_prefix));
} else if ("num_parallel_decoders".equals(fds[0]) || "threads".equals(fds[0]) ) {
num_parallel_decoders = Integer.parseInt(fds[1]);
if (num_parallel_decoders <= 0) {
throw new IllegalArgumentException("Must specify a positive number for num_parallel_decoders");
}
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("num_parallel_decoders: %s", num_parallel_decoders));
} else if ("save_disk_hg".equals(fds[0])) {
save_disk_hg = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("save_disk_hg: %s", save_disk_hg));
} else if ("use_kbest_hg".equals(fds[0])) {
use_kbest_hg = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("use_kbest_hg: %s", use_kbest_hg));
} else if ("forest_pruning".equals(fds[0])) {
forest_pruning = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("forest_pruning: %s", forest_pruning));
} else if ("forest_pruning_threshold".equals(fds[0])) {
forest_pruning_threshold = Double.parseDouble(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("forest_pruning_threshold: %s", forest_pruning_threshold));
} else if ("visualize_hypergraph".equals(fds[0])) {
visualize_hypergraph = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("visualize_hypergraph: %s", visualize_hypergraph));
} else if ("mark_oovs".equals(fds[0])) {
mark_oovs = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("mark_oovs: %s", mark_oovs));
} else if ("segment_file_parser_class".equals(fds[0])) {
segmentFileParserClass = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest("segmentFileParserClass: " + segmentFileParserClass);
} else if ("useCubePrune".equals(fds[0])) {
useCubePrune = Boolean.valueOf(fds[1]);
if(useCubePrune==false)
logger.warning("useCubePrune=false");
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useCubePrune: %s", useCubePrune));
}else if ("useBeamAndThresholdPrune".equals(fds[0])) {
useBeamAndThresholdPrune = Boolean.valueOf(fds[1]);
if(useBeamAndThresholdPrune==false)
logger.warning("useBeamAndThresholdPrune=false");
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useBeamAndThresholdPrune: %s", useBeamAndThresholdPrune));
} else if ("oovFeatureCost".equals(fds[0])) {
oov_feature_cost = Float.parseFloat(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("oovFeatureCost: %s", oov_feature_cost));
} else if ("useTMFeat".equals(fds[0])) {
useTMFeat = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useTMFeat: %s", useTMFeat));
} else if ("useLMFeat".equals(fds[0])) {
useLMFeat = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useLMFeat: %s", useLMFeat));
} else if ("useMicroTMFeat".equals(fds[0])) {
useMicroTMFeat = new Boolean(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useMicroTMFeat: %s", useMicroTMFeat));
} else if ("wordMapFile".equals(fds[0])) {
wordMapFile = fds[1].trim();
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("wordMapFile: %s", wordMapFile));
} else if ("useRuleIDName".equals(fds[0])) {
useRuleIDName = new Boolean(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useRuleIDName: %s", useRuleIDName));
}else if ("startNgramOrder".equals(fds[0])) {
startNgramOrder = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("startNgramOrder: %s", startNgramOrder));
} else if ("endNgramOrder".equals(fds[0])) {
endNgramOrder = Integer.parseInt(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("endNgramOrder: %s", endNgramOrder));
}else if ("useEdgeNgramOnly".equals(fds[0])) {
useEdgeNgramOnly = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useEdgeNgramOnly: %s", useEdgeNgramOnly));
}else if ("useTMTargetFeat".equals(fds[0])) {
useTMTargetFeat = Boolean.valueOf(fds[1]);
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useTMTargetFeat: %s", useTMTargetFeat));
} else if ("useGoogleLinearCorpusGain".equals(fds[0])) {
useGoogleLinearCorpusGain = new Boolean(fds[1].trim());
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("useGoogleLinearCorpusGain: %s", useGoogleLinearCorpusGain));
} else if ("googleBLEUWeights".equals(fds[0])) {
String[] googleWeights = fds[1].trim().split(";");
if(googleWeights.length!=5){
logger.severe("wrong line=" + line);
System.exit(1);
}
linearCorpusGainThetas = new double[5];
for(int i=0; i<5; i++)
linearCorpusGainThetas[i] = new Double(googleWeights[i]);
logger.finest(String.format("googleBLEUWeights: %s", linearCorpusGainThetas));
} else {
logger.warning("Maybe Wrong config line: " + line);
}
} else { // feature function
String[] fds = Regex.spaces.split(line);
if ("lm".equals(fds[0]) && fds.length == 2) { // lm weight
have_lm_model = true;
if(new Double(fds[1].trim())!=0){
use_max_lm_cost_for_oov = true;
}
logger.info("you use a LM feature function, so make sure you have a LM grammar");
logger.info("useMaxLMCostForOOV=" + use_max_lm_cost_for_oov);
}
}
} } finally { configReader.close(); }
if( useGoogleLinearCorpusGain ){
if( linearCorpusGainThetas==null){
logger.info("linearCorpusGainThetas is null, did you set googleBLEUWeights properly?");
System.exit(1);
}else if( linearCorpusGainThetas.length!=5){
logger.info("linearCorpusGainThetas does not have five values, did you set googleBLEUWeights properly?");
System.exit(1);
}
}
}
}