| lm_file=example2/example2.4gram.lm.gz |
| #lm_file=C:/data_disk/java_work_space/sf_trunk/example/example.trigram.lm.gz |
| #lm_file=aexperiment/iwslt/models/corpus.en.lm.gz |
| |
| tm_file=example/example.hiero.tm.gz |
| #tm_file=aexperiment/iwslt/models/iwslt.grammar.combined.gz |
| tm_format=hiero |
| |
| glue_file=grammars/hiero.glue |
| glue_format=hiero |
| |
| #lm config |
| use_srilm=false |
| lm_ceiling_cost=100 |
| use_left_euqivalent_state=false |
| use_right_euqivalent_state=false |
| order=4 |
| |
| |
| #tm config |
| span_limit=10 |
| phrase_owner=pt |
| glue_owner=glue |
| default_non_terminal=X |
| goalSymbol=S |
| |
| #pruning config |
| useCubePrune=true |
| useBeamAndThresholdPrune=true |
| fuzz1=0.1 |
| fuzz2=0.1 |
| max_n_items=50 |
| relative_threshold=10.0 |
| max_n_rules=50 |
| rule_relative_threshold=10.0 |
| |
| #nbest config |
| use_unique_nbest=true |
| use_tree_nbest=false |
| add_combined_cost=true |
| include_align_index=false |
| top_n=300 |
| |
| |
| #remoter lm server config,we should first prepare remote_symbol_tbl before starting any jobs |
| use_remote_lm_server=false |
| remote_symbol_tbl=./voc.remote.sym |
| num_remote_lm_servers=4 |
| f_remote_server_list=./remote.lm.server.list |
| remote_lm_server_port=9000 |
| |
| |
| #parallel deocoder: it cannot be used together with remote lm |
| num_parallel_decoders=1 |
| parallel_files_prefix=. |
| |
| #disk hg |
| save_disk_hg=true |
| use_kbest_hg=false |
| forest_pruning=false |
| forest_pruning_threshold=150 |
| |
| #0: no merge; 1: merge without de-dup; 2: merge with de-dup |
| #note that if use_kbest_hg=false, then we cannot set hyp_merge_mode=2 |
| hyp_merge_mode=2 |
| |
| ###### model weights |
| #lm order weight |
| lm 1.000000 |
| |
| #phrasemodel owner column(0-indexed) weight |
| phrasemodel pt 0 1.066893 |
| phrasemodel pt 1 0.752247 |
| phrasemodel pt 2 0.589793 |
| |
| #arityphrasepenalty owner start_arity end_arity weight |
| #arityphrasepenalty pt 0 0 1.0 |
| #arityphrasepenalty pt 1 1 -1.0 |
| #arityphrasepenalty pt 2 2 -2.0 |
| |
| #arityphrasepenalty glue 1 1 1.0 |
| #arityphrasepenalty glue 2 2 2.0 |
| |
| #phrasemodel mono 0 0.5 |
| |
| #wordpenalty weight |
| wordpenalty -2.844814 |
| #latticecost 1.0 |
| |
| #oracle example/example.test.ref.0 example/example.test.ref.1 example/example.test.ref.2 example/example.test.ref.3 1.0 |
| |
| #========================discriminative model options |
| #discriminative aexperiment/featureFile 1.0 |
| |
| #general |
| maxNumIter=15 |
| useSemiringV2=true |
| maxNumHGInQueue=100 |
| numThreads=10 |
| saveHGInMemory=false |
| printFirstN=10 |
| |
| #option for first feature (e.g., baseline feature) |
| normalizeByFirstFeature=true |
| fixFirstFeature=false |
| |
| #loss-augmented pruning |
| lossAugmentedPrune=false |
| startLossScale=10 |
| lossDecreaseConstant=2 |
| |
| |
| #google linear corpus gain |
| useGoogleLinearCorpusGain=true |
| #googleBLEUWeights=-1.0;0.10277777076514476;0.07949965001350584;0.6993000659479868;0.09565585699195878 |
| googleBLEUWeights=-1.0;0.2941176470588235;0.42016806722689076;0.6002400960384154;0.8574858514834507 |
| |
| #annealing? |
| #0: no annealing at all; 1: quenching only; 2: cooling and then quenching |
| annealingMode=0 |
| |
| isScalingFactorTunable=false |
| useL2Regula=false |
| varianceForL2=1 |
| useModelDivergenceRegula=false |
| lambda=-1 |
| |
| #feature related |
| #dense features |
| useBaseline=false |
| baselineFeatureName=baseline_lzf |
| baselineFeatureWeight=1.0 |
| |
| useIndividualBaselines=true |
| #baselineFeatIDsToTune=0;1;2;3;4;5;6;7;8;9 |
| baselineFeatIDsToTune=0;1;2;3;4 |
| |
| #sparse features |
| useSparseFeature=false |
| |
| useRuleIDName=true |
| |
| useTMFeat=true |
| useTMTargetFeat=false |
| |
| useMicroTMFeat=true |
| wordMapFile=src/joshua/discriminative/training/risk_annealer/data/wordMap |
| |
| useLMFeat=true |
| startNgramOrder=1 |
| endNgramOrder=2 |
| |
| |
| |
| |
| |
| |