Fix: memory overflow in tuning when larger language models are used, the tuner needs more memory. Even though we allocate more memory in the training pipeline script the value is ignored at one place in pipeline. This patch fixes it by exporting env var, and passing it to the JVM

commit: cad86aa7512f2829c8a964f2ccc3f69c7cd2f1c7 [log] [tgz]
author: Thamme Gowda <tg@isi.edu> Mon Mar 05 17:13:27 2018 -0800
committer: Thamme Gowda <tg@isi.edu> Mon Mar 05 17:13:27 2018 -0800
tree: 250b8422da0220a5fc159b1279b26241db359061
parent: 621edeada51adf4f205426408a47fd81ab2f150a [diff]
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index 4c6380c..b057dac 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl

@@ -135,6 +135,8 @@
 # a lot more than this for SAMT decoding (though really it depends
 # mostly on your grammar size)
 my $JOSHUA_MEM = "4g";
+# export the environment var
+$ENV{'JOSHUA_MEM'} = $JOSHUA_MEM;
 
 # the amount of memory available for hadoop processes (passed to
 # Hadoop via -Dmapred.child.java.opts

diff --git a/scripts/training/run_tuner.py b/scripts/training/run_tuner.py
index 38059fd..d548aee 100755
--- a/scripts/training/run_tuner.py
+++ b/scripts/training/run_tuner.py

@@ -348,7 +348,10 @@
     """Queries the decoder for all dense features that will be fired by the feature
     functions activated in the config file"""
 
-    output = check_output("%s/bin/joshua-decoder -c %s -show-weights -v 0" % (JOSHUA, config_file), shell=True)
+    mem_size = os.environ.get('JOSHUA_MEM', None)
+    mem_arg = '-m %s' % mem_size if mem_size else ''
+    decode_cmd = "%s/bin/joshua-decoder %s -c %s -show-weights -v 0" % (JOSHUA, mem_arg, config_file)
+    output = check_output(decode_cmd, shell=True)
     features = []
     for index, item in enumerate(output.split('\n'.encode(encoding='utf_8', errors='strict'))):
         item = item.decode()
commit	cad86aa7512f2829c8a964f2ccc3f69c7cd2f1c7	[log] [tgz]
author	Thamme Gowda <tg@isi.edu>	Mon Mar 05 17:13:27 2018 -0800
committer	Thamme Gowda <tg@isi.edu>	Mon Mar 05 17:13:27 2018 -0800
tree	250b8422da0220a5fc159b1279b26241db359061
parent	621edeada51adf4f205426408a47fd81ab2f150a [diff]