Fix: memory overflow in tuning

when larger language models are used, the tuner needs more memory.
Even though we allocate more memory in the training pipeline script
the value is ignored at one place in pipeline.

This patch fixes it by exporting env var, and passing it to the JVM
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index 4c6380c..b057dac 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -135,6 +135,8 @@
 # a lot more than this for SAMT decoding (though really it depends
 # mostly on your grammar size)
 my $JOSHUA_MEM = "4g";
+# export the environment var
+$ENV{'JOSHUA_MEM'} = $JOSHUA_MEM;
 
 # the amount of memory available for hadoop processes (passed to
 # Hadoop via -Dmapred.child.java.opts
diff --git a/scripts/training/run_tuner.py b/scripts/training/run_tuner.py
index 38059fd..d548aee 100755
--- a/scripts/training/run_tuner.py
+++ b/scripts/training/run_tuner.py
@@ -348,7 +348,10 @@
     """Queries the decoder for all dense features that will be fired by the feature
     functions activated in the config file"""
 
-    output = check_output("%s/bin/joshua-decoder -c %s -show-weights -v 0" % (JOSHUA, config_file), shell=True)
+    mem_size = os.environ.get('JOSHUA_MEM', None)
+    mem_arg = '-m %s' % mem_size if mem_size else ''
+    decode_cmd = "%s/bin/joshua-decoder %s -c %s -show-weights -v 0" % (JOSHUA, mem_arg, config_file)
+    output = check_output(decode_cmd, shell=True)
     features = []
     for index, item in enumerate(output.split('\n'.encode(encoding='utf_8', errors='strict'))):
         item = item.decode()