fixed script moving, added new script
diff --git a/scripts/thrax/strip_label.py b/scripts/thrax/strip_label.py
new file mode 100755
index 0000000..e1b20a8
--- /dev/null
+++ b/scripts/thrax/strip_label.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+"""
+Removes labels (if present) from features.
+
+e.g.,
+
+    [X] ||| le ||| the ||| e_given_f_lex=1
+
+becomes
+
+    [X] ||| le ||| the ||| 1
+"""
+
+import re
+import sys
+import codecs
+
+reload(sys)
+sys.setdefaultencoding('utf-8')
+sys.stdin = codecs.getreader('utf-8')(sys.stdin)
+sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
+sys.stdout.encoding = 'utf-8'
+
+for line in sys.stdin:
+    tokens = line.split(' ||| ')
+    tokens[3] = re.sub(r' \S*=', ' ', tokens[3])
+
+    print ' ||| '.join(tokens),
diff --git a/scripts/training/run_thrax.py b/scripts/training/run_thrax.py
deleted file mode 100755
index 810844a..0000000
--- a/scripts/training/run_thrax.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""
-Runs Thrax.
-"""
-from __future__ import print_function
-from itertools import izip
-
-import codecs
-import argparse
-import tempfile
-import subprocess
-import sys
-import os
-
-JOSHUA = os.environ.get('JOSHUA')
-
-EXAMPLE = r"""
-Example invocation:
-
-$JOSHUA/scripts/support/run_thrax.py \
-  /path/to/thrax.config \
-  /path/to/corpus.SOURCE \
-  /path/to/corpus.TARGET \
-  /path/to/alignment \
-  /path/to/thrax.config \
-  [-o grammar.gz] \
-  [-T /tmp] \
-  [-v]
-"""
-parser = argparse.ArgumentParser(description='Run thrax')
-parser.add_argument('-o', dest='output_file', default='grammar.gz', help='Location of output grammar')
-parser.add_argument('-f', dest='force', default=False, action='store_true', help='Force overwrite')
-parser.add_argument('-T', dest='tmp_dir', default='/tmp', help='Temporary directory')
-parser.add_argument('-v', dest='verbose', default=False, action='store_true', help='Be verbose')
-parser.add_argument('-d', '--debug', dest='debug', default=False, action='store_true', help='Don\'t cleanup')
-parser.add_argument('corpora', nargs='+', help='Either (a) the Thrax input file or (b) the source, target, and aligned corpus files')
-args = parser.parse_args()
-
-HADOOP   = os.environ['HADOOP']
-THRAX_JAR = os.path.join(os.environ['JOSHUA'], 'thrax', 'bin', 'thrax.jar')
-
-THRAXDIR = 'pipeline-%s' % ( os.getcwd().replace('/','_') )
-
-def run(cmd):
-    if args.verbose:
-        print(cmd)
-    subprocess.call(cmd, shell=True)
-
-def utf8open(file, flags='r'):
-    return codecs.open(file, flags, 'utf-8')
-
-def paste(source, target, align, out_file): 
-    out = utf8open(out_file, 'w')
-    for s, t, a in izip(utf8open(source), utf8open(target), utf8open(align)):
-        out.write(' ||| '.join([s.strip(), t.strip(), a.strip()]) + '\n')
-    out.close()
-
-if os.path.exists(args.output_file) and not args.force:
-    sys.stderr.write('Fatal: output path "%s" already exists\n' % (args.output_file))
-    sys.stderr.write('  (use -f to force overwrite)\n')
-    sys.exit(1)
-
-if len(args.corpora) not in [1,3]:
-    sys.stderr.write('Fatal: corpora argument must be either')
-    sys.stderr.write('  (a) a single consolidated Thrax input file, or')
-    sys.stderr.write('  (b) three parallel files: source, target, and alignments')
-    sys.exit(2)
-
-# Cleanup 
-run('%s/bin/hadoop fs -rm -r %s' % (HADOOP, THRAXDIR))
-run('%s/bin/hadoop fs -mkdir %s' % (HADOOP, THRAXDIR))
-
-# Create thrax input file
-if len(args.corpora) == 3:
-    # Paste together the source, target, and alignment files
-    thrax_file = 'thrax.input_file'
-    paste(args.corpora[0], args.corpora[1], args.corpora[2], thrax_file)
-else:    
-    # Assume the thrax input file is already created
-    thrax_file = args.corpora[0]
-
-run('%s/bin/hadoop fs -put %s %s/input-file' % (HADOOP, thrax_file, THRAXDIR))
-
-# Copy the template
-conf_file = tempfile.NamedTemporaryFile(prefix='thrax.conf', delete=False)
-for line in open(args.thrax_config):
-    if not line.startswith('input-file'):
-        conf_file.write(line)
-conf_file.write('input-file %s/input-file\n' % (THRAXDIR))
-conf_file.close()
-conf_file_name = conf_file.name
-
-# Run Hadoop
-run('%s/bin/hadoop jar %s -D mapred.child.java.opts="-Xmx%s" -D hadoop.tmp.dir=%s %s %s > thrax.log 2>&1' % (HADOOP, THRAX_JAR, '4g', args.tmp_dir, conf_file_name, THRAXDIR))
-run('rm -f %s' % (args.output_file))
-run('%s/bin/hadoop fs -getmerge %s/final/ %s' % (HADOOP, THRAXDIR, args.output_file))
-
-# Cleanup
-if not args.debug:
-    os.remove(conf_file_name)
-    os.remove(thrax_file)
-    run('%s/bin/hadoop fs -rm -r %s' % (HADOOP, THRAXDIR))