blob: 072f6e3ab643e4fecb18628d5f210ce9a31a312a [file] [log] [blame]
#!/bin/bash
#$ -S /bin/bash
#$ -l h_vmem=10g,mem_free=8g,h_rt=24:00:00,num_proc=3
#$ -m a
#$ -M post@cs.jhu.edu
# Author: Damianos Karakos <damianos@jhu.edu>
input_grammar=$1
corpus=$2
output_grammar=$3
fast=$4
ngrams=$5
if [[ $fast -eq 1 ]]; then
fast="-f"
else
fast=""
fi
# extract the matching rules from the piece
if [[ ! -e $output_grammar ]]; then
$JOSHUA/scripts/training/scat $input_grammar | java -Xmx5g -Dfile.encoding=utf8 -cp $JOSHUA/thrax/bin/thrax.jar edu.jhu.thrax.util.TestSetFilter -v $fast -n $ngrams $corpus | $JOSHUA/scripts/training/remove-unary-abstract.pl | gzip -9 > $output_grammar
fi
# clean up
rm -f $input_grammar