blob: 69e43370723474d358abd2d26fd972e7b4e66ced [file] [log] [blame]
#!/bin/bash
# Strings together the preprocessing scripts
set -u
lang=$1
$JOSHUA/scripts/training/normalize-punctuation.pl $lang | $JOSHUA/scripts/training/penn-treebank-tokenizer.perl -l $lang | $JOSHUA/scripts/lowercase.perl