blob: 023ae6f2291fe8083c0dfb12f243ddace307ffd0 [file] [log] [blame]
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script trains and evaluate LSTM models. There is no
# discriminative training yet.
# In this recipe, MXNet directly read Kaldi features and labels,
# which makes the whole pipline much simpler.
set -e #Exit on non-zero return code from any command
set -o pipefail #Exit if any of the commands in the pipeline will
#return non-zero return code
set -u #Fail on an undefined variable
. ./cmd.sh
. ./path.sh
cmd=run.pl
# root folder,
expdir=exp_timit
##################################################
# Kaldi generated folder
##################################################
# alignment folder
ali_src=/home/sooda/speech/kaldi/egs/timit/s5/exp/tri3_ali
# decoding graph
graph_src=/home/sooda/speech/kaldi/egs/timit/s5/exp/tri3/graph
# features
train_src=/home/sooda/speech/kaldi/egs/timit/s5/data/train
dev_src=/home/sooda/speech/kaldi/egs/timit/s5/data/dev
# config file
config=default_timit.cfg
# optional settings,
njdec=8
scoring="--min-lmwt 5 --max-lmwt 19"
# The device number to run the training
# change to AUTO to select the card automatically
deviceNumber=gpu0
# decoding method
method=simple
modelName=
# model
prefix=timit
num_epoch=12
acwt=0.1
#smbr training variables
num_utts_per_iter=40
smooth_factor=0.1
use_one_sil=true
stage=4
. utils/parse_options.sh || exit 1;
###############################################
# Training
###############################################
mkdir -p $expdir
dir=$expdir/data-for-mxnet
# prepare listing data
if [ $stage -le 0 ] ; then
mkdir -p $dir
mkdir -p $dir/log
mkdir -p $dir/rawpost
# for compressed ali
num=`cat $ali_src/num_jobs`
$cmd JOB=1:$num $dir/log/gen_post.JOB.log \
ali-to-pdf $ali_src/final.mdl "ark:gunzip -c $ali_src/ali.JOB.gz |" \
ark:- \| ali-to-post ark:- ark,scp:$dir/rawpost/post.JOB.ark,$dir/rawpost/post.JOB.scp || exit 1;
#num=`cat $ali_src/num_jobs`
#$cmd JOB=1:$num $dir/log/gen_post.JOB.log \
# ali-to-pdf $ali_src/final.mdl ark:$ali_src/ali.JOB.ark \
# ark:- \| ali-to-post ark:- ark,scp:$dir/rawpost/post.JOB.ark,$dir/rawpost/post.JOB.scp || exit 1;
for n in $(seq $num); do
cat $dir/rawpost/post.${n}.scp || exit 1;
done > $dir/post.scp
fi
if [ $stage -le 1 ] ; then
# split the data : 90% train and 10% held-out
[ ! -e ${train_src}_tr90 ] && utils/subset_data_dir_tr_cv.sh $train_src ${train_src}_tr90 ${train_src}_cv10
# generate dataset list
echo NO_FEATURE_TRANSFORM scp:${train_src}_tr90/feats.scp > $dir/train.feats
echo scp:$dir/post.scp >> $dir/train.feats
echo NO_FEATURE_TRANSFORM scp:${train_src}_cv10/feats.scp > $dir/dev.feats
echo scp:$dir/post.scp >> $dir/dev.feats
echo NO_FEATURE_TRANSFORM scp:${dev_src}/feats.scp > $dir/test.feats
fi
# generate label counts
if [ $stage -le 2 ] ; then
$cmd JOB=1:1 $dir/log/gen_label_mean.JOB.log \
python make_stats.py --configfile $config --data_train $dir/train.feats \| copy-feats ark:- ark:$dir/label_mean.ark
echo NO_FEATURE_TRANSFORM ark:$dir/label_mean.ark > $dir/label_mean.feats
fi
# training, note that weight decay is for the whole batch (0.00001 * 20 (minibatch) * 40 (batch_size))
if [ $stage -le 3 ] ; then
python train_lstm_proj.py --configfile $config --data_train $dir/train.feats --data_dev $dir/dev.feats --train_prefix $PWD/$expdir/$prefix --train_optimizer speechSGD --train_learning_rate 1 --train_context $deviceNumber --train_weight_decay 0.008 --train_show_every 1000
fi
# decoding
if [ $stage -le 4 ] ; then
cp $ali_src/final.mdl $expdir
mxnet_string="OMP_NUM_THREADS=1 python decode_mxnet.py --config $config --data_test $dir/test.feats --data_label_mean $dir/label_mean.feats --train_method $method --train_prefix $PWD/$expdir/$prefix --train_num_epoch $num_epoch --train_context cpu0 --train_batch_size 1"
./decode_mxnet.sh --nj $njdec --cmd $cmd --acwt $acwt --scoring-opts "$scoring" \
$graph_src $dev_src $expdir/decode_${prefix}_$(basename $dev_src) "$mxnet_string" || exit 1;
fi