blob: ac9ed3672186f691fd3937a52c88504e353f1742 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>Speech LSTM — mxnet documentation</title>
<link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
<link href="../../_static/basic.css" rel="stylesheet" type="text/css">
<link href="../../_static/pygments.css" rel="stylesheet" type="text/css">
<link href="../../_static/mxnet.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../',
VERSION: '',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: ''
};
</script>
<script src="../../_static/jquery-1.11.1.js" type="text/javascript"></script>
<script src="../../_static/underscore.js" type="text/javascript"></script>
<script src="../../_static/searchtools_custom.js" type="text/javascript"></script>
<script src="../../_static/doctools.js" type="text/javascript"></script>
<script src="../../_static/selectlang.js" type="text/javascript"></script>
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
<script type="text/javascript"> jQuery(function() { Search.loadIndex("/searchindex.js"); Search.init();}); </script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-96378503-1', 'auto');
ga('send', 'pageview');
</script>
<!-- -->
<!-- <script type="text/javascript" src="../../_static/jquery.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../_static/underscore.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../_static/doctools.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
<!-- -->
<link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
</link></link></head>
<body role="document"><!-- Previous Navbar Layout
<div class="navbar navbar-default navbar-fixed-top">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="../../" class="navbar-brand">
<img src="http://data.mxnet.io/theme/mxnet.png">
</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul id="navbar" class="navbar navbar-left">
<li> <a href="../../get_started/index.html">Get Started</a> </li>
<li> <a href="../../tutorials/index.html">Tutorials</a> </li>
<li> <a href="../../how_to/index.html">How To</a> </li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Packages <span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="../../packages/python/index.html">
Python
</a></li>
<li><a href="../../packages/r/index.html">
R
</a></li>
<li><a href="../../packages/julia/index.html">
Julia
</a></li>
<li><a href="../../packages/c++/index.html">
C++
</a></li>
<li><a href="../../packages/scala/index.html">
Scala
</a></li>
<li><a href="../../packages/perl/index.html">
Perl
</a></li>
</ul>
</li>
<li> <a href="../../system/index.html">System</a> </li>
<li>
<form class="" role="search" action="../../search.html" method="get" autocomplete="off">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input type="text" name="q" class="form-control" placeholder="Search">
</div>
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form> </li>
</ul>
<ul id="navbar" class="navbar navbar-right">
<li> <a href="../../index.html"><span class="flag-icon flag-icon-us"></span></a> </li>
<li> <a href="../..//zh/index.html"><span class="flag-icon flag-icon-cn"></span></a> </li>
</ul>
</div>
</div>
</div>
Previous Navbar Layout End -->
<div class="navbar navbar-fixed-top">
<div class="container" id="navContainer">
<div class="innder" id="header-inner">
<h1 id="logo-wrap">
<a href="../../" id="logo"><img src="http://data.mxnet.io/theme/mxnet.png"/></a>
</h1>
<nav class="nav-bar" id="main-nav">
<a class="main-nav-link" href="../../get_started/install.html">Install</a>
<a class="main-nav-link" href="../../tutorials/index.html">Tutorials</a>
<a class="main-nav-link" href="../../how_to/index.html">How To</a>
<span id="dropdown-menu-position-anchor">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
<ul class="dropdown-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="../../api/python/index.html">Python</a></li>
<li><a class="main-nav-link" href="../../api/scala/index.html">Scala</a></li>
<li><a class="main-nav-link" href="../../api/r/index.html">R</a></li>
<li><a class="main-nav-link" href="../../api/julia/index.html">Julia</a></li>
<li><a class="main-nav-link" href="../../api/c++/index.html">C++</a></li>
<li><a class="main-nav-link" href="../../api/perl/index.html">Perl</a></li>
</ul>
</span>
<a class="main-nav-link" href="../../architecture/index.html">Architecture</a>
<!-- <a class="main-nav-link" href="../../community/index.html">Community</a> -->
<a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a>
<span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Versions(master)<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></span></nav>
<script> function getRootPath(){ return "../../" } </script>
<div class="burgerIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"></a>
<ul class="dropdown-menu dropdown-menu-right" id="burgerMenu">
<li><a href="../../get_started/install.html">Install</a></li>
<li><a href="../../tutorials/index.html">Tutorials</a></li>
<li><a href="../../how_to/index.html">How To</a></li>
<li class="dropdown-submenu">
<a href="#" tabindex="-1">API</a>
<ul class="dropdown-menu">
<li><a href="../../api/python/index.html" tabindex="-1">Python</a>
</li>
<li><a href="../../api/scala/index.html" tabindex="-1">Scala</a>
</li>
<li><a href="../../api/r/index.html" tabindex="-1">R</a>
</li>
<li><a href="../../api/julia/index.html" tabindex="-1">Julia</a>
</li>
<li><a href="../../api/c++/index.html" tabindex="-1">C++</a>
</li>
<li><a href="../../api/perl/index.html" tabindex="-1">Perl</a>
</li>
</ul>
</li>
<li><a href="../../architecture/index.html">Architecture</a></li>
<li><a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a></li>
<li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">Versions(master)</a><ul class="dropdown-menu"><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></li></ul>
</div>
<div class="plusIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
<ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
</div>
<div id="search-input-wrap">
<form action="../../search.html" autocomplete="off" class="" method="get" role="search">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input class="form-control" name="q" placeholder="Search" type="text"/>
</div>
<input name="check_keywords" type="hidden" value="yes">
<input name="area" type="hidden" value="default"/>
</input></form>
<div id="search-preview"></div>
</div>
<div id="searchIcon">
<span aria-hidden="true" class="glyphicon glyphicon-search"></span>
</div>
<!-- <div id="lang-select-wrap"> -->
<!-- <label id="lang-select-label"> -->
<!-- <\!-- <i class="fa fa-globe"></i> -\-> -->
<!-- <span></span> -->
<!-- </label> -->
<!-- <select id="lang-select"> -->
<!-- <option value="en">Eng</option> -->
<!-- <option value="zh">中文</option> -->
<!-- </select> -->
<!-- </div> -->
<!-- <a id="mobile-nav-toggle">
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
</a> -->
</div>
</div>
</div>
<div class="container">
<div class="row">
<div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../api/python/index.html">Python Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/r/index.html">R Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/julia/index.html">Julia Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/c++/index.html">C++ Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/scala/index.html">Scala Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/perl/index.html">Perl Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../how_to/index.html">HowTo Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">System Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Tutorials</a></li>
</ul>
</div>
</div>
<div class="content">
<div class="section" id="speech-lstm">
<span id="speech-lstm"></span><h1>Speech LSTM<a class="headerlink" href="#speech-lstm" title="Permalink to this headline"></a></h1>
<p>You can get the source code for these examples on <a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo">GitHub</a>.</p>
<div class="section" id="speech-acoustic-modeling-example">
<span id="speech-acoustic-modeling-example"></span><h2>Speech Acoustic Modeling Example<a class="headerlink" href="#speech-acoustic-modeling-example" title="Permalink to this headline"></a></h2>
<p>The examples folder contains examples for speech recognition:</p>
<ul class="simple">
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/lstm_proj.py">lstm_proj.py</a>: Functions for building an LSTM network with and without a projection layer.</li>
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/io_util.py">io_util.py</a>: Wrapper functions for <code class="docutils literal"><span class="pre">DataIter</span></code> over speech data.</li>
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/train_lstm_proj.py">train_lstm_proj.py</a>: A script for training an LSTM acoustic model.</li>
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/decode_mxnet.py">decode_mxnet.py</a>: A script for decoding an LSTMP acoustic model.</li>
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/default.cfg">default.cfg</a>: Configuration for training on the <code class="docutils literal"><span class="pre">AMI</span></code> SDM1 dataset. You can use it as a template for writing other configuration files.</li>
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/python_wrap">python_wrap</a>: C wrappers for Kaldi C++ code, built into an .so file. Python code that loads the .so file and calls the C wrapper functions in <code class="docutils literal"><span class="pre">io_func/feat_readers/reader_kaldi.py</span></code>.</li>
</ul>
<p>Connect to Kaldi:</p>
<ul class="simple">
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/decode_mxnet.sh">decode_mxnet.sh</a>: Called by Kaldi to decode an acoustic model trained by MXNet (select the <code class="docutils literal"><span class="pre">simple</span></code> method for decoding).</li>
</ul>
<p>A full receipt:</p>
<ul class="simple">
<li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/run_ami.sh">run_ami.sh</a>: A full receipt to train and decode an acoustic model on AMI. It takes features and alignment from Kaldi to train an acoustic model and decode it.</li>
</ul>
<p>To create the speech acoustic modeling example, use the following steps.</p>
<div class="section" id="build-kaldi">
<span id="build-kaldi"></span><h3>Build Kaldi<a class="headerlink" href="#build-kaldi" title="Permalink to this headline"></a></h3>
<p>Build Kaldi as shared libraries if you have not already done so.</p>
<div class="highlight-bash"><div class="highlight"><pre><span></span><span class="nb">cd</span> kaldi/src
./configure --shared <span class="c1"># and other options that you need</span>
make depend
make
</pre></div>
</div>
</div>
<div class="section" id="build-the-python-wrapper">
<span id="build-the-python-wrapper"></span><h3>Build the Python Wrapper<a class="headerlink" href="#build-the-python-wrapper" title="Permalink to this headline"></a></h3>
<ol class="simple">
<li>Copy or link the attached <code class="docutils literal"><span class="pre">python_wrap</span></code> folder to <code class="docutils literal"><span class="pre">kaldi/src</span></code>.</li>
<li>Compile python_wrap/.</li>
</ol>
<div class="highlight-python"><div class="highlight"><pre><span></span>cd kaldi/src/python_wrap/
make
</pre></div>
</div>
</div>
<div class="section" id="extract-features-and-prepare-frame-level-labels">
<span id="extract-features-and-prepare-frame-level-labels"></span><h3>Extract Features and Prepare Frame-level Labels<a class="headerlink" href="#extract-features-and-prepare-frame-level-labels" title="Permalink to this headline"></a></h3>
<p>The acoustic models use Mel filter-bank or MFCC as input features. They also need to use Kaldi to perform force-alignment to generate frame-level labels from the text transcriptions. For example, if you want to work on the <code class="docutils literal"><span class="pre">AMI</span></code> data <code class="docutils literal"><span class="pre">SDM1</span></code>, you can run <code class="docutils literal"><span class="pre">kaldi/egs/ami/s5/run_sdm.sh</span></code>. Before you can run the examples, you need to configure some paths in <code class="docutils literal"><span class="pre">kaldi/egs/ami/s5/cmd.sh</span></code> and <code class="docutils literal"><span class="pre">kaldi/egs/ami/s5/run_sdm.sh</span></code>. Refer to Kaldi’s documentation for details.</p>
<p>The default <code class="docutils literal"><span class="pre">run_sdm.sh</span></code> script generates the force-alignment labels in their stage 7, and saves the force-aligned labels in <code class="docutils literal"><span class="pre">exp/sdm1/tri3a_ali</span></code>. The default script generates MFCC features (13-dimensional). You can try training with the MFCC features, or you can create Mel filter-bank features by yourself. For example, you can use a script like this to compute Mel filter-bank features using Kaldi:</p>
<div class="highlight-bash"><div class="highlight"><pre><span></span><span class="ch">#!/bin/bash -u</span>
. ./cmd.sh
. ./path.sh
<span class="c1"># SDM - Single Distant Microphone</span>
<span class="nv">micid</span><span class="o">=</span><span class="m">1</span> <span class="c1">#which mic from array should be used?</span>
<span class="nv">mic</span><span class="o">=</span>sdm<span class="nv">$micid</span>
<span class="c1"># Set bash to 'debug' mode, it prints the commands (option '-x') and exits on :</span>
<span class="c1"># -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline',</span>
<span class="nb">set</span> -euxo pipefail
<span class="c1"># Path where AMI gets downloaded (or where locally available):</span>
<span class="nv">AMI_DIR</span><span class="o">=</span><span class="nv">$PWD</span>/wav_db <span class="c1"># Default,</span>
<span class="nv">data_dir</span><span class="o">=</span><span class="nv">$PWD</span>/data/<span class="nv">$mic</span>
<span class="c1"># make filter bank data</span>
<span class="k">for</span> dset in train dev eval<span class="p">;</span> <span class="k">do</span>
steps/make_fbank.sh --nj <span class="m">48</span> --cmd <span class="s2">"</span><span class="nv">$train_cmd</span><span class="s2">"</span> <span class="nv">$data_dir</span>/<span class="nv">$dset</span> <span class="se">\</span>
<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/log <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/data-fbank
steps/compute_cmvn_stats.sh <span class="nv">$data_dir</span>/<span class="nv">$dset</span> <span class="se">\</span>
<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/log <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/data
apply-cmvn --utt2spk<span class="o">=</span>ark:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/utt2spk <span class="se">\</span>
scp:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/cmvn.scp scp:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats.scp <span class="se">\</span>
ark,scp:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats-cmvn.ark,<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats-cmvn.scp
mv <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats-cmvn.scp <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats.scp
<span class="k">done</span>
</pre></div>
</div>
<p><code class="docutils literal"><span class="pre">apply-cmvn</span></code> provides mean-variance normalization. The default setup was applied per speaker. It’s more common to perform mean-variance normalization for the whole corpus, and then feed the results to the neural networks:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span> compute-cmvn-stats scp:data/sdm1/train_fbank/feats.scp data/sdm1/train_fbank/cmvn_g.ark
apply-cmvn --norm-vars=true data/sdm1/train_fbank/cmvn_g.ark scp:data/sdm1/train_fbank/feats.scp ark,scp:data/sdm1/train_fbank_gcmvn/feats.ark,data/sdm1/train_fbank_gcmvn/feats.scp
</pre></div>
</div>
<p>Note that Kaldi always tries to find features in <code class="docutils literal"><span class="pre">feats.scp</span></code>. Ensure that the normalized features are organized as Kaldi expects them during decoding.</p>
<p>Finally, put the features and labels together in a file so that MXNet can find them. More specifically, for each data set (train, dev, eval), you will need to create a file similar to <code class="docutils literal"><span class="pre">train_mxnet.feats</span></code>, with the following contents:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span>TRANSFORM scp:feat.scp
scp:label.scp
</pre></div>
</div>
<p><code class="docutils literal"><span class="pre">TRANSFORM</span></code> is the transformation you want to apply to the features. By default, we use <code class="docutils literal"><span class="pre">NO_FEATURE_TRANSFORM</span></code>. The <code class="docutils literal"><span class="pre">scp:</span></code> syntax is from Kaldi. <code class="docutils literal"><span class="pre">feat.scp</span></code> is typically the file from <code class="docutils literal"><span class="pre">data/sdm1/train/feats.scp</span></code>, and <code class="docutils literal"><span class="pre">label.scp</span></code> is converted from the force-aligned labels located in <code class="docutils literal"><span class="pre">exp/sdm1/tri3a_ali</span></code>. Because the force-alignments are generated only on the training data, we split the training set in two, using a 90/10 ratio, and then use the 1/10 holdout as the dev set (validation set). The script <a class="reference external" href="https://github.com/dmlc/mxnet/blob/master/example/speech-demo/run_ami.sh">run_ami.sh</a> automatically splits and formats the file for MXNet. Before running it, set the path in the script correctly. The <a class="reference external" href="https://github.com/dmlc/mxnet/blob/master/example/speech-demo/run_ami.sh">run_ami.sh</a> script actually runs the full pipeline, including training the acoustic model and decoding. If the scripts ran successfully, you can skip the following sections.</p>
</div>
<div class="section" id="run-mxnet-acoustic-model-training">
<span id="run-mxnet-acoustic-model-training"></span><h3>Run MXNet Acoustic Model Training<a class="headerlink" href="#run-mxnet-acoustic-model-training" title="Permalink to this headline"></a></h3>
<ol class="simple">
<li>Return to the speech demo directory in MXNet. Make a copy of <code class="docutils literal"><span class="pre">default.cfg</span></code>, and edit the necessary parameters, such as the path to the dataset you just prepared.</li>
<li>Run <code class="docutils literal"><span class="pre">python</span> <span class="pre">train_lstm.py</span> <span class="pre">--configfile=your-config.cfg</span></code>. For help, use <code class="docutils literal"><span class="pre">python</span> <span class="pre">train_lstm.py</span> <span class="pre">--help</span></code>. You can set all of the configuration parameters in <code class="docutils literal"><span class="pre">default.cfg</span></code>, the customized config file, and through the command line (e.g., using <code class="docutils literal"><span class="pre">--train_batch_size=50</span></code>). The latter values overwrite the former ones.</li>
</ol>
<p>Here are some example outputs from training on the TIMIT dataset:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span>Example output for TIMIT:
Summary of dataset ==================
bucket of len 100 : 3 samples
bucket of len 200 : 346 samples
bucket of len 300 : 1496 samples
bucket of len 400 : 974 samples
bucket of len 500 : 420 samples
bucket of len 600 : 90 samples
bucket of len 700 : 11 samples
bucket of len 800 : 2 samples
Summary of dataset ==================
bucket of len 100 : 0 samples
bucket of len 200 : 28 samples
bucket of len 300 : 169 samples
bucket of len 400 : 107 samples
bucket of len 500 : 41 samples
bucket of len 600 : 6 samples
bucket of len 700 : 3 samples
bucket of len 800 : 0 samples
2016-04-21 20:02:40,904 Epoch[0] Train-Acc_exlude_padding=0.154763
2016-04-21 20:02:40,904 Epoch[0] Time cost=91.574
2016-04-21 20:02:44,419 Epoch[0] Validation-Acc_exlude_padding=0.353552
2016-04-21 20:04:17,290 Epoch[1] Train-Acc_exlude_padding=0.447318
2016-04-21 20:04:17,290 Epoch[1] Time cost=92.870
2016-04-21 20:04:20,738 Epoch[1] Validation-Acc_exlude_padding=0.506458
2016-04-21 20:05:53,127 Epoch[2] Train-Acc_exlude_padding=0.557543
2016-04-21 20:05:53,128 Epoch[2] Time cost=92.390
2016-04-21 20:05:56,568 Epoch[2] Validation-Acc_exlude_padding=0.548100
</pre></div>
</div>
<p>The final frame accuracy was approximately 62%.</p>
</div>
<div class="section" id="run-decode-on-the-trained-acoustic-model">
<span id="run-decode-on-the-trained-acoustic-model"></span><h3>Run Decode on the Trained Acoustic Model<a class="headerlink" href="#run-decode-on-the-trained-acoustic-model" title="Permalink to this headline"></a></h3>
<ol class="simple">
<li>Estimate senone priors by running <code class="docutils literal"><span class="pre">python</span> <span class="pre">make_stats.py</span> <span class="pre">--configfile=your-config.cfg</span> <span class="pre">|</span> <span class="pre">copy-feats</span> <span class="pre">ark:-</span> <span class="pre">ark:label_mean.ark</span></code> (edit necessary items, such as the path to the training dataset). This command generates the label counts in <code class="docutils literal"><span class="pre">label_mean.ark</span></code>.</li>
<li>Link to the necessary Kaldi decode setup, e.g., <code class="docutils literal"><span class="pre">local/</span></code> and <code class="docutils literal"><span class="pre">utils/</span></code> and run <code class="docutils literal"><span class="pre">./run_ami.sh</span> <span class="pre">--model</span> <span class="pre">prefix</span> <span class="pre">model</span> <span class="pre">--num_epoch</span> <span class="pre">num</span></code>.</li>
</ol>
<p>Here are the results for the TIMIT and AMI test sets (using the default setup, three-layer LSTM with projection layers):</p>
<table border="1" class="docutils">
<colgroup>
<col width="50%"/>
<col width="50%"/>
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head">Corpus</th>
<th class="head">WER</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even"><td>TIMIT</td>
<td>18.9</td>
</tr>
<tr class="row-odd"><td>AMI</td>
<td>51.7 (42.2)</td>
</tr>
</tbody>
</table>
<p>For AMI 42.2 was evaluated non-overlapped speech. The Kaldi-HMM baseline was 67.2%, and DNN was 57.5%.</p>
</div>
</div>
<div class="section" id="next-steps">
<span id="next-steps"></span><h2>Next Steps<a class="headerlink" href="#next-steps" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
<ul>
<li class="toctree-l1"><a class="reference external" href="http://mxnet.io/tutorials/index.html">MXNet tutorials index</a></li>
</ul>
</div>
</div>
</div>
<div class="container">
<div class="footer">
<p> © 2015-2017 DMLC. All rights reserved. </p>
</div>
</div>
</div>
<div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<h3><a href="../../index.html">Table Of Contents</a></h3>
<ul>
<li><a class="reference internal" href="#">Speech LSTM</a><ul>
<li><a class="reference internal" href="#speech-acoustic-modeling-example">Speech Acoustic Modeling Example</a><ul>
<li><a class="reference internal" href="#build-kaldi">Build Kaldi</a></li>
<li><a class="reference internal" href="#build-the-python-wrapper">Build the Python Wrapper</a></li>
<li><a class="reference internal" href="#extract-features-and-prepare-frame-level-labels">Extract Features and Prepare Frame-level Labels</a></li>
<li><a class="reference internal" href="#run-mxnet-acoustic-model-training">Run MXNet Acoustic Model Training</a></li>
<li><a class="reference internal" href="#run-decode-on-the-trained-acoustic-model">Run Decode on the Trained Acoustic Model</a></li>
</ul>
</li>
<li><a class="reference internal" href="#next-steps">Next Steps</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div> <!-- pagename != index -->
<script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
<script src="../../_static/js/sidebar.js" type="text/javascript"></script>
<script src="../../_static/js/search.js" type="text/javascript"></script>
<script src="../../_static/js/navbar.js" type="text/javascript"></script>
<script src="../../_static/js/clipboard.min.js" type="text/javascript"></script>
<script src="../../_static/js/copycode.js" type="text/javascript"></script>
<script type="text/javascript">
$('body').ready(function () {
$('body').css('visibility', 'visible');
});
</script>
</div></body>
</html>