versions/master/tutorials/speech_recognition/speech_lstm.html - mxnet-test - Git at Google

 <!DOCTYPE html>

 <html lang="en">
 <head>
 <meta charset="utf-8"/>
 <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
 <meta content="width=device-width, initial-scale=1" name="viewport"/>
 <title>Speech LSTM — mxnet  documentation</title>
 <link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
 <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
 <link href="../../_static/basic.css" rel="stylesheet" type="text/css">
 <link href="../../_static/pygments.css" rel="stylesheet" type="text/css">
 <link href="../../_static/mxnet.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript">
       var DOCUMENTATION_OPTIONS = {
         URL_ROOT:    '../../',
         VERSION:     '',
         COLLAPSE_INDEX: false,
         FILE_SUFFIX: '.html',
         HAS_SOURCE:  true,
         SOURCELINK_SUFFIX: ''
       };
     </script>
 <script src="../../_static/jquery-1.11.1.js" type="text/javascript"></script>
 <script src="../../_static/underscore.js" type="text/javascript"></script>
 <script src="../../_static/searchtools_custom.js" type="text/javascript"></script>
 <script src="../../_static/doctools.js" type="text/javascript"></script>
 <script src="../../_static/selectlang.js" type="text/javascript"></script>
 <script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
 <script type="text/javascript"> jQuery(function() { Search.loadIndex("/searchindex.js"); Search.init();}); </script>
 <script>
       (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
       (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
       Date();a=s.createElement(o),
       m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
       })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

       ga('create', 'UA-96378503-1', 'auto');
       ga('send', 'pageview');

     </script>
 <!-- -->
 <!-- <script type="text/javascript" src="../../_static/jquery.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="../../_static/underscore.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="../../_static/doctools.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
 <!-- -->
 <link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
 </link></link></head>
 <body role="document"><!-- Previous Navbar Layout
 <div class="navbar navbar-default navbar-fixed-top">
   <div class="container">
     <div class="navbar-header">
       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
         <span class="sr-only">Toggle navigation</span>
         <span class="icon-bar"></span>
         <span class="icon-bar"></span>
         <span class="icon-bar"></span>
       </button>
       <a href="../../" class="navbar-brand">
         <img src="http://data.mxnet.io/theme/mxnet.png">
       </a>
     </div>
     <div id="navbar" class="navbar-collapse collapse">
       <ul id="navbar" class="navbar navbar-left">

         <li> <a href="../../get_started/index.html">Get Started</a> </li>

         <li> <a href="../../tutorials/index.html">Tutorials</a> </li>

         <li> <a href="../../how_to/index.html">How To</a> </li>


         <li class="dropdown">
           <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Packages <span class="caret"></span></a>
           <ul class="dropdown-menu">

             <li><a href="../../packages/python/index.html">
                 Python
             </a></li>

             <li><a href="../../packages/r/index.html">
                 R
             </a></li>

             <li><a href="../../packages/julia/index.html">
                 Julia
             </a></li>

             <li><a href="../../packages/c++/index.html">
                 C++
             </a></li>

             <li><a href="../../packages/scala/index.html">
                 Scala
             </a></li>

             <li><a href="../../packages/perl/index.html">
                 Perl
             </a></li>

           </ul>
         </li>

         <li> <a href="../../system/index.html">System</a> </li>
         <li>
 <form class="" role="search" action="../../search.html" method="get" autocomplete="off">
   <div class="form-group inner-addon left-addon">
     <i class="glyphicon glyphicon-search"></i>
     <input type="text" name="q" class="form-control" placeholder="Search">
   </div>
   <input type="hidden" name="check_keywords" value="yes" />
   <input type="hidden" name="area" value="default" />

 </form> </li>
       </ul>
       <ul id="navbar" class="navbar navbar-right">
         <li> <a href="../../index.html"><span class="flag-icon flag-icon-us"></span></a> </li>
         <li> <a href="../..//zh/index.html"><span class="flag-icon flag-icon-cn"></span></a> </li>
       </ul>
     </div>
   </div>
 </div>
 Previous Navbar Layout End -->
 <div class="navbar navbar-fixed-top">
 <div class="container" id="navContainer">
 <div class="innder" id="header-inner">
 <h1 id="logo-wrap">
 <a href="../../" id="logo"><img src="http://data.mxnet.io/theme/mxnet.png"/></a>
 </h1>
 <nav class="nav-bar" id="main-nav">
 <a class="main-nav-link" href="../../get_started/install.html">Install</a>
 <a class="main-nav-link" href="../../tutorials/index.html">Tutorials</a>
 <a class="main-nav-link" href="../../how_to/index.html">How To</a>
 <span id="dropdown-menu-position-anchor">
 <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
 <ul class="dropdown-menu" id="package-dropdown-menu">
 <li><a class="main-nav-link" href="../../api/python/index.html">Python</a></li>
 <li><a class="main-nav-link" href="../../api/scala/index.html">Scala</a></li>
 <li><a class="main-nav-link" href="../../api/r/index.html">R</a></li>
 <li><a class="main-nav-link" href="../../api/julia/index.html">Julia</a></li>
 <li><a class="main-nav-link" href="../../api/c++/index.html">C++</a></li>
 <li><a class="main-nav-link" href="../../api/perl/index.html">Perl</a></li>
 </ul>
 </span>
 <a class="main-nav-link" href="../../architecture/index.html">Architecture</a>
 <!-- <a class="main-nav-link" href="../../community/index.html">Community</a> -->
 <a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a>
 <span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Versions(master)<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></span></nav>
 <script> function getRootPath(){ return "../../" } </script>
 <div class="burgerIcon dropdown">
 <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button">☰</a>
 <ul class="dropdown-menu dropdown-menu-right" id="burgerMenu">
 <li><a href="../../get_started/install.html">Install</a></li>
 <li><a href="../../tutorials/index.html">Tutorials</a></li>
 <li><a href="../../how_to/index.html">How To</a></li>
 <li class="dropdown-submenu">
 <a href="#" tabindex="-1">API</a>
 <ul class="dropdown-menu">
 <li><a href="../../api/python/index.html" tabindex="-1">Python</a>
 </li>
 <li><a href="../../api/scala/index.html" tabindex="-1">Scala</a>
 </li>
 <li><a href="../../api/r/index.html" tabindex="-1">R</a>
 </li>
 <li><a href="../../api/julia/index.html" tabindex="-1">Julia</a>
 </li>
 <li><a href="../../api/c++/index.html" tabindex="-1">C++</a>
 </li>
 <li><a href="../../api/perl/index.html" tabindex="-1">Perl</a>
 </li>
 </ul>
 </li>
 <li><a href="../../architecture/index.html">Architecture</a></li>
 <li><a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a></li>
 <li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">Versions(master)</a><ul class="dropdown-menu"><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></li></ul>
 </div>
 <div class="plusIcon dropdown">
 <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
 <ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
 </div>
 <div id="search-input-wrap">
 <form action="../../search.html" autocomplete="off" class="" method="get" role="search">
 <div class="form-group inner-addon left-addon">
 <i class="glyphicon glyphicon-search"></i>
 <input class="form-control" name="q" placeholder="Search" type="text"/>
 </div>
 <input name="check_keywords" type="hidden" value="yes">
 <input name="area" type="hidden" value="default"/>
 </input></form>
 <div id="search-preview"></div>
 </div>
 <div id="searchIcon">
 <span aria-hidden="true" class="glyphicon glyphicon-search"></span>
 </div>
 <!-- <div id="lang-select-wrap"> -->
 <!--   <label id="lang-select-label"> -->
 <!--     <\!-- <i class="fa fa-globe"></i> -\-> -->
 <!--     <span></span> -->
 <!--   </label> -->
 <!--   <select id="lang-select"> -->
 <!--     <option value="en">Eng</option> -->
 <!--     <option value="zh">中文</option> -->
 <!--   </select> -->
 <!-- </div> -->
 <!--     <a id="mobile-nav-toggle">
         <span class="mobile-nav-toggle-bar"></span>
         <span class="mobile-nav-toggle-bar"></span>
         <span class="mobile-nav-toggle-bar"></span>
       </a> -->
 </div>
 </div>
 </div>
 <div class="container">
 <div class="row">
 <div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
 <div class="sphinxsidebarwrapper">
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../api/python/index.html">Python Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../api/r/index.html">R Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../api/julia/index.html">Julia Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../api/c++/index.html">C++ Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../api/scala/index.html">Scala Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../api/perl/index.html">Perl Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../how_to/index.html">HowTo Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">System Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../index.html">Tutorials</a></li>
 </ul>
 </div>
 </div>
 <div class="content">
 <div class="section" id="speech-lstm">
 <span id="speech-lstm"></span><h1>Speech LSTM<a class="headerlink" href="#speech-lstm" title="Permalink to this headline">¶</a></h1>
 <p>You can get the source code for these examples on <a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo">GitHub</a>.</p>
 <div class="section" id="speech-acoustic-modeling-example">
 <span id="speech-acoustic-modeling-example"></span><h2>Speech Acoustic Modeling Example<a class="headerlink" href="#speech-acoustic-modeling-example" title="Permalink to this headline">¶</a></h2>
 <p>The examples folder contains examples for speech recognition:</p>
 <ul class="simple">
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/lstm_proj.py">lstm_proj.py</a>: Functions for building an LSTM network with and without a projection layer.</li>
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/io_util.py">io_util.py</a>: Wrapper functions for <code class="docutils literal"><span class="pre">DataIter</span></code> over speech data.</li>
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/train_lstm_proj.py">train_lstm_proj.py</a>: A script for training an LSTM acoustic model.</li>
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/decode_mxnet.py">decode_mxnet.py</a>: A script for decoding an LSTMP acoustic model.</li>
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/default.cfg">default.cfg</a>: Configuration for training on the <code class="docutils literal"><span class="pre">AMI</span></code> SDM1 dataset. You can use it as a template for writing other configuration files.</li>
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/python_wrap">python_wrap</a>: C wrappers for Kaldi C++ code, built into an .so file. Python code that loads the .so file and calls the C wrapper functions in <code class="docutils literal"><span class="pre">io_func/feat_readers/reader_kaldi.py</span></code>.</li>
 </ul>
 <p>Connect to Kaldi:</p>
 <ul class="simple">
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/decode_mxnet.sh">decode_mxnet.sh</a>: Called by Kaldi to decode an acoustic model trained by MXNet (select the <code class="docutils literal"><span class="pre">simple</span></code> method for decoding).</li>
 </ul>
 <p>A full receipt:</p>
 <ul class="simple">
 <li><a class="reference external" href="https://github.com/dmlc/mxnet/tree/master/example/speech-demo/run_ami.sh">run_ami.sh</a>: A full receipt to train and decode an acoustic model on AMI. It takes features and alignment from Kaldi to train an acoustic model and decode it.</li>
 </ul>
 <p>To create the speech acoustic modeling example, use the following steps.</p>
 <div class="section" id="build-kaldi">
 <span id="build-kaldi"></span><h3>Build Kaldi<a class="headerlink" href="#build-kaldi" title="Permalink to this headline">¶</a></h3>
 <p>Build Kaldi as shared libraries if you have not already done so.</p>
 <div class="highlight-bash"><div class="highlight"><pre><span></span><span class="nb">cd</span> kaldi/src
 ./configure --shared <span class="c1"># and other options that you need</span>
 make depend
 make
 </pre></div>
 </div>
 </div>
 <div class="section" id="build-the-python-wrapper">
 <span id="build-the-python-wrapper"></span><h3>Build the Python Wrapper<a class="headerlink" href="#build-the-python-wrapper" title="Permalink to this headline">¶</a></h3>
 <ol class="simple">
 <li>Copy or link the attached <code class="docutils literal"><span class="pre">python_wrap</span></code> folder to <code class="docutils literal"><span class="pre">kaldi/src</span></code>.</li>
 <li>Compile python_wrap/.</li>
 </ol>
 <div class="highlight-python"><div class="highlight"><pre><span></span>cd kaldi/src/python_wrap/
 make
 </pre></div>
 </div>
 </div>
 <div class="section" id="extract-features-and-prepare-frame-level-labels">
 <span id="extract-features-and-prepare-frame-level-labels"></span><h3>Extract Features and Prepare Frame-level Labels<a class="headerlink" href="#extract-features-and-prepare-frame-level-labels" title="Permalink to this headline">¶</a></h3>
 <p>The acoustic models use Mel filter-bank or MFCC as input features. They also need to use Kaldi to perform force-alignment to generate frame-level labels from the text transcriptions. For example, if you want to work on the <code class="docutils literal"><span class="pre">AMI</span></code> data <code class="docutils literal"><span class="pre">SDM1</span></code>, you can run <code class="docutils literal"><span class="pre">kaldi/egs/ami/s5/run_sdm.sh</span></code>. Before you can run the examples, you need to configure some paths in <code class="docutils literal"><span class="pre">kaldi/egs/ami/s5/cmd.sh</span></code> and <code class="docutils literal"><span class="pre">kaldi/egs/ami/s5/run_sdm.sh</span></code>. Refer to Kaldi’s documentation for details.</p>
 <p>The default <code class="docutils literal"><span class="pre">run_sdm.sh</span></code> script generates the force-alignment labels in their stage 7, and saves the force-aligned labels in <code class="docutils literal"><span class="pre">exp/sdm1/tri3a_ali</span></code>. The default script generates MFCC features (13-dimensional). You can try training with the MFCC features, or you can create Mel filter-bank features by yourself. For example, you can use a script like this to compute Mel filter-bank features using Kaldi:</p>
 <div class="highlight-bash"><div class="highlight"><pre><span></span><span class="ch">#!/bin/bash -u</span>

 . ./cmd.sh
 . ./path.sh

 <span class="c1"># SDM - Single Distant Microphone</span>
 <span class="nv">micid</span><span class="o">=</span><span class="m">1</span> <span class="c1">#which mic from array should be used?</span>
 <span class="nv">mic</span><span class="o">=</span>sdm<span class="nv">$micid</span>

 <span class="c1"># Set bash to 'debug' mode, it prints the commands (option '-x') and exits on :</span>
 <span class="c1"># -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline',</span>
 <span class="nb">set</span> -euxo pipefail

 <span class="c1"># Path where AMI gets downloaded (or where locally available):</span>
 <span class="nv">AMI_DIR</span><span class="o">=</span><span class="nv">$PWD</span>/wav_db <span class="c1"># Default,</span>
 <span class="nv">data_dir</span><span class="o">=</span><span class="nv">$PWD</span>/data/<span class="nv">$mic</span>

 <span class="c1"># make filter bank data</span>
 <span class="k">for</span> dset in train dev eval<span class="p">;</span> <span class="k">do</span>
   steps/make_fbank.sh --nj <span class="m">48</span> --cmd <span class="s2">"</span><span class="nv">$train_cmd</span><span class="s2">"</span> <span class="nv">$data_dir</span>/<span class="nv">$dset</span> <span class="se">\</span>
     <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/log <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/data-fbank
   steps/compute_cmvn_stats.sh <span class="nv">$data_dir</span>/<span class="nv">$dset</span> <span class="se">\</span>
     <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/log <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/data

   apply-cmvn --utt2spk<span class="o">=</span>ark:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/utt2spk <span class="se">\</span>
     scp:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/cmvn.scp scp:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats.scp <span class="se">\</span>
     ark,scp:<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats-cmvn.ark,<span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats-cmvn.scp

   mv <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats-cmvn.scp <span class="nv">$data_dir</span>/<span class="nv">$dset</span>/feats.scp
 <span class="k">done</span>
 </pre></div>
 </div>
 <p><code class="docutils literal"><span class="pre">apply-cmvn</span></code> provides mean-variance normalization. The default setup was applied per speaker. It’s more common to perform mean-variance normalization for the whole corpus, and then feed the results to the neural networks:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span> compute-cmvn-stats scp:data/sdm1/train_fbank/feats.scp data/sdm1/train_fbank/cmvn_g.ark
  apply-cmvn --norm-vars=true data/sdm1/train_fbank/cmvn_g.ark scp:data/sdm1/train_fbank/feats.scp ark,scp:data/sdm1/train_fbank_gcmvn/feats.ark,data/sdm1/train_fbank_gcmvn/feats.scp
 </pre></div>
 </div>
 <p>Note that Kaldi always tries to find features in <code class="docutils literal"><span class="pre">feats.scp</span></code>. Ensure that the normalized features are organized as Kaldi expects them during decoding.</p>
 <p>Finally, put the features and labels together in a file so that MXNet can find them. More specifically, for each data set (train, dev, eval), you will need to create a file similar to <code class="docutils literal"><span class="pre">train_mxnet.feats</span></code>, with the following contents:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span>TRANSFORM scp:feat.scp
 scp:label.scp
 </pre></div>
 </div>
 <p><code class="docutils literal"><span class="pre">TRANSFORM</span></code> is the transformation you want to apply to the features. By default, we use <code class="docutils literal"><span class="pre">NO_FEATURE_TRANSFORM</span></code>. The <code class="docutils literal"><span class="pre">scp:</span></code> syntax is from Kaldi. <code class="docutils literal"><span class="pre">feat.scp</span></code> is typically the file from <code class="docutils literal"><span class="pre">data/sdm1/train/feats.scp</span></code>, and <code class="docutils literal"><span class="pre">label.scp</span></code> is converted from the force-aligned labels located in <code class="docutils literal"><span class="pre">exp/sdm1/tri3a_ali</span></code>. Because the force-alignments are generated only on the training data, we split the training set in two, using a 90/10 ratio, and then use the 1/10 holdout as the dev set (validation set). The script <a class="reference external" href="https://github.com/dmlc/mxnet/blob/master/example/speech-demo/run_ami.sh">run_ami.sh</a> automatically splits and formats the file for MXNet. Before running it, set the path in the script correctly. The <a class="reference external" href="https://github.com/dmlc/mxnet/blob/master/example/speech-demo/run_ami.sh">run_ami.sh</a> script actually runs the full pipeline, including training the acoustic model and decoding. If the scripts ran successfully, you can skip the following sections.</p>
 </div>
 <div class="section" id="run-mxnet-acoustic-model-training">
 <span id="run-mxnet-acoustic-model-training"></span><h3>Run MXNet Acoustic Model Training<a class="headerlink" href="#run-mxnet-acoustic-model-training" title="Permalink to this headline">¶</a></h3>
 <ol class="simple">
 <li>Return to the speech demo directory in MXNet. Make a copy of <code class="docutils literal"><span class="pre">default.cfg</span></code>, and edit the necessary parameters, such as the path to the dataset you just prepared.</li>
 <li>Run <code class="docutils literal"><span class="pre">python</span> <span class="pre">train_lstm.py</span> <span class="pre">--configfile=your-config.cfg</span></code>. For help, use <code class="docutils literal"><span class="pre">python</span> <span class="pre">train_lstm.py</span> <span class="pre">--help</span></code>. You can set all of the configuration parameters in <code class="docutils literal"><span class="pre">default.cfg</span></code>, the customized config file, and through the command line (e.g., using <code class="docutils literal"><span class="pre">--train_batch_size=50</span></code>). The latter values overwrite the former ones.</li>
 </ol>
 <p>Here are some example outputs from training on the TIMIT dataset:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span>Example output for TIMIT:
 Summary of dataset ==================
 bucket of len 100 : 3 samples
 bucket of len 200 : 346 samples
 bucket of len 300 : 1496 samples
 bucket of len 400 : 974 samples
 bucket of len 500 : 420 samples
 bucket of len 600 : 90 samples
 bucket of len 700 : 11 samples
 bucket of len 800 : 2 samples
 Summary of dataset ==================
 bucket of len 100 : 0 samples
 bucket of len 200 : 28 samples
 bucket of len 300 : 169 samples
 bucket of len 400 : 107 samples
 bucket of len 500 : 41 samples
 bucket of len 600 : 6 samples
 bucket of len 700 : 3 samples
 bucket of len 800 : 0 samples
 2016-04-21 20:02:40,904 Epoch[0] Train-Acc_exlude_padding=0.154763
 2016-04-21 20:02:40,904 Epoch[0] Time cost=91.574
 2016-04-21 20:02:44,419 Epoch[0] Validation-Acc_exlude_padding=0.353552
 2016-04-21 20:04:17,290 Epoch[1] Train-Acc_exlude_padding=0.447318
 2016-04-21 20:04:17,290 Epoch[1] Time cost=92.870
 2016-04-21 20:04:20,738 Epoch[1] Validation-Acc_exlude_padding=0.506458
 2016-04-21 20:05:53,127 Epoch[2] Train-Acc_exlude_padding=0.557543
 2016-04-21 20:05:53,128 Epoch[2] Time cost=92.390
 2016-04-21 20:05:56,568 Epoch[2] Validation-Acc_exlude_padding=0.548100
 </pre></div>
 </div>
 <p>The final frame accuracy was approximately 62%.</p>
 </div>
 <div class="section" id="run-decode-on-the-trained-acoustic-model">
 <span id="run-decode-on-the-trained-acoustic-model"></span><h3>Run Decode on the Trained Acoustic Model<a class="headerlink" href="#run-decode-on-the-trained-acoustic-model" title="Permalink to this headline">¶</a></h3>
 <ol class="simple">
 <li>Estimate senone priors by running <code class="docutils literal"><span class="pre">python</span> <span class="pre">make_stats.py</span> <span class="pre">--configfile=your-config.cfg</span> <span class="pre">|</span> <span class="pre">copy-feats</span> <span class="pre">ark:-</span> <span class="pre">ark:label_mean.ark</span></code> (edit necessary items, such as the path to the training dataset). This command generates the label counts in <code class="docutils literal"><span class="pre">label_mean.ark</span></code>.</li>
 <li>Link to the necessary Kaldi decode setup, e.g., <code class="docutils literal"><span class="pre">local/</span></code> and <code class="docutils literal"><span class="pre">utils/</span></code> and run <code class="docutils literal"><span class="pre">./run_ami.sh</span> <span class="pre">--model</span> <span class="pre">prefix</span> <span class="pre">model</span> <span class="pre">--num_epoch</span> <span class="pre">num</span></code>.</li>
 </ol>
 <p>Here are the results for the TIMIT and AMI test sets (using the default setup, three-layer LSTM with projection layers):</p>
 <table border="1" class="docutils">
 <colgroup>
 <col width="50%"/>
 <col width="50%"/>
 </colgroup>
 <thead valign="bottom">
 <tr class="row-odd"><th class="head">Corpus</th>
 <th class="head">WER</th>
 </tr>
 </thead>
 <tbody valign="top">
 <tr class="row-even"><td>TIMIT</td>
 <td>18.9</td>
 </tr>
 <tr class="row-odd"><td>AMI</td>
 <td>51.7 (42.2)</td>
 </tr>
 </tbody>
 </table>
 <p>For AMI 42.2 was evaluated non-overlapped speech. The Kaldi-HMM baseline was 67.2%, and DNN was 57.5%.</p>
 </div>
 </div>
 <div class="section" id="next-steps">
 <span id="next-steps"></span><h2>Next Steps<a class="headerlink" href="#next-steps" title="Permalink to this headline">¶</a></h2>
 <div class="toctree-wrapper compound">
 <ul>
 <li class="toctree-l1"><a class="reference external" href="http://mxnet.io/tutorials/index.html">MXNet tutorials index</a></li>
 </ul>
 </div>
 </div>
 </div>
 <div class="container">
 <div class="footer">
 <p> © 2015-2017 DMLC. All rights reserved. </p>
 </div>
 </div>
 </div>
 <div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
 <div class="sphinxsidebarwrapper">
 <h3><a href="../../index.html">Table Of Contents</a></h3>
 <ul>
 <li><a class="reference internal" href="#">Speech LSTM</a><ul>
 <li><a class="reference internal" href="#speech-acoustic-modeling-example">Speech Acoustic Modeling Example</a><ul>
 <li><a class="reference internal" href="#build-kaldi">Build Kaldi</a></li>
 <li><a class="reference internal" href="#build-the-python-wrapper">Build the Python Wrapper</a></li>
 <li><a class="reference internal" href="#extract-features-and-prepare-frame-level-labels">Extract Features and Prepare Frame-level Labels</a></li>
 <li><a class="reference internal" href="#run-mxnet-acoustic-model-training">Run MXNet Acoustic Model Training</a></li>
 <li><a class="reference internal" href="#run-decode-on-the-trained-acoustic-model">Run Decode on the Trained Acoustic Model</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#next-steps">Next Steps</a></li>
 </ul>
 </li>
 </ul>
 </div>
 </div>
 </div> <!-- pagename != index -->
 <script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
 <script src="../../_static/js/sidebar.js" type="text/javascript"></script>
 <script src="../../_static/js/search.js" type="text/javascript"></script>
 <script src="../../_static/js/navbar.js" type="text/javascript"></script>
 <script src="../../_static/js/clipboard.min.js" type="text/javascript"></script>
 <script src="../../_static/js/copycode.js" type="text/javascript"></script>
 <script type="text/javascript">
         $('body').ready(function () {
             $('body').css('visibility', 'visible');
         });
     </script>
 </div></body>
 </html>