blob: 50f309eefed221194c769adaff0c5b3e43bf7ea6 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<meta content="Developing a Character-level Language model" property="og:title">
<meta content="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/og-logo.png" property="og:image">
<meta content="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/og-logo.png" property="og:image:secure_url">
<meta content="Developing a Character-level Language model" property="og:description"/>
<title>Developing a Character-level Language model — mxnet documentation</title>
<link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
<link href="../../_static/basic.css" rel="stylesheet" type="text/css">
<link href="../../_static/pygments.css" rel="stylesheet" type="text/css">
<link href="../../_static/mxnet.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../',
VERSION: '',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: '.txt'
};
</script>
<script src="https://code.jquery.com/jquery-1.11.1.min.js" type="text/javascript"></script>
<script src="../../_static/underscore.js" type="text/javascript"></script>
<script src="../../_static/searchtools_custom.js" type="text/javascript"></script>
<script src="../../_static/doctools.js" type="text/javascript"></script>
<script src="../../_static/selectlang.js" type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
<script type="text/javascript"> jQuery(function() { Search.loadIndex("/versions/1.2.1/searchindex.js"); Search.init();}); </script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-96378503-1', 'auto');
ga('send', 'pageview');
</script>
<!-- -->
<!-- <script type="text/javascript" src="../../_static/jquery.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../_static/underscore.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../_static/doctools.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
<!-- -->
<link href="../../genindex.html" rel="index" title="Index">
<link href="../../search.html" rel="search" title="Search"/>
<link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
</link></link></link></meta></meta></meta></head>
<body background="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-background-compressed.jpeg" role="document">
<div class="content-block"><div class="navbar navbar-fixed-top">
<div class="container" id="navContainer">
<div class="innder" id="header-inner">
<h1 id="logo-wrap">
<a href="../../" id="logo"><img src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet_logo.png"/></a>
</h1>
<nav class="nav-bar" id="main-nav">
<a class="main-nav-link" href="/versions/1.2.1/install/index.html">Install</a>
<span id="dropdown-menu-position-anchor">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Gluon <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="/versions/1.2.1/tutorials/gluon/gluon.html">About</a></li>
<li><a class="main-nav-link" href="https://www.d2l.ai/">Dive into Deep Learning</a></li>
<li><a class="main-nav-link" href="https://gluon-cv.mxnet.io">GluonCV Toolkit</a></li>
<li><a class="main-nav-link" href="https://gluon-nlp.mxnet.io/">GluonNLP Toolkit</a></li>
</ul>
</span>
<span id="dropdown-menu-position-anchor">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="/versions/1.2.1/api/python/index.html">Python</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/c++/index.html">C++</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/julia/index.html">Julia</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/perl/index.html">Perl</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/r/index.html">R</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/scala/index.html">Scala</a></li>
</ul>
</span>
<span id="dropdown-menu-position-anchor-docs">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Docs <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu-docs">
<li><a class="main-nav-link" href="/versions/1.2.1/faq/index.html">FAQ</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/tutorials/index.html">Tutorials</a>
<li><a class="main-nav-link" href="https://github.com/apache/incubator-mxnet/tree/1.2.1/example">Examples</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/architecture/index.html">Architecture</a></li>
<li><a class="main-nav-link" href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/model_zoo/index.html">Model Zoo</a></li>
<li><a class="main-nav-link" href="https://github.com/onnx/onnx-mxnet">ONNX</a></li>
</li></ul>
</span>
<span id="dropdown-menu-position-anchor-community">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Community <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu-community">
<li><a class="main-nav-link" href="http://discuss.mxnet.io">Forum</a></li>
<li><a class="main-nav-link" href="https://github.com/apache/incubator-mxnet/tree/1.2.1">Github</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/community/contribute.html">Contribute</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/community/powered_by.html">Powered By</a></li>
</ul>
</span>
<span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">1.2.1<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a href="/">master</a></li><li><a href="/versions/1.7.0/">1.7.0</a></li><li><a href=/versions/1.6.0/>1.6.0</a></li><li><a href=/versions/1.5.0/>1.5.0</a></li><li><a href=/versions/1.4.1/>1.4.1</a></li><li><a href=/versions/1.3.1/>1.3.1</a></li><li><a href=/versions/1.2.1/>1.2.1</a></li><li><a href=/versions/1.1.0/>1.1.0</a></li><li><a href=/versions/1.0.0/>1.0.0</a></li><li><a href=/versions/0.12.1/>0.12.1</a></li><li><a href=/versions/0.11.0/>0.11.0</a></li></ul></span></nav>
<script> function getRootPath(){ return "../../" } </script>
<div class="burgerIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"></a>
<ul class="dropdown-menu" id="burgerMenu">
<li><a href="/versions/1.2.1/install/index.html">Install</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/tutorials/index.html">Tutorials</a></li>
<li class="dropdown-submenu dropdown">
<a aria-expanded="true" aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" tabindex="-1">Gluon</a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="/versions/1.2.1/tutorials/gluon/gluon.html">About</a></li>
<li><a class="main-nav-link" href="http://gluon.mxnet.io">The Straight Dope (Tutorials)</a></li>
<li><a class="main-nav-link" href="https://gluon-cv.mxnet.io">GluonCV Toolkit</a></li>
<li><a class="main-nav-link" href="https://gluon-nlp.mxnet.io/">GluonNLP Toolkit</a></li>
</ul>
</li>
<li class="dropdown-submenu">
<a aria-expanded="true" aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" tabindex="-1">API</a>
<ul class="dropdown-menu">
<li><a class="main-nav-link" href="/versions/1.2.1/api/python/index.html">Python</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/c++/index.html">C++</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/julia/index.html">Julia</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/perl/index.html">Perl</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/r/index.html">R</a></li>
<li><a class="main-nav-link" href="/versions/1.2.1/api/scala/index.html">Scala</a></li>
</ul>
</li>
<li class="dropdown-submenu">
<a aria-expanded="true" aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" tabindex="-1">Docs</a>
<ul class="dropdown-menu">
<li><a href="/versions/1.2.1/faq/index.html" tabindex="-1">FAQ</a></li>
<li><a href="/versions/1.2.1/tutorials/index.html" tabindex="-1">Tutorials</a></li>
<li><a href="https://github.com/apache/incubator-mxnet/tree/1.2.1/example" tabindex="-1">Examples</a></li>
<li><a href="/versions/1.2.1/architecture/index.html" tabindex="-1">Architecture</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home" tabindex="-1">Developer Wiki</a></li>
<li><a href="/versions/1.2.1/model_zoo/index.html" tabindex="-1">Gluon Model Zoo</a></li>
<li><a href="https://github.com/onnx/onnx-mxnet" tabindex="-1">ONNX</a></li>
</ul>
</li>
<li class="dropdown-submenu dropdown">
<a aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" role="button" tabindex="-1">Community</a>
<ul class="dropdown-menu">
<li><a href="http://discuss.mxnet.io" tabindex="-1">Forum</a></li>
<li><a href="https://github.com/apache/incubator-mxnet/tree/1.2.1" tabindex="-1">Github</a></li>
<li><a href="/versions/1.2.1/community/contribute.html" tabindex="-1">Contribute</a></li>
<li><a href="/versions/1.2.1/community/powered_by.html" tabindex="-1">Powered By</a></li>
</ul>
</li>
<li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">1.2.1</a><ul class="dropdown-menu"><li><a tabindex="-1" href=/>master</a></li><li><a tabindex="-1" href=/versions/1.6.0/>1.6.0</a></li><li><a tabindex="-1" href=/versions/1.5.0/>1.5.0</a></li><li><a tabindex="-1" href=/versions/1.4.1/>1.4.1</a></li><li><a tabindex="-1" href=/versions/1.3.1/>1.3.1</a></li><li><a tabindex="-1" href=/versions/1.2.1/>1.2.1</a></li><li><a tabindex="-1" href=/versions/1.1.0/>1.1.0</a></li><li><a tabindex="-1" href=/versions/1.0.0/>1.0.0</a></li><li><a tabindex="-1" href=/versions/0.12.1/>0.12.1</a></li><li><a tabindex="-1" href=/versions/0.11.0/>0.11.0</a></li></ul></li></ul>
</div>
<div class="plusIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
<ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
</div>
<div id="search-input-wrap">
<form action="../../search.html" autocomplete="off" class="" method="get" role="search">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input class="form-control" name="q" placeholder="Search" type="text"/>
</div>
<input name="check_keywords" type="hidden" value="yes">
<input name="area" type="hidden" value="default"/>
</input></form>
<div id="search-preview"></div>
</div>
<div id="searchIcon">
<span aria-hidden="true" class="glyphicon glyphicon-search"></span>
</div>
<!-- <div id="lang-select-wrap"> -->
<!-- <label id="lang-select-label"> -->
<!-- <\!-- <i class="fa fa-globe"></i> -\-> -->
<!-- <span></span> -->
<!-- </label> -->
<!-- <select id="lang-select"> -->
<!-- <option value="en">Eng</option> -->
<!-- <option value="zh">中文</option> -->
<!-- </select> -->
<!-- </div> -->
<!-- <a id="mobile-nav-toggle">
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
</a> -->
</div>
</div>
</div>
<script type="text/javascript">
$('body').css('background', 'white');
</script>
<div class="container">
<div class="row">
<div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../api/python/index.html">Python Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/r/index.html">R Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/julia/index.html">Julia Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/c++/index.html">C++ Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/scala/index.html">Scala Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/perl/index.html">Perl Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../faq/index.html">HowTo Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">System Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Tutorials</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../community/index.html">Community</a></li>
</ul>
</div>
</div>
<div class="content">
<div class="page-tracker"></div>
<div class="section" id="developing-a-character-level-language-model">
<span id="developing-a-character-level-language-model"></span><h1>Developing a Character-level Language model<a class="headerlink" href="#developing-a-character-level-language-model" title="Permalink to this headline"></a></h1>
<p>This tutorial shows how to train a character-level language model with a multilayer recurrent neural network (RNN) using Scala. This model takes one text file as input and trains an RNN that learns to predict the next character in the sequence. In this tutorial, you train a multilayer LSTM (Long Short-Term Memory) network that generates relevant text using Barack Obama’s speech patterns.</p>
<p>There are many documents that explain LSTM concepts. If you aren’t familiar with LSTM, refer to the following before you proceed:</p>
<ul class="simple">
<li>Christopher Olah’s <a class="reference external" href="http://colah.github.io/posts/2015-08-Understanding-LSTMs/">Understanding LSTM blog post</a></li>
<li><a class="reference external" href="http://dmlc.ml/mxnet/2015/11/15/char-lstm-in-julia.html">Training a LSTM char-rnn in Julia to Generate Random Sentences</a></li>
<li><a class="reference external" href="https://github.com/dmlc/mxnet-notebooks/blob/1.2.1/python/tutorials/char_lstm.ipynb">Bucketing in MXNet in Python</a></li>
<li><a class="reference external" href="/versions/1.2.1/faq/bucketing.html">Bucketing in MXNet</a></li>
</ul>
<div class="section" id="how-to-use-this-tutorial">
<span id="how-to-use-this-tutorial"></span><h2>How to Use This Tutorial<a class="headerlink" href="#how-to-use-this-tutorial" title="Permalink to this headline"></a></h2>
<p>There are three ways to use this tutorial:</p>
<ol class="simple">
<li>Run it by copying the provided code snippets and pasting them into the Scala command line, making the appropriate changes to the input file path.</li>
<li>Reuse the code by making changes to relevant parameters and running it from command line.</li>
<li><a class="reference external" href="https://github.com/apache/incubator-mxnet/tree/1.2.1/scala-package/examples/src/main/scala/org/apache/mxnetexamples/rnn">Run the source code directly</a> by running the <a class="reference external" href="https://github.com/apache/incubator-mxnet/tree/1.2.1/scala-package/examples/scripts/rnn">provided scripts</a>.</li>
</ol>
<p>To run the scripts:</p>
<ul class="simple">
<li>Build and train the model with the <a class="reference external" href="https://github.com/apache/incubator-mxnet/tree/1.2.1/scala-package/examples/scripts/rnn/run_train_charrnn.sh">run_train_charrnn.sh script</a>. Edit the script as follows:</li>
</ul>
<p>Edit the CLASS_PATH variable in the script to include your operating system-specific folder (e.g., linux-x86_64-cpu/linux-x86_64-gpu/osx-x86_64-cpu) in the path. Run the script with the following command:</p>
<div class="highlight-bash"><div class="highlight"><pre><span></span> bash run_train_charrnn.sh <which GPU card to use<span class="p">;</span> -1 means CPU> <input data path> <location to save the model>
e.g.,
bash run_train_charrnn.sh -1 ./datas/obama.txt ./models/obama
</pre></div>
</div>
<ul class="simple">
<li>Run inference with the <a class="reference external" href="https://github.com/dmlc/mxnet/blob/1.2.1/scala-package/examples/scripts/rnn/run_test_charrnn.sh">run_test_charrnn.sh script</a>. Edit the script as follows:</li>
</ul>
<p>Edit the CLASS_PATH variable in the script to include your operating system-specific folder (e.g., linux-x86_64-cpu/linux-x86_64-gpu/osx-x86_64-cpu) in the path. Run the script with the following command:</p>
<div class="highlight-bash"><div class="highlight"><pre><span></span> bash run_test_charrnn.sh <input data path> <trained model from previous script>
e.g.,
bash run_test_charrnn.sh ./datas/obama.txt ./models/obama
</pre></div>
</div>
<p>In this tutorial, you will accomplish the following:</p>
<ul class="simple">
<li>Build an LSTM network that learns speech patterns from Barack Obama’s speeches at the character level. At each time interval, the input is a character.</li>
<li>Clean up the dataset.</li>
<li>Train a model.</li>
<li>Fit the model.</li>
<li>Build the inference model.</li>
</ul>
</div>
<div class="section" id="prerequisites">
<span id="prerequisites"></span><h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Permalink to this headline"></a></h2>
<p>To complete this tutorial, you need:</p>
<ul class="simple">
<li>MXNet. See the instructions for your operating system in <a class="reference external" href="/versions/1.2.1/install/index.html">Setup and Installation</a></li>
<li><a class="reference external" href="https://www.scala-lang.org/download/2.11.8.html">Scala 2.11.8</a></li>
<li><a class="reference external" href="https://maven.apache.org/install.html">Maven 3</a></li>
</ul>
</div>
<div class="section" id="download-the-data">
<span id="download-the-data"></span><h2>Download the Data<a class="headerlink" href="#download-the-data" title="Permalink to this headline"></a></h2>
<p>First, download the data, which contains Barack Obama’s speeches. The data is stored in a file called obama.txt and is available on <a class="reference external" href="http://data.mxnet.io/data/char_lstm.zip">mxnet.io</a></p>
<p>To download the data which contains Barack Obama’s speeches:</p>
<ol class="simple">
<li>Download the dataset with the following command:</li>
</ol>
<div class="highlight-bash"><div class="highlight"><pre><span></span> wget http://data.mxnet.io/data/char_lstm.zip
</pre></div>
</div>
<ol class="simple">
<li>Unzip the dataset with the following command:</li>
</ol>
<div class="highlight-bash"><div class="highlight"><pre><span></span> unzip char_lstm.zip -d char_lstm/
</pre></div>
</div>
<ol class="simple">
<li>The downloaded data contains President Obama’s speeches. You can have sneak peek at the dataset with the following command:</li>
</ol>
<div class="highlight-bash"><div class="highlight"><pre><span></span> head -10 obama.txt
</pre></div>
</div>
<p>Output:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span> <span class="n">Call</span> <span class="n">to</span> <span class="n">Renewal</span> <span class="n">Keynote</span> <span class="n">Address</span> <span class="n">Call</span> <span class="n">to</span> <span class="n">Renewal</span> <span class="n">Pt</span> <span class="mi">1</span><span class="n">Call</span> <span class="n">to</span> <span class="n">Renewal</span> <span class="n">Part</span> <span class="mi">2</span> <span class="n">TOPIC</span><span class="p">:</span> <span class="n">Our</span> <span class="n">Past</span><span class="p">,</span> <span class="n">Our</span> <span class="n">Future</span> <span class="o">&amp;</span> <span class="n">Vision</span> <span class="k">for</span> <span class="n">America</span> <span class="n">June</span>
<span class="mi">28</span><span class="p">,</span> <span class="mi">2006</span> <span class="n">Call</span> <span class="n">to</span> <span class="n">Renewal</span><span class="s1">' Keynote Address Complete Text Good morning. I appreciate the opportunity to speak here at the Call to R</span>
<span class="n">enewal</span><span class="s1">'s Building a Covenant for a New America conference. I'</span><span class="n">ve</span> <span class="n">had</span> <span class="n">the</span> <span class="n">opportunity</span> <span class="n">to</span> <span class="n">take</span> <span class="n">a</span> <span class="n">look</span> <span class="n">at</span> <span class="n">your</span> <span class="n">Covenant</span> <span class="k">for</span> <span class="n">a</span> <span class="n">New</span> <span class="n">Ame</span>
<span class="n">rica</span><span class="o">.</span> <span class="n">It</span> <span class="ow">is</span> <span class="n">filled</span> <span class="k">with</span> <span class="n">outstanding</span> <span class="n">policies</span> <span class="ow">and</span> <span class="n">prescriptions</span> <span class="k">for</span> <span class="n">much</span> <span class="n">of</span> <span class="n">what</span> <span class="n">ails</span> <span class="n">this</span> <span class="n">country</span><span class="o">.</span> <span class="n">So</span> <span class="n">I</span><span class="s1">'d like to congratulate yo</span>
<span class="n">u</span> <span class="nb">all</span> <span class="n">on</span> <span class="n">the</span> <span class="n">thoughtful</span> <span class="n">presentations</span> <span class="n">you</span><span class="s1">'ve given so far about poverty and justice in America, and for putting fire under the fe</span>
<span class="n">et</span> <span class="n">of</span> <span class="n">the</span> <span class="n">political</span> <span class="n">leadership</span> <span class="n">here</span> <span class="ow">in</span> <span class="n">Washington</span><span class="o">.</span><span class="n">But</span> <span class="n">today</span> <span class="n">I</span><span class="s1">'d like to talk about the connection between religion and politics a</span>
<span class="n">nd</span> <span class="n">perhaps</span> <span class="n">offer</span> <span class="n">some</span> <span class="n">thoughts</span> <span class="n">about</span> <span class="n">how</span> <span class="n">we</span> <span class="n">can</span> <span class="n">sort</span> <span class="n">through</span> <span class="n">some</span> <span class="n">of</span> <span class="n">the</span> <span class="n">often</span> <span class="n">bitter</span> <span class="n">arguments</span> <span class="n">that</span> <span class="n">we</span><span class="s1">'ve been seeing over the l</span>
<span class="n">ast</span> <span class="n">several</span> <span class="n">years</span><span class="o">.</span><span class="n">I</span> <span class="n">do</span> <span class="n">so</span> <span class="n">because</span><span class="p">,</span> <span class="k">as</span> <span class="n">you</span> <span class="nb">all</span> <span class="n">know</span><span class="p">,</span> <span class="n">we</span> <span class="n">can</span> <span class="n">affirm</span> <span class="n">the</span> <span class="n">importance</span> <span class="n">of</span> <span class="n">poverty</span> <span class="ow">in</span> <span class="n">the</span> <span class="n">Bible</span><span class="p">;</span> <span class="ow">and</span> <span class="n">we</span> <span class="n">can</span> <span class="k">raise</span> <span class="n">up</span> <span class="ow">and</span>
<span class="k">pass</span> <span class="n">out</span> <span class="n">this</span> <span class="n">Covenant</span> <span class="k">for</span> <span class="n">a</span> <span class="n">New</span> <span class="n">America</span><span class="o">.</span> <span class="n">We</span> <span class="n">can</span> <span class="n">talk</span> <span class="n">to</span> <span class="n">the</span> <span class="n">press</span><span class="p">,</span> <span class="ow">and</span> <span class="n">we</span> <span class="n">can</span> <span class="n">discuss</span> <span class="n">the</span> <span class="n">religious</span> <span class="n">call</span> <span class="n">to</span> <span class="n">address</span> <span class="n">poverty</span> <span class="ow">and</span>
<span class="n">environmental</span> <span class="n">stewardship</span> <span class="nb">all</span> <span class="n">we</span> <span class="n">want</span><span class="p">,</span> <span class="n">but</span> <span class="n">it</span> <span class="n">won</span><span class="s1">'t have an impact unless we tackle head-on the mutual suspicion that sometimes</span>
</pre></div>
</div>
</div>
<div class="section" id="prepare-the-data">
<span id="prepare-the-data"></span><h2>Prepare the Data<a class="headerlink" href="#prepare-the-data" title="Permalink to this headline"></a></h2>
<p>To preprocess the dataset, define the following utility functions:</p>
<ul class="simple">
<li><code class="docutils literal"><span class="pre">readContent</span></code> - Reads data from the data file.</li>
<li><code class="docutils literal"><span class="pre">buildVocab</span></code> - Maps each character to a unique Integer ID, i.e., a build a vocabulary</li>
<li><code class="docutils literal"><span class="pre">text2Id</span></code> - Encodes each sentence with an Integer ID.</li>
</ul>
<p>Then, use these utility functions to generate vocabulary from the input text file (obama.txt).</p>
<p>To prepare the data:</p>
<ol class="simple">
<li>Read the dataset with the following function:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">scala.io.Source</span>
<span class="k">import</span> <span class="nn">scala.io.Source</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// Read file</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">readContent</span><span class="o">(</span><span class="n">path</span><span class="k">:</span> <span class="kt">String</span><span class="o">)</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> <span class="nc">Source</span><span class="o">.</span><span class="n">fromFile</span><span class="o">(</span><span class="n">path</span><span class="o">).</span><span class="n">mkString</span>
<span class="n">readContent</span><span class="k">:</span> <span class="o">(</span><span class="kt">path:</span> <span class="kt">String</span><span class="o">)</span><span class="kt">String</span>
</pre></div>
</div>
<ol class="simple">
<li>Build a vocabulary with the following function:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="c1">// Build a vocabulary of what char we have in the content</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">buildVocab</span><span class="o">(</span><span class="n">path</span><span class="k">:</span> <span class="kt">String</span><span class="o">)</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>, <span class="kt">Int</span><span class="o">]</span> <span class="k">=</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">content</span> <span class="k">=</span> <span class="n">readContent</span><span class="o">(</span><span class="n">dataPath</span><span class="o">).</span><span class="n">split</span><span class="o">(</span><span class="s">"\n"</span><span class="o">)</span>
<span class="k">var</span> <span class="n">idx</span> <span class="k">=</span> <span class="mi">1</span> <span class="c1">// 0 is left for zero padding</span>
<span class="k">var</span> <span class="n">theVocab</span> <span class="k">=</span> <span class="nc">Map</span><span class="o">[</span><span class="kt">String</span>, <span class="kt">Int</span><span class="o">]()</span>
<span class="k">for</span> <span class="o">(</span><span class="n">line</span> <span class="k"><-</span> <span class="n">content</span><span class="o">)</span> <span class="o">{</span>
<span class="k">for</span> <span class="o">(</span><span class="n">char</span> <span class="k"><-</span> <span class="n">line</span><span class="o">)</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">key</span> <span class="k">=</span> <span class="s">s"</span><span class="si">$char</span><span class="s">"</span>
<span class="k">if</span> <span class="o">(!</span><span class="n">theVocab</span><span class="o">.</span><span class="n">contains</span><span class="o">(</span><span class="n">key</span><span class="o">))</span> <span class="o">{</span>
<span class="n">theVocab</span> <span class="k">=</span> <span class="n">theVocab</span> <span class="o">+</span> <span class="o">(</span><span class="n">key</span> <span class="o">-></span> <span class="n">idx</span><span class="o">)</span>
<span class="n">idx</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="n">theVocab</span>
<span class="o">}</span>
<span class="n">buildVocab</span><span class="k">:</span> <span class="o">(</span><span class="kt">path:</span> <span class="kt">String</span><span class="o">)</span><span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>,<span class="kt">Int</span><span class="o">]</span>
</pre></div>
</div>
<ol class="simple">
<li>To assign each character a unique numerical ID, use the following function:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">text2Id</span><span class="o">(</span><span class="n">sentence</span><span class="k">:</span> <span class="kt">String</span><span class="o">,</span> <span class="n">theVocab</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>, <span class="kt">Int</span><span class="o">])</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Int</span><span class="o">]</span> <span class="k">=</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">words</span> <span class="k">=</span> <span class="k">for</span> <span class="o">(</span><span class="n">char</span> <span class="k"><-</span> <span class="n">sentence</span><span class="o">)</span> <span class="k">yield</span> <span class="n">theVocab</span><span class="o">(</span><span class="s">s"</span><span class="si">$char</span><span class="s">"</span><span class="o">)</span>
<span class="n">words</span><span class="o">.</span><span class="n">toArray</span>
<span class="o">}</span>
<span class="n">text2Id</span><span class="k">:</span> <span class="o">(</span><span class="kt">sentence:</span> <span class="kt">String</span><span class="o">,</span> <span class="kt">theVocab:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>,<span class="kt">Int</span><span class="o">])</span><span class="nc">Array</span><span class="o">[</span><span class="kt">Int</span><span class="o">]</span>
</pre></div>
</div>
<ol class="simple">
<li>Now, build a character vocabulary from the dataset (obama.txt). Change the input filepath (dataPath) to reflect your settings.</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="c1">// Give your system path to the "obama.txt" we have downloaded using previous steps.</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">dataPath</span> <span class="k">=</span> <span class="s">"obama.txt"</span>
<span class="n">dataPath</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> <span class="n">obama</span><span class="o">.</span><span class="n">txt</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">vocab</span> <span class="k">=</span> <span class="n">buildVocab</span><span class="o">(</span><span class="n">dataPath</span><span class="o">)</span>
<span class="n">scala</span><span class="o">></span> <span class="n">vocab</span><span class="o">.</span><span class="n">size</span>
<span class="n">res23</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">82</span>
</pre></div>
</div>
</div>
<div class="section" id="build-a-multi-layer-lstm-model">
<span id="build-a-multi-layer-lstm-model"></span><h2>Build a Multi-layer LSTM model<a class="headerlink" href="#build-a-multi-layer-lstm-model" title="Permalink to this headline"></a></h2>
<p>Now, create a multi-layer LSTM model.</p>
<p>To create the model:</p>
<ol class="simple">
<li>Load the helper files (<code class="docutils literal"><span class="pre">Lstm.scala</span></code>, <code class="docutils literal"><span class="pre">BucketIo.scala</span></code> and <code class="docutils literal"><span class="pre">RnnModel.scala</span></code>).
<code class="docutils literal"><span class="pre">Lstm.scala</span></code> contains the definition of the LSTM cell. <code class="docutils literal"><span class="pre">BucketIo.scala</span></code> creates a sentence iterator. <code class="docutils literal"><span class="pre">RnnModel.scala</span></code> is used for model inference. The helper files are available on the <a class="reference external" href="https://github.com/apache/incubator-mxnet/tree/1.2.1/scala-package/examples/src/main/scala/org/apache/mxnetexamples/rnn">MXNet site</a>.
To load them, at the Scala command prompt type:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">:</span><span class="kt">load</span> <span class="kt">../../../scala-package/examples/src/main/scala/org/apache/mxnet/examples/rnn/Lstm.scala</span>
<span class="n">scala</span><span class="o">></span> <span class="k">:</span><span class="kt">load</span> <span class="kt">../../../scala-package/examples/src/main/scala/org/apache/mxnet/examples/rnn/BucketIo.scala</span>
<span class="n">scala</span><span class="o">></span> <span class="k">:</span><span class="kt">load</span> <span class="kt">../../../scala-package/examples/src/main/scala/org/apache/mxnet/examples/rnn/RnnModel.scala</span>
</pre></div>
</div>
<ol class="simple">
<li>Set the LSTM hyperparameters as follows:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="c1">// We can support various input lengths.</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// For this problem, we cut each input sentence to a length of 129 characters.</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// So we only need a fixed length bucket length.</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">buckets</span> <span class="k">=</span> <span class="nc">Array</span><span class="o">(</span><span class="mi">129</span><span class="o">)</span>
<span class="n">buckets</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Int</span><span class="o">]</span> <span class="k">=</span> <span class="nc">Array</span><span class="o">(</span><span class="mi">129</span><span class="o">)</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// hidden unit in LSTM cell</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">numHidden</span> <span class="k">=</span> <span class="mi">512</span>
<span class="n">numHidden</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">512</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// The embedding dimension, which maps a char to a 256 dim vector</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">numEmbed</span> <span class="k">=</span> <span class="mi">256</span>
<span class="n">numEmbed</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">256</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// The number of lstm layers</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">numLstmLayer</span> <span class="k">=</span> <span class="mi">3</span>
<span class="n">numLstmLayer</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">3</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// The batch size for training</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">batchSize</span> <span class="k">=</span> <span class="mi">32</span>
<span class="n">batchSize</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">32</span>
</pre></div>
</div>
<ol class="simple">
<li>Now, construct the LSTM network as a symbolic computation graph. Type the following to create a graph in which the model is unrolled for a fixed length explicitly in time.</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="c1">// generate symbol for a length</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">symGen</span><span class="o">(</span><span class="n">seqLen</span><span class="k">:</span> <span class="kt">Int</span><span class="o">)</span><span class="k">:</span> <span class="kt">Symbol</span> <span class="o">=</span> <span class="o">{</span>
<span class="nc">Lstm</span><span class="o">.</span><span class="n">lstmUnroll</span><span class="o">(</span><span class="n">numLstmLayer</span><span class="o">,</span> <span class="n">seqLen</span><span class="o">,</span> <span class="n">vocab</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span><span class="o">,</span>
<span class="n">numHidden</span> <span class="k">=</span> <span class="n">numHidden</span><span class="o">,</span> <span class="n">numEmbed</span> <span class="k">=</span> <span class="n">numEmbed</span><span class="o">,</span>
<span class="n">numLabel</span> <span class="k">=</span> <span class="n">vocab</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span><span class="o">,</span> <span class="n">dropout</span> <span class="k">=</span> <span class="mf">0.2f</span><span class="o">)</span>
<span class="o">}</span>
<span class="n">symGen</span><span class="k">:</span> <span class="o">(</span><span class="kt">seqLen:</span> <span class="kt">Int</span><span class="o">)</span><span class="kt">org.apache.mxnet.Symbol</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// create the network symbol</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">symbol</span> <span class="k">=</span> <span class="n">symGen</span><span class="o">(</span><span class="n">buckets</span><span class="o">(</span><span class="mi">0</span><span class="o">))</span>
<span class="n">symbol</span><span class="k">:</span> <span class="kt">org.apache.mxnet.Symbol</span> <span class="o">=</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">mxnet</span><span class="o">.</span><span class="nc">Symbol</span><span class="k">@</span><span class="mi">3</span><span class="n">a589eed</span>
</pre></div>
</div>
<ol class="simple">
<li>To train the model, initialize states for the LSTM and create a data iterator, which groups the data into buckets.
Note: The BucketSentenceIter data iterator supports various length examples; however, we use only the fixed length version in this tutorial.</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="c1">// initialize states for LSTM</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">initC</span> <span class="k">=</span> <span class="k">for</span> <span class="o">(</span><span class="n">l</span> <span class="k"><-</span> <span class="mi">0</span> <span class="n">until</span> <span class="n">numLstmLayer</span><span class="o">)</span> <span class="k">yield</span> <span class="o">(</span><span class="s">s"l</span><span class="si">${</span><span class="n">l</span><span class="si">}</span><span class="s">_init_c"</span><span class="o">,</span> <span class="o">(</span><span class="n">batchSize</span><span class="o">,</span> <span class="n">numHidden</span><span class="o">))</span>
<span class="n">initC</span><span class="k">:</span> <span class="kt">scala.collection.immutable.IndexedSeq</span><span class="o">[(</span><span class="kt">String</span>, <span class="o">(</span><span class="kt">Int</span>, <span class="kt">Int</span><span class="o">))]</span> <span class="k">=</span> <span class="nc">Vector</span><span class="o">((</span><span class="n">l0_init_c</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span>
<span class="o">(</span><span class="n">l1_init_c</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span> <span class="o">(</span><span class="n">l2_init_c</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)))</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">initH</span> <span class="k">=</span> <span class="k">for</span> <span class="o">(</span><span class="n">l</span> <span class="k"><-</span> <span class="mi">0</span> <span class="n">until</span> <span class="n">numLstmLayer</span><span class="o">)</span> <span class="k">yield</span> <span class="o">(</span><span class="s">s"l</span><span class="si">${</span><span class="n">l</span><span class="si">}</span><span class="s">_init_h"</span><span class="o">,</span> <span class="o">(</span><span class="n">batchSize</span><span class="o">,</span> <span class="n">numHidden</span><span class="o">))</span>
<span class="n">initH</span><span class="k">:</span> <span class="kt">scala.collection.immutable.IndexedSeq</span><span class="o">[(</span><span class="kt">String</span>, <span class="o">(</span><span class="kt">Int</span>, <span class="kt">Int</span><span class="o">))]</span> <span class="k">=</span> <span class="nc">Vector</span><span class="o">((</span><span class="n">l0_init_h</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span>
<span class="o">(</span><span class="n">l1_init_h</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span> <span class="o">(</span><span class="n">l2_init_h</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)))</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">initStates</span> <span class="k">=</span> <span class="n">initC</span> <span class="o">++</span> <span class="n">initH</span>
<span class="n">initStates</span><span class="k">:</span> <span class="kt">scala.collection.immutable.IndexedSeq</span><span class="o">[(</span><span class="kt">String</span>, <span class="o">(</span><span class="kt">Int</span>, <span class="kt">Int</span><span class="o">))]</span> <span class="k">=</span>
<span class="nc">Vector</span><span class="o">((</span><span class="n">l0_init_c</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span> <span class="o">(</span><span class="n">l1_init_c</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span> <span class="o">(</span><span class="n">l2_init_c</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span> <span class="o">(</span><span class="n">l0_init_h</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span>
<span class="o">(</span><span class="n">l1_init_h</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)),</span> <span class="o">(</span><span class="n">l2_init_h</span><span class="o">,(</span><span class="mi">32</span><span class="o">,</span><span class="mi">512</span><span class="o">)))</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">dataTrain</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">BucketIo</span><span class="o">.</span><span class="nc">BucketSentenceIter</span><span class="o">(</span><span class="n">dataPath</span><span class="o">,</span> <span class="n">vocab</span><span class="o">,</span> <span class="n">buckets</span><span class="o">,</span>
<span class="n">batchSize</span><span class="o">,</span> <span class="n">initStates</span><span class="o">,</span> <span class="n">seperateChar</span> <span class="k">=</span> <span class="s">"\n"</span><span class="o">,</span>
<span class="n">text2Id</span> <span class="k">=</span> <span class="n">text2Id</span><span class="o">,</span> <span class="n">readContent</span> <span class="k">=</span> <span class="n">readContent</span><span class="o">)</span>
<span class="n">dataTrain</span><span class="k">:</span> <span class="kt">BucketIo.BucketSentenceIter</span> <span class="o">=</span> <span class="n">non</span><span class="o">-</span><span class="n">empty</span> <span class="n">iterator</span>
</pre></div>
</div>
<ol class="simple">
<li>You can set more than 100 epochs, but for this tutorial, specify 75 epochs. Each epoch can take as long as 4 minutes on a GPU. In this tutorial, you will use the <a class="reference external" href="/versions/1.2.1/api/scala/docs/index.html#org.apache.mxnet.optimizer.Adam">ADAM optimizer</a>:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">org.apache.mxnet._</span>
<span class="k">import</span> <span class="nn">org.apache.mxnet._</span>
<span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">org.apache.mxnet.Callback.Speedometer</span>
<span class="k">import</span> <span class="nn">org.apache.mxnet.Callback.Speedometer</span>
<span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">org.apache.mxnet.optimizer.Adam</span>
<span class="k">import</span> <span class="nn">org.apache.mxnet.optimizer.Adam</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// and we will see result by training 75 epochs</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">numEpoch</span> <span class="k">=</span> <span class="mi">75</span>
<span class="n">numEpoch</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">75</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// learning rate</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">learningRate</span> <span class="k">=</span> <span class="mf">0.001f</span>
<span class="n">learningRate</span><span class="k">:</span> <span class="kt">Float</span> <span class="o">=</span> <span class="mf">0.001</span>
</pre></div>
</div>
<ol class="simple">
<li>Define the perplexity utility function for the evaluation metric which is used to calculate the negative log-likelihood during training.</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">perplexity</span><span class="o">(</span><span class="n">label</span><span class="k">:</span> <span class="kt">NDArray</span><span class="o">,</span> <span class="n">pred</span><span class="k">:</span> <span class="kt">NDArray</span><span class="o">)</span><span class="k">:</span> <span class="kt">Float</span> <span class="o">=</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">shape</span> <span class="k">=</span> <span class="n">label</span><span class="o">.</span><span class="n">shape</span>
<span class="k">val</span> <span class="n">size</span> <span class="k">=</span> <span class="n">shape</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span> <span class="o">*</span> <span class="n">shape</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span>
<span class="k">val</span> <span class="n">labelT</span> <span class="k">=</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">tmp</span> <span class="k">=</span> <span class="n">label</span><span class="o">.</span><span class="n">toArray</span><span class="o">.</span><span class="n">grouped</span><span class="o">(</span><span class="n">shape</span><span class="o">(</span><span class="mi">1</span><span class="o">)).</span><span class="n">toArray</span>
<span class="k">val</span> <span class="n">result</span> <span class="k">=</span> <span class="nc">Array</span><span class="o">.</span><span class="n">fill</span><span class="o">[</span><span class="kt">Float</span><span class="o">](</span><span class="n">size</span><span class="o">)(</span><span class="mi">0</span><span class="n">f</span><span class="o">)</span>
<span class="k">var</span> <span class="n">idx</span> <span class="k">=</span> <span class="mi">0</span>
<span class="k">for</span> <span class="o">(</span><span class="n">i</span> <span class="k"><-</span> <span class="mi">0</span> <span class="n">until</span> <span class="n">shape</span><span class="o">(</span><span class="mi">1</span><span class="o">))</span> <span class="o">{</span>
<span class="k">for</span> <span class="o">(</span><span class="n">j</span> <span class="k"><-</span> <span class="mi">0</span> <span class="n">until</span> <span class="n">shape</span><span class="o">(</span><span class="mi">0</span><span class="o">))</span> <span class="o">{</span>
<span class="n">result</span><span class="o">(</span><span class="n">idx</span><span class="o">)</span> <span class="k">=</span> <span class="n">tmp</span><span class="o">(</span><span class="n">j</span><span class="o">)(</span><span class="n">i</span><span class="o">)</span>
<span class="n">idx</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="n">result</span>
<span class="o">}</span>
<span class="k">var</span> <span class="n">loss</span> <span class="k">=</span> <span class="mi">0</span><span class="n">f</span>
<span class="k">val</span> <span class="n">predArray</span> <span class="k">=</span> <span class="n">pred</span><span class="o">.</span><span class="n">toArray</span><span class="o">.</span><span class="n">grouped</span><span class="o">(</span><span class="n">pred</span><span class="o">.</span><span class="n">shape</span><span class="o">(</span><span class="mi">1</span><span class="o">)).</span><span class="n">toArray</span>
<span class="k">for</span> <span class="o">(</span><span class="n">i</span> <span class="k"><-</span> <span class="mi">0</span> <span class="n">until</span> <span class="n">pred</span><span class="o">.</span><span class="n">shape</span><span class="o">(</span><span class="mi">0</span><span class="o">))</span> <span class="o">{</span>
<span class="n">loss</span> <span class="o">+=</span> <span class="o">-</span><span class="nc">Math</span><span class="o">.</span><span class="n">log</span><span class="o">(</span><span class="nc">Math</span><span class="o">.</span><span class="n">max</span><span class="o">(</span><span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">10</span><span class="o">,</span> <span class="n">predArray</span><span class="o">(</span><span class="n">i</span><span class="o">)(</span><span class="n">labelT</span><span class="o">(</span><span class="n">i</span><span class="o">).</span><span class="n">toInt</span><span class="o">)).</span><span class="n">toFloat</span><span class="o">).</span><span class="n">toFloat</span>
<span class="o">}</span>
<span class="n">loss</span> <span class="o">/</span> <span class="n">size</span>
<span class="o">}</span>
<span class="n">perplexity</span><span class="k">:</span> <span class="o">(</span><span class="kt">label:</span> <span class="kt">org.apache.mxnet.NDArray</span><span class="o">,</span> <span class="kt">pred:</span> <span class="kt">org.apache.mxnet.NDArray</span><span class="o">)</span><span class="nc">Float</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">doCheckpoint</span><span class="o">(</span><span class="n">prefix</span><span class="k">:</span> <span class="kt">String</span><span class="o">)</span><span class="k">:</span> <span class="kt">EpochEndCallback</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">EpochEndCallback</span> <span class="o">{</span>
<span class="k">override</span> <span class="k">def</span> <span class="n">invoke</span><span class="o">(</span><span class="n">epoch</span><span class="k">:</span> <span class="kt">Int</span><span class="o">,</span> <span class="n">symbol</span><span class="k">:</span> <span class="kt">Symbol</span><span class="o">,</span>
<span class="n">argParams</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>, <span class="kt">NDArray</span><span class="o">],</span>
<span class="n">auxStates</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>, <span class="kt">NDArray</span><span class="o">])</span><span class="k">:</span> <span class="kt">Unit</span> <span class="o">=</span> <span class="o">{</span>
<span class="nc">Model</span><span class="o">.</span><span class="n">saveCheckpoint</span><span class="o">(</span><span class="n">prefix</span><span class="o">,</span> <span class="n">epoch</span> <span class="o">+</span> <span class="mi">1</span><span class="o">,</span> <span class="n">symbol</span><span class="o">,</span> <span class="n">argParams</span><span class="o">,</span> <span class="n">auxStates</span><span class="o">)</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="n">doCheckpoint</span><span class="k">:</span> <span class="o">(</span><span class="kt">prefix:</span> <span class="kt">String</span><span class="o">)</span><span class="kt">org.apache.mxnet.EpochEndCallback</span>
</pre></div>
</div>
<ol class="simple">
<li>Define the initializer that is required for creating a model, as follows:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">initializer</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">Xavier</span><span class="o">(</span><span class="n">factorType</span> <span class="k">=</span> <span class="s">"in"</span><span class="o">,</span> <span class="n">magnitude</span> <span class="k">=</span> <span class="mf">2.34f</span><span class="o">)</span>
<span class="n">initializer</span><span class="k">:</span> <span class="kt">org.apache.mxnet.Xavier</span> <span class="o">=</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">mxnet</span><span class="o">.</span><span class="nc">Xavier</span><span class="k">@</span><span class="mi">54</span><span class="n">e8f10a</span>
</pre></div>
</div>
<ol class="simple">
<li>Now, you have implemented all the supporting infrastructures for the char-lstm model. To train the model, use the standard <a class="reference external" href="/versions/1.2.1/api/scala/docs/index.html#org.apache.mxnet.FeedForward">MXNet high-level API</a>. You can train the model on a single GPU or CPU from multiple GPUs or CPUs by changing <code class="docutils literal"><span class="pre">scala</span> <span class="pre">.setContext(Array(Context.gpu(0),Context.gpu(1),Context.gpu(2),Context.gpu(3)))</span></code> to <code class="docutils literal"><span class="pre">scala</span> <span class="pre">.setContext(Array(Context.gpu(0)))</span></code>:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">model</span> <span class="k">=</span> <span class="nc">FeedForward</span><span class="o">.</span><span class="n">newBuilder</span><span class="o">(</span><span class="n">symbol</span><span class="o">)</span>
<span class="o">.</span><span class="n">setContext</span><span class="o">(</span><span class="nc">Array</span><span class="o">(</span><span class="nc">Context</span><span class="o">.</span><span class="n">gpu</span><span class="o">(</span><span class="mi">0</span><span class="o">),</span><span class="nc">Context</span><span class="o">.</span><span class="n">gpu</span><span class="o">(</span><span class="mi">1</span><span class="o">),</span><span class="nc">Context</span><span class="o">.</span><span class="n">gpu</span><span class="o">(</span><span class="mi">2</span><span class="o">),</span><span class="nc">Context</span><span class="o">.</span><span class="n">gpu</span><span class="o">(</span><span class="mi">3</span><span class="o">)))</span>
<span class="o">.</span><span class="n">setNumEpoch</span><span class="o">(</span><span class="n">numEpoch</span><span class="o">)</span>
<span class="o">.</span><span class="n">setOptimizer</span><span class="o">(</span><span class="k">new</span> <span class="nc">Adam</span><span class="o">(</span><span class="n">learningRate</span> <span class="k">=</span> <span class="n">learningRate</span><span class="o">,</span> <span class="n">wd</span> <span class="k">=</span> <span class="mf">0.00001f</span><span class="o">))</span>
<span class="o">.</span><span class="n">setInitializer</span><span class="o">(</span><span class="n">initializer</span><span class="o">)</span>
<span class="o">.</span><span class="n">setTrainData</span><span class="o">(</span><span class="n">dataTrain</span><span class="o">)</span>
<span class="o">.</span><span class="n">setEvalMetric</span><span class="o">(</span><span class="k">new</span> <span class="nc">CustomMetric</span><span class="o">(</span><span class="n">perplexity</span><span class="o">,</span> <span class="n">name</span> <span class="k">=</span> <span class="s">"perplexity"</span><span class="o">))</span>
<span class="o">.</span><span class="n">setBatchEndCallback</span><span class="o">(</span><span class="k">new</span> <span class="nc">Speedometer</span><span class="o">(</span><span class="n">batchSize</span><span class="o">,</span> <span class="mi">20</span><span class="o">))</span>
<span class="o">.</span><span class="n">setEpochEndCallback</span><span class="o">(</span><span class="n">doCheckpoint</span><span class="o">(</span><span class="s">"obama"</span><span class="o">))</span>
<span class="o">.</span><span class="n">build</span><span class="o">()</span>
<span class="n">model</span><span class="k">:</span> <span class="kt">org.apache.mxnet.FeedForward</span> <span class="o">=</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">mxnet</span><span class="o">.</span><span class="nc">FeedForward</span><span class="k">@</span><span class="mi">4926</span><span class="n">f6c7</span>
</pre></div>
</div>
<p>Now, you have an LSTM model and you’ve trained it. Use this model to create the inference.</p>
</div>
<div class="section" id="build-the-inference-model">
<span id="build-the-inference-model"></span><h2>Build the Inference Model<a class="headerlink" href="#build-the-inference-model" title="Permalink to this headline"></a></h2>
<p>You can now sample sentences from the trained model. The sampler works as follows:</p>
<ul class="simple">
<li>Takes some fixed character set (e.g., “The United States”) and feeds it into the LSTM as the starting input.</li>
<li>The LSTM produces an output distribution over the vocabulary and a state in the first time step then, samples a character from the output distribution and fixes it as the second character.</li>
<li>In the next time step, feeds the previously sampled character as input.</li>
<li>Continues running until it has sampled enough characters. Note we are running mini-batches, so several sentences could be sampled simultaneously.</li>
</ul>
<p>To build the inference model, define the following utility functions that help MXNet make inferences:</p>
<ul class="simple">
<li><code class="docutils literal"><span class="pre">makeRevertVocab</span></code> - Reverts the key value in the dictionary for easy access to characters while predicting</li>
<li><code class="docutils literal"><span class="pre">makeInput</span></code> - Uses a given character as input</li>
<li><code class="docutils literal"><span class="pre">cdf</span></code>, <code class="docutils literal"><span class="pre">choice</span></code> - <code class="docutils literal"><span class="pre">cdf</span></code> is a helper function for the <code class="docutils literal"><span class="pre">choice</span></code> function, which is used to create random samples</li>
<li><code class="docutils literal"><span class="pre">makeOutput</span></code> - Directs the model to use either random output or fixed output by choosing the option with the greatest probability.</li>
</ul>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">import</span> <span class="nn">scala.util.Random</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// helper structure for prediction</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">makeRevertVocab</span><span class="o">(</span><span class="n">vocab</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>, <span class="kt">Int</span><span class="o">])</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">Int</span>, <span class="kt">String</span><span class="o">]</span> <span class="k">=</span> <span class="o">{</span>
<span class="k">var</span> <span class="n">dic</span> <span class="k">=</span> <span class="nc">Map</span><span class="o">[</span><span class="kt">Int</span>, <span class="kt">String</span><span class="o">]()</span>
<span class="n">vocab</span><span class="o">.</span><span class="n">foreach</span> <span class="o">{</span> <span class="k">case</span> <span class="o">(</span><span class="n">k</span><span class="o">,</span> <span class="n">v</span><span class="o">)</span> <span class="k">=></span>
<span class="n">dic</span> <span class="k">=</span> <span class="n">dic</span> <span class="o">+</span> <span class="o">(</span><span class="n">v</span> <span class="o">-></span> <span class="n">k</span><span class="o">)</span>
<span class="o">}</span>
<span class="n">dic</span>
<span class="o">}</span>
<span class="n">makeRevertVocab</span><span class="k">:</span> <span class="o">(</span><span class="kt">vocab:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>,<span class="kt">Int</span><span class="o">])</span><span class="nc">Map</span><span class="o">[</span><span class="kt">Int</span>,<span class="kt">String</span><span class="o">]</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// make input from char</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">makeInput</span><span class="o">(</span><span class="n">char</span><span class="k">:</span> <span class="kt">Char</span><span class="o">,</span> <span class="n">vocab</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>, <span class="kt">Int</span><span class="o">],</span> <span class="n">arr</span><span class="k">:</span> <span class="kt">NDArray</span><span class="o">)</span><span class="k">:</span> <span class="kt">Unit</span> <span class="o">=</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">idx</span> <span class="k">=</span> <span class="n">vocab</span><span class="o">(</span><span class="s">s"</span><span class="si">$char</span><span class="s">"</span><span class="o">)</span>
<span class="k">val</span> <span class="n">tmp</span> <span class="k">=</span> <span class="nc">NDArray</span><span class="o">.</span><span class="n">zeros</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span>
<span class="n">tmp</span><span class="o">.</span><span class="n">set</span><span class="o">(</span><span class="n">idx</span><span class="o">)</span>
<span class="n">arr</span><span class="o">.</span><span class="n">set</span><span class="o">(</span><span class="n">tmp</span><span class="o">)</span>
<span class="o">}</span>
<span class="n">makeInput</span><span class="k">:</span> <span class="o">(</span><span class="kt">char:</span> <span class="kt">Char</span><span class="o">,</span> <span class="kt">vocab:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">String</span>,<span class="kt">Int</span><span class="o">],</span> <span class="n">arr</span><span class="k">:</span> <span class="kt">org.apache.mxnet.NDArray</span><span class="o">)</span><span class="nc">Unit</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// helper function for random sample</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">cdf</span><span class="o">(</span><span class="n">weights</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">])</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">]</span> <span class="k">=</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">total</span> <span class="k">=</span> <span class="n">weights</span><span class="o">.</span><span class="n">sum</span>
<span class="k">var</span> <span class="n">result</span> <span class="k">=</span> <span class="nc">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">]()</span>
<span class="k">var</span> <span class="n">cumsum</span> <span class="k">=</span> <span class="mi">0</span><span class="n">f</span>
<span class="k">for</span> <span class="o">(</span><span class="n">w</span> <span class="k"><-</span> <span class="n">weights</span><span class="o">)</span> <span class="o">{</span>
<span class="n">cumsum</span> <span class="o">+=</span> <span class="n">w</span>
<span class="n">result</span> <span class="k">=</span> <span class="n">result</span> <span class="o">:+</span> <span class="o">(</span><span class="n">cumsum</span> <span class="o">/</span> <span class="n">total</span><span class="o">)</span>
<span class="o">}</span>
<span class="n">result</span>
<span class="o">}</span>
<span class="n">cdf</span><span class="k">:</span> <span class="o">(</span><span class="kt">weights:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">])</span><span class="nc">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">]</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">choice</span><span class="o">(</span><span class="n">population</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">String</span><span class="o">],</span> <span class="n">weights</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">])</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> <span class="o">{</span>
<span class="n">assert</span><span class="o">(</span><span class="n">population</span><span class="o">.</span><span class="n">length</span> <span class="o">==</span> <span class="n">weights</span><span class="o">.</span><span class="n">length</span><span class="o">)</span>
<span class="k">val</span> <span class="n">cdfVals</span> <span class="k">=</span> <span class="n">cdf</span><span class="o">(</span><span class="n">weights</span><span class="o">)</span>
<span class="k">val</span> <span class="n">x</span> <span class="k">=</span> <span class="nc">Random</span><span class="o">.</span><span class="n">nextFloat</span><span class="o">()</span>
<span class="k">var</span> <span class="n">idx</span> <span class="k">=</span> <span class="mi">0</span>
<span class="k">var</span> <span class="n">found</span> <span class="k">=</span> <span class="kc">false</span>
<span class="k">for</span> <span class="o">(</span><span class="n">i</span> <span class="k"><-</span> <span class="mi">0</span> <span class="n">until</span> <span class="n">cdfVals</span><span class="o">.</span><span class="n">length</span><span class="o">)</span> <span class="o">{</span>
<span class="k">if</span> <span class="o">(</span><span class="n">cdfVals</span><span class="o">(</span><span class="n">i</span><span class="o">)</span> <span class="o">>=</span> <span class="n">x</span> <span class="o">&amp;&amp;</span> <span class="o">!</span><span class="n">found</span><span class="o">)</span> <span class="o">{</span>
<span class="n">idx</span> <span class="k">=</span> <span class="n">i</span>
<span class="n">found</span> <span class="k">=</span> <span class="kc">true</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="n">population</span><span class="o">(</span><span class="n">idx</span><span class="o">)</span>
<span class="o">}</span>
<span class="n">choice</span><span class="k">:</span> <span class="o">(</span><span class="kt">population:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">String</span><span class="o">],</span> <span class="n">weights</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">])</span><span class="nc">String</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// we can use random output or fixed output by choosing largest probability</span>
<span class="n">scala</span><span class="o">></span> <span class="k">def</span> <span class="n">makeOutput</span><span class="o">(</span><span class="n">prob</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">],</span> <span class="n">vocab</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">Int</span>, <span class="kt">String</span><span class="o">],</span>
<span class="n">sample</span><span class="k">:</span> <span class="kt">Boolean</span> <span class="o">=</span> <span class="kc">false</span><span class="o">,</span> <span class="n">temperature</span><span class="k">:</span> <span class="kt">Float</span> <span class="o">=</span> <span class="mi">1</span><span class="n">f</span><span class="o">)</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> <span class="o">{</span>
<span class="k">var</span> <span class="n">idx</span> <span class="k">=</span> <span class="o">-</span><span class="mi">1</span>
<span class="k">val</span> <span class="n">char</span> <span class="k">=</span> <span class="k">if</span> <span class="o">(</span><span class="n">sample</span> <span class="o">==</span> <span class="kc">false</span><span class="o">)</span> <span class="o">{</span>
<span class="n">idx</span> <span class="k">=</span> <span class="o">((-</span><span class="mi">1</span><span class="n">f</span><span class="o">,</span> <span class="o">-</span><span class="mi">1</span><span class="o">)</span> <span class="o">/:</span> <span class="n">prob</span><span class="o">.</span><span class="n">zipWithIndex</span><span class="o">)</span> <span class="o">{</span> <span class="o">(</span><span class="n">max</span><span class="o">,</span> <span class="n">elem</span><span class="o">)</span> <span class="k">=></span>
<span class="k">if</span> <span class="o">(</span><span class="n">max</span><span class="o">.</span><span class="n">_1</span> <span class="o"><</span> <span class="n">elem</span><span class="o">.</span><span class="n">_1</span><span class="o">)</span> <span class="n">elem</span> <span class="k">else</span> <span class="n">max</span>
<span class="o">}.</span><span class="n">_2</span>
<span class="k">if</span> <span class="o">(</span><span class="n">vocab</span><span class="o">.</span><span class="n">contains</span><span class="o">(</span><span class="n">idx</span><span class="o">))</span> <span class="n">vocab</span><span class="o">(</span><span class="n">idx</span><span class="o">)</span>
<span class="k">else</span> <span class="s">""</span>
<span class="o">}</span> <span class="k">else</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">fixDict</span> <span class="k">=</span> <span class="nc">Array</span><span class="o">(</span><span class="s">""</span><span class="o">)</span> <span class="o">++</span> <span class="o">(</span><span class="mi">1</span> <span class="n">until</span> <span class="n">vocab</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span><span class="o">).</span><span class="n">map</span><span class="o">(</span><span class="n">i</span> <span class="k">=></span> <span class="n">vocab</span><span class="o">(</span><span class="n">i</span><span class="o">))</span>
<span class="k">var</span> <span class="n">scaleProb</span> <span class="k">=</span> <span class="n">prob</span><span class="o">.</span><span class="n">map</span><span class="o">(</span><span class="n">x</span> <span class="k">=></span> <span class="k">if</span> <span class="o">(</span><span class="n">x</span> <span class="o"><</span> <span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">6</span><span class="o">)</span> <span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">6</span> <span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">x</span> <span class="o">></span> <span class="mi">1</span> <span class="o">-</span> <span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">6</span><span class="o">)</span> <span class="mi">1</span> <span class="o">-</span> <span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">6</span> <span class="k">else</span> <span class="n">x</span><span class="o">)</span>
<span class="k">var</span> <span class="n">rescale</span> <span class="k">=</span> <span class="n">scaleProb</span><span class="o">.</span><span class="n">map</span><span class="o">(</span><span class="n">x</span> <span class="k">=></span> <span class="nc">Math</span><span class="o">.</span><span class="n">exp</span><span class="o">(</span><span class="nc">Math</span><span class="o">.</span><span class="n">log</span><span class="o">(</span><span class="n">x</span><span class="o">)</span> <span class="o">/</span> <span class="n">temperature</span><span class="o">).</span><span class="n">toFloat</span><span class="o">)</span>
<span class="k">val</span> <span class="n">sum</span> <span class="k">=</span> <span class="n">rescale</span><span class="o">.</span><span class="n">sum</span><span class="o">.</span><span class="n">toFloat</span>
<span class="n">rescale</span> <span class="k">=</span> <span class="n">rescale</span><span class="o">.</span><span class="n">map</span><span class="o">(</span><span class="k">_</span> <span class="o">/</span> <span class="n">sum</span><span class="o">)</span>
<span class="n">choice</span><span class="o">(</span><span class="n">fixDict</span><span class="o">,</span> <span class="n">rescale</span><span class="o">)</span>
<span class="o">}</span>
<span class="n">char</span>
<span class="o">}</span>
<span class="n">makeOutput</span><span class="k">:</span> <span class="o">(</span><span class="kt">prob:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Float</span><span class="o">],</span> <span class="n">vocab</span><span class="k">:</span> <span class="kt">Map</span><span class="o">[</span><span class="kt">Int</span>,<span class="kt">String</span><span class="o">],</span> <span class="n">sample</span><span class="k">:</span> <span class="kt">Boolean</span><span class="o">,</span> <span class="n">temperature</span><span class="k">:</span> <span class="kt">Float</span><span class="o">)</span><span class="nc">String</span>
</pre></div>
</div>
<ol class="simple">
<li>Build the inference model:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="c1">// load from check-point</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="o">(</span><span class="k">_</span><span class="o">,</span> <span class="n">argParams</span><span class="o">,</span> <span class="k">_</span><span class="o">)</span> <span class="k">=</span> <span class="nc">Model</span><span class="o">.</span><span class="n">loadCheckpoint</span><span class="o">(</span><span class="s">"obama"</span><span class="o">,</span> <span class="mi">75</span><span class="o">)</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// build an inference model</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">model</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">RnnModel</span><span class="o">.</span><span class="nc">LSTMInferenceModel</span><span class="o">(</span><span class="n">numLstmLayer</span><span class="o">,</span> <span class="n">vocab</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span><span class="o">,</span> <span class="o">\</span>
<span class="n">numHidden</span> <span class="k">=</span> <span class="n">numHidden</span><span class="o">,</span> <span class="n">numEmbed</span> <span class="k">=</span> <span class="n">numEmbed</span><span class="o">,</span> <span class="o">\</span>
<span class="n">numLabel</span> <span class="k">=</span> <span class="n">vocab</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span><span class="o">,</span> <span class="n">argParams</span> <span class="k">=</span> <span class="n">argParams</span><span class="o">,</span> <span class="o">\</span>
<span class="n">ctx</span> <span class="k">=</span> <span class="nc">Context</span><span class="o">.</span><span class="n">cpu</span><span class="o">(),</span> <span class="n">dropout</span> <span class="k">=</span> <span class="mf">0.2f</span><span class="o">)</span>
<span class="n">model</span><span class="k">:</span> <span class="kt">RnnModel.LSTMInferenceModel</span> <span class="o">=</span> <span class="nc">RnnModel$LSTMInferenceModel</span><span class="k">@</span><span class="mi">2</span><span class="n">f0c0319</span>
</pre></div>
</div>
<ol class="simple">
<li>Now you can generate a sequence of 1200 characters (you can select any number of characters you want) starting with “The United States” as follows:</li>
</ol>
<div class="highlight-scala"><div class="highlight"><pre><span></span> <span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">seqLength</span> <span class="k">=</span> <span class="mi">1200</span>
<span class="n">seqLength</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">1200</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">inputNdarray</span> <span class="k">=</span> <span class="nc">NDArray</span><span class="o">.</span><span class="n">zeros</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span>
<span class="n">inputNdarray</span><span class="k">:</span> <span class="kt">org.apache.mxnet.NDArray</span> <span class="o">=</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">mxnet</span><span class="o">.</span><span class="nc">NDArray</span><span class="k">@</span><span class="mi">9</span><span class="n">c231a24</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">revertVocab</span> <span class="k">=</span> <span class="n">makeRevertVocab</span><span class="o">(</span><span class="n">vocab</span><span class="o">)</span>
<span class="n">scala</span><span class="o">></span> <span class="c1">// Feel free to change the starter sentence</span>
<span class="n">scala</span><span class="o">></span> <span class="k">var</span> <span class="n">output</span> <span class="k">=</span> <span class="s">"The United States"</span>
<span class="n">output</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> <span class="nc">The</span> <span class="nc">United</span> <span class="nc">States</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">randomSample</span> <span class="k">=</span> <span class="kc">true</span>
<span class="n">randomSample</span><span class="k">:</span> <span class="kt">Boolean</span> <span class="o">=</span> <span class="kc">true</span>
<span class="n">scala</span><span class="o">></span> <span class="k">var</span> <span class="n">newSentence</span> <span class="k">=</span> <span class="kc">true</span>
<span class="n">newSentence</span><span class="k">:</span> <span class="kt">Boolean</span> <span class="o">=</span> <span class="kc">true</span>
<span class="n">scala</span><span class="o">></span> <span class="k">val</span> <span class="n">ignoreLength</span> <span class="k">=</span> <span class="n">output</span><span class="o">.</span><span class="n">length</span><span class="o">()</span>
<span class="n">ignoreLength</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="mi">17</span>
<span class="n">scala</span><span class="o">></span> <span class="k">for</span> <span class="o">(</span><span class="n">i</span> <span class="k"><-</span> <span class="mi">0</span> <span class="n">until</span> <span class="n">seqLength</span><span class="o">)</span> <span class="o">{</span>
<span class="k">if</span> <span class="o">(</span><span class="n">i</span> <span class="o"><=</span> <span class="n">ignoreLength</span> <span class="o">-</span> <span class="mi">1</span><span class="o">)</span> <span class="n">makeInput</span><span class="o">(</span><span class="n">output</span><span class="o">(</span><span class="n">i</span><span class="o">),</span> <span class="n">vocab</span><span class="o">,</span> <span class="n">inputNdarray</span><span class="o">)</span>
<span class="k">else</span> <span class="n">makeInput</span><span class="o">(</span><span class="n">output</span><span class="o">.</span><span class="n">takeRight</span><span class="o">(</span><span class="mi">1</span><span class="o">)(</span><span class="mi">0</span><span class="o">),</span> <span class="n">vocab</span><span class="o">,</span> <span class="n">inputNdarray</span><span class="o">)</span>
<span class="k">val</span> <span class="n">prob</span> <span class="k">=</span> <span class="n">model</span><span class="o">.</span><span class="n">forward</span><span class="o">(</span><span class="n">inputNdarray</span><span class="o">,</span> <span class="n">newSentence</span><span class="o">)</span>
<span class="n">newSentence</span> <span class="k">=</span> <span class="kc">false</span>
<span class="k">val</span> <span class="n">nextChar</span> <span class="k">=</span> <span class="n">makeOutput</span><span class="o">(</span><span class="n">prob</span><span class="o">,</span> <span class="n">revertVocab</span><span class="o">,</span> <span class="n">randomSample</span><span class="o">)</span>
<span class="k">if</span> <span class="o">(</span><span class="n">nextChar</span> <span class="o">==</span> <span class="s">""</span><span class="o">)</span> <span class="n">newSentence</span> <span class="k">=</span> <span class="kc">true</span>
<span class="k">if</span> <span class="o">(</span><span class="n">i</span> <span class="o">>=</span> <span class="n">ignoreLength</span><span class="o">)</span> <span class="n">output</span> <span class="k">=</span> <span class="n">output</span> <span class="o">++</span> <span class="n">nextChar</span>
<span class="o">}</span>
<span class="n">scala</span><span class="o">></span> <span class="n">output</span>
<span class="n">res7</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> <span class="nc">The</span> <span class="nc">United</span> <span class="nc">States</span> <span class="n">who</span> <span class="n">have</span> <span class="n">been</span> <span class="n">blessed</span> <span class="n">no</span> <span class="n">companies</span> <span class="n">would</span> <span class="n">be</span> <span class="n">proud</span> <span class="n">that</span> <span class="n">the</span> <span class="n">challenges</span> <span class="n">we</span> <span class="n">face</span><span class="o">,</span> <span class="n">it</span><span class=" -Symbol">'s</span> <span class="n">not</span> <span class="n">as</span> <span class="n">directly</span> <span class="n">untelle</span> <span class="n">are</span> <span class="n">in</span> <span class="n">my</span> <span class="n">daughters</span> <span class="o">-</span> <span class="n">you</span> <span class="n">can</span> <span class="n">afford</span> <span class="o">--</span> <span class="n">life</span><span class="o">-</span><span class="n">saving</span> <span class="n">march</span> <span class="n">care</span> <span class="n">and</span> <span class="n">poor</span> <span class="n">information</span> <span class="n">and</span> <span class="n">receiving</span> <span class="n">battle</span> <span class="n">against</span> <span class="n">other</span> <span class="n">speeces</span> <span class="n">and</span> <span class="n">lead</span> <span class="n">its</span> <span class="n">people</span><span class="o">.</span> <span class="nc">After</span> <span class="n">champions</span> <span class="n">of</span> <span class="mi">2006</span><span class="o">,</span> <span class="n">and</span> <span class="n">because</span> <span class="nc">Africa</span> <span class="n">in</span> <span class="nc">America</span><span class="o">,</span> <span class="n">separate</span> <span class="n">has</span> <span class="n">been</span> <span class="n">conferenced</span> <span class="n">by</span> <span class="n">children</span> <span class="n">ation</span> <span class="n">of</span> <span class="n">discrimination</span><span class="o">,</span> <span class="n">we</span> <span class="n">remember</span> <span class="n">all</span> <span class="n">of</span> <span class="k">this</span><span class="o">,</span> <span class="n">succeeded</span> <span class="n">in</span> <span class="n">any</span> <span class="n">other</span> <span class="n">feelings</span> <span class="n">of</span> <span class="n">a</span> <span class="n">palently</span> <span class="n">better</span> <span class="n">political</span> <span class="n">process</span> <span class="o">-</span> <span class="n">at</span> <span class="n">lliims</span> <span class="n">being</span> <span class="n">disability</span> <span class="n">payment</span><span class="o">.</span> <span class="nc">All</span> <span class="n">across</span> <span class="n">all</span> <span class="n">different</span> <span class="n">mights</span> <span class="n">of</span> <span class="n">a</span> <span class="n">more</span> <span class="n">just</span> <span class="n">a</span> <span class="n">few</span> <span class="n">global</span> <span class="n">personal</span> <span class="n">morality</span> <span class="n">and</span> <span class="n">industrialized</span> <span class="n">ready</span> <span class="n">to</span> <span class="n">succeed</span><span class="o">.</span><span class="nc">One</span> <span class="n">can</span> <span class="n">afford</span> <span class="n">when</span> <span class="n">the</span> <span class="n">earliest</span> <span class="n">days</span> <span class="n">of</span> <span class="n">a</span> <span class="n">pension</span> <span class="n">you</span> <span class="n">can</span> <span class="n">add</span> <span class="n">to</span> <span class="n">the</span> <span class="n">system</span> <span class="n">be</span> <span class="n">confructive</span> <span class="n">despair</span><span class="o">.</span> <span class="nc">They</span> <span class="n">have</span> <span class="n">starting</span> <span class="n">in</span> <span class="n">the</span> <span class="n">demand</span> <span class="k">for</span><span class="o">...</span>
</pre></div>
</div>
<p>You can see the output generated from Obama’s speeches. All of the line breaks, punctuation, and uppercase and lowercase letters were produced by the sampler (no post-processing was performed).</p>
</div>
<div class="section" id="next-steps">
<span id="next-steps"></span><h2>Next Steps<a class="headerlink" href="#next-steps" title="Permalink to this headline"></a></h2>
<div class="toctree-wrapper compound">
<ul>
<li class="toctree-l1"><a class="reference external" href="/versions/1.2.1/api/scala/">Scala API</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/dmlc/mxnet/tree/1.2.1/scala-package/examples/">More Scala Examples</a></li>
<li class="toctree-l1"><a class="reference external" href="/versions/1.2.1/tutorials/index.html">MXNet tutorials index</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
<div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<h3><a href="../../index.html">Table Of Contents</a></h3>
<ul>
<li><a class="reference internal" href="#">Developing a Character-level Language model</a><ul>
<li><a class="reference internal" href="#how-to-use-this-tutorial">How to Use This Tutorial</a></li>
<li><a class="reference internal" href="#prerequisites">Prerequisites</a></li>
<li><a class="reference internal" href="#download-the-data">Download the Data</a></li>
<li><a class="reference internal" href="#prepare-the-data">Prepare the Data</a></li>
<li><a class="reference internal" href="#build-a-multi-layer-lstm-model">Build a Multi-layer LSTM model</a></li>
<li><a class="reference internal" href="#build-the-inference-model">Build the Inference Model</a></li>
<li><a class="reference internal" href="#next-steps">Next Steps</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div><div class="footer">
<div class="section-disclaimer">
<div class="container">
<div>
<img height="60" src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/apache_incubator_logo.png"/>
<p>
Apache MXNet is an effort undergoing incubation at The Apache Software Foundation (ASF), <strong>sponsored by the <i>Apache Incubator</i></strong>. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
</p>
<p>
"Copyright © 2017-2018, The Apache Software Foundation
Apache MXNet, MXNet, Apache, the Apache feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the Apache Software Foundation."
</p>
</div>
</div>
</div>
</div> <!-- pagename != index -->
</div>
<script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
<script src="../../_static/js/sidebar.js" type="text/javascript"></script>
<script src="../../_static/js/search.js" type="text/javascript"></script>
<script src="../../_static/js/navbar.js" type="text/javascript"></script>
<script src="../../_static/js/clipboard.min.js" type="text/javascript"></script>
<script src="../../_static/js/copycode.js" type="text/javascript"></script>
<script src="../../_static/js/page.js" type="text/javascript"></script>
<script src="../../_static/js/docversion.js" type="text/javascript"></script>
<script type="text/javascript">
$('body').ready(function () {
$('body').css('visibility', 'visible');
});
</script>
</body>
</html>