blob: 36a8f7ddeb2ae99241540e0f1fd9355e90f9c5bc [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>Optimization: initialize and update weights — mxnet documentation</title>
<link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
<link href="../../_static/basic.css" rel="stylesheet" type="text/css">
<link href="../../_static/pygments.css" rel="stylesheet" type="text/css">
<link href="../../_static/mxnet.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../',
VERSION: '',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: ''
};
</script>
<script src="../../_static/jquery-1.11.1.js" type="text/javascript"></script>
<script src="../../_static/underscore.js" type="text/javascript"></script>
<script src="../../_static/searchtools_custom.js" type="text/javascript"></script>
<script src="../../_static/doctools.js" type="text/javascript"></script>
<script src="../../_static/selectlang.js" type="text/javascript"></script>
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
<script type="text/javascript"> jQuery(function() { Search.loadIndex("/searchindex.js"); Search.init();}); </script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-96378503-1', 'auto');
ga('send', 'pageview');
</script>
<!-- -->
<!-- <script type="text/javascript" src="../../_static/jquery.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../_static/underscore.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../_static/doctools.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
<!-- -->
<link href="index.html" rel="up" title="MXNet - Python API">
<link href="callback.html" rel="next" title="Callback API"/>
<link href="io.html" rel="prev" title="Data Loading API"/>
<link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
</link></link></link></head>
<body role="document"><!-- Previous Navbar Layout
<div class="navbar navbar-default navbar-fixed-top">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="../../" class="navbar-brand">
<img src="http://data.mxnet.io/theme/mxnet.png">
</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul id="navbar" class="navbar navbar-left">
<li> <a href="../../get_started/index.html">Get Started</a> </li>
<li> <a href="../../tutorials/index.html">Tutorials</a> </li>
<li> <a href="../../how_to/index.html">How To</a> </li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Packages <span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="../../packages/python/index.html">
Python
</a></li>
<li><a href="../../packages/r/index.html">
R
</a></li>
<li><a href="../../packages/julia/index.html">
Julia
</a></li>
<li><a href="../../packages/c++/index.html">
C++
</a></li>
<li><a href="../../packages/scala/index.html">
Scala
</a></li>
<li><a href="../../packages/perl/index.html">
Perl
</a></li>
</ul>
</li>
<li> <a href="../../system/index.html">System</a> </li>
<li>
<form class="" role="search" action="../../search.html" method="get" autocomplete="off">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input type="text" name="q" class="form-control" placeholder="Search">
</div>
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form> </li>
</ul>
<ul id="navbar" class="navbar navbar-right">
<li> <a href="../../index.html"><span class="flag-icon flag-icon-us"></span></a> </li>
<li> <a href="../..//zh/index.html"><span class="flag-icon flag-icon-cn"></span></a> </li>
</ul>
</div>
</div>
</div>
Previous Navbar Layout End -->
<div class="navbar navbar-fixed-top">
<div class="container" id="navContainer">
<div class="innder" id="header-inner">
<h1 id="logo-wrap">
<a href="../../" id="logo"><img src="http://data.mxnet.io/theme/mxnet.png"/></a>
</h1>
<nav class="nav-bar" id="main-nav">
<a class="main-nav-link" href="../../get_started/install.html">Install</a>
<a class="main-nav-link" href="../../tutorials/index.html">Tutorials</a>
<a class="main-nav-link" href="../../how_to/index.html">How To</a>
<span id="dropdown-menu-position-anchor">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
<ul class="dropdown-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="../../api/python/index.html">Python</a></li>
<li><a class="main-nav-link" href="../../api/scala/index.html">Scala</a></li>
<li><a class="main-nav-link" href="../../api/r/index.html">R</a></li>
<li><a class="main-nav-link" href="../../api/julia/index.html">Julia</a></li>
<li><a class="main-nav-link" href="../../api/c++/index.html">C++</a></li>
<li><a class="main-nav-link" href="../../api/perl/index.html">Perl</a></li>
</ul>
</span>
<a class="main-nav-link" href="../../architecture/index.html">Architecture</a>
<!-- <a class="main-nav-link" href="../../community/index.html">Community</a> -->
<a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a>
<span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Versions(master)<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></span></nav>
<script> function getRootPath(){ return "../../" } </script>
<div class="burgerIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"></a>
<ul class="dropdown-menu dropdown-menu-right" id="burgerMenu">
<li><a href="../../get_started/install.html">Install</a></li>
<li><a href="../../tutorials/index.html">Tutorials</a></li>
<li><a href="../../how_to/index.html">How To</a></li>
<li class="dropdown-submenu">
<a href="#" tabindex="-1">API</a>
<ul class="dropdown-menu">
<li><a href="../../api/python/index.html" tabindex="-1">Python</a>
</li>
<li><a href="../../api/scala/index.html" tabindex="-1">Scala</a>
</li>
<li><a href="../../api/r/index.html" tabindex="-1">R</a>
</li>
<li><a href="../../api/julia/index.html" tabindex="-1">Julia</a>
</li>
<li><a href="../../api/c++/index.html" tabindex="-1">C++</a>
</li>
<li><a href="../../api/perl/index.html" tabindex="-1">Perl</a>
</li>
</ul>
</li>
<li><a href="../../architecture/index.html">Architecture</a></li>
<li><a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a></li>
<li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">Versions(master)</a><ul class="dropdown-menu"><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></li></ul>
</div>
<div class="plusIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
<ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
</div>
<div id="search-input-wrap">
<form action="../../search.html" autocomplete="off" class="" method="get" role="search">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input class="form-control" name="q" placeholder="Search" type="text"/>
</div>
<input name="check_keywords" type="hidden" value="yes">
<input name="area" type="hidden" value="default"/>
</input></form>
<div id="search-preview"></div>
</div>
<div id="searchIcon">
<span aria-hidden="true" class="glyphicon glyphicon-search"></span>
</div>
<!-- <div id="lang-select-wrap"> -->
<!-- <label id="lang-select-label"> -->
<!-- <\!-- <i class="fa fa-globe"></i> -\-> -->
<!-- <span></span> -->
<!-- </label> -->
<!-- <select id="lang-select"> -->
<!-- <option value="en">Eng</option> -->
<!-- <option value="zh">中文</option> -->
<!-- </select> -->
<!-- </div> -->
<!-- <a id="mobile-nav-toggle">
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
</a> -->
</div>
</div>
</div>
<div class="container">
<div class="row">
<div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Python Documents</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="index.html#table-of-contents">Table of contents</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="ndarray.html">NDArray API</a></li>
<li class="toctree-l3"><a class="reference internal" href="symbol.html">Symbol API</a></li>
<li class="toctree-l3"><a class="reference internal" href="module.html">Module API</a></li>
<li class="toctree-l3"><a class="reference internal" href="rnn.html">RNN Cell API</a></li>
<li class="toctree-l3"><a class="reference internal" href="kvstore.html">KVStore API</a></li>
<li class="toctree-l3"><a class="reference internal" href="io.html">Data Loading API</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="">Optimization: initialize and update weights</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#overview">Overview</a></li>
<li class="toctree-l4"><a class="reference internal" href="#the-mxnet-initializer-package">The <code class="docutils literal"><span class="pre">mxnet.initializer</span></code> package</a></li>
<li class="toctree-l4"><a class="reference internal" href="#the-mxnet-optimizer-package">The <code class="docutils literal"><span class="pre">mxnet.optimizer</span></code> package</a></li>
<li class="toctree-l4"><a class="reference internal" href="#the-mxnet-lr-scheduler-package">The <code class="docutils literal"><span class="pre">mxnet.lr_scheduler</span></code> package</a></li>
<li class="toctree-l4"><a class="reference internal" href="#implement-a-new-algorithm">Implement a new algorithm</a></li>
<li class="toctree-l4"><a class="reference internal" href="#api-reference">API Reference</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="callback.html">Callback API</a></li>
<li class="toctree-l3"><a class="reference internal" href="metric.html">Evaluation Metric API</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../r/index.html">R Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../julia/index.html">Julia Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../c++/index.html">C++ Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../scala/index.html">Scala Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../perl/index.html">Perl Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../how_to/index.html">HowTo Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">System Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../tutorials/index.html">Tutorials</a></li>
</ul>
</div>
</div>
<div class="content">
<div class="section" id="optimization-initialize-and-update-weights">
<span id="optimization-initialize-and-update-weights"></span><h1>Optimization: initialize and update weights<a class="headerlink" href="#optimization-initialize-and-update-weights" title="Permalink to this headline"></a></h1>
<div class="section" id="overview">
<span id="overview"></span><h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline"></a></h2>
<p>This document summaries the APIs used to initialize and update the model weights
during training</p>
<table border="1" class="longtable docutils">
<colgroup>
<col width="10%"/>
<col width="90%"/>
</colgroup>
<tbody valign="top">
<tr class="row-odd"><td><a class="reference internal" href="#module-mxnet.initializer" title="mxnet.initializer"><code class="xref py py-obj docutils literal"><span class="pre">mxnet.initializer</span></code></a></td>
<td>Weight initializer.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#module-mxnet.optimizer" title="mxnet.optimizer"><code class="xref py py-obj docutils literal"><span class="pre">mxnet.optimizer</span></code></a></td>
<td>Weight updating functions.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#module-mxnet.lr_scheduler" title="mxnet.lr_scheduler"><code class="xref py py-obj docutils literal"><span class="pre">mxnet.lr_scheduler</span></code></a></td>
<td>Scheduling learning rate.</td>
</tr>
</tbody>
</table>
<p>and how to develop a new optimization algorithm in MXNet.</p>
<p>Assume there there is a pre-defined <code class="docutils literal"><span class="pre">Symbol</span></code> and a <code class="docutils literal"><span class="pre">Module</span></code> is created for
it</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">Variable</span><span class="p">(</span><span class="s1">'data'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">label</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">Variable</span><span class="p">(</span><span class="s1">'softmax_label'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">fc</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">FullyConnected</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'fc'</span><span class="p">,</span> <span class="n">num_hidden</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">loss</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">SoftmaxOutput</span><span class="p">(</span><span class="n">fc</span><span class="p">,</span> <span class="n">label</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'softmax'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">mod</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">mod</span><span class="o">.</span><span class="n">Module</span><span class="p">(</span><span class="n">loss</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">mod</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">data_shapes</span><span class="o">=</span><span class="p">[(</span><span class="s1">'data'</span><span class="p">,</span> <span class="p">(</span><span class="mi">128</span><span class="p">,</span><span class="mi">20</span><span class="p">))],</span> <span class="n">label_shapes</span><span class="o">=</span><span class="p">[(</span><span class="s1">'softmax_label'</span><span class="p">,</span> <span class="p">(</span><span class="mi">128</span><span class="p">,))])</span>
</pre></div>
</div>
<p>Next we can initialize the weights with values sampled uniformly from
<code class="docutils literal"><span class="pre">[-1,1]</span></code>:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">mod</span><span class="o">.</span><span class="n">init_params</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">initializer</span><span class="o">.</span><span class="n">Uniform</span><span class="p">(</span><span class="n">scale</span><span class="o">=</span><span class="mf">1.0</span><span class="p">))</span>
</pre></div>
</div>
<p>Then we will train a model with standard SGD which decreases the learning rate
by multiplying 0.9 for each 100 batches.</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">lr_sch</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">lr_scheduler</span><span class="o">.</span><span class="n">FactorScheduler</span><span class="p">(</span><span class="n">step</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">factor</span><span class="o">=</span><span class="mf">0.9</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">mod</span><span class="o">.</span><span class="n">init_optimizer</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">optimizer</span><span class="o">=</span><span class="s1">'sgd'</span><span class="p">,</span> <span class="n">optimizer_params</span><span class="o">=</span><span class="p">((</span><span class="s1">'learning_rate'</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">),</span> <span class="p">(</span><span class="s1">'lr_scheduler'</span><span class="p">,</span> <span class="n">lr_sch</span><span class="p">)))</span>
</pre></div>
</div>
<p>Finally run <code class="docutils literal"><span class="pre">mod.fit(...)</span></code> to start training.</p>
</div>
<div class="section" id="the-mxnet-initializer-package">
<span id="the-mxnet-initializer-package"></span><h2>The <code class="docutils literal"><span class="pre">mxnet.initializer</span></code> package<a class="headerlink" href="#the-mxnet-initializer-package" title="Permalink to this headline"></a></h2>
<p>The base class <code class="docutils literal"><span class="pre">Initializer</span></code> defines the default behaviors to initialize
various parameters, such as set bias to 1, except for the weight. Other classes
then defines how to initialize the weight.</p>
<table border="1" class="longtable docutils">
<colgroup>
<col width="10%"/>
<col width="90%"/>
</colgroup>
<tbody valign="top">
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><code class="xref py py-obj docutils literal"><span class="pre">Initializer</span></code></a></td>
<td>The base class of an initializer.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.initializer.Uniform" title="mxnet.initializer.Uniform"><code class="xref py py-obj docutils literal"><span class="pre">Uniform</span></code></a></td>
<td>Initializes weights with random values uniformly sampled from a given range.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.initializer.Normal" title="mxnet.initializer.Normal"><code class="xref py py-obj docutils literal"><span class="pre">Normal</span></code></a></td>
<td>Initializes weights with random values sampled from a normal distribution with a mean of zero and standard deviation of <cite>sigma</cite>.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.initializer.Load" title="mxnet.initializer.Load"><code class="xref py py-obj docutils literal"><span class="pre">Load</span></code></a></td>
<td>Initializes variables by loading data from file or dict.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.initializer.Mixed" title="mxnet.initializer.Mixed"><code class="xref py py-obj docutils literal"><span class="pre">Mixed</span></code></a></td>
<td>Initialize parameters using multiple initializers.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.initializer.Zero" title="mxnet.initializer.Zero"><code class="xref py py-obj docutils literal"><span class="pre">Zero</span></code></a></td>
<td>Initializes weights to zero.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.initializer.One" title="mxnet.initializer.One"><code class="xref py py-obj docutils literal"><span class="pre">One</span></code></a></td>
<td>Initializes weights to one.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.initializer.Constant" title="mxnet.initializer.Constant"><code class="xref py py-obj docutils literal"><span class="pre">Constant</span></code></a></td>
<td>Initializes the weights to a scalar value.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.initializer.Orthogonal" title="mxnet.initializer.Orthogonal"><code class="xref py py-obj docutils literal"><span class="pre">Orthogonal</span></code></a></td>
<td>Initialize weight as orthogonal matrix.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.initializer.Xavier" title="mxnet.initializer.Xavier"><code class="xref py py-obj docutils literal"><span class="pre">Xavier</span></code></a></td>
<td>Returns an initializer performing “Xavier” initialization for weights.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.initializer.MSRAPrelu" title="mxnet.initializer.MSRAPrelu"><code class="xref py py-obj docutils literal"><span class="pre">MSRAPrelu</span></code></a></td>
<td>Initialize the weight according to a MSRA paper.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.initializer.Bilinear" title="mxnet.initializer.Bilinear"><code class="xref py py-obj docutils literal"><span class="pre">Bilinear</span></code></a></td>
<td>Initialize weight for upsampling layers.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.initializer.FusedRNN" title="mxnet.initializer.FusedRNN"><code class="xref py py-obj docutils literal"><span class="pre">FusedRNN</span></code></a></td>
<td>Initialize parameters for fused rnn layers.</td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="the-mxnet-optimizer-package">
<span id="the-mxnet-optimizer-package"></span><h2>The <code class="docutils literal"><span class="pre">mxnet.optimizer</span></code> package<a class="headerlink" href="#the-mxnet-optimizer-package" title="Permalink to this headline"></a></h2>
<p>The base class <code class="docutils literal"><span class="pre">Optimizer</span></code> accepts commonly shared arguments such as
<code class="docutils literal"><span class="pre">learning_rate</span></code> and defines the interface. Each other class in this package
implements one weight updating function.</p>
<table border="1" class="longtable docutils">
<colgroup>
<col width="10%"/>
<col width="90%"/>
</colgroup>
<tbody valign="top">
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-obj docutils literal"><span class="pre">Optimizer</span></code></a></td>
<td>The base class inherited by all optimizers.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.optimizer.SGD" title="mxnet.optimizer.SGD"><code class="xref py py-obj docutils literal"><span class="pre">SGD</span></code></a></td>
<td>The SGD optimizer with momentum and weight decay.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.optimizer.NAG" title="mxnet.optimizer.NAG"><code class="xref py py-obj docutils literal"><span class="pre">NAG</span></code></a></td>
<td>Nesterov accelerated SGD.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.optimizer.RMSProp" title="mxnet.optimizer.RMSProp"><code class="xref py py-obj docutils literal"><span class="pre">RMSProp</span></code></a></td>
<td>The RMSProp optimizer.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.optimizer.Adam" title="mxnet.optimizer.Adam"><code class="xref py py-obj docutils literal"><span class="pre">Adam</span></code></a></td>
<td>The Adam optimizer.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.optimizer.AdaGrad" title="mxnet.optimizer.AdaGrad"><code class="xref py py-obj docutils literal"><span class="pre">AdaGrad</span></code></a></td>
<td>AdaGrad optimizer.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.optimizer.AdaDelta" title="mxnet.optimizer.AdaDelta"><code class="xref py py-obj docutils literal"><span class="pre">AdaDelta</span></code></a></td>
<td>The AdaDelta optimizer.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.optimizer.DCASGD" title="mxnet.optimizer.DCASGD"><code class="xref py py-obj docutils literal"><span class="pre">DCASGD</span></code></a></td>
<td>The DCASGD optimizer.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.optimizer.SGLD" title="mxnet.optimizer.SGLD"><code class="xref py py-obj docutils literal"><span class="pre">SGLD</span></code></a></td>
<td>Stochastic Gradient Riemannian Langevin Dynamics.</td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="the-mxnet-lr-scheduler-package">
<span id="the-mxnet-lr-scheduler-package"></span><h2>The <code class="docutils literal"><span class="pre">mxnet.lr_scheduler</span></code> package<a class="headerlink" href="#the-mxnet-lr-scheduler-package" title="Permalink to this headline"></a></h2>
<p>The base class <code class="docutils literal"><span class="pre">LRScheduler</span></code> defines the interface, while other classes
implement various schemes to change the learning rate during training.</p>
<table border="1" class="longtable docutils">
<colgroup>
<col width="10%"/>
<col width="90%"/>
</colgroup>
<tbody valign="top">
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.lr_scheduler.LRScheduler" title="mxnet.lr_scheduler.LRScheduler"><code class="xref py py-obj docutils literal"><span class="pre">LRScheduler</span></code></a></td>
<td>Base class of a learning rate scheduler.</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#mxnet.lr_scheduler.FactorScheduler" title="mxnet.lr_scheduler.FactorScheduler"><code class="xref py py-obj docutils literal"><span class="pre">FactorScheduler</span></code></a></td>
<td>Reduce the learning rate by a factor for every <em>n</em> steps.</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#mxnet.lr_scheduler.MultiFactorScheduler" title="mxnet.lr_scheduler.MultiFactorScheduler"><code class="xref py py-obj docutils literal"><span class="pre">MultiFactorScheduler</span></code></a></td>
<td>Reduce the learning rate by given a list of steps.</td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="implement-a-new-algorithm">
<span id="implement-a-new-algorithm"></span><h2>Implement a new algorithm<a class="headerlink" href="#implement-a-new-algorithm" title="Permalink to this headline"></a></h2>
<p>Most classes listed in this document are implemented in Python by using <code class="docutils literal"><span class="pre">NDArray</span></code>.
So implementing new weight updating or initialization functions is
straightforward.</p>
<p>For <code class="docutils literal"><span class="pre">initializer</span></code>, create a subclass of <code class="docutils literal"><span class="pre">Initializer</span></code> and define the
<code class="docutils literal"><span class="pre">_init_weight</span></code> method. We can also change the default behaviors to initialize
other parameters such as <code class="docutils literal"><span class="pre">_init_bias</span></code>. See
<a class="reference external" href="https://github.com/dmlc/mxnet/blob/master/python/mxnet/initializer.py"><code class="docutils literal"><span class="pre">initializer.py</span></code></a>
for examples.</p>
<p>For <code class="docutils literal"><span class="pre">optimizer</span></code>, create a subclass of <code class="docutils literal"><span class="pre">Optimizer</span></code>
and implement two methods <code class="docutils literal"><span class="pre">create_state</span></code> and <code class="docutils literal"><span class="pre">update</span></code>. Also add
<code class="docutils literal"><span class="pre">@mx.optimizer.Optimizer.register</span></code> before this class. See
<a class="reference external" href="https://github.com/dmlc/mxnet/blob/master/python/mxnet/optimizer.py"><code class="docutils literal"><span class="pre">optimizer.py</span></code></a>
for examples.</p>
<p>For <code class="docutils literal"><span class="pre">lr_scheduler</span></code>, create a subclass of <code class="docutils literal"><span class="pre">LRScheduler</span></code> and then implement the
<code class="docutils literal"><span class="pre">__call__</span></code> method. See
<a class="reference external" href="https://github.com/dmlc/mxnet/blob/master/python/mxnet/lr_scheduler.py"><code class="docutils literal"><span class="pre">lr_scheduler.py</span></code></a>
for examples.</p>
</div>
<div class="section" id="api-reference">
<span id="api-reference"></span><h2>API Reference<a class="headerlink" href="#api-reference" title="Permalink to this headline"></a></h2>
<script src="../../_static/js/auto_module_index.js" type="text/javascript"></script><span class="target" id="module-mxnet.optimizer"></span><p>Weight updating functions.</p>
<dl class="class">
<dt id="mxnet.optimizer.Optimizer">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">Optimizer</code><span class="sig-paren">(</span><em>rescale_grad=1.0</em>, <em>param_idx2name=None</em>, <em>wd=0.0</em>, <em>clip_gradient=None</em>, <em>learning_rate=0.01</em>, <em>lr_scheduler=None</em>, <em>sym=None</em>, <em>begin_num_update=0</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer" title="Permalink to this definition"></a></dt>
<dd><p>The base class inherited by all optimizers.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>rescale_grad</strong> (<em>float, optional</em>) – Multiply the gradient with <cite>rescale_grad</cite> before updating. Often
choose to be <code class="docutils literal"><span class="pre">1.0/batch_size</span></code>.</li>
<li><strong>param_idx2name</strong> (<em>dict from int to string, optional</em>) – A dictionary that maps int index to string name.</li>
<li><strong>clip_gradient</strong> (<em>float, optional</em>) – Clip the gradient by projecting onto the box <code class="docutils literal"><span class="pre">[-clip_gradient,</span> <span class="pre">clip_gradient]</span></code>.</li>
<li><strong>learning_rate</strong> (<em>float, optional</em>) – The initial learning rate.</li>
<li><strong>lr_scheduler</strong> (<em>LRScheduler, optional</em>) – The learning rate scheduler.</li>
<li><strong>wd</strong> (<em>float, optional</em>) – The weight decay (or L2 regularization) coefficient. Modifies objective
by adding a penalty for having large weights.</li>
<li><strong>sym</strong> (<em>Symbol, optional</em>) – The Symbol this optimizer is applying to.</li>
<li><strong>begin_num_update</strong> (<em>int, optional</em>) – The initial number of updates.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="staticmethod">
<dt id="mxnet.optimizer.Optimizer.register">
<em class="property">static </em><code class="descname">register</code><span class="sig-paren">(</span><em>klass</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer.register" title="Permalink to this definition"></a></dt>
<dd><p>Registers a new optimizer.</p>
<p>Once an optimizer is registered, we can create an instance of this
optimizer with <cite>create_optimizer</cite> later.</p>
<p class="rubric">Examples</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nd">@mx.optimizer.Optimizer.register</span>
<span class="gp">... </span><span class="k">class</span> <span class="nc">MyOptimizer</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">Optimizer</span><span class="p">):</span>
<span class="gp">... </span> <span class="k">pass</span>
<span class="gp">>>> </span><span class="n">optim</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">Optimizer</span><span class="o">.</span><span class="n">create_optimizer</span><span class="p">(</span><span class="s1">'MyOptimizer'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="k">print</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">optim</span><span class="p">))</span>
<span class="go"><class '__main__.MyOptimizer'></span>
</pre></div>
</div>
</dd></dl>
<dl class="staticmethod">
<dt id="mxnet.optimizer.Optimizer.create_optimizer">
<em class="property">static </em><code class="descname">create_optimizer</code><span class="sig-paren">(</span><em>name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer.create_optimizer" title="Permalink to this definition"></a></dt>
<dd><p>Instantiates an optimizer with a given name and kwargs.</p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">We can use the alias <cite>create</cite> for <code class="docutils literal"><span class="pre">Optimizer.create_optimizer</span></code>.</p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>name</strong> (<em>str</em>) – Name of the optimizer. Should be the name
of a subclass of Optimizer. Case insensitive.</li>
<li><strong>kwargs</strong> (<em>dict</em>) – Parameters for the optimizer.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">An instantiated optimizer.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer">Optimizer</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Examples</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sgd</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">Optimizer</span><span class="o">.</span><span class="n">create_optimizer</span><span class="p">(</span><span class="s1">'sgd'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="nb">type</span><span class="p">(</span><span class="n">sgd</span><span class="p">)</span>
<span class="go"><class 'mxnet.optimizer.SGD'></span>
<span class="gp">>>> </span><span class="n">adam</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="s1">'adam'</span><span class="p">,</span> <span class="n">learning_rate</span><span class="o">=.</span><span class="mi">1</span><span class="p">)</span>
<span class="gp">>>> </span><span class="nb">type</span><span class="p">(</span><span class="n">adam</span><span class="p">)</span>
<span class="go"><class 'mxnet.optimizer.Adam'></span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Optimizer.create_state">
<code class="descname">create_state</code><span class="sig-paren">(</span><em>index</em>, <em>weight</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer.create_state" title="Permalink to this definition"></a></dt>
<dd><p>Creates auxiliary state for a given weight.</p>
<p>Some optimizers require additional states, e.g. as momentum, in addition
to gradients in order to update weights. This function creates state
for a given weight which will be used in <cite>update</cite>. This function is
called only once for each weight.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>index</strong> (<em>int</em>) – An unique index to identify the weight.</li>
<li><strong>weight</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – The weight.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>state</strong>
The state associated with the weight.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">any obj</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Optimizer.update">
<code class="descname">update</code><span class="sig-paren">(</span><em>index</em>, <em>weight</em>, <em>grad</em>, <em>state</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer.update" title="Permalink to this definition"></a></dt>
<dd><p>Updates the given parameter using the corresponding gradient and state.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>index</strong> (<em>int</em>) – The unique index of the parameter into the individual learning
rates and weight decays. Learning rates and weight decay
may be set via <cite>set_lr_mult()</cite> and <cite>set_wd_mult()</cite>, respectively.</li>
<li><strong>weight</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – The parameter to be updated.</li>
<li><strong>grad</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – The gradient of the objective with respect to this parameter.</li>
<li><strong>state</strong> (<em>any obj</em>) – The state returned by <cite>create_state()</cite>.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Optimizer.set_lr_scale">
<code class="descname">set_lr_scale</code><span class="sig-paren">(</span><em>args_lrscale</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer.set_lr_scale" title="Permalink to this definition"></a></dt>
<dd><p>[DEPRECATED] Sets lr scale. Use set_lr_mult instead.</p>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Optimizer.set_lr_mult">
<code class="descname">set_lr_mult</code><span class="sig-paren">(</span><em>args_lr_mult</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer.set_lr_mult" title="Permalink to this definition"></a></dt>
<dd><p>Sets an individual learning rate multiplier for each parameter.</p>
<p>If you specify a learning rate multiplier for a parameter, then
the learning rate for the parameter will be set as the product of
the global learning rate <cite>self.lr</cite> and its multiplier.</p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">The default learning rate multiplier of a <cite>Variable</cite>
can be set with <cite>lr_mult</cite> argument in the constructor.</p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>args_lr_mult</strong> (<em>dict of str/int to float</em>) – <p>For each of its key-value entries, the learning rate multipler for the
parameter specified in the key will be set as the given value.</p>
<p>You can specify the parameter with either its name or its index.
If you use the name, you should pass <cite>sym</cite> in the constructor,
and the name you specified in the key of <cite>args_lr_mult</cite> should match
the name of the parameter in <cite>sym</cite>. If you use the index, it should
correspond to the index of the parameter used in the <cite>update</cite> method.</p>
<p>Specifying a parameter by its index is only supported for backward
compatibility, and we recommend to use the name instead.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Optimizer.set_wd_mult">
<code class="descname">set_wd_mult</code><span class="sig-paren">(</span><em>args_wd_mult</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Optimizer.set_wd_mult" title="Permalink to this definition"></a></dt>
<dd><p>Sets an individual weight decay multiplier for each parameter.</p>
<p>By default, if <cite>param_idx2name</cite> was provided in the
constructor, the weight decay multipler is set as 0 for all
parameters whose name don’t end with <code class="docutils literal"><span class="pre">_weight</span></code> or
<code class="docutils literal"><span class="pre">_gamma</span></code>.</p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">The default weight decay multiplier for a <cite>Variable</cite>
can be set with its <cite>wd_mult</cite> argument in the constructor.</p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>args_wd_mult</strong> (<em>dict of string/int to float</em>) – <p>For each of its key-value entries, the weight decay multipler for the
parameter specified in the key will be set as the given value.</p>
<p>You can specify the parameter with either its name or its index.
If you use the name, you should pass <cite>sym</cite> in the constructor,
and the name you specified in the key of <cite>args_lr_mult</cite> should match
the name of the parameter in <cite>sym</cite>. If you use the index, it should
correspond to the index of the parameter used in the <cite>update</cite> method.</p>
<p>Specifying a parameter by its index is only supported for backward
compatibility, and we recommend to use the name instead.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
<dl class="function">
<dt id="mxnet.optimizer.register">
<code class="descclassname">mxnet.optimizer.</code><code class="descname">register</code><span class="sig-paren">(</span><em>klass</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.register" title="Permalink to this definition"></a></dt>
<dd><p>Registers a new optimizer.</p>
<p>Once an optimizer is registered, we can create an instance of this
optimizer with <cite>create_optimizer</cite> later.</p>
<p class="rubric">Examples</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nd">@mx.optimizer.Optimizer.register</span>
<span class="gp">... </span><span class="k">class</span> <span class="nc">MyOptimizer</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">Optimizer</span><span class="p">):</span>
<span class="gp">... </span> <span class="k">pass</span>
<span class="gp">>>> </span><span class="n">optim</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">Optimizer</span><span class="o">.</span><span class="n">create_optimizer</span><span class="p">(</span><span class="s1">'MyOptimizer'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="k">print</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">optim</span><span class="p">))</span>
<span class="go"><class '__main__.MyOptimizer'></span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.SGD">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">SGD</code><span class="sig-paren">(</span><em>momentum=0.0</em>, <em>multi_precision=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.SGD" title="Permalink to this definition"></a></dt>
<dd><p>The SGD optimizer with momentum and weight decay.</p>
<p>The optimizer updates the weight by:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">state</span> <span class="o">=</span> <span class="n">momentum</span> <span class="o">*</span> <span class="n">state</span> <span class="o">+</span> <span class="n">lr</span> <span class="o">*</span> <span class="n">rescale_grad</span> <span class="o">*</span> <span class="n">clip</span><span class="p">(</span><span class="n">grad</span><span class="p">,</span> <span class="n">clip_gradient</span><span class="p">)</span> <span class="o">+</span> <span class="n">wd</span> <span class="o">*</span> <span class="n">weight</span>
<span class="n">weight</span> <span class="o">=</span> <span class="n">weight</span> <span class="o">-</span> <span class="n">state</span>
</pre></div>
</div>
<p>For details of the update algorithm see <a class="reference internal" href="ndarray.html#mxnet.ndarray.sgd_update" title="mxnet.ndarray.sgd_update"><code class="xref py py-class docutils literal"><span class="pre">sgd_update</span></code></a> and
<a class="reference internal" href="ndarray.html#mxnet.ndarray.sgd_mom_update" title="mxnet.ndarray.sgd_mom_update"><code class="xref py py-class docutils literal"><span class="pre">sgd_mom_update</span></code></a>.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>momentum</strong> (<em>float, optional</em>) – The momentum value.</li>
<li><strong>multi_precision</strong> (<em>bool, optional</em>) – <p>Flag to control the internal precision of the optimizer.
<code class="docutils literal"><span class="pre">False</span></code> results in using the same precision as the weights (default),
<code class="docutils literal"><span class="pre">True</span></code> makes internal 32-bit copy of the weights and applies gradients</p>
<blockquote>
<div>in 32-bit precision even if actual weights used in the model have lower precision.
Turning this on can improve convergence and accuracy when training with float16.</div></blockquote>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.DCASGD">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">DCASGD</code><span class="sig-paren">(</span><em>momentum=0.0</em>, <em>lamda=0.04</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.DCASGD" title="Permalink to this definition"></a></dt>
<dd><p>The DCASGD optimizer.</p>
<p>This class implements the optimizer described in <em>Asynchronous Stochastic Gradient Descent
with Delay Compensation for Distributed Deep Learning</em>,
available at <a class="reference external" href="https://arxiv.org/abs/1609.08326">https://arxiv.org/abs/1609.08326</a>.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>momentum</strong> (<em>float, optional</em>) – The momentum value.</li>
<li><strong>lamda</strong> (<em>float, optional</em>) – Scale DC value.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.NAG">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">NAG</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.NAG" title="Permalink to this definition"></a></dt>
<dd><p>Nesterov accelerated SGD.</p>
<p>This optimizer updates each weight by:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">state</span> <span class="o">=</span> <span class="n">momentum</span> <span class="o">*</span> <span class="n">state</span> <span class="o">+</span> <span class="n">grad</span> <span class="o">+</span> <span class="n">wd</span> <span class="o">*</span> <span class="n">weight</span>
<span class="n">weight</span> <span class="o">=</span> <span class="n">weight</span> <span class="o">-</span> <span class="p">(</span><span class="n">lr</span> <span class="o">*</span> <span class="p">(</span><span class="n">grad</span> <span class="o">+</span> <span class="n">momentum</span> <span class="o">*</span> <span class="n">state</span><span class="p">))</span>
</pre></div>
</div>
<p>This optimizer accepts the same arguments as <a class="reference internal" href="#mxnet.optimizer.SGD" title="mxnet.optimizer.SGD"><code class="xref py py-class docutils literal"><span class="pre">SGD</span></code></a>.</p>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.SGLD">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">SGLD</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.SGLD" title="Permalink to this definition"></a></dt>
<dd><p>Stochastic Gradient Riemannian Langevin Dynamics.</p>
<p>This class implements the optimizer described in the paper <em>Stochastic Gradient
Riemannian Langevin Dynamics on the Probability Simplex</em>, available at
<a class="reference external" href="https://papers.nips.cc/paper/4883-stochastic-gradient-riemannian-langevin-dynamics-on-the-probability-simplex.pdf">https://papers.nips.cc/paper/4883-stochastic-gradient-riemannian-langevin-dynamics-on-the-probability-simplex.pdf</a>.</p>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.ccSGD">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">ccSGD</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.ccSGD" title="Permalink to this definition"></a></dt>
<dd><p>[DEPRECATED] Same as <cite>SGD</cite>. Left here for backward compatibility.</p>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.Adam">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">Adam</code><span class="sig-paren">(</span><em>learning_rate=0.001</em>, <em>beta1=0.9</em>, <em>beta2=0.999</em>, <em>epsilon=1e-08</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Adam" title="Permalink to this definition"></a></dt>
<dd><p>The Adam optimizer.</p>
<p>This class implements the optimizer described in <em>Adam: A Method for
Stochastic Optimization</em>, available at <a class="reference external" href="http://arxiv.org/abs/1412.6980">http://arxiv.org/abs/1412.6980</a>.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<p>For details of the update algorithm, see <code class="xref py py-class docutils literal"><span class="pre">ndarray.adam_update</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>beta1</strong> (<em>float, optional</em>) – Exponential decay rate for the first moment estimates.</li>
<li><strong>beta2</strong> (<em>float, optional</em>) – Exponential decay rate for the second moment estimates.</li>
<li><strong>epsilon</strong> (<em>float, optional</em>) – Small value to avoid division by 0.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.AdaGrad">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">AdaGrad</code><span class="sig-paren">(</span><em>eps=1e-07</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.AdaGrad" title="Permalink to this definition"></a></dt>
<dd><p>AdaGrad optimizer.</p>
<p>This class implements the AdaGrad optimizer described in <em>Adaptive Subgradient
Methods for Online Learning and Stochastic Optimization</em>, and available at
<a class="reference external" href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf</a>.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>eps</strong> (<em>float, optional</em>) – Small value to avoid division by 0.</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.RMSProp">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">RMSProp</code><span class="sig-paren">(</span><em>learning_rate=0.001</em>, <em>gamma1=0.9</em>, <em>gamma2=0.9</em>, <em>epsilon=1e-08</em>, <em>centered=False</em>, <em>clip_weights=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.RMSProp" title="Permalink to this definition"></a></dt>
<dd><p>The RMSProp optimizer.</p>
<p>Two versions of RMSProp are implemented:</p>
<p>If <code class="docutils literal"><span class="pre">centered=False</span></code>, we follow
<a class="reference external" href="http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf</a> by
Tieleman &amp; Hinton, 2012.
For details of the update algorithm see <a class="reference internal" href="ndarray.html#mxnet.ndarray.rmsprop_update" title="mxnet.ndarray.rmsprop_update"><code class="xref py py-class docutils literal"><span class="pre">rmsprop_update</span></code></a>.</p>
<p>If <code class="docutils literal"><span class="pre">centered=True</span></code>, we follow <a class="reference external" href="http://arxiv.org/pdf/1308.0850v5.pdf">http://arxiv.org/pdf/1308.0850v5.pdf</a> (38)-(45)
by Alex Graves, 2013.
For details of the update algorithm see <a class="reference internal" href="ndarray.html#mxnet.ndarray.rmspropalex_update" title="mxnet.ndarray.rmspropalex_update"><code class="xref py py-class docutils literal"><span class="pre">rmspropalex_update</span></code></a>.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>gamma1</strong> (<em>float, optional</em>) – A decay factor of moving average over past squared gradient.</li>
<li><strong>gamma2</strong> (<em>float, optional</em>) – A “momentum” factor. Only used if <cite>centered`=``True`</cite>.</li>
<li><strong>epsilon</strong> (<em>float, optional</em>) – Small value to avoid division by 0.</li>
<li><strong>centered</strong> (<em>bool, optional</em>) – Flag to control which version of RMSProp to use.
<code class="docutils literal"><span class="pre">True</span></code> will use Graves’s version of <cite>RMSProp</cite>,
<code class="docutils literal"><span class="pre">False</span></code> will use Tieleman &amp; Hinton’s version of <cite>RMSProp</cite>.</li>
<li><strong>clip_weights</strong> (<em>float, optional</em>) – Clips weights into range <code class="docutils literal"><span class="pre">[-clip_weights,</span> <span class="pre">clip_weights]</span></code>.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.AdaDelta">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">AdaDelta</code><span class="sig-paren">(</span><em>rho=0.9</em>, <em>epsilon=1e-05</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.AdaDelta" title="Permalink to this definition"></a></dt>
<dd><p>The AdaDelta optimizer.</p>
<p>This class implements AdaDelta, an optimizer described in <em>ADADELTA: An adaptive
learning rate method</em>, available at <a class="reference external" href="https://arxiv.org/abs/1212.5701">https://arxiv.org/abs/1212.5701</a>.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>rho</strong> (<em>float</em>) – Decay rate for both squared gradients and delta.</li>
<li><strong>epsilon</strong> (<em>float</em>) – Small value to avoid division by 0.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.Ftrl">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">Ftrl</code><span class="sig-paren">(</span><em>lamda1=0.01</em>, <em>learning_rate=0.1</em>, <em>beta=1</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Ftrl" title="Permalink to this definition"></a></dt>
<dd><p>The Ftrl optimizer.</p>
<p>Referenced from <em>Ad Click Prediction: a View from the Trenches</em>, available at
<a class="reference external" href="http://dl.acm.org/citation.cfm?id=2488200">http://dl.acm.org/citation.cfm?id=2488200</a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>lamda1</strong> (<em>float, optional</em>) – L1 regularization coefficient.</li>
<li><strong>learning_rate</strong> (<em>float, optional</em>) – The initial learning rate.</li>
<li><strong>beta</strong> (<em>float, optional</em>) – Per-coordinate learning rate correlation parameter.</li>
<li><strong>eta</strong><div class="math">
</div>
<p>eta_{t,i} = frac{learningrate}{beta+sqrt{sum_{s=1}^tg_{s,i}^t}}</p>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.Adamax">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">Adamax</code><span class="sig-paren">(</span><em>learning_rate=0.002</em>, <em>beta1=0.9</em>, <em>beta2=0.999</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Adamax" title="Permalink to this definition"></a></dt>
<dd><p>The AdaMax optimizer.</p>
<p>It is a variant of Adam based on the infinity norm
available at <a class="reference external" href="http://arxiv.org/abs/1412.6980">http://arxiv.org/abs/1412.6980</a> Section 7.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>beta1</strong> (<em>float, optional</em>) – Exponential decay rate for the first moment estimates.</li>
<li><strong>beta2</strong> (<em>float, optional</em>) – Exponential decay rate for the second moment estimates.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.Nadam">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">Nadam</code><span class="sig-paren">(</span><em>learning_rate=0.001</em>, <em>beta1=0.9</em>, <em>beta2=0.999</em>, <em>epsilon=1e-08</em>, <em>schedule_decay=0.004</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Nadam" title="Permalink to this definition"></a></dt>
<dd><p>The Nesterov Adam optimizer.</p>
<p>Much like Adam is essentially RMSprop with momentum,
Nadam is Adam RMSprop with Nesterov momentum available
at <a class="reference external" href="http://cs229.stanford.edu/proj2015/054_report.pdf">http://cs229.stanford.edu/proj2015/054_report.pdf</a>.</p>
<p>This optimizer accepts the following parameters in addition to those accepted
by <a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><code class="xref py py-class docutils literal"><span class="pre">Optimizer</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>beta1</strong> (<em>float, optional</em>) – Exponential decay rate for the first moment estimates.</li>
<li><strong>beta2</strong> (<em>float, optional</em>) – Exponential decay rate for the second moment estimates.</li>
<li><strong>epsilon</strong> (<em>float, optional</em>) – Small value to avoid division by 0.</li>
<li><strong>schedule_decay</strong> (<em>float, optional</em>) – Exponential decay rate for the momentum schedule</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.Test">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">Test</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Test" title="Permalink to this definition"></a></dt>
<dd><p>The Test optimizer</p>
<dl class="method">
<dt id="mxnet.optimizer.Test.create_state">
<code class="descname">create_state</code><span class="sig-paren">(</span><em>index</em>, <em>weight</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Test.create_state" title="Permalink to this definition"></a></dt>
<dd><p>Creates a state to duplicate weight.</p>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Test.update">
<code class="descname">update</code><span class="sig-paren">(</span><em>index</em>, <em>weight</em>, <em>grad</em>, <em>state</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Test.update" title="Permalink to this definition"></a></dt>
<dd><p>Performs w += rescale_grad * grad.</p>
</dd></dl>
</dd></dl>
<dl class="function">
<dt id="mxnet.optimizer.create">
<code class="descclassname">mxnet.optimizer.</code><code class="descname">create</code><span class="sig-paren">(</span><em>name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.create" title="Permalink to this definition"></a></dt>
<dd><p>Instantiates an optimizer with a given name and kwargs.</p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">We can use the alias <cite>create</cite> for <code class="docutils literal"><span class="pre">Optimizer.create_optimizer</span></code>.</p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>name</strong> (<em>str</em>) – Name of the optimizer. Should be the name
of a subclass of Optimizer. Case insensitive.</li>
<li><strong>kwargs</strong> (<em>dict</em>) – Parameters for the optimizer.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">An instantiated optimizer.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer">Optimizer</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Examples</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sgd</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">Optimizer</span><span class="o">.</span><span class="n">create_optimizer</span><span class="p">(</span><span class="s1">'sgd'</span><span class="p">)</span>
<span class="gp">>>> </span><span class="nb">type</span><span class="p">(</span><span class="n">sgd</span><span class="p">)</span>
<span class="go"><class 'mxnet.optimizer.SGD'></span>
<span class="gp">>>> </span><span class="n">adam</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="s1">'adam'</span><span class="p">,</span> <span class="n">learning_rate</span><span class="o">=.</span><span class="mi">1</span><span class="p">)</span>
<span class="gp">>>> </span><span class="nb">type</span><span class="p">(</span><span class="n">adam</span><span class="p">)</span>
<span class="go"><class 'mxnet.optimizer.Adam'></span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.optimizer.Updater">
<em class="property">class </em><code class="descclassname">mxnet.optimizer.</code><code class="descname">Updater</code><span class="sig-paren">(</span><em>optimizer</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Updater" title="Permalink to this definition"></a></dt>
<dd><p>Updater for kvstore.</p>
<dl class="method">
<dt id="mxnet.optimizer.Updater.__call__">
<code class="descname">__call__</code><span class="sig-paren">(</span><em>index</em>, <em>grad</em>, <em>weight</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Updater.__call__" title="Permalink to this definition"></a></dt>
<dd><p>Updates weight given gradient and index.</p>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Updater.set_states">
<code class="descname">set_states</code><span class="sig-paren">(</span><em>states</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Updater.set_states" title="Permalink to this definition"></a></dt>
<dd><p>Sets updater states.</p>
</dd></dl>
<dl class="method">
<dt id="mxnet.optimizer.Updater.get_states">
<code class="descname">get_states</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.Updater.get_states" title="Permalink to this definition"></a></dt>
<dd><p>Gets updater states.</p>
</dd></dl>
</dd></dl>
<dl class="function">
<dt id="mxnet.optimizer.get_updater">
<code class="descclassname">mxnet.optimizer.</code><code class="descname">get_updater</code><span class="sig-paren">(</span><em>optimizer</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.optimizer.get_updater" title="Permalink to this definition"></a></dt>
<dd><p>Returns a closure of the updater needed for kvstore.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>optimizer</strong> (<a class="reference internal" href="#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><em>Optimizer</em></a>) – The optimizer.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><strong>updater</strong>
The closure of the updater.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">function</td>
</tr>
</tbody>
</table>
</dd></dl>
<span class="target" id="module-mxnet.lr_scheduler"></span><p>Scheduling learning rate.</p>
<dl class="class">
<dt id="mxnet.lr_scheduler.LRScheduler">
<em class="property">class </em><code class="descclassname">mxnet.lr_scheduler.</code><code class="descname">LRScheduler</code><span class="sig-paren">(</span><em>base_lr=0.01</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.lr_scheduler.LRScheduler" title="Permalink to this definition"></a></dt>
<dd><p>Base class of a learning rate scheduler.</p>
<p>A scheduler returns a new learning rate based on the number of updates that have
been performed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>base_lr</strong> (<em>float, optional</em>) – The initial learning rate.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="mxnet.lr_scheduler.LRScheduler.__call__">
<code class="descname">__call__</code><span class="sig-paren">(</span><em>num_update</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.lr_scheduler.LRScheduler.__call__" title="Permalink to this definition"></a></dt>
<dd><p>Return a new learning rate.</p>
<p>The <code class="docutils literal"><span class="pre">num_update</span></code> is the upper bound of the number of updates applied to
every weight.</p>
<p>Assume the optimizer has udpated <em>i</em>-th weight by <em>k_i</em> times, namely
<code class="docutils literal"><span class="pre">optimizer.update(i,</span> <span class="pre">weight_i)</span></code> is called by <em>k_i</em> times. Then:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span>num_update = max([k_i for all i])
</pre></div>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>num_update</strong> (<em>int</em>) – the maximal number of updates applied to a weight.</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="mxnet.lr_scheduler.FactorScheduler">
<em class="property">class </em><code class="descclassname">mxnet.lr_scheduler.</code><code class="descname">FactorScheduler</code><span class="sig-paren">(</span><em>step</em>, <em>factor=1</em>, <em>stop_factor_lr=1e-08</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.lr_scheduler.FactorScheduler" title="Permalink to this definition"></a></dt>
<dd><p>Reduce the learning rate by a factor for every <em>n</em> steps.</p>
<p>It returns a new learning rate by:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">base_lr</span> <span class="o">*</span> <span class="nb">pow</span><span class="p">(</span><span class="n">factor</span><span class="p">,</span> <span class="n">floor</span><span class="p">(</span><span class="n">num_update</span><span class="o">/</span><span class="n">step</span><span class="p">))</span>
</pre></div>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>step</strong> (<em>int</em>) – Changes the learning rate for every n updates.</li>
<li><strong>factor</strong> (<em>float, optional</em>) – The factor to change the learning rate.</li>
<li><strong>stop_factor_lr</strong> (<em>float, optional</em>) – Stop updating the learning rate if it is less than this value.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.lr_scheduler.MultiFactorScheduler">
<em class="property">class </em><code class="descclassname">mxnet.lr_scheduler.</code><code class="descname">MultiFactorScheduler</code><span class="sig-paren">(</span><em>step</em>, <em>factor=1</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.lr_scheduler.MultiFactorScheduler" title="Permalink to this definition"></a></dt>
<dd><p>Reduce the learning rate by given a list of steps.</p>
<p>Assume there exists <em>k</em> such that:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">step</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o"><=</span> <span class="n">num_update</span> <span class="ow">and</span> <span class="n">num_update</span> <span class="o"><</span> <span class="n">step</span><span class="p">[</span><span class="n">k</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span>
</pre></div>
</div>
<p>Then calculate the new learning rate by:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">base_lr</span> <span class="o">*</span> <span class="nb">pow</span><span class="p">(</span><span class="n">factor</span><span class="p">,</span> <span class="n">k</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>step</strong> (<em>list of int</em>) – The list of steps to schedule a change</li>
<li><strong>factor</strong> (<em>float</em>) – The factor to change the learning rate.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<span class="target" id="module-mxnet.initializer"></span><p>Weight initializer.</p>
<dl class="class">
<dt id="mxnet.initializer.InitDesc">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">InitDesc</code><a class="headerlink" href="#mxnet.initializer.InitDesc" title="Permalink to this definition"></a></dt>
<dd><p>Descriptor for the initialization pattern.</p>
<dl class="docutils">
<dt>name <span class="classifier-delimiter">:</span> <span class="classifier">str</span></dt>
<dd>Name of variable.</dd>
<dt>attrs <span class="classifier-delimiter">:</span> <span class="classifier">dict of str to str</span></dt>
<dd>Attributes of this variable taken from <code class="docutils literal"><span class="pre">Symbol.attr_dict</span></code>.</dd>
<dt>global_init <span class="classifier-delimiter">:</span> <span class="classifier">Initializer</span></dt>
<dd>Global initializer to fallback to.</dd>
</dl>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Initializer">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Initializer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Initializer" title="Permalink to this definition"></a></dt>
<dd><p>The base class of an initializer.</p>
<dl class="method">
<dt id="mxnet.initializer.Initializer.set_verbosity">
<code class="descname">set_verbosity</code><span class="sig-paren">(</span><em>verbose=False</em>, <em>print_func=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Initializer.set_verbosity" title="Permalink to this definition"></a></dt>
<dd><p>Switch on/off verbose mode</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>verbose</strong> (<em>bool</em>) – switch on/off verbose mode</li>
<li><strong>print_func</strong> (<em>function</em>) – A function that computes statistics of initialized arrays.
Takes an <cite>NDArray</cite> and returns an <cite>str</cite>. Defaults to mean
absolute value str((<a href="#id1"><span class="problematic" id="id2">|x|</span></a>/size(x)).asscalar()).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="mxnet.initializer.Initializer.dumps">
<code class="descname">dumps</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Initializer.dumps" title="Permalink to this definition"></a></dt>
<dd><p>Saves the initializer to string</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">JSON formatted string that describes the initializer.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">str</td>
</tr>
</tbody>
</table>
<p class="rubric">Examples</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Create initializer and retrieve its parameters</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="n">init</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Normal</span><span class="p">(</span><span class="mf">0.5</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">init</span><span class="o">.</span><span class="n">dumps</span><span class="p">()</span>
<span class="go">'["normal", {"sigma": 0.5}]'</span>
<span class="gp">>>> </span><span class="n">init</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Xavier</span><span class="p">(</span><span class="n">factor_type</span><span class="o">=</span><span class="s2">"in"</span><span class="p">,</span> <span class="n">magnitude</span><span class="o">=</span><span class="mf">2.34</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">init</span><span class="o">.</span><span class="n">dumps</span><span class="p">()</span>
<span class="go">'["xavier", {"rnd_type": "uniform", "magnitude": 2.34, "factor_type": "in"}]'</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="mxnet.initializer.Initializer.__call__">
<code class="descname">__call__</code><span class="sig-paren">(</span><em>desc</em>, <em>arr</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Initializer.__call__" title="Permalink to this definition"></a></dt>
<dd><p>Initialize an array</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>desc</strong> (<a class="reference internal" href="#mxnet.initializer.InitDesc" title="mxnet.initializer.InitDesc"><em>InitDesc</em></a>) – Initialization pattern descriptor.</li>
<li><strong>arr</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – The array to be initialized.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
<dl class="function">
<dt id="mxnet.initializer.register">
<code class="descclassname">mxnet.initializer.</code><code class="descname">register</code><span class="sig-paren">(</span><em>klass</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.register" title="Permalink to this definition"></a></dt>
<dd><p>Registers a custom initializer.</p>
<p>Custom initializers can be created by extending <cite>mx.init.Initializer</cite> and implementing the
required functions like <cite>_init_weight</cite> and <cite>_init_bias</cite>. The created initializer must be
registered using <cite>mx.init.register</cite> before it can be called by name.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>klass</strong> (<em>class</em>) – A subclass of <cite>mx.init.Initializer</cite> that needs to be registered as a custom initializer.</td>
</tr>
</tbody>
</table>
<p class="rubric">Example</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Create and register a custom initializer that</span>
<span class="gp">... </span><span class="c1"># initializes weights to 0.1 and biases to 1.</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="nd">@mx.init.register</span>
<span class="gp">... </span><span class="nd">@alias</span><span class="p">(</span><span class="s1">'myinit'</span><span class="p">)</span>
<span class="gp">... </span><span class="k">class</span> <span class="nc">CustomInit</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Initializer</span><span class="p">):</span>
<span class="gp">... </span> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="gp">... </span> <span class="nb">super</span><span class="p">(</span><span class="n">CustomInit</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="gp">... </span> <span class="k">def</span> <span class="nf">_init_weight</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">arr</span><span class="p">):</span>
<span class="gp">... </span> <span class="n">arr</span><span class="p">[:]</span> <span class="o">=</span> <span class="mf">0.1</span>
<span class="gp">... </span> <span class="k">def</span> <span class="nf">_init_bias</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">arr</span><span class="p">):</span>
<span class="gp">... </span> <span class="n">arr</span><span class="p">[:]</span> <span class="o">=</span> <span class="mi">1</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="c1"># Module is an instance of 'mxnet.module.Module'</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="n">module</span><span class="o">.</span><span class="n">init_params</span><span class="p">(</span><span class="s2">"custominit"</span><span class="p">)</span>
<span class="gp">>>> </span><span class="c1"># module.init_params("myinit")</span>
<span class="gp">>>> </span><span class="c1"># module.init_params(CustomInit())</span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Load">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Load</code><span class="sig-paren">(</span><em>param</em>, <em>default_init=None</em>, <em>verbose=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Load" title="Permalink to this definition"></a></dt>
<dd><p>Initializes variables by loading data from file or dict.</p>
<p><strong>Note</strong> Load will drop <code class="docutils literal"><span class="pre">arg:</span></code> or <code class="docutils literal"><span class="pre">aux:</span></code> from name and
initialize the variables that match with the prefix dropped.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>param</strong> (<em>str or dict of str->`NDArray`</em>) – Parameter file or dict mapping name to NDArray.</li>
<li><strong>default_init</strong> (<a class="reference internal" href="#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Default initializer when name is not found in <cite>param</cite>.</li>
<li><strong>verbose</strong> (<em>bool</em>) – Flag for enabling logging of source when initializing.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Mixed">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Mixed</code><span class="sig-paren">(</span><em>patterns</em>, <em>initializers</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Mixed" title="Permalink to this definition"></a></dt>
<dd><p>Initialize parameters using multiple initializers.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>patterns</strong> (<em>list of str</em>) – List of regular expressions matching parameter names.</li>
<li><strong>initializers</strong> (<em>list of Initializer</em>) – List of initializers corresponding to <cite>patterns</cite>.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Example</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Given 'module', an instance of 'mxnet.module.Module', initialize biases to zero</span>
<span class="gp">... </span><span class="c1"># and every other parameter to random values with uniform distribution.</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="n">init</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">initializer</span><span class="o">.</span><span class="n">Mixed</span><span class="p">([</span><span class="s1">'bias'</span><span class="p">,</span> <span class="s1">'.*'</span><span class="p">],</span> <span class="p">[</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Zero</span><span class="p">(),</span> <span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Uniform</span><span class="p">(</span><span class="mf">0.1</span><span class="p">)])</span>
<span class="gp">>>> </span><span class="n">module</span><span class="o">.</span><span class="n">init_params</span><span class="p">(</span><span class="n">init</span><span class="p">)</span>
<span class="go">>>></span>
<span class="gp">>>> </span><span class="k">for</span> <span class="n">dictionary</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">get_params</span><span class="p">():</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">dictionary</span><span class="p">:</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">dictionary</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">())</span>
<span class="gp">...</span>
<span class="go">fullyconnected1_weight</span>
<span class="go">[[ 0.0097627 0.01856892 0.04303787]]</span>
<span class="go">fullyconnected1_bias</span>
<span class="go">[ 0.]</span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Zero">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Zero</code><a class="headerlink" href="#mxnet.initializer.Zero" title="Permalink to this definition"></a></dt>
<dd><p>Initializes weights to zero.</p>
<p class="rubric">Example</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Given 'module', an instance of 'mxnet.module.Module', initialize weights to zero.</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="n">init</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">initializer</span><span class="o">.</span><span class="n">Zero</span><span class="p">()</span>
<span class="gp">>>> </span><span class="n">module</span><span class="o">.</span><span class="n">init_params</span><span class="p">(</span><span class="n">init</span><span class="p">)</span>
<span class="gp">>>> </span><span class="k">for</span> <span class="n">dictionary</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">get_params</span><span class="p">():</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">dictionary</span><span class="p">:</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">dictionary</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">())</span>
<span class="gp">...</span>
<span class="go">fullyconnected0_weight</span>
<span class="go">[[ 0. 0. 0.]]</span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.One">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">One</code><a class="headerlink" href="#mxnet.initializer.One" title="Permalink to this definition"></a></dt>
<dd><p>Initializes weights to one.</p>
<p class="rubric">Example</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Given 'module', an instance of 'mxnet.module.Module', initialize weights to one.</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="n">init</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">initializer</span><span class="o">.</span><span class="n">One</span><span class="p">()</span>
<span class="gp">>>> </span><span class="n">module</span><span class="o">.</span><span class="n">init_params</span><span class="p">(</span><span class="n">init</span><span class="p">)</span>
<span class="gp">>>> </span><span class="k">for</span> <span class="n">dictionary</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">get_params</span><span class="p">():</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">dictionary</span><span class="p">:</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">dictionary</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">())</span>
<span class="gp">...</span>
<span class="go">fullyconnected0_weight</span>
<span class="go">[[ 1. 1. 1.]]</span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Constant">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Constant</code><span class="sig-paren">(</span><em>value</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Constant" title="Permalink to this definition"></a></dt>
<dd><p>Initializes the weights to a scalar value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>value</strong> (<em>float</em>) – Fill value.</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Uniform">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Uniform</code><span class="sig-paren">(</span><em>scale=0.07</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Uniform" title="Permalink to this definition"></a></dt>
<dd><p>Initializes weights with random values uniformly sampled from a given range.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>scale</strong> (<em>float, optional</em>) – The bound on the range of the generated random values.
Values are generated from the range [-<cite>scale</cite>, <cite>scale</cite>].
Default scale is 0.07.</td>
</tr>
</tbody>
</table>
<p class="rubric">Example</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Given 'module', an instance of 'mxnet.module.Module', initialize weights</span>
<span class="gp">>>> </span><span class="c1"># to random values uniformly sampled between -0.1 and 0.1.</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="n">init</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Uniform</span><span class="p">(</span><span class="mf">0.1</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">module</span><span class="o">.</span><span class="n">init_params</span><span class="p">(</span><span class="n">init</span><span class="p">)</span>
<span class="gp">>>> </span><span class="k">for</span> <span class="n">dictionary</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">get_params</span><span class="p">():</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">dictionary</span><span class="p">:</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">dictionary</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">())</span>
<span class="gp">...</span>
<span class="go">fullyconnected0_weight</span>
<span class="go">[[ 0.01360891 -0.02144304 0.08511933]]</span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Normal">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Normal</code><span class="sig-paren">(</span><em>sigma=0.01</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Normal" title="Permalink to this definition"></a></dt>
<dd><p>Initializes weights with random values sampled from a normal distribution
with a mean of zero and standard deviation of <cite>sigma</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>sigma</strong> (<em>float, optional</em>) – Standard deviation of the normal distribution.
Default standard deviation is 0.01.</td>
</tr>
</tbody>
</table>
<p class="rubric">Example</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Given 'module', an instance of 'mxnet.module.Module', initialize weights</span>
<span class="gp">>>> </span><span class="c1"># to random values sampled from a normal distribution.</span>
<span class="gp">...</span>
<span class="gp">>>> </span><span class="n">init</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Normal</span><span class="p">(</span><span class="mf">0.5</span><span class="p">)</span>
<span class="gp">>>> </span><span class="n">module</span><span class="o">.</span><span class="n">init_params</span><span class="p">(</span><span class="n">init</span><span class="p">)</span>
<span class="gp">>>> </span><span class="k">for</span> <span class="n">dictionary</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">get_params</span><span class="p">():</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">dictionary</span><span class="p">:</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">print</span><span class="p">(</span><span class="n">dictionary</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">())</span>
<span class="gp">...</span>
<span class="go">fullyconnected0_weight</span>
<span class="go">[[-0.3214761 -0.12660924 0.53789419]]</span>
</pre></div>
</div>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Orthogonal">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Orthogonal</code><span class="sig-paren">(</span><em>scale=1.414</em>, <em>rand_type='uniform'</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Orthogonal" title="Permalink to this definition"></a></dt>
<dd><p>Initialize weight as orthogonal matrix.</p>
<p>This initializer implements <em>Exact solutions to the nonlinear dynamics of
learning in deep linear neural networks</em>, available at
<a class="reference external" href="https://arxiv.org/abs/1312.6120">https://arxiv.org/abs/1312.6120</a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>scale</strong> (<em>float optional</em>) – Scaling factor of weight.</li>
<li><strong>rand_type</strong> (<em>string optional</em>) – Use “uniform” or “normal” random number to initialize weight.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Xavier">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Xavier</code><span class="sig-paren">(</span><em>rnd_type='uniform'</em>, <em>factor_type='avg'</em>, <em>magnitude=3</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.Xavier" title="Permalink to this definition"></a></dt>
<dd><p>Returns an initializer performing “Xavier” initialization for weights.</p>
<p>This initializer is designed to keep the scale of gradients roughly the same
in all layers.</p>
<p>By default, <cite>rnd_type</cite> is <code class="docutils literal"><span class="pre">'uniform'</span></code> and <cite>factor_type</cite> is <code class="docutils literal"><span class="pre">'avg'</span></code>,
the initializer fills the weights with random numbers in the range
of <span class="math">\([-c, c]\)</span>, where <span class="math">\(c = \sqrt{\frac{3.}{0.5 * (n_{in} + n_{out})}}\)</span>.
<span class="math">\(n_{in}\)</span> is the number of neurons feeding into weights, and <span class="math">\(n_{out}\)</span> is
the number of neurons the result is fed to.</p>
<p>If <cite>rnd_type</cite> is <code class="docutils literal"><span class="pre">'uniform'</span></code> and <cite>factor_type</cite> is <code class="docutils literal"><span class="pre">'in'</span></code>,
the <span class="math">\(c = \sqrt{\frac{3.}{n_{in}}}\)</span>.
Similarly when <cite>factor_type</cite> is <code class="docutils literal"><span class="pre">'out'</span></code>, the <span class="math">\(c = \sqrt{\frac{3.}{n_{out}}}\)</span>.</p>
<p>If <cite>rnd_type</cite> is <code class="docutils literal"><span class="pre">'gaussian'</span></code> and <cite>factor_type</cite> is <code class="docutils literal"><span class="pre">'avg'</span></code>,
the initializer fills the weights with numbers from normal distribution with
a standard deviation of <span class="math">\(\sqrt{\frac{3.}{0.5 * (n_{in} + n_{out})}}\)</span>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>rnd_type</strong> (<em>str, optional</em>) – Random generator type, can be <code class="docutils literal"><span class="pre">'gaussian'</span></code> or <code class="docutils literal"><span class="pre">'uniform'</span></code>.</li>
<li><strong>factor_type</strong> (<em>str, optional</em>) – Can be <code class="docutils literal"><span class="pre">'avg'</span></code>, <code class="docutils literal"><span class="pre">'in'</span></code>, or <code class="docutils literal"><span class="pre">'out'</span></code>.</li>
<li><strong>magnitude</strong> (<em>float, optional</em>) – Scale of random number.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.MSRAPrelu">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">MSRAPrelu</code><span class="sig-paren">(</span><em>factor_type='avg'</em>, <em>slope=0.25</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.MSRAPrelu" title="Permalink to this definition"></a></dt>
<dd><p>Initialize the weight according to a MSRA paper.</p>
<p>This initializer implements <em>Delving Deep into Rectifiers: Surpassing
Human-Level Performance on ImageNet Classification</em>, available at
<a class="reference external" href="https://arxiv.org/abs/1502.01852">https://arxiv.org/abs/1502.01852</a>.</p>
<p>This initializer is proposed for initialization related to ReLu activation,
it maked some changes on top of Xavier method.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>factor_type</strong> (<em>str, optional</em>) – Can be <code class="docutils literal"><span class="pre">'avg'</span></code>, <code class="docutils literal"><span class="pre">'in'</span></code>, or <code class="docutils literal"><span class="pre">'out'</span></code>.</li>
<li><strong>slope</strong> (<em>float, optional</em>) – initial slope of any PReLU (or similar) nonlinearities.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.Bilinear">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">Bilinear</code><a class="headerlink" href="#mxnet.initializer.Bilinear" title="Permalink to this definition"></a></dt>
<dd><p>Initialize weight for upsampling layers.</p>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.LSTMBias">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">LSTMBias</code><span class="sig-paren">(</span><em>forget_bias</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.LSTMBias" title="Permalink to this definition"></a></dt>
<dd><p>Initialize all bias of an LSTMCell to 0.0 except for
the forget gate whose bias is set to custom value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>forget_bias</strong> (<em>float, bias for the forget gate.</em>) – Jozefowicz et al. 2015 recommends setting this to 1.0.</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="mxnet.initializer.FusedRNN">
<em class="property">class </em><code class="descclassname">mxnet.initializer.</code><code class="descname">FusedRNN</code><span class="sig-paren">(</span><em>init</em>, <em>num_hidden</em>, <em>num_layers</em>, <em>mode</em>, <em>bidirectional=False</em>, <em>forget_bias=1.0</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.initializer.FusedRNN" title="Permalink to this definition"></a></dt>
<dd><p>Initialize parameters for fused rnn layers.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name"/>
<col class="field-body"/>
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>init</strong> (<a class="reference internal" href="#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – initializer applied to unpacked weights. Fall back to global
initializer if None.</li>
<li><strong>num_hidden</strong> (<em>int</em>) – should be the same with arguments passed to FusedRNNCell.</li>
<li><strong>num_layers</strong> (<em>int</em>) – should be the same with arguments passed to FusedRNNCell.</li>
<li><strong>mode</strong> (<em>str</em>) – should be the same with arguments passed to FusedRNNCell.</li>
<li><strong>bidirectional</strong> (<em>bool</em>) – should be the same with arguments passed to FusedRNNCell.</li>
<li><strong>forget_bias</strong> (<em>float</em>) – should be the same with arguments passed to FusedRNNCell.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<script>auto_index("api-reference");</script></div>
</div>
<div class="container">
<div class="footer">
<p> © 2015-2017 DMLC. All rights reserved. </p>
</div>
</div>
</div>
<div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<h3><a href="../../index.html">Table Of Contents</a></h3>
<ul>
<li><a class="reference internal" href="#">Optimization: initialize and update weights</a><ul>
<li><a class="reference internal" href="#overview">Overview</a></li>
<li><a class="reference internal" href="#the-mxnet-initializer-package">The <code class="docutils literal"><span class="pre">mxnet.initializer</span></code> package</a></li>
<li><a class="reference internal" href="#the-mxnet-optimizer-package">The <code class="docutils literal"><span class="pre">mxnet.optimizer</span></code> package</a></li>
<li><a class="reference internal" href="#the-mxnet-lr-scheduler-package">The <code class="docutils literal"><span class="pre">mxnet.lr_scheduler</span></code> package</a></li>
<li><a class="reference internal" href="#implement-a-new-algorithm">Implement a new algorithm</a></li>
<li><a class="reference internal" href="#api-reference">API Reference</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div> <!-- pagename != index -->
<script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
<script src="../../_static/js/sidebar.js" type="text/javascript"></script>
<script src="../../_static/js/search.js" type="text/javascript"></script>
<script src="../../_static/js/navbar.js" type="text/javascript"></script>
<script src="../../_static/js/clipboard.min.js" type="text/javascript"></script>
<script src="../../_static/js/copycode.js" type="text/javascript"></script>
<script type="text/javascript">
$('body').ready(function () {
$('body').css('visibility', 'visible');
});
</script>
</div></body>
</html>