| <!DOCTYPE html> |
| |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8"/> |
| <meta content="IE=edge" http-equiv="X-UA-Compatible"/> |
| <meta content="width=device-width, initial-scale=1" name="viewport"/> |
| <title>Gluon Package — mxnet documentation</title> |
| <link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/> |
| <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/> |
| <link href="../../_static/basic.css" rel="stylesheet" type="text/css"/> |
| <link href="../../_static/pygments.css" rel="stylesheet" type="text/css"/> |
| <link href="../../_static/mxnet.css" rel="stylesheet" type="text/css"> |
| <script type="text/javascript"> |
| var DOCUMENTATION_OPTIONS = { |
| URL_ROOT: '../../', |
| VERSION: '', |
| COLLAPSE_INDEX: false, |
| FILE_SUFFIX: '.html', |
| HAS_SOURCE: true, |
| SOURCELINK_SUFFIX: '.txt' |
| }; |
| </script> |
| <script src="../../_static/jquery-1.11.1.js" type="text/javascript"></script> |
| <script src="../../_static/underscore.js" type="text/javascript"></script> |
| <script src="../../_static/searchtools_custom.js" type="text/javascript"></script> |
| <script src="../../_static/doctools.js" type="text/javascript"></script> |
| <script src="../../_static/selectlang.js" type="text/javascript"></script> |
| <script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script> |
| <script type="text/javascript"> jQuery(function() { Search.loadIndex("/searchindex.js"); Search.init();}); </script> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new |
| Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); |
| |
| ga('create', 'UA-96378503-1', 'auto'); |
| ga('send', 'pageview'); |
| |
| </script> |
| <!-- --> |
| <!-- <script type="text/javascript" src="../../_static/jquery.js"></script> --> |
| <!-- --> |
| <!-- <script type="text/javascript" src="../../_static/underscore.js"></script> --> |
| <!-- --> |
| <!-- <script type="text/javascript" src="../../_static/doctools.js"></script> --> |
| <!-- --> |
| <!-- <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> --> |
| <!-- --> |
| <link href="../../genindex.html" rel="index" title="Index"/> |
| <link href="../../search.html" rel="search" title="Search"> |
| <link href="index.html" rel="up" title="MXNet - Python API"> |
| <link href="rnn.html" rel="next" title="RNN Cell API"/> |
| <link href="module.html" rel="prev" title="Module API"/> |
| <link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/> |
| </link></link></link></head> |
| <body role="document"><!-- Previous Navbar Layout |
| <div class="navbar navbar-default navbar-fixed-top"> |
| <div class="container"> |
| <div class="navbar-header"> |
| <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar"> |
| <span class="sr-only">Toggle navigation</span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| </button> |
| <a href="../../" class="navbar-brand"> |
| <img src="http://data.mxnet.io/theme/mxnet.png"> |
| </a> |
| </div> |
| <div id="navbar" class="navbar-collapse collapse"> |
| <ul id="navbar" class="navbar navbar-left"> |
| |
| <li> <a href="../../get_started/index.html">Get Started</a> </li> |
| |
| <li> <a href="../../tutorials/index.html">Tutorials</a> </li> |
| |
| <li> <a href="../../how_to/index.html">How To</a> </li> |
| |
| |
| <li class="dropdown"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Packages <span class="caret"></span></a> |
| <ul class="dropdown-menu"> |
| |
| <li><a href="../../packages/python/index.html"> |
| Python |
| </a></li> |
| |
| <li><a href="../../packages/r/index.html"> |
| R |
| </a></li> |
| |
| <li><a href="../../packages/julia/index.html"> |
| Julia |
| </a></li> |
| |
| <li><a href="../../packages/c++/index.html"> |
| C++ |
| </a></li> |
| |
| <li><a href="../../packages/scala/index.html"> |
| Scala |
| </a></li> |
| |
| <li><a href="../../packages/perl/index.html"> |
| Perl |
| </a></li> |
| |
| </ul> |
| </li> |
| |
| <li> <a href="../../system/index.html">System</a> </li> |
| <li> |
| <form class="" role="search" action="../../search.html" method="get" autocomplete="off"> |
| <div class="form-group inner-addon left-addon"> |
| <i class="glyphicon glyphicon-search"></i> |
| <input type="text" name="q" class="form-control" placeholder="Search"> |
| </div> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| |
| </form> </li> |
| </ul> |
| <ul id="navbar" class="navbar navbar-right"> |
| <li> <a href="../../index.html"><span class="flag-icon flag-icon-us"></span></a> </li> |
| <li> <a href="../..//zh/index.html"><span class="flag-icon flag-icon-cn"></span></a> </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| Previous Navbar Layout End --> |
| <div class="navbar navbar-fixed-top"> |
| <div class="container" id="navContainer"> |
| <div class="innder" id="header-inner"> |
| <h1 id="logo-wrap"> |
| <a href="../../" id="logo"><img src="http://data.mxnet.io/theme/mxnet.png"/></a> |
| </h1> |
| <nav class="nav-bar" id="main-nav"> |
| <a class="main-nav-link" href="../../get_started/install.html">Install</a> |
| <a class="main-nav-link" href="../../tutorials/index.html">Tutorials</a> |
| <a class="main-nav-link" href="../../how_to/index.html">How To</a> |
| <span id="dropdown-menu-position-anchor"> |
| <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a> |
| <ul class="dropdown-menu" id="package-dropdown-menu"> |
| <li><a class="main-nav-link" href="../../api/python/index.html">Python</a></li> |
| <li><a class="main-nav-link" href="../../api/scala/index.html">Scala</a></li> |
| <li><a class="main-nav-link" href="../../api/r/index.html">R</a></li> |
| <li><a class="main-nav-link" href="../../api/julia/index.html">Julia</a></li> |
| <li><a class="main-nav-link" href="../../api/c++/index.html">C++</a></li> |
| <li><a class="main-nav-link" href="../../api/perl/index.html">Perl</a></li> |
| </ul> |
| </span> |
| <a class="main-nav-link" href="../../architecture/index.html">Architecture</a> |
| <!-- <a class="main-nav-link" href="../../community/index.html">Community</a> --> |
| <a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a> |
| <span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Versions(master)<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></span></nav> |
| <script> function getRootPath(){ return "../../" } </script> |
| <div class="burgerIcon dropdown"> |
| <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button">☰</a> |
| <ul class="dropdown-menu dropdown-menu-right" id="burgerMenu"> |
| <li><a href="../../get_started/install.html">Install</a></li> |
| <li><a href="../../tutorials/index.html">Tutorials</a></li> |
| <li><a href="../../how_to/index.html">How To</a></li> |
| <li class="dropdown-submenu"> |
| <a href="#" tabindex="-1">API</a> |
| <ul class="dropdown-menu"> |
| <li><a href="../../api/python/index.html" tabindex="-1">Python</a> |
| </li> |
| <li><a href="../../api/scala/index.html" tabindex="-1">Scala</a> |
| </li> |
| <li><a href="../../api/r/index.html" tabindex="-1">R</a> |
| </li> |
| <li><a href="../../api/julia/index.html" tabindex="-1">Julia</a> |
| </li> |
| <li><a href="../../api/c++/index.html" tabindex="-1">C++</a> |
| </li> |
| <li><a href="../../api/perl/index.html" tabindex="-1">Perl</a> |
| </li> |
| </ul> |
| </li> |
| <li><a href="../../architecture/index.html">Architecture</a></li> |
| <li><a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a></li> |
| <li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">Versions(master)</a><ul class="dropdown-menu"><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></li></ul> |
| </div> |
| <div class="plusIcon dropdown"> |
| <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a> |
| <ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul> |
| </div> |
| <div id="search-input-wrap"> |
| <form action="../../search.html" autocomplete="off" class="" method="get" role="search"> |
| <div class="form-group inner-addon left-addon"> |
| <i class="glyphicon glyphicon-search"></i> |
| <input class="form-control" name="q" placeholder="Search" type="text"/> |
| </div> |
| <input name="check_keywords" type="hidden" value="yes"/> |
| <input name="area" type="hidden" value="default"/> |
| </form> |
| <div id="search-preview"></div> |
| </div> |
| <div id="searchIcon"> |
| <span aria-hidden="true" class="glyphicon glyphicon-search"></span> |
| </div> |
| <!-- <div id="lang-select-wrap"> --> |
| <!-- <label id="lang-select-label"> --> |
| <!-- <\!-- <i class="fa fa-globe"></i> -\-> --> |
| <!-- <span></span> --> |
| <!-- </label> --> |
| <!-- <select id="lang-select"> --> |
| <!-- <option value="en">Eng</option> --> |
| <!-- <option value="zh">中文</option> --> |
| <!-- </select> --> |
| <!-- </div> --> |
| <!-- <a id="mobile-nav-toggle"> |
| <span class="mobile-nav-toggle-bar"></span> |
| <span class="mobile-nav-toggle-bar"></span> |
| <span class="mobile-nav-toggle-bar"></span> |
| </a> --> |
| </div> |
| </div> |
| </div> |
| <div class="container"> |
| <div class="row"> |
| <div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation"> |
| <div class="sphinxsidebarwrapper"> |
| <ul class="current"> |
| <li class="toctree-l1 current"><a class="reference internal" href="index.html">Python Documents</a><ul class="current"> |
| <li class="toctree-l2 current"><a class="reference internal" href="index.html#table-of-contents">Table of contents</a><ul class="current"> |
| <li class="toctree-l3"><a class="reference internal" href="ndarray.html">NDArray API</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="symbol.html">Symbol API</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="module.html">Module API</a></li> |
| <li class="toctree-l3 current"><a class="current reference internal" href="#">Gluon Package</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#overview">Overview</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#parameter">Parameter</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#containers">Containers</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#neural-network-layers">Neural Network Layers</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#recurrent-layers">Recurrent Layers</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#trainer">Trainer</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#loss-functions">Loss functions</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#utilities">Utilities</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="rnn.html">RNN Cell API</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="kvstore.html">KVStore API</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="io.html">Data Loading API</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="image.html">Image API</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="optimization.html">Optimization: initialize and update weights</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="callback.html">Callback API</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="metric.html">Evaluation Metric API</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="../r/index.html">R Documents</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../julia/index.html">Julia Documents</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../c++/index.html">C++ Documents</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../scala/index.html">Scala Documents</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../perl/index.html">Perl Documents</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../how_to/index.html">HowTo Documents</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">System Documents</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../tutorials/index.html">Tutorials</a></li> |
| </ul> |
| </div> |
| </div> |
| <div class="content"> |
| <div class="section" id="gluon-package"> |
| <span id="gluon-package"></span><h1>Gluon Package<a class="headerlink" href="#gluon-package" title="Permalink to this headline">¶</a></h1> |
| <div class="admonition warning"> |
| <p class="first admonition-title">Warning</p> |
| <p class="last">This package is currently experimental and may change in the near future.</p> |
| </div> |
| <script src="../../_static/js/auto_module_index.js" type="text/javascript"></script><div class="section" id="overview"> |
| <span id="overview"></span><h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2> |
| <p>Gluon package is a high-level interface for MXNet designed to be easy to use while |
| keeping most of the flexibility of low level API. Gluon supports both imperative |
| and symbolic programming, making it easy to train complex models imperatively |
| in Python and then deploy with symbolic graph in C++ and Scala.</p> |
| </div> |
| <div class="section" id="parameter"> |
| <span id="parameter"></span><h2>Parameter<a class="headerlink" href="#parameter" title="Permalink to this headline">¶</a></h2> |
| <dl class="class"> |
| <dt id="mxnet.gluon.Parameter"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">Parameter</code><span class="sig-paren">(</span><em>name</em>, <em>grad_req='write'</em>, <em>shape=None</em>, <em>dtype=<type 'numpy.float32'=""></type></em>, <em>lr_mult=1.0</em>, <em>wd_mult=1.0</em>, <em>init=None</em>, <em>allow_deferred_init=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A Container holding parameters (weights) of <a href="#id2"><span class="problematic" id="id3">`</span></a>Block`s.</p> |
| <p><cite>Parameter</cite> holds a copy of the the parameter on each <cite>Context</cite> after |
| it is initialized with <cite>Parameter.initialize(...)</cite>. If <cite>grad_req</cite> is |
| not <cite>null</cite>, it will also hold a gradient array on each <cite>Context</cite>:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">ctx</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> |
| <span class="n">x</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">16</span><span class="p">,</span> <span class="mi">100</span><span class="p">),</span> <span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span> |
| <span class="n">w</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="s1">'fc_weight'</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">64</span><span class="p">,</span> <span class="mi">100</span><span class="p">),</span> <span class="n">init</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Xavier</span><span class="p">())</span> |
| <span class="n">b</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="s1">'fc_bias'</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">64</span><span class="p">,),</span> <span class="n">init</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Zero</span><span class="p">())</span> |
| <span class="n">w</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span> |
| <span class="n">b</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span> |
| <span class="n">out</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">FullyConnected</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">w</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">ctx</span><span class="p">),</span> <span class="n">b</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">ctx</span><span class="p">),</span> <span class="n">num_hidden</span><span class="o">=</span><span class="mi">64</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>name</strong> (<em>str</em>) – Name of this parameter.</li> |
| <li><strong>grad_req</strong> (<em>{'write'</em><em>, </em><em>'add'</em><em>, </em><em>'null'}</em><em>, </em><em>default 'write'</em>) – <p>Specifies how to update gradient to grad arrays.</p> |
| <ul> |
| <li>‘write’ means everytime gradient is written to grad <cite>NDArray</cite>.</li> |
| <li>‘add’ means everytime gradient is added to the grad <cite>NDArray</cite>. You need |
| to manually call <cite>zero_grad()</cite> to clear the gradient buffer before each |
| iteration when using this option.</li> |
| <li>‘null’ means gradient is not requested for this parameter. gradient arrays |
| will not be allocated.</li> |
| </ul> |
| </li> |
| <li><strong>shape</strong> (<em>tuple of int</em><em>, </em><em>default None</em>) – Shape of this parameter. By default shape is not specified. Parameter with |
| unknown shape can be used for <cite>Symbol</cite> API, but <cite>init</cite> will throw an error |
| when using <cite>NDArray</cite> API.</li> |
| <li><strong>dtype</strong> (<em>numpy.dtype</em><em> or </em><em>str</em><em>, </em><em>default 'float32'</em>) – Data type of this parameter. For example, numpy.float32 or ‘float32’.</li> |
| <li><strong>lr_mult</strong> (<em>float</em><em>, </em><em>default 1.0</em>) – Learning rate multiplier. Learning rate will be multiplied by lr_mult |
| when updating this parameter with optimizer.</li> |
| <li><strong>wd_mult</strong> (<em>float</em><em>, </em><em>default 1.0</em>) – Weight decay multiplier (L2 regularizer coefficient). Works similar to lr_mult.</li> |
| <li><strong>init</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a><em>, </em><em>default None</em>) – Initializer of this parameter. Will use the global initializer by default.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.initialize"> |
| <code class="descname">initialize</code><span class="sig-paren">(</span><em>init=None</em>, <em>ctx=None</em>, <em>default_init=<mxnet.initializer.uniform object=""></mxnet.initializer.uniform></em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.initialize" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Initializes parameter and gradient arrays. Only used for <cite>NDArray</cite> API.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>init</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – The initializer to use. Overrides <cite>Parameter.init</cite> and default_init.</li> |
| <li><strong>ctx</strong> (Context or list of Context, defaults to <cite>context.current_context()</cite>.) – <p>Initialize Parameter on given context. If ctx is a list of Context, a |
| copy will be made for each context.</p> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">Copies are independent arrays. User is responsible for keeping</p> |
| </div> |
| <p>their values consistent when updating. Normally <cite>gluon.Trainer</cite> does this for you.</p> |
| </li> |
| <li><strong>default_init</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Default initializer is used when both <cite>init</cite> and <cite>Parameter.init</cite> are <cite>None</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p class="rubric">Examples</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">weight</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="s1">'weight'</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="p">()</span> |
| <span class="go">[[-0.01068833 0.01729892]</span> |
| <span class="go"> [ 0.02042518 -0.01618656]]</span> |
| <span class="go"><ndarray 2x2="" @cpu(0)=""></ndarray></span> |
| <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">grad</span><span class="p">()</span> |
| <span class="go">[[ 0. 0.]</span> |
| <span class="go"> [ 0. 0.]]</span> |
| <span class="go"><ndarray 2x2="" @cpu(0)=""></ndarray></span> |
| <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="p">[</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">1</span><span class="p">)])</span> |
| <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> |
| <span class="go">[[-0.00873779 -0.02834515]</span> |
| <span class="go"> [ 0.05484822 -0.06206018]]</span> |
| <span class="go"><ndarray 2x2="" @gpu(0)=""></ndarray></span> |
| <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span> |
| <span class="go">[[-0.00873779 -0.02834515]</span> |
| <span class="go"> [ 0.05484822 -0.06206018]]</span> |
| <span class="go"><ndarray 2x2="" @gpu(1)=""></ndarray></span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.set_data"> |
| <code class="descname">set_data</code><span class="sig-paren">(</span><em>data</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.set_data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets this parameter’s value on all contexts to data.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.data"> |
| <code class="descname">data</code><span class="sig-paren">(</span><em>ctx=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a copy of this parameter on one context. Must have been |
| initialized on this context before.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>ctx</strong> (<em>Context</em>) – Desired context.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"></td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">NDArray on ctx</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.list_data"> |
| <code class="descname">list_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.list_data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns copies of this parameter on all contexts, in the same order |
| as creation.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.grad"> |
| <code class="descname">grad</code><span class="sig-paren">(</span><em>ctx=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.grad" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a gradient buffer for this parameter on one context.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>ctx</strong> (<em>Context</em>) – Desired context.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.list_grad"> |
| <code class="descname">list_grad</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.list_grad" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns gradient buffers on all contexts, in the same order |
| as <cite>values</cite>.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.list_ctx"> |
| <code class="descname">list_ctx</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.list_ctx" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of contexts this parameter is initialized on.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.zero_grad"> |
| <code class="descname">zero_grad</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.zero_grad" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets gradient buffer on all contexts to 0. No action is taken if |
| parameter is uninitialized or doesn’t require gradient.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Parameter.var"> |
| <code class="descname">var</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.var" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a symbol representing this parameter.</p> |
| </dd></dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.ParameterDict"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">ParameterDict</code><span class="sig-paren">(</span><em>prefix=''</em>, <em>shared=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A dictionary managing a set of parameters.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>prefix</strong> (<em>str</em><em>, </em><em>default ''</em>) – The prefix to be prepended to all Parameters’ name created by this dict.</li> |
| <li><strong>shared</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a><em> or </em><em>None</em>) – If not <cite>None</cite>, when this dict’s <cite>get</cite> method creates a new parameter, will |
| first try to retrieve it from <cite>shared</cite> dict. Usually used for sharing |
| parameters with another <cite>Block</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="attribute"> |
| <dt id="mxnet.gluon.ParameterDict.prefix"> |
| <code class="descname">prefix</code><a class="headerlink" href="#mxnet.gluon.ParameterDict.prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Prefix of this dict. It will be prepended to Parameters’ name created |
| with <cite>get</cite>.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.ParameterDict.get"> |
| <code class="descname">get</code><span class="sig-paren">(</span><em>name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.get" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves a <cite>Parameter</cite> with name <cite>self.prefix+name</cite>. If not found, |
| <cite>get</cite> will first try to retrieve it from <cite>shared</cite> dict. If still not |
| found, <cite>get</cite> will create a new <cite>Parameter</cite> with key-word arguments and |
| insert it to self.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>name</strong> (<em>str</em>) – Name of the desired Parameter. It will be prepended with this dictionary’s |
| prefix.</li> |
| <li><strong>**kwargs</strong> (<em>dict</em>) – The rest of key-word arguments for the created <cite>Parameter</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The created or retrieved <cite>Parameter</cite>.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#mxnet.gluon.Parameter" title="mxnet.gluon.Parameter">Parameter</a></p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.ParameterDict.update"> |
| <code class="descname">update</code><span class="sig-paren">(</span><em>other</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.update" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Copies all Parameters in <cite>other</cite> to self.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.ParameterDict.initialize"> |
| <code class="descname">initialize</code><span class="sig-paren">(</span><em>init=<mxnet.initializer.uniform object=""></mxnet.initializer.uniform></em>, <em>ctx=None</em>, <em>verbose=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.initialize" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Initializes all Parameters managed by this dictionary to be used for <cite>NDArray</cite> |
| API. It has no effect when using <cite>Symbol</cite> API.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>init</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Global default Initializer to be used when <cite>Parameter.init</cite> is <cite>None</cite>. |
| Otherwise, <cite>Parameter.init</cite> takes precedence.</li> |
| <li><strong>ctx</strong> (<em>Context</em><em> or </em><em>list of Context</em>) – Keeps a copy of Parameters on one or many context(s).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.ParameterDict.zero_grad"> |
| <code class="descname">zero_grad</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.zero_grad" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets all Parameters’ gradient buffer to 0.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.ParameterDict.save"> |
| <code class="descname">save</code><span class="sig-paren">(</span><em>filename</em>, <em>strip_prefix=''</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.save" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Save parameters to file.</p> |
| <dl class="docutils"> |
| <dt>filename</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">str</span><dd>Path to parameter file.</dd> |
| <dt>strip_prefix</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">str, default ‘’</span><dd>Strip prefix from parameter names before saving.</dd> |
| </dl> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.ParameterDict.load"> |
| <code class="descname">load</code><span class="sig-paren">(</span><em>filename</em>, <em>ctx</em>, <em>allow_missing=False</em>, <em>ignore_extra=False</em>, <em>restore_prefix=''</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.load" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Load parameters from file.</p> |
| <dl class="docutils"> |
| <dt>filename</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">str</span><dd>Path to parameter file.</dd> |
| <dt>ctx</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">Context or list of Context</span><dd>Context(s) initialize loaded parameters on.</dd> |
| <dt>allow_missing</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span><dd>Whether to silently skip loading parameters not represents in the file.</dd> |
| <dt>ignore_extra</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span><dd>Whether to silently ignore parameters from the file that are not |
| present in this ParameterDict.</dd> |
| <dt>restore_prefix</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">str, default ‘’</span><dd>prepend prefix to names of stored parameters before loading.</dd> |
| </dl> |
| </dd></dl> |
| </dd></dl> |
| </div> |
| <div class="section" id="containers"> |
| <span id="containers"></span><h2>Containers<a class="headerlink" href="#containers" title="Permalink to this headline">¶</a></h2> |
| <dl class="class"> |
| <dt id="mxnet.gluon.Block"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">Block</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Base class for all neural network layers and models. Your models should |
| subclass this class.</p> |
| <p><cite>Block</cite> can be nested recursively in a tree structure. You can create and |
| assign child <cite>Block</cite> as regular attributes:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">mxnet.gluon</span> <span class="k">import</span> <span class="n">Block</span><span class="p">,</span> <span class="n">nn</span> |
| <span class="kn">from</span> <span class="nn">mxnet</span> <span class="k">import</span> <span class="n">ndarray</span> <span class="k">as</span> <span class="n">F</span> |
| |
| <span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">Block</span><span class="p">):</span> |
| <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">Model</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| <span class="c1"># use name_scope to give child Blocks appropriate names.</span> |
| <span class="c1"># It also allows sharing Parameters between Blocks recursively.</span> |
| <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">dense0</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">dense1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span> |
| <span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dense0</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> |
| <span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dense1</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> |
| |
| <span class="n">model</span> <span class="o">=</span> <span class="n">Model</span><span class="p">()</span> |
| <span class="n">model</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> |
| <span class="n">model</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="n">ctx</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">)))</span> |
| </pre></div> |
| </div> |
| <p>Child <cite>Block</cite> assigned this way will be registered and <cite>collect_params</cite> |
| will collect their Parameters recursively.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>prefix</strong> (<em>str</em>) – Prefix acts like a name space. It will be prepended to the names of all |
| Parameters and child <cite>Block`s in this `Block</cite>‘s <cite>name_scope</cite>. Prefix |
| should be unique within one model to prevent name collisions.</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a><em> or </em><em>None</em>) – <p><cite>ParameterDict</cite> for sharing weights with the new <cite>Block</cite>. For example, |
| if you want <cite>dense1</cite> to share <cite>dense0</cite>‘s weights, you can do:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">dense0</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span> |
| <span class="n">dense1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="n">params</span><span class="o">=</span><span class="n">dense0</span><span class="o">.</span><span class="n">collect_params</span><span class="p">())</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.forward"> |
| <code class="descname">forward</code><span class="sig-paren">(</span><em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.forward" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Overrides to implement forward computation using <cite>NDArray</cite>. Only |
| accepts positional arguments.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>*args</strong> (<em>list of NDArray</em>) – Input tensors.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="attribute"> |
| <dt id="mxnet.gluon.Block.prefix"> |
| <code class="descname">prefix</code><a class="headerlink" href="#mxnet.gluon.Block.prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Prefix of this <cite>Block</cite>.</p> |
| </dd></dl> |
| <dl class="attribute"> |
| <dt id="mxnet.gluon.Block.name"> |
| <code class="descname">name</code><a class="headerlink" href="#mxnet.gluon.Block.name" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Name of this <cite>Block</cite>, without ‘_’ in the end.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.name_scope"> |
| <code class="descname">name_scope</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.name_scope" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a name space object managing a child <cite>Block</cite> and parameter |
| names. Should be used within a <cite>with</cite> statement:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">dense</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| <dl class="attribute"> |
| <dt id="mxnet.gluon.Block.params"> |
| <code class="descname">params</code><a class="headerlink" href="#mxnet.gluon.Block.params" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns this <cite>Block</cite>‘s parameter dictionary (does not include its |
| children’s parameters).</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.collect_params"> |
| <code class="descname">collect_params</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.collect_params" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a <cite>ParameterDict</cite> containing this <cite>Block</cite> and all of its |
| children’s Parameters.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.save_params"> |
| <code class="descname">save_params</code><span class="sig-paren">(</span><em>filename</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.save_params" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Save parameters to file.</p> |
| <dl class="docutils"> |
| <dt>filename</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">str</span><dd>Path to file.</dd> |
| </dl> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.load_params"> |
| <code class="descname">load_params</code><span class="sig-paren">(</span><em>filename</em>, <em>ctx</em>, <em>allow_missing=False</em>, <em>ignore_extra=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.load_params" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Load parameters from file.</p> |
| <dl class="docutils"> |
| <dt>filename</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">str</span><dd>Path to parameter file.</dd> |
| <dt>ctx</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">Context or list of Context</span><dd>Context(s) initialize loaded parameters on.</dd> |
| <dt>allow_missing</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span><dd>Whether to silently skip loading parameters not represents in the file.</dd> |
| <dt>ignore_extra</dt> |
| <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span><dd>Whether to silently ignore parameters from the file that are not |
| present in this Block.</dd> |
| </dl> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.register_child"> |
| <code class="descname">register_child</code><span class="sig-paren">(</span><em>block</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.register_child" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Registers block as a child of self. <a href="#id4"><span class="problematic" id="id5">`</span></a>Block`s assigned to self as |
| attributes will be registered automatically.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.initialize"> |
| <code class="descname">initialize</code><span class="sig-paren">(</span><em>init=<mxnet.initializer.uniform object=""></mxnet.initializer.uniform></em>, <em>ctx=None</em>, <em>verbose=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.initialize" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Initializes <cite>Parameter`s of this `Block</cite> and its children.</p> |
| <p>Equivalent to <cite>block.collect_params().initialize(...)</cite></p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Block.hybridize"> |
| <code class="descname">hybridize</code><span class="sig-paren">(</span><em>active=True</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.hybridize" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Activates or deactivates <a href="#id6"><span class="problematic" id="id7">`</span></a>HybridBlock`s recursively. Has no effect on |
| non-hybrid children.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>active</strong> (<em>bool</em><em>, </em><em>default True</em>) – Whether to turn hybrid on or off.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt> |
| <code class="descname">forward</code><span class="sig-paren">(</span><em>*args</em><span class="sig-paren">)</span></dt> |
| <dd><p>Overrides to implement forward computation using <cite>NDArray</cite>. Only |
| accepts positional arguments.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>*args</strong> (<em>list of NDArray</em>) – Input tensors.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.HybridBlock"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">HybridBlock</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock" title="Permalink to this definition">¶</a></dt> |
| <dd><p><cite>HybridBlock</cite> supports forwarding with both Symbol and NDArray.</p> |
| <p>Forward computation in <cite>HybridBlock</cite> must be static to work with <cite>Symbol`s, |
| i.e. you cannot call `.asnumpy()</cite>, <cite>.shape</cite>, <cite>.dtype</cite>, etc on tensors. |
| Also, you cannot use branching or loop logic that bases on non-constant |
| expressions like random numbers or intermediate results, since they change |
| the graph structure for each iteration.</p> |
| <p>Before activating with <cite>hybridize()</cite>, <cite>HybridBlock</cite> works just like normal |
| <cite>Block</cite>. After activation, <cite>HybridBlock</cite> will create a symbolic graph |
| representing the forward computation and cache it. On subsequent forwards, |
| the cached graph will be used instead of <cite>hybrid_forward</cite>.</p> |
| <p>Refer <a class="reference external" href="http://mxnet.io/tutorials/gluon/hybrid.html">Hybrid tutorial</a> to see |
| the end-to-end usage.</p> |
| <dl class="method"> |
| <dt id="mxnet.gluon.HybridBlock.hybrid_forward"> |
| <code class="descname">hybrid_forward</code><span class="sig-paren">(</span><em>F</em>, <em>x</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock.hybrid_forward" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Overrides to construct symbolic graph for this <cite>Block</cite>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>x</strong> (<a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em> or </em><a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – The first input tensor.</li> |
| <li><strong>*args</strong> (<em>list of Symbol</em><em> or </em><em>list of NDArray</em>) – Additional input tensors.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.HybridBlock.infer_shape"> |
| <code class="descname">infer_shape</code><span class="sig-paren">(</span><em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock.infer_shape" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Infers shape of Parameters from inputs.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.HybridBlock.forward"> |
| <code class="descname">forward</code><span class="sig-paren">(</span><em>x</em>, <em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock.forward" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Defines the forward computation. Arguments can be either |
| <cite>NDArray</cite> or <cite>Symbol</cite>.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt> |
| <code class="descname">hybrid_forward</code><span class="sig-paren">(</span><em>F</em>, <em>x</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span></dt> |
| <dd><p>Overrides to construct symbolic graph for this <cite>Block</cite>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>x</strong> (<a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em> or </em><a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – The first input tensor.</li> |
| <li><strong>*args</strong> (<em>list of Symbol</em><em> or </em><em>list of NDArray</em>) – Additional input tensors.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| </dd></dl> |
| </div> |
| <div class="section" id="neural-network-layers"> |
| <span id="neural-network-layers"></span><h2>Neural Network Layers<a class="headerlink" href="#neural-network-layers" title="Permalink to this headline">¶</a></h2> |
| <div class="section" id="containers"> |
| <span id="id1"></span><h3>Containers<a class="headerlink" href="#containers" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Sequential"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Sequential</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Sequential" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Stacks <a href="#id8"><span class="problematic" id="id9">`</span></a>Block`s sequentially.</p> |
| <p>Example:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">net</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">()</span> |
| <span class="c1"># use net's name_scope to give child Blocks appropriate names.</span> |
| <span class="k">with</span> <span class="n">net</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span> |
| <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">'relu'</span><span class="p">))</span> |
| <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">))</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="mxnet.gluon.nn.Sequential.add"> |
| <code class="descname">add</code><span class="sig-paren">(</span><em>block</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Sequential.add" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Adds block on top of the stack.</p> |
| </dd></dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.HybridSequential"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">HybridSequential</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.HybridSequential" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Stacks <a href="#id10"><span class="problematic" id="id11">`</span></a>HybridBlock`s sequentially.</p> |
| <p>Example:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">net</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">()</span> |
| <span class="c1"># use net's name_scope to give child Blocks appropriate names.</span> |
| <span class="k">with</span> <span class="n">net</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span> |
| <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">'relu'</span><span class="p">))</span> |
| <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">))</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="mxnet.gluon.nn.HybridSequential.add"> |
| <code class="descname">add</code><span class="sig-paren">(</span><em>block</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.HybridSequential.add" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Adds block on top of the stack.</p> |
| </dd></dl> |
| </dd></dl> |
| </div> |
| <div class="section" id="basic-layers"> |
| <span id="basic-layers"></span><h3>Basic Layers<a class="headerlink" href="#basic-layers" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Dense"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Dense</code><span class="sig-paren">(</span><em>units</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_units=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Dense" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Just your regular densely-connected NN layer.</p> |
| <p><cite>Dense</cite> implements the operation: |
| <cite>output = activation(dot(input, weight) + bias)</cite> |
| where <cite>activation</cite> is the element-wise activation function |
| passed as the <cite>activation</cite> argument, <cite>weight</cite> is a weights matrix |
| created by the layer, and <cite>bias</cite> is a bias vector created by the layer |
| (only applicable if <cite>use_bias</cite> is <cite>True</cite>).</p> |
| <p>Note: the input must be a tensor with rank 2. Use <cite>flatten</cite> to convert it |
| to rank 2 manually if necessary.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>units</strong> (<em>int</em>) – Dimensionality of the output space.</li> |
| <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See help on <cite>Activation</cite> layer. |
| If you don’t specify anything, no activation is applied |
| (ie. “linear” activation: <cite>a(x) = x</cite>).</li> |
| <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li> |
| <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>kernel</cite> weights matrix.</li> |
| <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li> |
| <li><strong>in_units</strong> (<em>int</em><em>, </em><em>optional</em>) – Size of the input data. If not specified, initialization will be |
| deferred to the first time <cite>forward</cite> is called and <cite>in_units</cite> |
| will be inferred from the shape of input data.</li> |
| <li><strong>prefix</strong> (<em>str</em><em> or </em><em>None</em>) – See document of <cite>Block</cite>.</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a><em> or </em><em>None</em>) – See document of <cite>Block</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>A 2D input with shape <cite>(batch_size, in_units)</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd>The output would have shape <cite>(batch_size, units)</cite>.</dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Activation"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Activation</code><span class="sig-paren">(</span><em>activation</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Activation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies an activation function to input.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>activation</strong> (<em>str</em>) – Name of activation function to use. |
| See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a> for available choices.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>Arbitrary.</dd> |
| <dt>Output shape:</dt> |
| <dd>Same shape as input.</dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Dropout"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Dropout</code><span class="sig-paren">(</span><em>rate</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Dropout" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies Dropout to the input.</p> |
| <p>Dropout consists in randomly setting a fraction <cite>rate</cite> of input units |
| to 0 at each update during training time, which helps prevent overfitting.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>rate</strong> (<em>float</em>) – Fraction of the input units to drop. Must be a number between 0 and 1.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>Arbitrary.</dd> |
| <dt>Output shape:</dt> |
| <dd>Same shape as input.</dd> |
| </dl> |
| <p class="rubric">References</p> |
| <p><a class="reference external" href="http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf">Dropout: A Simple Way to Prevent Neural Networks from Overfitting</a></p> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.BatchNorm"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">BatchNorm</code><span class="sig-paren">(</span><em>axis=1</em>, <em>momentum=0.9</em>, <em>epsilon=0.001</em>, <em>center=True</em>, <em>scale=True</em>, <em>beta_initializer='zeros'</em>, <em>gamma_initializer='ones'</em>, <em>running_mean_initializer='zeros'</em>, <em>running_variance_initializer='ones'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.BatchNorm" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Batch normalization layer (Ioffe and Szegedy, 2014). |
| Normalizes the input at each batch, i.e. applies a transformation |
| that maintains the mean activation close to 0 and the activation |
| standard deviation close to 1.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>axis</strong> (<em>int</em><em>, </em><em>default 1</em>) – The axis that should be normalized. This is typically the channels |
| (C) axis. For instance, after a <cite>Conv2D</cite> layer with <cite>layout=’NCHW’</cite>, |
| set <cite>axis=1</cite> in <cite>BatchNorm</cite>. If <cite>layout=’NHWC’</cite>, then set <cite>axis=3</cite>.</li> |
| <li><strong>momentum</strong> (<em>float</em><em>, </em><em>default 0.9</em>) – Momentum for the moving average.</li> |
| <li><strong>epsilon</strong> (<em>float</em><em>, </em><em>default 1e-3</em>) – Small float added to variance to avoid dividing by zero.</li> |
| <li><strong>center</strong> (<em>bool</em><em>, </em><em>default True</em>) – If True, add offset of <cite>beta</cite> to normalized tensor. |
| If False, <cite>beta</cite> is ignored.</li> |
| <li><strong>scale</strong> (<em>bool</em><em>, </em><em>default True</em>) – If True, multiply by <cite>gamma</cite>. If False, <cite>gamma</cite> is not used. |
| When the next layer is linear (also e.g. <cite>nn.relu</cite>), |
| this can be disabled since the scaling |
| will be done by the next layer.</li> |
| <li><strong>beta_initializer</strong> (str or <cite>Initializer</cite>, default ‘zeros’) – Initializer for the beta weight.</li> |
| <li><strong>gamma_initializer</strong> (str or <cite>Initializer</cite>, default ‘ones’) – Initializer for the gamma weight.</li> |
| <li><strong>moving_mean_initializer</strong> (str or <cite>Initializer</cite>, default ‘zeros’) – Initializer for the moving mean.</li> |
| <li><strong>moving_variance_initializer</strong> (str or <cite>Initializer</cite>, default ‘ones’) – Initializer for the moving variance.</li> |
| <li><strong>in_channels</strong> (<em>int</em><em>, </em><em>default 0</em>) – Number of channels (feature maps) in input data. If not specified, |
| initialization will be deferred to the first time <cite>forward</cite> is called |
| and <cite>in_channels</cite> will be inferred from the shape of input data.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>Arbitrary.</dd> |
| <dt>Output shape:</dt> |
| <dd>Same shape as input.</dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.LeakyReLU"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">LeakyReLU</code><span class="sig-paren">(</span><em>alpha</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.LeakyReLU" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Leaky version of a Rectified Linear Unit.</p> |
| <p>It allows a small gradient when the unit is not active:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span>`f(x) = alpha * x for x < 0`, |
| `f(x) = x for x >= 0`. |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>alpha</strong> (<em>float</em>) – slope coefficient for the negative half axis. Must be >= 0.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>Arbitrary.</dd> |
| <dt>Output shape:</dt> |
| <dd>Same shape as input.</dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Embedding"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Embedding</code><span class="sig-paren">(</span><em>input_dim</em>, <em>output_dim</em>, <em>dtype='float32'</em>, <em>weight_initializer=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Embedding" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Turns non-negative integers (indexes/tokens) into dense vectors |
| of fixed size. eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>input_dim</strong> (<em>int</em>) – Size of the vocabulary, i.e. maximum integer index + 1.</li> |
| <li><strong>output_dim</strong> (<em>int</em>) – Dimension of the dense embedding.</li> |
| <li><strong>dtype</strong> (<em>str</em><em> or </em><em>np.dtype</em><em>, </em><em>default 'float32'</em>) – Data type of output embeddings.</li> |
| <li><strong>weight_initializer</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the <cite>embeddings</cite> matrix.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>2D tensor with shape: <cite>(N, M)</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd>3D tensor with shape: <cite>(N, M, output_dim)</cite>.</dd> |
| </dl> |
| </dd></dl> |
| </div> |
| <div class="section" id="convolutional-layers"> |
| <span id="convolutional-layers"></span><h3>Convolutional Layers<a class="headerlink" href="#convolutional-layers" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Conv1D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv1D</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=1</em>, <em>padding=0</em>, <em>dilation=1</em>, <em>groups=1</em>, <em>layout='NCW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv1D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>1D convolution layer (e.g. temporal convolution).</p> |
| <p>This layer creates a convolution kernel that is convolved |
| with the layer input over a single spatial (or temporal) dimension |
| to produce a tensor of outputs. |
| If <cite>use_bias</cite> is True, a bias vector is created and added to the outputs. |
| Finally, if <cite>activation</cite> is not <cite>None</cite>, |
| it is applied to the outputs as well.</p> |
| <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be |
| deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be |
| inferred from the shape of input data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output |
| channels (filters) in the convolution.</li> |
| <li><strong>kernel_size</strong> (<em>int</em><em> or </em><em>tuple/list of 1 int</em>) – Specifies the dimensions of the convolution window.</li> |
| <li><strong>strides</strong> (<em>int</em><em> or </em><em>tuple/list of 1 int</em><em>,</em><em></em>) – Specify the strides of the convolution.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>a tuple/list of 1 int</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly zero-padded |
| on both sides for padding number of points</li> |
| <li><strong>dilation</strong> (<em>int</em><em> or </em><em>tuple/list of 1 int</em>) – Specifies the dilation rate to use for dilated convolution.</li> |
| <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs. |
| At groups=1, all inputs are convolved to all outputs. |
| At groups=2, the operation becomes equivalent to having two conv |
| layers side by side, each seeing half the input channels, and producing |
| half the output channels, and both subsequently concatenated.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc. |
| ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions |
| respectively. Convolution is applied on the ‘W’ dimension.</li> |
| <li><strong>in_channels</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of input channels to this layer. If not specified, |
| initialization will be deferred to the first time <cite>forward</cite> is called |
| and <cite>in_channels</cite> will be inferred from the shape of input data.</li> |
| <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>. |
| If you don’t specify anything, no activation is applied |
| (ie. “linear” activation: <cite>a(x) = x</cite>).</li> |
| <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li> |
| <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li> |
| <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape |
| (batch_size, in_channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape |
| (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>. |
| out_width is calculated as:</p> |
| <div class="last highlight-default"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">-</span><span class="n">dilation</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Conv2D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv2D</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv2D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>2D convolution layer (e.g. spatial convolution over images).</p> |
| <p>This layer creates a convolution kernel that is convolved |
| with the layer input to produce a tensor of |
| outputs. If <cite>use_bias</cite> is True, |
| a bias vector is created and added to the outputs. Finally, if |
| <cite>activation</cite> is not <cite>None</cite>, it is applied to the outputs as well.</p> |
| <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be |
| deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be |
| inferred from the shape of input data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output |
| channels (filters) in the convolution.</li> |
| <li><strong>kernel_size</strong> (<em>int</em><em> or </em><em>tuple/list of 2 int</em>) – Specifies the dimensions of the convolution window.</li> |
| <li><strong>strides</strong> (<em>int</em><em> or </em><em>tuple/list of 2 int</em><em>,</em><em></em>) – Specify the strides of the convolution.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>a tuple/list of 2 int</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly zero-padded |
| on both sides for padding number of points</li> |
| <li><strong>dilation</strong> (<em>int</em><em> or </em><em>tuple/list of 2 int</em>) – Specifies the dilation rate to use for dilated convolution.</li> |
| <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs. |
| At groups=1, all inputs are convolved to all outputs. |
| At groups=2, the operation becomes equivalent to having two conv |
| layers side by side, each seeing half the input channels, and producing |
| half the output channels, and both subsequently concatenated.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width |
| dimensions respectively. Convolution is applied on the ‘H’ and |
| ‘W’ dimensions.</li> |
| <li><strong>in_channels</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of input channels to this layer. If not specified, |
| initialization will be deferred to the first time <cite>forward</cite> is called |
| and <cite>in_channels</cite> will be inferred from the shape of input data.</li> |
| <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>. |
| If you don’t specify anything, no activation is applied |
| (ie. “linear” activation: <cite>a(x) = x</cite>).</li> |
| <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li> |
| <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li> |
| <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape |
| (batch_size, in_channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape |
| (batch_size, channels, out_height, out_width) if <cite>layout</cite> is <cite>NCHW</cite>.</p> |
| <p>out_height and out_width are calculated as:</p> |
| <div class="last highlight-default"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Conv3D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv3D</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCDHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv3D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>3D convolution layer (e.g. spatial convolution over volumes).</p> |
| <p>This layer creates a convolution kernel that is convolved |
| with the layer input to produce a tensor of |
| outputs. If <cite>use_bias</cite> is <cite>True</cite>, |
| a bias vector is created and added to the outputs. Finally, if |
| <cite>activation</cite> is not <cite>None</cite>, it is applied to the outputs as well.</p> |
| <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be |
| deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be |
| inferred from the shape of input data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output |
| channels (filters) in the convolution.</li> |
| <li><strong>kernel_size</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li> |
| <li><strong>strides</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em><em>,</em><em></em>) – Specify the strides of the convolution.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>a tuple/list of 3 int</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly zero-padded |
| on both sides for padding number of points</li> |
| <li><strong>dilation</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li> |
| <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs. |
| At groups=1, all inputs are convolved to all outputs. |
| At groups=2, the operation becomes equivalent to having two conv |
| layers side by side, each seeing half the input channels, and producing |
| half the output channels, and both subsequently concatenated.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and |
| depth dimensions respectively. Convolution is applied on the ‘D’, |
| ‘H’ and ‘W’ dimensions.</li> |
| <li><strong>in_channels</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of input channels to this layer. If not specified, |
| initialization will be deferred to the first time <cite>forward</cite> is called |
| and <cite>in_channels</cite> will be inferred from the shape of input data.</li> |
| <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>. |
| If you don’t specify anything, no activation is applied |
| (ie. “linear” activation: <cite>a(x) = x</cite>).</li> |
| <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li> |
| <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li> |
| <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape |
| (batch_size, in_channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape |
| (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite> is |
| <cite>NCDHW</cite>.</p> |
| <p>out_depth, out_height and out_width are calculated as:</p> |
| <div class="last highlight-default"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">depth</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Conv1DTranspose"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv1DTranspose</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=1</em>, <em>padding=0</em>, <em>output_padding=0</em>, <em>dilation=1</em>, <em>groups=1</em>, <em>layout='NCW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv1DTranspose" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Transposed 1D convolution layer (sometimes called Deconvolution).</p> |
| <p>The need for transposed convolutions generally arises |
| from the desire to use a transformation going in the opposite direction |
| of a normal convolution, i.e., from something that has the shape of the |
| output of some convolution to something that has the shape of its input |
| while maintaining a connectivity pattern that is compatible with |
| said convolution.</p> |
| <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be |
| deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be |
| inferred from the shape of input data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output |
| channels (filters) in the convolution.</li> |
| <li><strong>kernel_size</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li> |
| <li><strong>strides</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em><em>,</em><em></em>) – Specify the strides of the convolution.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>a tuple/list of 3 int</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly zero-padded |
| on both sides for padding number of points</li> |
| <li><strong>dilation</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li> |
| <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs. |
| At groups=1, all inputs are convolved to all outputs. |
| At groups=2, the operation becomes equivalent to having two conv |
| layers side by side, each seeing half the input channels, and producing |
| half the output channels, and both subsequently concatenated.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc. |
| ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions |
| respectively. Convolution is applied on the ‘W’ dimension.</li> |
| <li><strong>in_channels</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of input channels to this layer. If not specified, |
| initialization will be deferred to the first time <cite>forward</cite> is called |
| and <cite>in_channels</cite> will be inferred from the shape of input data.</li> |
| <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>. |
| If you don’t specify anything, no activation is applied |
| (ie. “linear” activation: <cite>a(x) = x</cite>).</li> |
| <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li> |
| <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li> |
| <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape |
| (batch_size, in_channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape |
| (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>.</p> |
| <p>out_width is calculated as:</p> |
| <div class="last highlight-default"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="p">(</span><span class="n">width</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">+</span><span class="n">kernel_size</span><span class="o">+</span><span class="n">output_padding</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Conv2DTranspose"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv2DTranspose</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0)</em>, <em>output_padding=(0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv2DTranspose" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Transposed 2D convolution layer (sometimes called Deconvolution).</p> |
| <p>The need for transposed convolutions generally arises |
| from the desire to use a transformation going in the opposite direction |
| of a normal convolution, i.e., from something that has the shape of the |
| output of some convolution to something that has the shape of its input |
| while maintaining a connectivity pattern that is compatible with |
| said convolution.</p> |
| <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be |
| deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be |
| inferred from the shape of input data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output |
| channels (filters) in the convolution.</li> |
| <li><strong>kernel_size</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li> |
| <li><strong>strides</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em><em>,</em><em></em>) – Specify the strides of the convolution.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>a tuple/list of 3 int</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly zero-padded |
| on both sides for padding number of points</li> |
| <li><strong>dilation</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li> |
| <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs. |
| At groups=1, all inputs are convolved to all outputs. |
| At groups=2, the operation becomes equivalent to having two conv |
| layers side by side, each seeing half the input channels, and producing |
| half the output channels, and both subsequently concatenated.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width |
| dimensions respectively. Convolution is applied on the ‘H’ and |
| ‘W’ dimensions.</li> |
| <li><strong>in_channels</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of input channels to this layer. If not specified, |
| initialization will be deferred to the first time <cite>forward</cite> is called |
| and <cite>in_channels</cite> will be inferred from the shape of input data.</li> |
| <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>. |
| If you don’t specify anything, no activation is applied |
| (ie. “linear” activation: <cite>a(x) = x</cite>).</li> |
| <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li> |
| <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li> |
| <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape |
| (batch_size, in_channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape |
| (batch_size, channels, out_height, out_width) if <cite>layout</cite> is <cite>NCHW</cite>.</p> |
| <p>out_height and out_width are calculated as:</p> |
| <div class="last highlight-default"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="p">(</span><span class="n">height</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="p">(</span><span class="n">width</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.Conv3DTranspose"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv3DTranspose</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0</em>, <em>0)</em>, <em>output_padding=(0</em>, <em>0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCDHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv3DTranspose" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Transposed 3D convolution layer (sometimes called Deconvolution).</p> |
| <p>The need for transposed convolutions generally arises |
| from the desire to use a transformation going in the opposite direction |
| of a normal convolution, i.e., from something that has the shape of the |
| output of some convolution to something that has the shape of its input |
| while maintaining a connectivity pattern that is compatible with |
| said convolution.</p> |
| <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be |
| deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be |
| inferred from the shape of input data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output |
| channels (filters) in the convolution.</li> |
| <li><strong>kernel_size</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li> |
| <li><strong>strides</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em><em>,</em><em></em>) – Specify the strides of the convolution.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>a tuple/list of 3 int</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly zero-padded |
| on both sides for padding number of points</li> |
| <li><strong>dilation</strong> (<em>int</em><em> or </em><em>tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li> |
| <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs. |
| At groups=1, all inputs are convolved to all outputs. |
| At groups=2, the operation becomes equivalent to having two conv |
| layers side by side, each seeing half the input channels, and producing |
| half the output channels, and both subsequently concatenated.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and |
| depth dimensions respectively. Convolution is applied on the ‘D’, |
| ‘H’, and ‘W’ dimensions.</li> |
| <li><strong>in_channels</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of input channels to this layer. If not specified, |
| initialization will be deferred to the first time <cite>forward</cite> is called |
| and <cite>in_channels</cite> will be inferred from the shape of input data.</li> |
| <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>. |
| If you don’t specify anything, no activation is applied |
| (ie. “linear” activation: <cite>a(x) = x</cite>).</li> |
| <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li> |
| <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li> |
| <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape |
| (batch_size, in_channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape |
| (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite> is <cite>NCDHW</cite>. |
| out_depth, out_height and out_width are calculated as:</p> |
| <div class="last highlight-default"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="p">(</span><span class="n">depth</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <span class="n">out_height</span> <span class="o">=</span> <span class="p">(</span><span class="n">height</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="p">(</span><span class="n">width</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| </div> |
| <div class="section" id="pooling-layers"> |
| <span id="pooling-layers"></span><h3>Pooling Layers<a class="headerlink" href="#pooling-layers" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.MaxPool1D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">MaxPool1D</code><span class="sig-paren">(</span><em>pool_size=2</em>, <em>strides=None</em>, <em>padding=0</em>, <em>layout='NCW'</em>, <em>ceil_mode=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.MaxPool1D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Max pooling operation for one dimensional data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>pool_size</strong> (<em>int</em>) – Size of the max pooling windows.</li> |
| <li><strong>strides</strong> (<em>int</em><em>, or </em><em>None</em>) – Factor by which to downscale. E.g. 2 will halve the input size. |
| If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li> |
| <li><strong>padding</strong> (<em>int</em>) – If padding is non-zero, then the input is implicitly |
| zero-padded on both sides for padding number of points.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc. |
| ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions |
| respectively. Pooling is applied on the W dimension.</li> |
| <li><strong>ceil_mode</strong> (<em>bool</em><em>, </em><em>default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape |
| (batch_size, channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape |
| (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>.</p> |
| <p>out_width is calculated as:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">-</span><span class="n">pool_size</span><span class="p">)</span><span class="o">/</span><span class="n">strides</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this |
| equation.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.MaxPool2D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">MaxPool2D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>layout='NCHW'</em>, <em>ceil_mode=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.MaxPool2D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Max pooling operation for two dimensional (spatial) data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>pool_size</strong> (<em>int</em><em> or </em><em>list/tuple of 2 ints</em><em>,</em><em></em>) – Size of the max pooling windows.</li> |
| <li><strong>strides</strong> (<em>int</em><em>, </em><em>list/tuple of 2 ints</em><em>, or </em><em>None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size. |
| If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>list/tuple of 2 ints</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly |
| zero-padded on both sides for padding number of points.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width |
| dimensions respectively. padding is applied on ‘H’ and ‘W’ dimension.</li> |
| <li><strong>ceil_mode</strong> (<em>bool</em><em>, </em><em>default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape |
| (batch_size, channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape |
| (batch_size, channels, out_height, out_width) if <cite>layout</cite> is <cite>NCHW</cite>.</p> |
| <p>out_height and out_width are calculated as:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this |
| equation.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.MaxPool3D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">MaxPool3D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>ceil_mode=False</em>, <em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.MaxPool3D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Max pooling operation for 3D data (spatial or spatio-temporal).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>pool_size</strong> (<em>int</em><em> or </em><em>list/tuple of 3 ints</em><em>,</em><em></em>) – Size of the max pooling windows.</li> |
| <li><strong>strides</strong> (<em>int</em><em>, </em><em>list/tuple of 3 ints</em><em>, or </em><em>None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size. |
| If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>list/tuple of 3 ints</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly |
| zero-padded on both sides for padding number of points.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and |
| depth dimensions respectively. padding is applied on ‘D’, ‘H’ and ‘W’ |
| dimension.</li> |
| <li><strong>ceil_mode</strong> (<em>bool</em><em>, </em><em>default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape |
| (batch_size, channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape |
| (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite> |
| is <cite>NCDHW</cite>.</p> |
| <p>out_depth, out_height and out_width are calculated as</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">depth</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this |
| equation.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.AvgPool1D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">AvgPool1D</code><span class="sig-paren">(</span><em>pool_size=2</em>, <em>strides=None</em>, <em>padding=0</em>, <em>layout='NCW'</em>, <em>ceil_mode=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.AvgPool1D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Average pooling operation for temporal data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>pool_size</strong> (<em>int</em>) – Size of the max pooling windows.</li> |
| <li><strong>strides</strong> (<em>int</em><em>, or </em><em>None</em>) – Factor by which to downscale. E.g. 2 will halve the input size. |
| If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li> |
| <li><strong>padding</strong> (<em>int</em>) – If padding is non-zero, then the input is implicitly |
| zero-padded on both sides for padding number of points.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc. |
| ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions |
| respectively. padding is applied on ‘W’ dimension.</li> |
| <li><strong>ceil_mode</strong> (<em>bool</em><em>, </em><em>default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape |
| (batch_size, channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape |
| (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>.</p> |
| <p>out_width is calculated as:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">-</span><span class="n">pool_size</span><span class="p">)</span><span class="o">/</span><span class="n">strides</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this |
| equation.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.AvgPool2D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">AvgPool2D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>ceil_mode=False</em>, <em>layout='NCHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.AvgPool2D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Average pooling operation for spatial data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>pool_size</strong> (<em>int</em><em> or </em><em>list/tuple of 2 ints</em><em>,</em><em></em>) – Size of the max pooling windows.</li> |
| <li><strong>strides</strong> (<em>int</em><em>, </em><em>list/tuple of 2 ints</em><em>, or </em><em>None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size. |
| If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>list/tuple of 2 ints</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly |
| zero-padded on both sides for padding number of points.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width |
| dimensions respectively. padding is applied on ‘H’ and ‘W’ dimension.</li> |
| <li><strong>ceil_mode</strong> (<em>bool</em><em>, </em><em>default False</em>) – When True, will use ceil instead of floor to compute the output shape.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape |
| (batch_size, channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape |
| (batch_size, channels, out_height, out_width) if <cite>layout</cite> is <cite>NCHW</cite>.</p> |
| <p>out_height and out_width are calculated as:</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this |
| equation.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.AvgPool3D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">AvgPool3D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>ceil_mode=False</em>, <em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.AvgPool3D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Average pooling operation for 3D data (spatial or spatio-temporal).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>pool_size</strong> (<em>int</em><em> or </em><em>list/tuple of 3 ints</em><em>,</em><em></em>) – Size of the max pooling windows.</li> |
| <li><strong>strides</strong> (<em>int</em><em>, </em><em>list/tuple of 3 ints</em><em>, or </em><em>None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size. |
| If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li> |
| <li><strong>padding</strong> (<em>int</em><em> or </em><em>list/tuple of 3 ints</em><em>,</em><em></em>) – If padding is non-zero, then the input is implicitly |
| zero-padded on both sides for padding number of points.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc. |
| ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and |
| depth dimensions respectively. padding is applied on ‘D’, ‘H’ and ‘W’ |
| dimension.</li> |
| <li><strong>ceil_mode</strong> (<em>bool</em><em>, </em><em>default False</em>) – When True, will use ceil instead of floor to compute the output shape.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shape:</dt> |
| <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape |
| (batch_size, channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd> |
| <dt>Output shape:</dt> |
| <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape |
| (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite> |
| is <cite>NCDHW</cite>.</p> |
| <p>out_depth, out_height and out_width are calculated as</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">depth</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span> |
| </pre></div> |
| </div> |
| <p class="last">When <cite>ceil_mode</cite> is <cite>True,</cite> ceil will be used instead of floor in this |
| equation.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.GlobalMaxPool1D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalMaxPool1D</code><span class="sig-paren">(</span><em>layout='NCW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalMaxPool1D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Global max pooling operation for temporal data.</p> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.GlobalMaxPool2D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalMaxPool2D</code><span class="sig-paren">(</span><em>layout='NCHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalMaxPool2D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Global max pooling operation for spatial data.</p> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.GlobalMaxPool3D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalMaxPool3D</code><span class="sig-paren">(</span><em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalMaxPool3D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Global max pooling operation for 3D data.</p> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.GlobalAvgPool1D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalAvgPool1D</code><span class="sig-paren">(</span><em>layout='NCW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalAvgPool1D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Global average pooling operation for temporal data.</p> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.GlobalAvgPool2D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalAvgPool2D</code><span class="sig-paren">(</span><em>layout='NCHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalAvgPool2D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Global average pooling operation for spatial data.</p> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.nn.GlobalAvgPool3D"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalAvgPool3D</code><span class="sig-paren">(</span><em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalAvgPool3D" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Global max pooling operation for 3D data.</p> |
| </dd></dl> |
| </div> |
| </div> |
| <div class="section" id="recurrent-layers"> |
| <span id="recurrent-layers"></span><h2>Recurrent Layers<a class="headerlink" href="#recurrent-layers" title="Permalink to this headline">¶</a></h2> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.RecurrentCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">RecurrentCell</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Abstract base class for RNN cells</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>prefix</strong> (<em>str</em><em>, </em><em>optional</em>) – Prefix for names of <cite>Block`s |
| (this prefix is also used for names of weights if `params</cite> is <cite>None</cite> |
| i.e. if <cite>params</cite> are being created and not reused)</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.Parameter" title="mxnet.gluon.Parameter"><em>Parameter</em></a><em> or </em><em>None</em><em>, </em><em>optional</em>) – Container for weight sharing between cells. |
| A new Parameter container is created if <cite>params</cite> is <cite>None</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="mxnet.gluon.rnn.RecurrentCell.__call__"> |
| <code class="descname">__call__</code><span class="sig-paren">(</span><em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.__call__" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Calls forward. Only accepts positional arguments.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.rnn.RecurrentCell.reset"> |
| <code class="descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.reset" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reset before re-using the cell for another graph.</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.rnn.RecurrentCell.state_info"> |
| <code class="descname">state_info</code><span class="sig-paren">(</span><em>batch_size=0</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.state_info" title="Permalink to this definition">¶</a></dt> |
| <dd><p>shape and layout information of states</p> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.rnn.RecurrentCell.begin_state"> |
| <code class="descname">begin_state</code><span class="sig-paren">(</span><em>batch_size=0</em>, <em>func=<function zeros=""></function></em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.begin_state" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Initial state for this cell.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>func</strong> (<em>callable</em><em>, </em><em>default symbol.zeros</em>) – <p>Function for creating initial state.</p> |
| <p>For Symbol API, func can be <cite>symbol.zeros</cite>, <cite>symbol.uniform</cite>, |
| <cite>symbol.var etc</cite>. Use <cite>symbol.var</cite> if you want to directly |
| feed input as states.</p> |
| <p>For NDArray API, func can be <cite>ndarray.zeros</cite>, <cite>ndarray.ones</cite>, etc.</p> |
| </li> |
| <li><strong>batch_size</strong> (<em>int</em><em>, </em><em>default 0</em>) – Only required for NDArray API. Size of the batch (‘N’ in layout) |
| dimension of input.</li> |
| <li><strong>**kwargs</strong> – Additional keyword arguments passed to func. For example |
| <cite>mean</cite>, <cite>std</cite>, <cite>dtype</cite>, etc.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>states</strong> – Starting states for the first RNN step.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">nested list of Symbol</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.rnn.RecurrentCell.unroll"> |
| <code class="descname">unroll</code><span class="sig-paren">(</span><em>length</em>, <em>inputs</em>, <em>begin_state=None</em>, <em>layout='NTC'</em>, <em>merge_outputs=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.unroll" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Unrolls an RNN cell across time steps.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>length</strong> (<em>int</em>) – Number of steps to unroll.</li> |
| <li><strong>inputs</strong> (<a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em>, </em><em>list of Symbol</em><em>, or </em><em>None</em>) – <p>If <cite>inputs</cite> is a single Symbol (usually the output |
| of Embedding symbol), it should have shape |
| (batch_size, length, ...) if <cite>layout</cite> is ‘NTC’, |
| or (length, batch_size, ...) if <cite>layout</cite> is ‘TNC’.</p> |
| <p>If <cite>inputs</cite> is a list of symbols (usually output of |
| previous unroll), they should all have shape |
| (batch_size, ...).</p> |
| </li> |
| <li><strong>begin_state</strong> (<em>nested list of Symbol</em><em>, </em><em>optional</em>) – Input states created by <cite>begin_state()</cite> |
| or output state of another cell. |
| Created from <cite>begin_state()</cite> if <cite>None</cite>.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>optional</em>) – <cite>layout</cite> of input symbol. Only used if inputs |
| is a single Symbol.</li> |
| <li><strong>merge_outputs</strong> (<em>bool</em><em>, </em><em>optional</em>) – If <cite>False</cite>, returns outputs as a list of Symbols. |
| If <cite>True</cite>, concatenates output across time steps |
| and returns a single symbol with shape |
| (batch_size, length, ...) if layout is ‘NTC’, |
| or (length, batch_size, ...) if layout is ‘TNC’. |
| If <cite>None</cite>, output whatever is faster.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><ul class="simple"> |
| <li><strong>outputs</strong> (<em>list of Symbol or Symbol</em>) – Symbol (if <cite>merge_outputs</cite> is True) or list of Symbols |
| (if <cite>merge_outputs</cite> is False) corresponding to the output from |
| the RNN from this unrolling.</li> |
| <li><strong>states</strong> (<em>list of Symbol</em>) – The new state of this RNN after this unrolling. |
| The type of this symbol is same as the output of <cite>begin_state()</cite>.</li> |
| </ul> |
| </p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.rnn.RecurrentCell.forward"> |
| <code class="descname">forward</code><span class="sig-paren">(</span><em>inputs</em>, <em>states</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.forward" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Unrolls the recurrent cell for one time step.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>inputs</strong> (<em>sym.Variable</em>) – Input symbol, 2D, of shape (batch_size * num_units).</li> |
| <li><strong>states</strong> (<em>list of sym.Variable</em>) – RNN state from previous step or the output of begin_state().</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><ul class="simple"> |
| <li><strong>output</strong> (<em>Symbol</em>) – Symbol corresponding to the output from the RNN when unrolling |
| for a single time step.</li> |
| <li><strong>states</strong> (<em>list of Symbol</em>) – The new state of this RNN after this unrolling. |
| The type of this symbol is same as the output of <cite>begin_state()</cite>. |
| This can be used as an input state to the next time step |
| of this RNN.</li> |
| </ul> |
| </p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <dl class="last docutils"> |
| <dt><a class="reference internal" href="#mxnet.gluon.rnn.RecurrentCell.begin_state" title="mxnet.gluon.rnn.RecurrentCell.begin_state"><code class="xref py py-meth docutils literal"><span class="pre">begin_state()</span></code></a></dt> |
| <dd>This function can provide the states for the first time step.</dd> |
| <dt><a class="reference internal" href="#mxnet.gluon.rnn.RecurrentCell.unroll" title="mxnet.gluon.rnn.RecurrentCell.unroll"><code class="xref py py-meth docutils literal"><span class="pre">unroll()</span></code></a></dt> |
| <dd>This function unrolls an RNN for a given number of (>=1) time steps.</dd> |
| </dl> |
| </div> |
| </dd></dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.RNN"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">RNN</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>num_layers=1</em>, <em>activation='relu'</em>, <em>layout='TNC'</em>, <em>dropout=0</em>, <em>bidirectional=False</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RNN" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies a multi-layer Elman RNN with <cite>tanh</cite> or <cite>ReLU</cite> non-linearity to an input sequence.</p> |
| <p>For each element in the input sequence, each layer computes the following |
| function:</p> |
| <div class="math"> |
| \[h_t = \tanh(w_{ih} * x_t + b_{ih} + w_{hh} * h_{(t-1)} + b_{hh})\]</div> |
| <p>where <span class="math">\(h_t\)</span> is the hidden state at time <cite>t</cite>, and <span class="math">\(x_t\)</span> is the hidden |
| state of the previous layer at time <cite>t</cite> or <span class="math">\(input_t\)</span> for the first layer. |
| If nonlinearity=’relu’, then <cite>ReLU</cite> is used instead of <cite>tanh</cite>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hidden_size</strong> (<em>int</em>) – The number of features in the hidden state h.</li> |
| <li><strong>num_layers</strong> (<em>int</em><em>, </em><em>default 1</em>) – Number of recurrent layers.</li> |
| <li><strong>activation</strong> (<em>{'relu'</em><em> or </em><em>'tanh'}</em><em>, </em><em>default 'tanh'</em>) – The activation function to use.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'TNC'</em>) – The format of input and output tensors. T, N and C stand for |
| sequence length, batch size, and feature dimensions respectively.</li> |
| <li><strong>dropout</strong> (<em>float</em><em>, </em><em>default 0</em>) – If non-zero, introduces a dropout layer on the outputs of each |
| RNN layer except the last layer.</li> |
| <li><strong>bidirectional</strong> (<em>bool</em><em>, </em><em>default False</em>) – If <cite>True</cite>, becomes a bidirectional RNN.</li> |
| <li><strong>i2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the input weights matrix, used for the linear |
| transformation of the inputs.</li> |
| <li><strong>h2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the recurrent weights matrix, used for the linear |
| transformation of the recurrent state.</li> |
| <li><strong>i2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>h2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>input_size</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of expected features in the input x. |
| If not specified, it will be inferred from input.</li> |
| <li><strong>prefix</strong> (<em>str</em><em> or </em><em>None</em>) – Prefix of this <cite>Block</cite>.</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a><em> or </em><em>None</em>) – Shared Parameters for this <cite>Block</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shapes:</dt> |
| <dd>The input shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the |
| input has shape <cite>(sequence_length, batch_size, input_size)</cite></dd> |
| <dt>Output shape:</dt> |
| <dd>The output shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the |
| output has shape <cite>(sequence_length, batch_size, num_hidden)</cite>. |
| If <cite>bidirectional</cite> is True, output shape will instead be |
| <cite>(sequence_length, batch_size, 2*num_hidden)</cite></dd> |
| <dt>Recurrent state shape:</dt> |
| <dd>The recurrent state’s shape is <cite>(num_layers, batch_size, num_hidden)</cite>. |
| If <cite>bidirectional</cite> is True, state shape will instead be |
| <cite>(num_layers, batch_size, 2*num_hidden)</cite></dd> |
| </dl> |
| <p class="rubric">Examples</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">layer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">RNN</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">layer</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="nb">input</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random_uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">10</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">h0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random_uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">output</span><span class="p">,</span> <span class="n">hn</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">h0</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.LSTM"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">LSTM</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>num_layers=1</em>, <em>layout='TNC'</em>, <em>dropout=0</em>, <em>bidirectional=False</em>, <em>input_size=0</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.LSTM" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.</p> |
| <p>For each element in the input sequence, each layer computes the following |
| function:</p> |
| <div class="math"> |
| \[\begin{split}\begin{array}{ll} |
| i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\ |
| f_t = sigmoid(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\ |
| g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\ |
| o_t = sigmoid(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\ |
| c_t = f_t * c_{(t-1)} + i_t * g_t \\ |
| h_t = o_t * \tanh(c_t) |
| \end{array}\end{split}\]</div> |
| <p>where <span class="math">\(h_t\)</span> is the hidden state at time <cite>t</cite>, <span class="math">\(c_t\)</span> is the |
| cell state at time <cite>t</cite>, <span class="math">\(x_t\)</span> is the hidden state of the previous |
| layer at time <cite>t</cite> or <span class="math">\(input_t\)</span> for the first layer, and <span class="math">\(i_t\)</span>, |
| <span class="math">\(f_t\)</span>, <span class="math">\(g_t\)</span>, <span class="math">\(o_t\)</span> are the input, forget, cell, and |
| out gates, respectively.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hidden_size</strong> (<em>int</em>) – The number of features in the hidden state h.</li> |
| <li><strong>num_layers</strong> (<em>int</em><em>, </em><em>default 1</em>) – Number of recurrent layers.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'TNC'</em>) – The format of input and output tensors. T, N and C stand for |
| sequence length, batch size, and feature dimensions respectively.</li> |
| <li><strong>dropout</strong> (<em>float</em><em>, </em><em>default 0</em>) – If non-zero, introduces a dropout layer on the outputs of each |
| RNN layer except the last layer.</li> |
| <li><strong>bidirectional</strong> (<em>bool</em><em>, </em><em>default False</em>) – If <cite>True</cite>, becomes a bidirectional RNN.</li> |
| <li><strong>i2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the input weights matrix, used for the linear |
| transformation of the inputs.</li> |
| <li><strong>h2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the recurrent weights matrix, used for the linear |
| transformation of the recurrent state.</li> |
| <li><strong>i2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a><em>, </em><em>default 'lstmbias'</em>) – Initializer for the bias vector. By default, bias for the forget |
| gate is initialized to 1 while all other biases are initialized |
| to zero.</li> |
| <li><strong>h2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>input_size</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of expected features in the input x. |
| If not specified, it will be inferred from input.</li> |
| <li><strong>prefix</strong> (<em>str</em><em> or </em><em>None</em>) – Prefix of this <cite>Block</cite>.</li> |
| <li><strong>params</strong> (<cite>ParameterDict</cite> or <cite>None</cite>) – Shared Parameters for this <cite>Block</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shapes:</dt> |
| <dd>The input shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the |
| input has shape <cite>(sequence_length, batch_size, input_size)</cite></dd> |
| <dt>Output shape:</dt> |
| <dd>The output shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the |
| output has shape <cite>(sequence_length, batch_size, num_hidden)</cite>. |
| If <cite>bidirectional</cite> is True, output shape will instead be |
| <cite>(sequence_length, batch_size, 2*num_hidden)</cite></dd> |
| <dt>Recurrent state shape:</dt> |
| <dd>The recurrent state is a list of two NDArrays. Both has shape |
| <cite>(num_layers, batch_size, num_hidden)</cite>. |
| If <cite>bidirectional</cite> is True, state shape will instead be |
| <cite>(num_layers, batch_size, 2*num_hidden)</cite>.</dd> |
| </dl> |
| <p class="rubric">Examples</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">layer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">layer</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="nb">input</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random_uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">10</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">h0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random_uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">c0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random_uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">output</span><span class="p">,</span> <span class="n">hn</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="p">[</span><span class="n">h0</span><span class="p">,</span> <span class="n">c0</span><span class="p">])</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.GRU"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">GRU</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>num_layers=1</em>, <em>layout='TNC'</em>, <em>dropout=0</em>, <em>bidirectional=False</em>, <em>input_size=0</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.GRU" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.</p> |
| <p>For each element in the input sequence, each layer computes the following |
| function:</p> |
| <div class="math"> |
| \[\begin{split}\begin{array}{ll} |
| r_t = sigmoid(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ |
| i_t = sigmoid(W_{ii} x_t + b_{ii} + W_hi h_{(t-1)} + b_{hi}) \\ |
| n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\ |
| h_t = (1 - i_t) * n_t + i_t * h_{(t-1)} \\ |
| \end{array}\end{split}\]</div> |
| <p>where <span class="math">\(h_t\)</span> is the hidden state at time <cite>t</cite>, <span class="math">\(x_t\)</span> is the hidden |
| state of the previous layer at time <cite>t</cite> or <span class="math">\(input_t\)</span> for the first layer, |
| and <span class="math">\(r_t\)</span>, <span class="math">\(i_t\)</span>, <span class="math">\(n_t\)</span> are the reset, input, and new gates, respectively.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hidden_size</strong> (<em>int</em>) – The number of features in the hidden state h</li> |
| <li><strong>num_layers</strong> (<em>int</em><em>, </em><em>default 1</em>) – Number of recurrent layers.</li> |
| <li><strong>layout</strong> (<em>str</em><em>, </em><em>default 'TNC'</em>) – The format of input and output tensors. T, N and C stand for |
| sequence length, batch size, and feature dimensions respectively.</li> |
| <li><strong>dropout</strong> (<em>float</em><em>, </em><em>default 0</em>) – If non-zero, introduces a dropout layer on the outputs of each |
| RNN layer except the last layer</li> |
| <li><strong>bidirectional</strong> (<em>bool</em><em>, </em><em>default False</em>) – If True, becomes a bidirectional RNN.</li> |
| <li><strong>i2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the input weights matrix, used for the linear |
| transformation of the inputs.</li> |
| <li><strong>h2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the recurrent weights matrix, used for the linear |
| transformation of the recurrent state.</li> |
| <li><strong>i2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>h2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>input_size</strong> (<em>int</em><em>, </em><em>default 0</em>) – The number of expected features in the input x. |
| If not specified, it will be inferred from input.</li> |
| <li><strong>prefix</strong> (<em>str</em><em> or </em><em>None</em>) – Prefix of this <cite>Block</cite>.</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a><em> or </em><em>None</em>) – Shared Parameters for this <cite>Block</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Input shapes:</dt> |
| <dd>The input shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the |
| input has shape <cite>(sequence_length, batch_size, input_size)</cite></dd> |
| <dt>Output shape:</dt> |
| <dd>The output shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the |
| output has shape <cite>(sequence_length, batch_size, num_hidden)</cite>. |
| If <cite>bidirectional</cite> is True, output shape will instead be |
| <cite>(sequence_length, batch_size, 2*num_hidden)</cite></dd> |
| <dt>Recurrent state shape:</dt> |
| <dd>The recurrent state’s shape is <cite>(num_layers, batch_size, num_hidden)</cite>. |
| If <cite>bidirectional</cite> is True, state shape will instead be |
| <cite>(num_layers, batch_size, 2*num_hidden)</cite></dd> |
| </dl> |
| <p class="rubric">Examples</p> |
| <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">layer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">GRU</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">layer</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="nb">input</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random_uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">10</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">h0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random_uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span> |
| <span class="gp">>>> </span><span class="n">output</span><span class="p">,</span> <span class="n">hn</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">h0</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.RNNCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">RNNCell</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>activation='tanh'</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RNNCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Simple recurrent neural network cell.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hidden_size</strong> (<em>int</em>) – Number of units in output symbol</li> |
| <li><strong>activation</strong> (<em>str</em><em> or </em><a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em>, </em><em>default 'tanh'</em>) – Type of activation function.</li> |
| <li><strong>i2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the input weights matrix, used for the linear |
| transformation of the inputs.</li> |
| <li><strong>h2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the recurrent weights matrix, used for the linear |
| transformation of the recurrent state.</li> |
| <li><strong>i2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>h2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>prefix</strong> (str, default ‘<a href="#id12"><span class="problematic" id="id13">rnn_</span></a>‘) – Prefix for name of <cite>Block`s |
| (and name of weight if params is `None</cite>).</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.Parameter" title="mxnet.gluon.Parameter"><em>Parameter</em></a><em> or </em><em>None</em>) – Container for weight sharing between cells. |
| Created if <cite>None</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.LSTMCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">LSTMCell</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.LSTMCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Long-Short Term Memory (LSTM) network cell.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hidden_size</strong> (<em>int</em>) – Number of units in output symbol.</li> |
| <li><strong>i2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the input weights matrix, used for the linear |
| transformation of the inputs.</li> |
| <li><strong>h2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the recurrent weights matrix, used for the linear |
| transformation of the recurrent state.</li> |
| <li><strong>i2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a><em>, </em><em>default 'lstmbias'</em>) – Initializer for the bias vector. By default, bias for the forget |
| gate is initialized to 1 while all other biases are initialized |
| to zero.</li> |
| <li><strong>h2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>prefix</strong> (str, default ‘<a href="#id14"><span class="problematic" id="id15">lstm_</span></a>‘) – Prefix for name of <cite>Block`s |
| (and name of weight if params is `None</cite>).</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.Parameter" title="mxnet.gluon.Parameter"><em>Parameter</em></a><em> or </em><em>None</em>) – Container for weight sharing between cells. |
| Created if <cite>None</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.GRUCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">GRUCell</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.GRUCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gated Rectified Unit (GRU) network cell. |
| Note: this is an implementation of the cuDNN version of GRUs |
| (slight modification compared to Cho et al. 2014).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hidden_size</strong> (<em>int</em>) – Number of units in output symbol.</li> |
| <li><strong>i2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the input weights matrix, used for the linear |
| transformation of the inputs.</li> |
| <li><strong>h2h_weight_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the recurrent weights matrix, used for the linear |
| transformation of the recurrent state.</li> |
| <li><strong>i2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>h2h_bias_initializer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the bias vector.</li> |
| <li><strong>prefix</strong> (str, default ‘<a href="#id16"><span class="problematic" id="id17">gru_</span></a>‘) – prefix for name of <cite>Block`s |
| (and name of weight if params is `None</cite>).</li> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.Parameter" title="mxnet.gluon.Parameter"><em>Parameter</em></a><em> or </em><em>None</em>) – Container for weight sharing between cells. |
| Created if <cite>None</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.SequentialRNNCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">SequentialRNNCell</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.SequentialRNNCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sequentially stacking multiple RNN cells.</p> |
| <dl class="method"> |
| <dt id="mxnet.gluon.rnn.SequentialRNNCell.add"> |
| <code class="descname">add</code><span class="sig-paren">(</span><em>cell</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.SequentialRNNCell.add" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Appends a cell into the stack.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cell</strong> (<em>rnn cell</em>) – </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.BidirectionalCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">BidirectionalCell</code><span class="sig-paren">(</span><em>l_cell</em>, <em>r_cell</em>, <em>output_prefix='bi_'</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.BidirectionalCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bidirectional RNN cell.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>l_cell</strong> (<a class="reference internal" href="#mxnet.gluon.rnn.RecurrentCell" title="mxnet.gluon.rnn.RecurrentCell"><em>RecurrentCell</em></a>) – Cell for forward unrolling</li> |
| <li><strong>r_cell</strong> (<a class="reference internal" href="#mxnet.gluon.rnn.RecurrentCell" title="mxnet.gluon.rnn.RecurrentCell"><em>RecurrentCell</em></a>) – Cell for backward unrolling</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.DropoutCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">DropoutCell</code><span class="sig-paren">(</span><em>dropout</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.DropoutCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies dropout on input.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>dropout</strong> (<em>float</em>) – Percentage of elements to drop out, which |
| is 1 - percentage to retain.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.ZoneoutCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">ZoneoutCell</code><span class="sig-paren">(</span><em>base_cell</em>, <em>zoneout_outputs=0.0</em>, <em>zoneout_states=0.0</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.ZoneoutCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies Zoneout on base cell.</p> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.rnn.ResidualCell"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">ResidualCell</code><span class="sig-paren">(</span><em>base_cell</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.ResidualCell" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Adds residual connection as described in Wu et al, 2016 |
| (<a class="reference external" href="https://arxiv.org/abs/1609.08144">https://arxiv.org/abs/1609.08144</a>). |
| Output of the cell is output of the base cell plus input.</p> |
| </dd></dl> |
| </div> |
| <div class="section" id="trainer"> |
| <span id="trainer"></span><h2>Trainer<a class="headerlink" href="#trainer" title="Permalink to this headline">¶</a></h2> |
| <dl class="class"> |
| <dt id="mxnet.gluon.Trainer"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">Trainer</code><span class="sig-paren">(</span><em>params</em>, <em>optimizer</em>, <em>optimizer_params</em>, <em>kvstore='device'</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Trainer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Applies an <cite>Optimizer</cite> on a set of Parameters. Trainer should |
| be used together with <cite>autograd</cite>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a>) – The set of parameters to optimize.</li> |
| <li><strong>optimizer</strong> (<em>str</em><em> or </em><a class="reference internal" href="optimization.html#mxnet.optimizer.Optimizer" title="mxnet.optimizer.Optimizer"><em>Optimizer</em></a>) – The optimizer to use.</li> |
| <li><strong>optimizer_params</strong> (<em>dict</em>) – Key-word arguments to be passed to optimizer constructor. For example, |
| <cite>{‘learning_rate’: 0.1}</cite></li> |
| <li><strong>kvstore</strong> (<em>str</em><em> or </em><a class="reference internal" href="kvstore.html#mxnet.kvstore.KVStore" title="mxnet.kvstore.KVStore"><em>KVStore</em></a>) – kvstore type for multi-gpu and distributed training.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="mxnet.gluon.Trainer.step"> |
| <code class="descname">step</code><span class="sig-paren">(</span><em>batch_size</em>, <em>ignore_stale_grad=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Trainer.step" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Makes one step of parameter update. Should be called after |
| <cite>autograd.compute_gradient</cite> and outside of <cite>record()</cite> scope.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>batch_size</strong> (<em>int</em>) – Batch size of data processed. Gradient will be normalized by <cite>1/batch_size</cite>. |
| Set this to 1 if you normalized loss manually with <cite>loss = mean(loss)</cite>.</li> |
| <li><strong>ignore_stale_grad</strong> (<em>bool</em><em>, </em><em>optional</em><em>, </em><em>default=False</em>) – If true, ignores Parameters with stale gradient (gradient that has not |
| been updated by <cite>backward</cite> after last step) and skip update.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| </dd></dl> |
| </div> |
| <div class="section" id="loss-functions"> |
| <span id="loss-functions"></span><h2>Loss functions<a class="headerlink" href="#loss-functions" title="Permalink to this headline">¶</a></h2> |
| <dl class="class"> |
| <dt id="mxnet.gluon.loss.L2Loss"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">L2Loss</code><span class="sig-paren">(</span><em>weight=1.0</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.L2Loss" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Calculates the mean squared error between output and label:</p> |
| <div class="math"> |
| \[L = \frac{1}{2}\sum_i \Vert {output}_i - {label}_i \Vert^2.\]</div> |
| <p>Output and label can have arbitrary shape as long as they have the same |
| number of elements.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>weight</strong> (<em>float</em><em> or </em><em>None</em>) – Global scalar weight for loss.</li> |
| <li><strong>sample_weight</strong> (<a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em> or </em><em>None</em>) – Per sample weighting. Must be broadcastable to |
| the same shape as loss. For example, if loss has |
| shape (64, 10) and you want to weight each sample |
| in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li> |
| <li><strong>batch_axis</strong> (<em>int</em><em>, </em><em>default 0</em>) – The axis that represents mini-batch.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.loss.L1Loss"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">L1Loss</code><span class="sig-paren">(</span><em>weight=None</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.L1Loss" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Calculates the mean absolute error between output and label:</p> |
| <div class="math"> |
| \[L = \frac{1}{2}\sum_i \vert {output}_i - {label}_i \vert.\]</div> |
| <p>Output and label must have the same shape.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>weight</strong> (<em>float</em><em> or </em><em>None</em>) – Global scalar weight for loss.</li> |
| <li><strong>sample_weight</strong> (<a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em> or </em><em>None</em>) – Per sample weighting. Must be broadcastable to |
| the same shape as loss. For example, if loss has |
| shape (64, 10) and you want to weight each sample |
| in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li> |
| <li><strong>batch_axis</strong> (<em>int</em><em>, </em><em>default 0</em>) – The axis that represents mini-batch.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.loss.SoftmaxCrossEntropyLoss"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">SoftmaxCrossEntropyLoss</code><span class="sig-paren">(</span><em>axis=-1</em>, <em>sparse_label=True</em>, <em>from_logits=False</em>, <em>weight=None</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.SoftmaxCrossEntropyLoss" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Computes the softmax cross entropy loss.</p> |
| <p>If <cite>sparse_label</cite> is <cite>True</cite>, label should contain integer category indicators:</p> |
| <div class="math"> |
| \[ \begin{align}\begin{aligned}p = {softmax}({output})\\L = -\sum_i {log}(p_{i,{label}_i})\end{aligned}\end{align} \]</div> |
| <p>Label’s shape should be output’s shape without the <cite>axis</cite> dimension. i.e. for |
| <cite>output.shape</cite> = (1,2,3,4) and axis = 2, <cite>label.shape</cite> should be (1,2,4).</p> |
| <p>If <cite>sparse_label</cite> is <cite>False</cite>, label should contain probability distribution |
| with the same shape as output:</p> |
| <div class="math"> |
| \[ \begin{align}\begin{aligned}p = {softmax}({output})\\L = -\sum_i \sum_j {label}_j {log}(p_{ij})\end{aligned}\end{align} \]</div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>axis</strong> (<em>int</em><em>, </em><em>default -1</em>) – The axis to sum over when computing softmax and entropy.</li> |
| <li><strong>sparse_label</strong> (<em>bool</em><em>, </em><em>default True</em>) – Whether label is an integer array instead of probability distribution.</li> |
| <li><strong>from_logits</strong> (<em>bool</em><em>, </em><em>default False</em>) – Whether input is a log probability (usually from log_softmax) instead |
| of unnormalized numbers.</li> |
| <li><strong>weight</strong> (<em>float</em><em> or </em><em>None</em>) – Global scalar weight for loss.</li> |
| <li><strong>sample_weight</strong> (<a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em> or </em><em>None</em>) – Per sample weighting. Must be broadcastable to |
| the same shape as loss. For example, if loss has |
| shape (64, 10) and you want to weight each sample |
| in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li> |
| <li><strong>batch_axis</strong> (<em>int</em><em>, </em><em>default 0</em>) – The axis that represents mini-batch.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="class"> |
| <dt id="mxnet.gluon.loss.KLDivLoss"> |
| <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">KLDivLoss</code><span class="sig-paren">(</span><em>from_logits=True</em>, <em>weight=None</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.KLDivLoss" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The Kullback-Leibler divergence loss.</p> |
| <p>KL divergence is a useful distance measure for continuous distributions |
| and is often useful when performing direct regression over the space of |
| (discretely sampled) continuous output distributions.</p> |
| <div class="math"> |
| \[L = 1/n \sum_i (label_i * (log(label_i) - output_i))\]</div> |
| <p>Label’s shape should be the same as output’s.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>from_logits</strong> (bool, default is <cite>True</cite>) – Whether the input is log probability (usually from log_softmax) instead |
| of unnormalized numbers.</li> |
| <li><strong>weight</strong> (<em>float</em><em> or </em><em>None</em>) – Global scalar weight for loss.</li> |
| <li><strong>sample_weight</strong> (<a class="reference internal" href="symbol.html#mxnet.symbol.Symbol" title="mxnet.symbol.Symbol"><em>Symbol</em></a><em> or </em><em>None</em>) – Per sample weighting. Must be broadcastable to |
| the same shape as loss. For example, if loss has |
| shape (64, 10) and you want to weight each sample |
| in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li> |
| <li><strong>batch_axis</strong> (<em>int</em><em>, </em><em>default 0</em>) – The axis that represents mini-batch.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| </div> |
| <div class="section" id="utilities"> |
| <span id="utilities"></span><h2>Utilities<a class="headerlink" href="#utilities" title="Permalink to this headline">¶</a></h2> |
| <dl class="method"> |
| <dt id="mxnet.gluon.utils.split_data"> |
| <code class="descclassname">utils.</code><code class="descname">split_data</code><span class="sig-paren">(</span><em>data</em>, <em>num_slice</em>, <em>batch_axis=0</em>, <em>even_split=True</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.utils.split_data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Splits an NDArray into <cite>num_slice</cite> slices along <cite>batch_axis</cite>. |
| Usually used for data parallelism where each slices is sent |
| to one device (i.e. GPU).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>data</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – A batch of data.</li> |
| <li><strong>num_slice</strong> (<em>int</em>) – Number of desired slices.</li> |
| <li><strong>batch_axis</strong> (<em>int</em><em>, </em><em>default 0</em>) – The axis along which to slice.</li> |
| <li><strong>even_split</strong> (<em>bool</em><em>, </em><em>default True</em>) – Whether to force all slices to have the same number of elements. |
| If <cite>True</cite>, an error will be raised when <cite>num_slice</cite> does not evenly |
| divide <cite>data.shape[batch_axis]</cite>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Return value is a list even if <cite>num_slice</cite> is 1.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list of NDArray</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.utils.split_and_load"> |
| <code class="descclassname">utils.</code><code class="descname">split_and_load</code><span class="sig-paren">(</span><em>data</em>, <em>ctx_list</em>, <em>batch_axis=0</em>, <em>even_split=True</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.utils.split_and_load" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Splits an NDArray into <cite>len(ctx_list)</cite> slices along <cite>batch_axis</cite> and loads |
| each slice to one context in <cite>ctx_list</cite>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name"/> |
| <col class="field-body"/> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>data</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – A batch of data.</li> |
| <li><strong>ctx_list</strong> (<em>list of Context</em>) – A list of Contexts.</li> |
| <li><strong>batch_axis</strong> (<em>int</em><em>, </em><em>default 0</em>) – The axis along which to slice.</li> |
| <li><strong>even_split</strong> (<em>bool</em><em>, </em><em>default True</em>) – Whether to force all slices to have the same number of elements.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Each corresponds to a context in <cite>ctx_list</cite>.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list of NDArray</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| <dl class="method"> |
| <dt id="mxnet.gluon.utils.clip_global_norm"> |
| <code class="descclassname">utils.</code><code class="descname">clip_global_norm</code><span class="sig-paren">(</span><em>arrays</em>, <em>max_norm</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.utils.clip_global_norm" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Rescales NDArrays so that the sum of their 2-norm is smaller than <cite>max_norm</cite>.</p> |
| </dd></dl> |
| <script>auto_index("api-reference");</script></div> |
| </div> |
| <div class="container"> |
| <div class="footer"> |
| <p> © 2015-2017 DMLC. All rights reserved. </p> |
| </div> |
| </div> |
| </div> |
| <div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation"> |
| <div class="sphinxsidebarwrapper"> |
| <h3><a href="../../index.html">Table Of Contents</a></h3> |
| <ul> |
| <li><a class="reference internal" href="#">Gluon Package</a><ul> |
| <li><a class="reference internal" href="#overview">Overview</a></li> |
| <li><a class="reference internal" href="#parameter">Parameter</a></li> |
| <li><a class="reference internal" href="#containers">Containers</a></li> |
| <li><a class="reference internal" href="#neural-network-layers">Neural Network Layers</a><ul> |
| <li><a class="reference internal" href="#containers">Containers</a></li> |
| <li><a class="reference internal" href="#basic-layers">Basic Layers</a></li> |
| <li><a class="reference internal" href="#convolutional-layers">Convolutional Layers</a></li> |
| <li><a class="reference internal" href="#pooling-layers">Pooling Layers</a></li> |
| </ul> |
| </li> |
| <li><a class="reference internal" href="#recurrent-layers">Recurrent Layers</a></li> |
| <li><a class="reference internal" href="#trainer">Trainer</a></li> |
| <li><a class="reference internal" href="#loss-functions">Loss functions</a></li> |
| <li><a class="reference internal" href="#utilities">Utilities</a></li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> <!-- pagename != index --> |
| <script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script> |
| <script src="../../_static/js/sidebar.js" type="text/javascript"></script> |
| <script src="../../_static/js/search.js" type="text/javascript"></script> |
| <script src="../../_static/js/navbar.js" type="text/javascript"></script> |
| <script src="../../_static/js/clipboard.min.js" type="text/javascript"></script> |
| <script src="../../_static/js/copycode.js" type="text/javascript"></script> |
| <script type="text/javascript"> |
| $('body').ready(function () { |
| $('body').css('visibility', 'visible'); |
| }); |
| </script> |
| </div></body> |
| </html> |