versions/0.11.0/api/python/gluon.html - mxnet-site - Git at Google

 <!DOCTYPE html>

 <html lang="en">
 <head>
 <meta charset="utf-8"/>
 <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
 <meta content="width=device-width, initial-scale=1" name="viewport"/>
 <title>Gluon Package — mxnet  documentation</title>
 <link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
 <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
 <link href="../../_static/basic.css" rel="stylesheet" type="text/css"/>
 <link href="../../_static/pygments.css" rel="stylesheet" type="text/css"/>
 <link href="../../_static/mxnet.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript">
       var DOCUMENTATION_OPTIONS = {
         URL_ROOT:    '../../',
         VERSION:     '',
         COLLAPSE_INDEX: false,
         FILE_SUFFIX: '.html',
         HAS_SOURCE:  true,
         SOURCELINK_SUFFIX: ''
       };
     </script>
 <script src="../../_static/jquery-1.11.1.js" type="text/javascript"></script>
 <script src="../../_static/underscore.js" type="text/javascript"></script>
 <script src="../../_static/searchtools_custom.js" type="text/javascript"></script>
 <script src="../../_static/doctools.js" type="text/javascript"></script>
 <script src="../../_static/selectlang.js" type="text/javascript"></script>
 <script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
 <script type="text/javascript"> jQuery(function() { Search.loadIndex("/searchindex.js"); Search.init();}); </script>
 <script>
       (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
       (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
       Date();a=s.createElement(o),
       m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
       })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

       ga('create', 'UA-96378503-1', 'auto');
       ga('send', 'pageview');

     </script>
 <!-- -->
 <!-- <script type="text/javascript" src="../../_static/jquery.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="../../_static/underscore.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="../../_static/doctools.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
 <!-- -->
 <link href="index.html" rel="up" title="MXNet - Python API"/>
 <link href="rnn.html" rel="next" title="RNN Cell API"/>
 <link href="autograd.html" rel="prev" title="Autograd Package">
 <link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
 </link></head>
 <body role="document"><div class="navbar navbar-fixed-top">
 <div class="container" id="navContainer">
 <div class="innder" id="header-inner">
 <h1 id="logo-wrap">
 <a href="../../" id="logo"><img src="../../_static/mxnet.png"/></a>
 </h1>
 <nav class="nav-bar" id="main-nav">
 <a class="main-nav-link" href="../../get_started/install.html">Install</a>
 <a class="main-nav-link" href="../../tutorials/index.html">Tutorials</a>
 <span id="dropdown-menu-position-anchor">
 <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Gluon <span class="caret"></span></a>
 <ul class="dropdown-menu" id="package-dropdown-menu">
 <li><a class="main-nav-link" href="../../gluon/index.html">About</a></li>
 <li><a class="main-nav-link" href="http://gluon.mxnet.io/">Tutorials</a></li>
 </ul>
 </span>
 <a class="main-nav-link" href="../../how_to/index.html">How To</a>
 <span id="dropdown-menu-position-anchor">
 <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
 <ul class="dropdown-menu" id="package-dropdown-menu">
 <li><a class="main-nav-link" href="../../api/python/index.html">Python</a></li>
 <li><a class="main-nav-link" href="../../api/scala/index.html">Scala</a></li>
 <li><a class="main-nav-link" href="../../api/r/index.html">R</a></li>
 <li><a class="main-nav-link" href="../../api/julia/index.html">Julia</a></li>
 <li><a class="main-nav-link" href="../../api/c++/index.html">C++</a></li>
 <li><a class="main-nav-link" href="../../api/perl/index.html">Perl</a></li>
 </ul>
 </span>
 <a class="main-nav-link" href="../../architecture/index.html">Architecture</a>
 <!-- <a class="main-nav-link" href="../../community/index.html">Community</a> -->
 <a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a>
 <span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Versions(0.11.0)<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a class="main-nav-link" href=https://mxnet.incubator.apache.org/>1.1.0</a></li><li><a class="main-nav-link" href=https://mxnet.incubator.apache.org/versions/1.0.0/index.html>1.0.0</a></li><li><a class="main-nav-link" href=https://mxnet.incubator.apache.org/versions/0.12.1/index.html>0.12.1</a></li><li><a class="main-nav-link" href=https://mxnet.incubator.apache.org/versions/0.12.0/index.html>0.12.0</a></li><li><a class="main-nav-link" href=https://mxnet.incubator.apache.org/versions/0.11.0/index.html>0.11.0</a></li><li><a class="main-nav-link" href=https://mxnet.incubator.apache.org/versions/master/index.html>master</a></li></ul></span></nav>
 <script> function getRootPath(){ return "../../" } </script>
 <div class="burgerIcon dropdown">
 <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button">☰</a>
 <ul class="dropdown-menu dropdown-menu-right" id="burgerMenu">
 <li><a href="../../get_started/install.html">Install</a></li>
 <li><a href="../../tutorials/index.html">Tutorials</a></li>
 <li><a href="../../how_to/index.html">How To</a></li>
 <li class="dropdown-submenu">
 <a href="#" tabindex="-1">API</a>
 <ul class="dropdown-menu">
 <li><a href="../../api/python/index.html" tabindex="-1">Python</a>
 </li>
 <li><a href="../../api/scala/index.html" tabindex="-1">Scala</a>
 </li>
 <li><a href="../../api/r/index.html" tabindex="-1">R</a>
 </li>
 <li><a href="../../api/julia/index.html" tabindex="-1">Julia</a>
 </li>
 <li><a href="../../api/c++/index.html" tabindex="-1">C++</a>
 </li>
 <li><a href="../../api/perl/index.html" tabindex="-1">Perl</a>
 </li>
 </ul>
 </li>
 <li><a href="../../architecture/index.html">Architecture</a></li>
 <li><a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a></li>
 <li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">Versions(0.11.0)</a><ul class="dropdown-menu"><li><a tabindex="-1" href=https://mxnet.incubator.apache.org/>1.1.0</a></li><li><a tabindex="-1" href=https://mxnet.incubator.apache.org/versions/1.0.0/index.html>1.0.0</a></li><li><a tabindex="-1" href=https://mxnet.incubator.apache.org/versions/0.12.1/index.html>0.12.1</a></li><li><a tabindex="-1" href=https://mxnet.incubator.apache.org/versions/0.12.0/index.html>0.12.0</a></li><li><a tabindex="-1" href=https://mxnet.incubator.apache.org/versions/0.11.0/index.html>0.11.0</a></li><li><a tabindex="-1" href=https://mxnet.incubator.apache.org/versions/master/index.html>master</a></li></ul></li></ul>
 </div>
 <div class="plusIcon dropdown">
 <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
 <ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
 </div>
 <div id="search-input-wrap">
 <form action="../../search.html" autocomplete="off" class="" method="get" role="search">
 <div class="form-group inner-addon left-addon">
 <i class="glyphicon glyphicon-search"></i>
 <input class="form-control" name="q" placeholder="Search" type="text"/>
 </div>
 <input name="check_keywords" type="hidden" value="yes"/>
 <input name="area" type="hidden" value="default"/>
 </form>
 <div id="search-preview"></div>
 </div>
 <div id="searchIcon">
 <span aria-hidden="true" class="glyphicon glyphicon-search"></span>
 </div>
 <!-- <div id="lang-select-wrap"> -->
 <!--   <label id="lang-select-label"> -->
 <!--     <\!-- <i class="fa fa-globe"></i> -\-> -->
 <!--     <span></span> -->
 <!--   </label> -->
 <!--   <select id="lang-select"> -->
 <!--     <option value="en">Eng</option> -->
 <!--     <option value="zh">中文</option> -->
 <!--   </select> -->
 <!-- </div> -->
 <!--     <a id="mobile-nav-toggle">
         <span class="mobile-nav-toggle-bar"></span>
         <span class="mobile-nav-toggle-bar"></span>
         <span class="mobile-nav-toggle-bar"></span>
       </a> -->
 </div>
 </div>
 </div>
 <div class="container">
 <div class="row">
 <div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
 <div class="sphinxsidebarwrapper">
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="index.html">Python Documents</a><ul class="current">
 <li class="toctree-l2 current"><a class="reference internal" href="index.html#table-of-contents">Table of contents</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="ndarray.html">NDArray API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="symbol.html">Symbol API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="module.html">Module API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="autograd.html">Autograd Package</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="">Gluon Package</a><ul>
 <li class="toctree-l4"><a class="reference internal" href="#overview">Overview</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#parameter">Parameter</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#containers">Containers</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#neural-network-layers">Neural Network Layers</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#recurrent-layers">Recurrent Layers</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#trainer">Trainer</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#loss-functions">Loss functions</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#utilities">Utilities</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#data">Data</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#model-zoo">Model Zoo</a></li>
 <li class="toctree-l4"><a class="reference internal" href="#api-reference">API Reference</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="rnn.html">RNN Cell API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="kvstore.html">KVStore API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="io.html">Data Loading API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="image.html">Image API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="optimization.html">Optimization: initialize and update weights</a></li>
 <li class="toctree-l3"><a class="reference internal" href="callback.html">Callback API</a></li>
 <li class="toctree-l3"><a class="reference internal" href="metric.html">Evaluation Metric API</a></li>
 </ul>
 </li>
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="../r/index.html">R Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../julia/index.html">Julia Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../c++/index.html">C++ Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../scala/index.html">Scala Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../perl/index.html">Perl Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../how_to/index.html">HowTo Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../architecture/index.html">System Documents</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../tutorials/index.html">Tutorials</a></li>
 </ul>
 </div>
 </div>
 <div class="content">
 <div class="section" id="gluon-package">
 <span id="gluon-package"></span><h1>Gluon Package<a class="headerlink" href="#gluon-package" title="Permalink to this headline">¶</a></h1>
 <div class="admonition warning">
 <p class="first admonition-title">Warning</p>
 <p class="last">This package is currently experimental and may change in the near future.</p>
 </div>
 <script src="../../_static/js/auto_module_index.js" type="text/javascript"></script><div class="section" id="overview">
 <span id="overview"></span><h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
 <p>Gluon package is a high-level interface for MXNet designed to be easy to use while
 keeping most of the flexibility of low level API. Gluon supports both imperative
 and symbolic programming, making it easy to train complex models imperatively
 in Python and then deploy with symbolic graph in C++ and Scala.</p>
 </div>
 <div class="section" id="parameter">
 <span id="parameter"></span><h2>Parameter<a class="headerlink" href="#parameter" title="Permalink to this headline">¶</a></h2>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.Parameter" title="mxnet.gluon.Parameter"><code class="xref py py-obj docutils literal"><span class="pre">Parameter</span></code></a></td>
 <td>A Container holding parameters (weights) of <a href="#id3"><span class="problematic" id="id4">`</span></a>Block`s.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><code class="xref py py-obj docutils literal"><span class="pre">ParameterDict</span></code></a></td>
 <td>A dictionary managing a set of parameters.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="containers">
 <span id="containers"></span><h2>Containers<a class="headerlink" href="#containers" title="Permalink to this headline">¶</a></h2>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.Block" title="mxnet.gluon.Block"><code class="xref py py-obj docutils literal"><span class="pre">Block</span></code></a></td>
 <td>Base class for all neural network layers and models.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.HybridBlock" title="mxnet.gluon.HybridBlock"><code class="xref py py-obj docutils literal"><span class="pre">HybridBlock</span></code></a></td>
 <td><cite>HybridBlock</cite> supports forwarding with both Symbol and NDArray.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.SymbolBlock" title="mxnet.gluon.SymbolBlock"><code class="xref py py-obj docutils literal"><span class="pre">SymbolBlock</span></code></a></td>
 <td>Construct block from symbol.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="neural-network-layers">
 <span id="neural-network-layers"></span><h2>Neural Network Layers<a class="headerlink" href="#neural-network-layers" title="Permalink to this headline">¶</a></h2>
 <div class="section" id="containers">
 <span id="id1"></span><h3>Containers<a class="headerlink" href="#containers" title="Permalink to this headline">¶</a></h3>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.Sequential" title="mxnet.gluon.nn.Sequential"><code class="xref py py-obj docutils literal"><span class="pre">Sequential</span></code></a></td>
 <td>Stacks <a href="#id5"><span class="problematic" id="id6">`</span></a>Block`s sequentially.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.HybridSequential" title="mxnet.gluon.nn.HybridSequential"><code class="xref py py-obj docutils literal"><span class="pre">HybridSequential</span></code></a></td>
 <td>Stacks <a href="#id7"><span class="problematic" id="id8">`</span></a>HybridBlock`s sequentially.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="basic-layers">
 <span id="basic-layers"></span><h3>Basic Layers<a class="headerlink" href="#basic-layers" title="Permalink to this headline">¶</a></h3>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.Dense" title="mxnet.gluon.nn.Dense"><code class="xref py py-obj docutils literal"><span class="pre">Dense</span></code></a></td>
 <td>Just your regular densely-connected NN layer.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.Activation" title="mxnet.gluon.nn.Activation"><code class="xref py py-obj docutils literal"><span class="pre">Activation</span></code></a></td>
 <td>Applies an activation function to input.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.Dropout" title="mxnet.gluon.nn.Dropout"><code class="xref py py-obj docutils literal"><span class="pre">Dropout</span></code></a></td>
 <td>Applies Dropout to the input.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.BatchNorm" title="mxnet.gluon.nn.BatchNorm"><code class="xref py py-obj docutils literal"><span class="pre">BatchNorm</span></code></a></td>
 <td>Batch normalization layer (Ioffe and Szegedy, 2014).</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.LeakyReLU" title="mxnet.gluon.nn.LeakyReLU"><code class="xref py py-obj docutils literal"><span class="pre">LeakyReLU</span></code></a></td>
 <td>Leaky version of a Rectified Linear Unit.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.Embedding" title="mxnet.gluon.nn.Embedding"><code class="xref py py-obj docutils literal"><span class="pre">Embedding</span></code></a></td>
 <td>Turns non-negative integers (indexes/tokens) into dense vectors of fixed size.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="convolutional-layers">
 <span id="convolutional-layers"></span><h3>Convolutional Layers<a class="headerlink" href="#convolutional-layers" title="Permalink to this headline">¶</a></h3>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.Conv1D" title="mxnet.gluon.nn.Conv1D"><code class="xref py py-obj docutils literal"><span class="pre">Conv1D</span></code></a></td>
 <td>1D convolution layer (e.g. temporal convolution).</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.Conv2D" title="mxnet.gluon.nn.Conv2D"><code class="xref py py-obj docutils literal"><span class="pre">Conv2D</span></code></a></td>
 <td>2D convolution layer (e.g. spatial convolution over images).</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.Conv3D" title="mxnet.gluon.nn.Conv3D"><code class="xref py py-obj docutils literal"><span class="pre">Conv3D</span></code></a></td>
 <td>3D convolution layer (e.g. spatial convolution over volumes).</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.Conv1DTranspose" title="mxnet.gluon.nn.Conv1DTranspose"><code class="xref py py-obj docutils literal"><span class="pre">Conv1DTranspose</span></code></a></td>
 <td>Transposed 1D convolution layer (sometimes called Deconvolution).</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.Conv2DTranspose" title="mxnet.gluon.nn.Conv2DTranspose"><code class="xref py py-obj docutils literal"><span class="pre">Conv2DTranspose</span></code></a></td>
 <td>Transposed 2D convolution layer (sometimes called Deconvolution).</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.Conv3DTranspose" title="mxnet.gluon.nn.Conv3DTranspose"><code class="xref py py-obj docutils literal"><span class="pre">Conv3DTranspose</span></code></a></td>
 <td>Transposed 3D convolution layer (sometimes called Deconvolution).</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="pooling-layers">
 <span id="pooling-layers"></span><h3>Pooling Layers<a class="headerlink" href="#pooling-layers" title="Permalink to this headline">¶</a></h3>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.MaxPool1D" title="mxnet.gluon.nn.MaxPool1D"><code class="xref py py-obj docutils literal"><span class="pre">MaxPool1D</span></code></a></td>
 <td>Max pooling operation for one dimensional data.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.MaxPool2D" title="mxnet.gluon.nn.MaxPool2D"><code class="xref py py-obj docutils literal"><span class="pre">MaxPool2D</span></code></a></td>
 <td>Max pooling operation for two dimensional (spatial) data.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.MaxPool3D" title="mxnet.gluon.nn.MaxPool3D"><code class="xref py py-obj docutils literal"><span class="pre">MaxPool3D</span></code></a></td>
 <td>Max pooling operation for 3D data (spatial or spatio-temporal).</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.AvgPool1D" title="mxnet.gluon.nn.AvgPool1D"><code class="xref py py-obj docutils literal"><span class="pre">AvgPool1D</span></code></a></td>
 <td>Average pooling operation for temporal data.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.AvgPool2D" title="mxnet.gluon.nn.AvgPool2D"><code class="xref py py-obj docutils literal"><span class="pre">AvgPool2D</span></code></a></td>
 <td>Average pooling operation for spatial data.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.AvgPool3D" title="mxnet.gluon.nn.AvgPool3D"><code class="xref py py-obj docutils literal"><span class="pre">AvgPool3D</span></code></a></td>
 <td>Average pooling operation for 3D data (spatial or spatio-temporal).</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.GlobalMaxPool1D" title="mxnet.gluon.nn.GlobalMaxPool1D"><code class="xref py py-obj docutils literal"><span class="pre">GlobalMaxPool1D</span></code></a></td>
 <td>Global max pooling operation for temporal data.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.GlobalMaxPool2D" title="mxnet.gluon.nn.GlobalMaxPool2D"><code class="xref py py-obj docutils literal"><span class="pre">GlobalMaxPool2D</span></code></a></td>
 <td>Global max pooling operation for spatial data.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.GlobalMaxPool3D" title="mxnet.gluon.nn.GlobalMaxPool3D"><code class="xref py py-obj docutils literal"><span class="pre">GlobalMaxPool3D</span></code></a></td>
 <td>Global max pooling operation for 3D data.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.GlobalAvgPool1D" title="mxnet.gluon.nn.GlobalAvgPool1D"><code class="xref py py-obj docutils literal"><span class="pre">GlobalAvgPool1D</span></code></a></td>
 <td>Global average pooling operation for temporal data.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.nn.GlobalAvgPool2D" title="mxnet.gluon.nn.GlobalAvgPool2D"><code class="xref py py-obj docutils literal"><span class="pre">GlobalAvgPool2D</span></code></a></td>
 <td>Global average pooling operation for spatial data.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.nn.GlobalAvgPool3D" title="mxnet.gluon.nn.GlobalAvgPool3D"><code class="xref py py-obj docutils literal"><span class="pre">GlobalAvgPool3D</span></code></a></td>
 <td>Global max pooling operation for 3D data.</td>
 </tr>
 </tbody>
 </table>
 </div>
 </div>
 <div class="section" id="recurrent-layers">
 <span id="recurrent-layers"></span><h2>Recurrent Layers<a class="headerlink" href="#recurrent-layers" title="Permalink to this headline">¶</a></h2>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.rnn.RecurrentCell" title="mxnet.gluon.rnn.RecurrentCell"><code class="xref py py-obj docutils literal"><span class="pre">RecurrentCell</span></code></a></td>
 <td>Abstract base class for RNN cells</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.rnn.RNN" title="mxnet.gluon.rnn.RNN"><code class="xref py py-obj docutils literal"><span class="pre">RNN</span></code></a></td>
 <td>Applies a multi-layer Elman RNN with <cite>tanh</cite> or <cite>ReLU</cite> non-linearity to an input sequence.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.rnn.LSTM" title="mxnet.gluon.rnn.LSTM"><code class="xref py py-obj docutils literal"><span class="pre">LSTM</span></code></a></td>
 <td>Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.rnn.GRU" title="mxnet.gluon.rnn.GRU"><code class="xref py py-obj docutils literal"><span class="pre">GRU</span></code></a></td>
 <td>Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.rnn.RNNCell" title="mxnet.gluon.rnn.RNNCell"><code class="xref py py-obj docutils literal"><span class="pre">RNNCell</span></code></a></td>
 <td>Simple recurrent neural network cell.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.rnn.LSTMCell" title="mxnet.gluon.rnn.LSTMCell"><code class="xref py py-obj docutils literal"><span class="pre">LSTMCell</span></code></a></td>
 <td>Long-Short Term Memory (LSTM) network cell.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.rnn.GRUCell" title="mxnet.gluon.rnn.GRUCell"><code class="xref py py-obj docutils literal"><span class="pre">GRUCell</span></code></a></td>
 <td>Gated Rectified Unit (GRU) network cell.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.rnn.SequentialRNNCell" title="mxnet.gluon.rnn.SequentialRNNCell"><code class="xref py py-obj docutils literal"><span class="pre">SequentialRNNCell</span></code></a></td>
 <td>Sequentially stacking multiple RNN cells.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.rnn.BidirectionalCell" title="mxnet.gluon.rnn.BidirectionalCell"><code class="xref py py-obj docutils literal"><span class="pre">BidirectionalCell</span></code></a></td>
 <td>Bidirectional RNN cell.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.rnn.DropoutCell" title="mxnet.gluon.rnn.DropoutCell"><code class="xref py py-obj docutils literal"><span class="pre">DropoutCell</span></code></a></td>
 <td>Applies dropout on input.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.rnn.ZoneoutCell" title="mxnet.gluon.rnn.ZoneoutCell"><code class="xref py py-obj docutils literal"><span class="pre">ZoneoutCell</span></code></a></td>
 <td>Applies Zoneout on base cell.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.rnn.ResidualCell" title="mxnet.gluon.rnn.ResidualCell"><code class="xref py py-obj docutils literal"><span class="pre">ResidualCell</span></code></a></td>
 <td>Adds residual connection as described in Wu et al, 2016 (<a class="reference external" href="https://arxiv.org/abs/1609.08144">https://arxiv.org/abs/1609.08144</a>).</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="trainer">
 <span id="trainer"></span><h2>Trainer<a class="headerlink" href="#trainer" title="Permalink to this headline">¶</a></h2>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.Trainer" title="mxnet.gluon.Trainer"><code class="xref py py-obj docutils literal"><span class="pre">Trainer</span></code></a></td>
 <td>Applies an <cite>Optimizer</cite> on a set of Parameters.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="loss-functions">
 <span id="loss-functions"></span><h2>Loss functions<a class="headerlink" href="#loss-functions" title="Permalink to this headline">¶</a></h2>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.loss.L2Loss" title="mxnet.gluon.loss.L2Loss"><code class="xref py py-obj docutils literal"><span class="pre">L2Loss</span></code></a></td>
 <td>Calculates the mean squared error between output and label:</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.loss.L1Loss" title="mxnet.gluon.loss.L1Loss"><code class="xref py py-obj docutils literal"><span class="pre">L1Loss</span></code></a></td>
 <td>Calculates the mean absolute error between output and label:</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.loss.SoftmaxCrossEntropyLoss" title="mxnet.gluon.loss.SoftmaxCrossEntropyLoss"><code class="xref py py-obj docutils literal"><span class="pre">SoftmaxCrossEntropyLoss</span></code></a></td>
 <td>Computes the softmax cross entropy loss.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.loss.KLDivLoss" title="mxnet.gluon.loss.KLDivLoss"><code class="xref py py-obj docutils literal"><span class="pre">KLDivLoss</span></code></a></td>
 <td>The Kullback-Leibler divergence loss.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="utilities">
 <span id="utilities"></span><h2>Utilities<a class="headerlink" href="#utilities" title="Permalink to this headline">¶</a></h2>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.utils.split_data" title="mxnet.gluon.utils.split_data"><code class="xref py py-obj docutils literal"><span class="pre">split_data</span></code></a></td>
 <td>Splits an NDArray into <cite>num_slice</cite> slices along <cite>batch_axis</cite>.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.utils.split_and_load" title="mxnet.gluon.utils.split_and_load"><code class="xref py py-obj docutils literal"><span class="pre">split_and_load</span></code></a></td>
 <td>Splits an NDArray into <cite>len(ctx_list)</cite> slices along <cite>batch_axis</cite> and loads each slice to one context in <cite>ctx_list</cite>.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.utils.clip_global_norm" title="mxnet.gluon.utils.clip_global_norm"><code class="xref py py-obj docutils literal"><span class="pre">clip_global_norm</span></code></a></td>
 <td>Rescales NDArrays so that the sum of their 2-norm is smaller than <cite>max_norm</cite>.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="data">
 <span id="data"></span><h2>Data<a class="headerlink" href="#data" title="Permalink to this headline">¶</a></h2>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.data.Dataset" title="mxnet.gluon.data.Dataset"><code class="xref py py-obj docutils literal"><span class="pre">Dataset</span></code></a></td>
 <td>Abstract dataset class.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.data.ArrayDataset" title="mxnet.gluon.data.ArrayDataset"><code class="xref py py-obj docutils literal"><span class="pre">ArrayDataset</span></code></a></td>
 <td>A dataset with a data array and a label array.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.data.RecordFileDataset" title="mxnet.gluon.data.RecordFileDataset"><code class="xref py py-obj docutils literal"><span class="pre">RecordFileDataset</span></code></a></td>
 <td>A dataset wrapping over a RecordIO (.rec) file.</td>
 </tr>
 <tr class="row-even"><td><code class="xref py py-obj docutils literal"><span class="pre">ImageRecordDataset</span></code></td>
 <td></td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.data.Sampler" title="mxnet.gluon.data.Sampler"><code class="xref py py-obj docutils literal"><span class="pre">Sampler</span></code></a></td>
 <td>Base class for samplers.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.data.SequentialSampler" title="mxnet.gluon.data.SequentialSampler"><code class="xref py py-obj docutils literal"><span class="pre">SequentialSampler</span></code></a></td>
 <td>Samples elements from [0, length) sequentially.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.data.RandomSampler" title="mxnet.gluon.data.RandomSampler"><code class="xref py py-obj docutils literal"><span class="pre">RandomSampler</span></code></a></td>
 <td>Samples elements from [0, length) randomly without replacement.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.data.BatchSampler" title="mxnet.gluon.data.BatchSampler"><code class="xref py py-obj docutils literal"><span class="pre">BatchSampler</span></code></a></td>
 <td>Wraps over another <cite>Sampler</cite> and return mini-batches of samples.</td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.data.DataLoader" title="mxnet.gluon.data.DataLoader"><code class="xref py py-obj docutils literal"><span class="pre">DataLoader</span></code></a></td>
 <td>Loads data from a dataset and returns mini-batches of data.</td>
 </tr>
 </tbody>
 </table>
 <div class="section" id="vision">
 <span id="vision"></span><h3>Vision<a class="headerlink" href="#vision" title="Permalink to this headline">¶</a></h3>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.data.vision.MNIST" title="mxnet.gluon.data.vision.MNIST"><code class="xref py py-obj docutils literal"><span class="pre">MNIST</span></code></a></td>
 <td>MNIST handwritten digits dataset from <a href="#id109"><span class="problematic" id="id110">`http://yann.lecun.com/exdb/mnist`_</span></a>.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.data.vision.FashionMNIST" title="mxnet.gluon.data.vision.FashionMNIST"><code class="xref py py-obj docutils literal"><span class="pre">FashionMNIST</span></code></a></td>
 <td>A dataset of Zalando’s article images consisting of fashion products, a drop-in replacement of the original MNIST dataset from <a href="#id111"><span class="problematic" id="id112">`https://github.com/zalandoresearch/fashion-mnist`_</span></a>.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.data.vision.CIFAR10" title="mxnet.gluon.data.vision.CIFAR10"><code class="xref py py-obj docutils literal"><span class="pre">CIFAR10</span></code></a></td>
 <td>CIFAR10 image classification dataset from <a href="#id113"><span class="problematic" id="id114">`https://www.cs.toronto.edu/~kriz/cifar.html`_</span></a>.</td>
 </tr>
 </tbody>
 </table>
 </div>
 </div>
 <div class="section" id="model-zoo">
 <span id="model-zoo"></span><h2>Model Zoo<a class="headerlink" href="#model-zoo" title="Permalink to this headline">¶</a></h2>
 <p>Model zoo provides pre-defined and pre-trained models to help bootstrap machine learning applications.</p>
 <div class="section" id="vision">
 <span id="id2"></span><h3>Vision<a class="headerlink" href="#vision" title="Permalink to this headline">¶</a></h3>
 <span class="target" id="module-mxnet.gluon.model_zoo.vision"></span><p>Module for pre-defined neural network models.</p>
 <p>This module contains definitions for the following model architectures:
 -  <a class="reference external" href="https://arxiv.org/abs/1404.5997">AlexNet</a>
 -  <a class="reference external" href="https://arxiv.org/abs/1608.06993">DenseNet</a>
 -  <a class="reference external" href="http://arxiv.org/abs/1512.00567">Inception V3</a>
 -  <a class="reference external" href="https://arxiv.org/abs/1512.03385">ResNet V1</a>
 -  <a class="reference external" href="https://arxiv.org/abs/1512.03385">ResNet V2</a>
 -  <a class="reference external" href="https://arxiv.org/abs/1602.07360">SqueezeNet</a>
 -  <a class="reference external" href="https://arxiv.org/abs/1409.1556">VGG</a></p>
 <p>You can construct a model with random weights by calling its constructor:
 .. code:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">mxnet.gluon.models</span> <span class="kn">as</span> <span class="nn">models</span>
 <span class="n">resnet18</span> <span class="o">=</span> <span class="n">models</span><span class="o">.</span><span class="n">resnet18_v1</span><span class="p">()</span>
 <span class="n">alexnet</span> <span class="o">=</span> <span class="n">models</span><span class="o">.</span><span class="n">alexnet</span><span class="p">()</span>
 <span class="n">squeezenet</span> <span class="o">=</span> <span class="n">models</span><span class="o">.</span><span class="n">squeezenet1_0</span><span class="p">()</span>
 <span class="n">densenet</span> <span class="o">=</span> <span class="n">models</span><span class="o">.</span><span class="n">densenet_161</span><span class="p">()</span>
 </pre></div>
 </div>
 <p>We provide pre-trained models for all the models except ResNet V2.
 These can constructed by passing
 <code class="docutils literal"><span class="pre">pretrained=True</span></code>:
 .. code:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">mxnet.gluon.models</span> <span class="kn">as</span> <span class="nn">models</span>
 <span class="n">resnet18</span> <span class="o">=</span> <span class="n">models</span><span class="o">.</span><span class="n">resnet18_v1</span><span class="p">(</span><span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
 <span class="n">alexnet</span> <span class="o">=</span> <span class="n">models</span><span class="o">.</span><span class="n">alexnet</span><span class="p">(</span><span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>Pretrained models are converted from torchvision.
 All pre-trained models expect input images normalized in the same way,
 i.e. mini-batches of 3-channel RGB images of shape (N x 3 x H x W),
 where N is the batch size, and H and W are expected to be at least 224.
 The images have to be loaded in to a range of [0, 1] and then normalized
 using <code class="docutils literal"><span class="pre">mean</span> <span class="pre">=</span> <span class="pre">[0.485,</span> <span class="pre">0.456,</span> <span class="pre">0.406]</span></code> and <code class="docutils literal"><span class="pre">std</span> <span class="pre">=</span> <span class="pre">[0.229,</span> <span class="pre">0.224,</span> <span class="pre">0.225]</span></code>.
 The transformation should preferrably happen at preprocessing. You can use
 <code class="docutils literal"><span class="pre">mx.image.color_normalize</span></code> for such transformation:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">image</span> <span class="o">=</span> <span class="n">image</span><span class="o">/</span><span class="mi">255</span>
 <span class="n">normalized</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">image</span><span class="o">.</span><span class="n">color_normalize</span><span class="p">(</span><span class="n">image</span><span class="p">,</span>
                                       <span class="n">mean</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mf">0.485</span><span class="p">,</span> <span class="mf">0.456</span><span class="p">,</span> <span class="mf">0.406</span><span class="p">]),</span>
                                       <span class="n">std</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mf">0.229</span><span class="p">,</span> <span class="mf">0.224</span><span class="p">,</span> <span class="mf">0.225</span><span class="p">]))</span>
 </pre></div>
 </div>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.get_model" title="mxnet.gluon.model_zoo.vision.get_model"><code class="xref py py-obj docutils literal"><span class="pre">get_model</span></code></a></td>
 <td>Returns a pre-defined model by name</td>
 </tr>
 </tbody>
 </table>
 <div class="section" id="resnet">
 <span id="resnet"></span><h4>ResNet<a class="headerlink" href="#resnet" title="Permalink to this headline">¶</a></h4>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet18_v1" title="mxnet.gluon.model_zoo.vision.resnet18_v1"><code class="xref py py-obj docutils literal"><span class="pre">resnet18_v1</span></code></a></td>
 <td>ResNet-18 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet34_v1" title="mxnet.gluon.model_zoo.vision.resnet34_v1"><code class="xref py py-obj docutils literal"><span class="pre">resnet34_v1</span></code></a></td>
 <td>ResNet-34 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet50_v1" title="mxnet.gluon.model_zoo.vision.resnet50_v1"><code class="xref py py-obj docutils literal"><span class="pre">resnet50_v1</span></code></a></td>
 <td>ResNet-50 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet101_v1" title="mxnet.gluon.model_zoo.vision.resnet101_v1"><code class="xref py py-obj docutils literal"><span class="pre">resnet101_v1</span></code></a></td>
 <td>ResNet-101 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet152_v1" title="mxnet.gluon.model_zoo.vision.resnet152_v1"><code class="xref py py-obj docutils literal"><span class="pre">resnet152_v1</span></code></a></td>
 <td>ResNet-152 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet18_v2" title="mxnet.gluon.model_zoo.vision.resnet18_v2"><code class="xref py py-obj docutils literal"><span class="pre">resnet18_v2</span></code></a></td>
 <td>ResNet-18 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet34_v2" title="mxnet.gluon.model_zoo.vision.resnet34_v2"><code class="xref py py-obj docutils literal"><span class="pre">resnet34_v2</span></code></a></td>
 <td>ResNet-34 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet50_v2" title="mxnet.gluon.model_zoo.vision.resnet50_v2"><code class="xref py py-obj docutils literal"><span class="pre">resnet50_v2</span></code></a></td>
 <td>ResNet-50 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet101_v2" title="mxnet.gluon.model_zoo.vision.resnet101_v2"><code class="xref py py-obj docutils literal"><span class="pre">resnet101_v2</span></code></a></td>
 <td>ResNet-101 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.resnet152_v2" title="mxnet.gluon.model_zoo.vision.resnet152_v2"><code class="xref py py-obj docutils literal"><span class="pre">resnet152_v2</span></code></a></td>
 <td>ResNet-152 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.ResNetV1" title="mxnet.gluon.model_zoo.vision.ResNetV1"><code class="xref py py-obj docutils literal"><span class="pre">ResNetV1</span></code></a></td>
 <td>ResNet V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.ResNetV2" title="mxnet.gluon.model_zoo.vision.ResNetV2"><code class="xref py py-obj docutils literal"><span class="pre">ResNetV2</span></code></a></td>
 <td>ResNet V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.BasicBlockV1" title="mxnet.gluon.model_zoo.vision.BasicBlockV1"><code class="xref py py-obj docutils literal"><span class="pre">BasicBlockV1</span></code></a></td>
 <td>BasicBlock V1 from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.BasicBlockV2" title="mxnet.gluon.model_zoo.vision.BasicBlockV2"><code class="xref py py-obj docutils literal"><span class="pre">BasicBlockV2</span></code></a></td>
 <td>BasicBlock V2 from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.BottleneckV1" title="mxnet.gluon.model_zoo.vision.BottleneckV1"><code class="xref py py-obj docutils literal"><span class="pre">BottleneckV1</span></code></a></td>
 <td>Bottleneck V1 from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.BottleneckV2" title="mxnet.gluon.model_zoo.vision.BottleneckV2"><code class="xref py py-obj docutils literal"><span class="pre">BottleneckV2</span></code></a></td>
 <td>Bottleneck V2 from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.get_resnet" title="mxnet.gluon.model_zoo.vision.get_resnet"><code class="xref py py-obj docutils literal"><span class="pre">get_resnet</span></code></a></td>
 <td>ResNet V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="vgg">
 <span id="vgg"></span><h4>VGG<a class="headerlink" href="#vgg" title="Permalink to this headline">¶</a></h4>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg11" title="mxnet.gluon.model_zoo.vision.vgg11"><code class="xref py py-obj docutils literal"><span class="pre">vgg11</span></code></a></td>
 <td>VGG-11 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg13" title="mxnet.gluon.model_zoo.vision.vgg13"><code class="xref py py-obj docutils literal"><span class="pre">vgg13</span></code></a></td>
 <td>VGG-13 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg16" title="mxnet.gluon.model_zoo.vision.vgg16"><code class="xref py py-obj docutils literal"><span class="pre">vgg16</span></code></a></td>
 <td>VGG-16 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg19" title="mxnet.gluon.model_zoo.vision.vgg19"><code class="xref py py-obj docutils literal"><span class="pre">vgg19</span></code></a></td>
 <td>VGG-19 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg11_bn" title="mxnet.gluon.model_zoo.vision.vgg11_bn"><code class="xref py py-obj docutils literal"><span class="pre">vgg11_bn</span></code></a></td>
 <td>VGG-11 model with batch normalization from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg13_bn" title="mxnet.gluon.model_zoo.vision.vgg13_bn"><code class="xref py py-obj docutils literal"><span class="pre">vgg13_bn</span></code></a></td>
 <td>VGG-13 model with batch normalization from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg16_bn" title="mxnet.gluon.model_zoo.vision.vgg16_bn"><code class="xref py py-obj docutils literal"><span class="pre">vgg16_bn</span></code></a></td>
 <td>VGG-16 model with batch normalization from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.vgg19_bn" title="mxnet.gluon.model_zoo.vision.vgg19_bn"><code class="xref py py-obj docutils literal"><span class="pre">vgg19_bn</span></code></a></td>
 <td>VGG-19 model with batch normalization from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.VGG" title="mxnet.gluon.model_zoo.vision.VGG"><code class="xref py py-obj docutils literal"><span class="pre">VGG</span></code></a></td>
 <td>VGG model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.get_vgg" title="mxnet.gluon.model_zoo.vision.get_vgg"><code class="xref py py-obj docutils literal"><span class="pre">get_vgg</span></code></a></td>
 <td>VGG model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="alexnet">
 <span id="alexnet"></span><h4>Alexnet<a class="headerlink" href="#alexnet" title="Permalink to this headline">¶</a></h4>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.alexnet" title="mxnet.gluon.model_zoo.vision.alexnet"><code class="xref py py-obj docutils literal"><span class="pre">alexnet</span></code></a></td>
 <td>AlexNet model from the <a class="reference external" href="https://arxiv.org/abs/1404.5997">“One weird trick...”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.AlexNet" title="mxnet.gluon.model_zoo.vision.AlexNet"><code class="xref py py-obj docutils literal"><span class="pre">AlexNet</span></code></a></td>
 <td>AlexNet model from the <a class="reference external" href="https://arxiv.org/abs/1404.5997">“One weird trick...”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="densenet">
 <span id="densenet"></span><h4>DenseNet<a class="headerlink" href="#densenet" title="Permalink to this headline">¶</a></h4>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.densenet121" title="mxnet.gluon.model_zoo.vision.densenet121"><code class="xref py py-obj docutils literal"><span class="pre">densenet121</span></code></a></td>
 <td>Densenet-BC 121-layer model from the <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.densenet161" title="mxnet.gluon.model_zoo.vision.densenet161"><code class="xref py py-obj docutils literal"><span class="pre">densenet161</span></code></a></td>
 <td>Densenet-BC 161-layer model from the <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</td>
 </tr>
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.densenet169" title="mxnet.gluon.model_zoo.vision.densenet169"><code class="xref py py-obj docutils literal"><span class="pre">densenet169</span></code></a></td>
 <td>Densenet-BC 169-layer model from the <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.densenet201" title="mxnet.gluon.model_zoo.vision.densenet201"><code class="xref py py-obj docutils literal"><span class="pre">densenet201</span></code></a></td>
 <td>Densenet-BC 201-layer model from the <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.DenseNet" title="mxnet.gluon.model_zoo.vision.DenseNet"><code class="xref py py-obj docutils literal"><span class="pre">DenseNet</span></code></a></td>
 <td>Densenet-BC model from the <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="squeezenet">
 <span id="squeezenet"></span><h4>SqueezeNet<a class="headerlink" href="#squeezenet" title="Permalink to this headline">¶</a></h4>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.squeezenet1_0" title="mxnet.gluon.model_zoo.vision.squeezenet1_0"><code class="xref py py-obj docutils literal"><span class="pre">squeezenet1_0</span></code></a></td>
 <td>SqueezeNet 1.0 model from the <a class="reference external" href="https://arxiv.org/abs/1602.07360">“SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size”</a> paper.</td>
 </tr>
 <tr class="row-even"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.squeezenet1_1" title="mxnet.gluon.model_zoo.vision.squeezenet1_1"><code class="xref py py-obj docutils literal"><span class="pre">squeezenet1_1</span></code></a></td>
 <td>SqueezeNet 1.1 model from the <a class="reference external" href="https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1">official SqueezeNet repo</a>.</td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.SqueezeNet" title="mxnet.gluon.model_zoo.vision.SqueezeNet"><code class="xref py py-obj docutils literal"><span class="pre">SqueezeNet</span></code></a></td>
 <td>SqueezeNet model from the <a class="reference external" href="https://arxiv.org/abs/1602.07360">“SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 </div>
 <div class="section" id="inception">
 <span id="inception"></span><h4>Inception<a class="headerlink" href="#inception" title="Permalink to this headline">¶</a></h4>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.inception_v3" title="mxnet.gluon.model_zoo.vision.inception_v3"><code class="xref py py-obj docutils literal"><span class="pre">inception_v3</span></code></a></td>
 <td>Inception v3 model from <a class="reference external" href="http://arxiv.org/abs/1512.00567">“Rethinking the Inception Architecture for Computer Vision”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 <table border="1" class="longtable docutils">
 <colgroup>
 <col width="10%"/>
 <col width="90%"/>
 </colgroup>
 <tbody valign="top">
 <tr class="row-odd"><td><a class="reference internal" href="#mxnet.gluon.model_zoo.vision.Inception3" title="mxnet.gluon.model_zoo.vision.Inception3"><code class="xref py py-obj docutils literal"><span class="pre">Inception3</span></code></a></td>
 <td>Inception v3 model from <a class="reference external" href="http://arxiv.org/abs/1512.00567">“Rethinking the Inception Architecture for Computer Vision”</a> paper.</td>
 </tr>
 </tbody>
 </table>
 </div>
 </div>
 </div>
 <div class="section" id="api-reference">
 <span id="api-reference"></span><h2>API Reference<a class="headerlink" href="#api-reference" title="Permalink to this headline">¶</a></h2>
 <script src="../../_static/js/auto_module_index.js" type="text/javascript"></script><dl class="class">
 <dt id="mxnet.gluon.Parameter">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">Parameter</code><span class="sig-paren">(</span><em>name</em>, <em>grad_req='write'</em>, <em>shape=None</em>, <em>dtype=<type 'numpy.float32'=""></type></em>, <em>lr_mult=1.0</em>, <em>wd_mult=1.0</em>, <em>init=None</em>, <em>allow_deferred_init=False</em>, <em>differentiable=True</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter" title="Permalink to this definition">¶</a></dt>
 <dd><p>A Container holding parameters (weights) of <a href="#id44"><span class="problematic" id="id45">`</span></a>Block`s.</p>
 <p><cite>Parameter</cite> holds a copy of the parameter on each <cite>Context</cite> after
 it is initialized with <cite>Parameter.initialize(...)</cite>. If <cite>grad_req</cite> is
 not <cite>null</cite>, it will also hold a gradient array on each <cite>Context</cite>:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">ctx</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 <span class="n">x</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">16</span><span class="p">,</span> <span class="mi">100</span><span class="p">),</span> <span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span>
 <span class="n">w</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="s1">'fc_weight'</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">64</span><span class="p">,</span> <span class="mi">100</span><span class="p">),</span> <span class="n">init</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Xavier</span><span class="p">())</span>
 <span class="n">b</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="s1">'fc_bias'</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">64</span><span class="p">,),</span> <span class="n">init</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Zero</span><span class="p">())</span>
 <span class="n">w</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span>
 <span class="n">b</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span>
 <span class="n">out</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">FullyConnected</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">w</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">ctx</span><span class="p">),</span> <span class="n">b</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">ctx</span><span class="p">),</span> <span class="n">num_hidden</span><span class="o">=</span><span class="mi">64</span><span class="p">)</span>
 </pre></div>
 </div>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>name</strong> (<em>str</em>) – Name of this parameter.</li>
 <li><strong>grad_req</strong> (<em>{'write', 'add', 'null'}, default 'write'</em>) – <p>Specifies how to update gradient to grad arrays.</p>
 <ul>
 <li>‘write’ means everytime gradient is written to grad <cite>NDArray</cite>.</li>
 <li>‘add’ means everytime gradient is added to the grad <cite>NDArray</cite>. You need
 to manually call <cite>zero_grad()</cite> to clear the gradient buffer before each
 iteration when using this option.</li>
 <li>‘null’ means gradient is not requested for this parameter. gradient arrays
 will not be allocated.</li>
 </ul>
 </li>
 <li><strong>shape</strong> (<em>tuple of int, default None</em>) – Shape of this parameter. By default shape is not specified. Parameter with
 unknown shape can be used for <cite>Symbol</cite> API, but <cite>init</cite> will throw an error
 when using <cite>NDArray</cite> API.</li>
 <li><strong>dtype</strong> (<em>numpy.dtype or str, default 'float32'</em>) – Data type of this parameter. For example, numpy.float32 or ‘float32’.</li>
 <li><strong>lr_mult</strong> (<em>float, default 1.0</em>) – Learning rate multiplier. Learning rate will be multiplied by lr_mult
 when updating this parameter with optimizer.</li>
 <li><strong>wd_mult</strong> (<em>float, default 1.0</em>) – Weight decay multiplier (L2 regularizer coefficient). Works similar to lr_mult.</li>
 <li><strong>init</strong> (<em>Initializer, default None</em>) – Initializer of this parameter. Will use the global initializer by default.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="attribute">
 <dt id="mxnet.gluon.model_zoo.vision.Parameter.grad_req">
 <code class="descname">grad_req</code><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.Parameter.grad_req" title="Permalink to this definition">¶</a></dt>
 <dd><p><em>{‘write’, ‘add’, ‘null’}</em></p>
 <p>This can be set before or after initialization. Setting grad_req to null
 with <cite>x.grad_req = ‘null’</cite> saves memory and computation when you don’t
 need gradient w.r.t x.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.initialize">
 <code class="descname">initialize</code><span class="sig-paren">(</span><em>init=None</em>, <em>ctx=None</em>, <em>default_init=<mxnet.initializer.uniform object=""></mxnet.initializer.uniform></em>, <em>force_reinit=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.initialize" title="Permalink to this definition">¶</a></dt>
 <dd><p>Initializes parameter and gradient arrays. Only used for <cite>NDArray</cite> API.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>init</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – The initializer to use. Overrides <cite>Parameter.init</cite> and default_init.</li>
 <li><strong>ctx</strong> (Context or list of Context, defaults to <cite>context.current_context()</cite>.) – <p>Initialize Parameter on given context. If ctx is a list of Context, a
 copy will be made for each context.</p>
 <div class="admonition note">
 <p class="first admonition-title">Note</p>
 <p class="last">Copies are independent arrays. User is responsible for keeping</p>
 </div>
 <p>their values consistent when updating. Normally <cite>gluon.Trainer</cite> does this for you.</p>
 </li>
 <li><strong>default_init</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Default initializer is used when both <cite>init</cite> and <cite>Parameter.init</cite> are <cite>None</cite>.</li>
 <li><strong>force_reinit</strong> (<em>bool, default False</em>) – Whether to force re-initialization if parameter is already initialized.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Examples</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">weight</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="s1">'weight'</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="p">()</span>
 <span class="go">[[-0.01068833  0.01729892]</span>
 <span class="go"> [ 0.02042518 -0.01618656]]</span>
 <span class="go"><ndarray 2x2="" @cpu(0)=""></ndarray></span>
 <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">grad</span><span class="p">()</span>
 <span class="go">[[ 0.  0.]</span>
 <span class="go"> [ 0.  0.]]</span>
 <span class="go"><ndarray 2x2="" @cpu(0)=""></ndarray></span>
 <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="p">[</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">1</span><span class="p">)])</span>
 <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span>
 <span class="go">[[-0.00873779 -0.02834515]</span>
 <span class="go"> [ 0.05484822 -0.06206018]]</span>
 <span class="go"><ndarray 2x2="" @gpu(0)=""></ndarray></span>
 <span class="gp">>>> </span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
 <span class="go">[[-0.00873779 -0.02834515]</span>
 <span class="go"> [ 0.05484822 -0.06206018]]</span>
 <span class="go"><ndarray 2x2="" @gpu(1)=""></ndarray></span>
 </pre></div>
 </div>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.reset_ctx">
 <code class="descname">reset_ctx</code><span class="sig-paren">(</span><em>ctx</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.reset_ctx" title="Permalink to this definition">¶</a></dt>
 <dd><p>Re-assign Parameter to other contexts.</p>
 <dl class="docutils">
 <dt>ctx <span class="classifier-delimiter">:</span> <span class="classifier">Context or list of Context, default <cite>context.current_context()</cite>.</span></dt>
 <dd>Assign Parameter to given context. If ctx is a list of Context, a
 copy will be made for each context.</dd>
 </dl>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.set_data">
 <code class="descname">set_data</code><span class="sig-paren">(</span><em>data</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.set_data" title="Permalink to this definition">¶</a></dt>
 <dd><p>Sets this parameter’s value on all contexts to data.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.data">
 <code class="descname">data</code><span class="sig-paren">(</span><em>ctx=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.data" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a copy of this parameter on one context. Must have been
 initialized on this context before.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>ctx</strong> (<em>Context</em>) – Desired context.</td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"></td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">NDArray on ctx</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.list_data">
 <code class="descname">list_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.list_data" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns copies of this parameter on all contexts, in the same order
 as creation.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.grad">
 <code class="descname">grad</code><span class="sig-paren">(</span><em>ctx=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.grad" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a gradient buffer for this parameter on one context.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>ctx</strong> (<em>Context</em>) – Desired context.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.list_grad">
 <code class="descname">list_grad</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.list_grad" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns gradient buffers on all contexts, in the same order
 as <cite>values</cite>.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.list_ctx">
 <code class="descname">list_ctx</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.list_ctx" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a list of contexts this parameter is initialized on.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.zero_grad">
 <code class="descname">zero_grad</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.zero_grad" title="Permalink to this definition">¶</a></dt>
 <dd><p>Sets gradient buffer on all contexts to 0. No action is taken if
 parameter is uninitialized or doesn’t require gradient.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Parameter.var">
 <code class="descname">var</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Parameter.var" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a symbol representing this parameter.</p>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.ParameterDict">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">ParameterDict</code><span class="sig-paren">(</span><em>prefix=''</em>, <em>shared=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict" title="Permalink to this definition">¶</a></dt>
 <dd><p>A dictionary managing a set of parameters.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>prefix</strong> (<em>str, default ''</em>) – The prefix to be prepended to all Parameters’ names created by this dict.</li>
 <li><strong>shared</strong> (<em>ParameterDict or None</em>) – If not <cite>None</cite>, when this dict’s <cite>get</cite> method creates a new parameter, will
 first try to retrieve it from <cite>shared</cite> dict. Usually used for sharing
 parameters with another <cite>Block</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="attribute">
 <dt id="mxnet.gluon.ParameterDict.prefix">
 <code class="descname">prefix</code><a class="headerlink" href="#mxnet.gluon.ParameterDict.prefix" title="Permalink to this definition">¶</a></dt>
 <dd><p>Prefix of this dict. It will be prepended to Parameters’ name created
 with <cite>get</cite>.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.get">
 <code class="descname">get</code><span class="sig-paren">(</span><em>name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.get" title="Permalink to this definition">¶</a></dt>
 <dd><p>Retrieves a <cite>Parameter</cite> with name <cite>self.prefix+name</cite>. If not found,
 <cite>get</cite> will first try to retrieve it from <cite>shared</cite> dict. If still not
 found, <cite>get</cite> will create a new <cite>Parameter</cite> with key-word arguments and
 insert it to self.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>name</strong> (<em>str</em>) – Name of the desired Parameter. It will be prepended with this dictionary’s
 prefix.</li>
 <li><strong>**kwargs</strong> – <p>The rest of key-word arguments for the created <cite>Parameter</cite>.</p>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The created or retrieved <cite>Parameter</cite>.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#mxnet.gluon.Parameter" title="mxnet.gluon.Parameter">Parameter</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.update">
 <code class="descname">update</code><span class="sig-paren">(</span><em>other</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.update" title="Permalink to this definition">¶</a></dt>
 <dd><p>Copies all Parameters in <cite>other</cite> to self.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.initialize">
 <code class="descname">initialize</code><span class="sig-paren">(</span><em>init=<mxnet.initializer.uniform object=""></mxnet.initializer.uniform></em>, <em>ctx=None</em>, <em>verbose=False</em>, <em>force_reinit=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.initialize" title="Permalink to this definition">¶</a></dt>
 <dd><p>Initializes all Parameters managed by this dictionary to be used for <cite>NDArray</cite>
 API. It has no effect when using <cite>Symbol</cite> API.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>init</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Global default Initializer to be used when <cite>Parameter.init</cite> is <cite>None</cite>.
 Otherwise, <cite>Parameter.init</cite> takes precedence.</li>
 <li><strong>ctx</strong> (<em>Context or list of Context</em>) – Keeps a copy of Parameters on one or many context(s).</li>
 <li><strong>force_reinit</strong> (<em>bool, default False</em>) – Whether to force re-initialization if parameter is already initialized.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.zero_grad">
 <code class="descname">zero_grad</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.zero_grad" title="Permalink to this definition">¶</a></dt>
 <dd><p>Sets all Parameters’ gradient buffer to 0.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.reset_ctx">
 <code class="descname">reset_ctx</code><span class="sig-paren">(</span><em>ctx</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.reset_ctx" title="Permalink to this definition">¶</a></dt>
 <dd><p>Re-assign all Parameters to other contexts.</p>
 <dl class="docutils">
 <dt>ctx <span class="classifier-delimiter">:</span> <span class="classifier">Context or list of Context, default <cite>context.current_context()</cite>.</span></dt>
 <dd>Assign Parameter to given context. If ctx is a list of Context, a
 copy will be made for each context.</dd>
 </dl>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.setattr">
 <code class="descname">setattr</code><span class="sig-paren">(</span><em>name</em>, <em>value</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.setattr" title="Permalink to this definition">¶</a></dt>
 <dd><p>Set an attribute to a new value for all Parameters.</p>
 <p>For example, set grad_req to null if you don’t need gradient w.r.t a
 model’s Parameters:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">model</span><span class="o">.</span><span class="n">collect_params</span><span class="p">()</span><span class="o">.</span><span class="n">setattr</span><span class="p">(</span><span class="s1">'grad_req'</span><span class="p">,</span> <span class="s1">'null'</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>or change the learning rate multiplier:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">model</span><span class="o">.</span><span class="n">collect_params</span><span class="p">()</span><span class="o">.</span><span class="n">setattr</span><span class="p">(</span><span class="s1">'lr_mult'</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">)</span>
 </pre></div>
 </div>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>name</strong> (<em>str</em>) – Name of the attribute.</li>
 <li><strong>value</strong> (<em>valid type for attribute name</em>) – The new value for the attribute.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.save">
 <code class="descname">save</code><span class="sig-paren">(</span><em>filename</em>, <em>strip_prefix=''</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.save" title="Permalink to this definition">¶</a></dt>
 <dd><p>Save parameters to file.</p>
 <dl class="docutils">
 <dt>filename <span class="classifier-delimiter">:</span> <span class="classifier">str</span></dt>
 <dd>Path to parameter file.</dd>
 <dt>strip_prefix <span class="classifier-delimiter">:</span> <span class="classifier">str, default ‘’</span></dt>
 <dd>Strip prefix from parameter names before saving.</dd>
 </dl>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.ParameterDict.load">
 <code class="descname">load</code><span class="sig-paren">(</span><em>filename</em>, <em>ctx</em>, <em>allow_missing=False</em>, <em>ignore_extra=False</em>, <em>restore_prefix=''</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.ParameterDict.load" title="Permalink to this definition">¶</a></dt>
 <dd><p>Load parameters from file.</p>
 <dl class="docutils">
 <dt>filename <span class="classifier-delimiter">:</span> <span class="classifier">str</span></dt>
 <dd>Path to parameter file.</dd>
 <dt>ctx <span class="classifier-delimiter">:</span> <span class="classifier">Context or list of Context</span></dt>
 <dd>Context(s) initialize loaded parameters on.</dd>
 <dt>allow_missing <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span></dt>
 <dd>Whether to silently skip loading parameters not represents in the file.</dd>
 <dt>ignore_extra <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span></dt>
 <dd>Whether to silently ignore parameters from the file that are not
 present in this ParameterDict.</dd>
 <dt>restore_prefix <span class="classifier-delimiter">:</span> <span class="classifier">str, default ‘’</span></dt>
 <dd>prepend prefix to names of stored parameters before loading.</dd>
 </dl>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.Block">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">Block</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block" title="Permalink to this definition">¶</a></dt>
 <dd><p>Base class for all neural network layers and models. Your models should
 subclass this class.</p>
 <p><cite>Block</cite> can be nested recursively in a tree structure. You can create and
 assign child <cite>Block</cite> as regular attributes:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">mxnet.gluon</span> <span class="kn">import</span> <span class="n">Block</span><span class="p">,</span> <span class="n">nn</span>
 <span class="kn">from</span> <span class="nn">mxnet</span> <span class="kn">import</span> <span class="n">ndarray</span> <span class="k">as</span> <span class="n">F</span>

 <span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">Block</span><span class="p">):</span>
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
         <span class="nb">super</span><span class="p">(</span><span class="n">Model</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
         <span class="c1"># use name_scope to give child Blocks appropriate names.</span>
         <span class="c1"># It also allows sharing Parameters between Blocks recursively.</span>
         <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">dense0</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">dense1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span>

     <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
         <span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dense0</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
         <span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dense1</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>

 <span class="n">model</span> <span class="o">=</span> <span class="n">Model</span><span class="p">()</span>
 <span class="n">model</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">ctx</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span>
 <span class="n">model</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="n">ctx</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">)))</span>
 </pre></div>
 </div>
 <p>Child <cite>Block</cite> assigned this way will be registered and <cite>collect_params</cite>
 will collect their Parameters recursively.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>prefix</strong> (<em>str</em>) – Prefix acts like a name space. It will be prepended to the names of all
 Parameters and child <cite>Block`s in this `Block</cite>‘s <cite>name_scope</cite>. Prefix
 should be unique within one model to prevent name collisions.</li>
 <li><strong>params</strong> (<em>ParameterDict or None</em>) – <p><cite>ParameterDict</cite> for sharing weights with the new <cite>Block</cite>. For example,
 if you want <cite>dense1</cite> to share <cite>dense0</cite>‘s weights, you can do:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">dense0</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span>
 <span class="n">dense1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="n">params</span><span class="o">=</span><span class="n">dense0</span><span class="o">.</span><span class="n">collect_params</span><span class="p">())</span>
 </pre></div>
 </div>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="method">
 <dt id="mxnet.gluon.Block.__setattr__">
 <code class="descname">__setattr__</code><span class="sig-paren">(</span><em>name</em>, <em>value</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.__setattr__" title="Permalink to this definition">¶</a></dt>
 <dd><p>Registers parameters.</p>
 </dd></dl>
 <dl class="attribute">
 <dt id="mxnet.gluon.Block.prefix">
 <code class="descname">prefix</code><a class="headerlink" href="#mxnet.gluon.Block.prefix" title="Permalink to this definition">¶</a></dt>
 <dd><p>Prefix of this <cite>Block</cite>.</p>
 </dd></dl>
 <dl class="attribute">
 <dt id="mxnet.gluon.Block.name">
 <code class="descname">name</code><a class="headerlink" href="#mxnet.gluon.Block.name" title="Permalink to this definition">¶</a></dt>
 <dd><p>Name of this <cite>Block</cite>, without ‘_’ in the end.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.name_scope">
 <code class="descname">name_scope</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.name_scope" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a name space object managing a child <cite>Block</cite> and parameter
 names. Should be used within a <cite>with</cite> statement:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">dense</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>
 <dl class="attribute">
 <dt id="mxnet.gluon.Block.params">
 <code class="descname">params</code><a class="headerlink" href="#mxnet.gluon.Block.params" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns this <cite>Block</cite>‘s parameter dictionary (does not include its
 children’s parameters).</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.collect_params">
 <code class="descname">collect_params</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.collect_params" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a <cite>ParameterDict</cite> containing this <cite>Block</cite> and all of its
 children’s Parameters.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.save_params">
 <code class="descname">save_params</code><span class="sig-paren">(</span><em>filename</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.save_params" title="Permalink to this definition">¶</a></dt>
 <dd><p>Save parameters to file.</p>
 <dl class="docutils">
 <dt>filename <span class="classifier-delimiter">:</span> <span class="classifier">str</span></dt>
 <dd>Path to file.</dd>
 </dl>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.load_params">
 <code class="descname">load_params</code><span class="sig-paren">(</span><em>filename</em>, <em>ctx</em>, <em>allow_missing=False</em>, <em>ignore_extra=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.load_params" title="Permalink to this definition">¶</a></dt>
 <dd><p>Load parameters from file.</p>
 <dl class="docutils">
 <dt>filename <span class="classifier-delimiter">:</span> <span class="classifier">str</span></dt>
 <dd>Path to parameter file.</dd>
 <dt>ctx <span class="classifier-delimiter">:</span> <span class="classifier">Context or list of Context</span></dt>
 <dd>Context(s) initialize loaded parameters on.</dd>
 <dt>allow_missing <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span></dt>
 <dd>Whether to silently skip loading parameters not represents in the file.</dd>
 <dt>ignore_extra <span class="classifier-delimiter">:</span> <span class="classifier">bool, default False</span></dt>
 <dd>Whether to silently ignore parameters from the file that are not
 present in this Block.</dd>
 </dl>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.register_child">
 <code class="descname">register_child</code><span class="sig-paren">(</span><em>block</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.register_child" title="Permalink to this definition">¶</a></dt>
 <dd><p>Registers block as a child of self. <a href="#id50"><span class="problematic" id="id51">`</span></a>Block`s assigned to self as
 attributes will be registered automatically.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.initialize">
 <code class="descname">initialize</code><span class="sig-paren">(</span><em>init=<mxnet.initializer.uniform object=""></mxnet.initializer.uniform></em>, <em>ctx=None</em>, <em>verbose=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.initialize" title="Permalink to this definition">¶</a></dt>
 <dd><p>Initializes <cite>Parameter`s of this `Block</cite> and its children.</p>
 <p>Equivalent to <cite>block.collect_params().initialize(...)</cite></p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.hybridize">
 <code class="descname">hybridize</code><span class="sig-paren">(</span><em>active=True</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.hybridize" title="Permalink to this definition">¶</a></dt>
 <dd><p>Activates or deactivates <a href="#id52"><span class="problematic" id="id53">`</span></a>HybridBlock`s recursively. Has no effect on
 non-hybrid children.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>active</strong> (<em>bool, default True</em>) – Whether to turn hybrid on or off.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.__call__">
 <code class="descname">__call__</code><span class="sig-paren">(</span><em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.__call__" title="Permalink to this definition">¶</a></dt>
 <dd><p>Calls forward. Only accepts positional arguments.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Block.forward">
 <code class="descname">forward</code><span class="sig-paren">(</span><em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Block.forward" title="Permalink to this definition">¶</a></dt>
 <dd><p>Overrides to implement forward computation using <cite>NDArray</cite>. Only
 accepts positional arguments.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>*args</strong> – <p>Input tensors.</p>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.HybridBlock">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">HybridBlock</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock" title="Permalink to this definition">¶</a></dt>
 <dd><p><cite>HybridBlock</cite> supports forwarding with both Symbol and NDArray.</p>
 <p>Forward computation in <cite>HybridBlock</cite> must be static to work with <cite>Symbol`s,
 i.e. you cannot call `.asnumpy()</cite>, <cite>.shape</cite>, <cite>.dtype</cite>, etc on tensors.
 Also, you cannot use branching or loop logic that bases on non-constant
 expressions like random numbers or intermediate results, since they change
 the graph structure for each iteration.</p>
 <p>Before activating with <cite>hybridize()</cite>, <cite>HybridBlock</cite> works just like normal
 <cite>Block</cite>. After activation, <cite>HybridBlock</cite> will create a symbolic graph
 representing the forward computation and cache it. On subsequent forwards,
 the cached graph will be used instead of <cite>hybrid_forward</cite>.</p>
 <p>Refer <a class="reference external" href="https://mxnet.incubator.apache.org/tutorials/gluon/hybrid.html">Hybrid tutorial</a> to see
 the end-to-end usage.</p>
 <dl class="method">
 <dt id="mxnet.gluon.HybridBlock.__setattr__">
 <code class="descname">__setattr__</code><span class="sig-paren">(</span><em>name</em>, <em>value</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock.__setattr__" title="Permalink to this definition">¶</a></dt>
 <dd><p>Registers parameters.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.HybridBlock.infer_shape">
 <code class="descname">infer_shape</code><span class="sig-paren">(</span><em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock.infer_shape" title="Permalink to this definition">¶</a></dt>
 <dd><p>Infers shape of Parameters from inputs.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.HybridBlock.forward">
 <code class="descname">forward</code><span class="sig-paren">(</span><em>x</em>, <em>*args</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock.forward" title="Permalink to this definition">¶</a></dt>
 <dd><p>Defines the forward computation. Arguments can be either
 <cite>NDArray</cite> or <cite>Symbol</cite>.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.HybridBlock.hybrid_forward">
 <code class="descname">hybrid_forward</code><span class="sig-paren">(</span><em>F</em>, <em>x</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.HybridBlock.hybrid_forward" title="Permalink to this definition">¶</a></dt>
 <dd><p>Overrides to construct symbolic graph for this <cite>Block</cite>.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>x</strong> (<em>Symbol or NDArray</em>) – The first input tensor.</li>
 <li><strong>*args</strong> – <p>Additional input tensors.</p>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.SymbolBlock">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">SymbolBlock</code><span class="sig-paren">(</span><em>outputs</em>, <em>inputs</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.SymbolBlock" title="Permalink to this definition">¶</a></dt>
 <dd><p>Construct block from symbol. This is useful for using pre-trained models
 as feature extractors. For example, you may want to extract get the output
 from fc2 layer in AlexNet.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>outputs</strong> (<em>Symbol or list of Symbol</em>) – The desired output for SymbolBlock.</li>
 <li><strong>inputs</strong> (<em>Symbol or list of Symbol</em>) – The Variables in output’s argument that should be used as inputs.</li>
 <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a>) – Parameter dictionary for arguments and auxililary states of outputs
 that are not inputs.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Examples</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># To extract the feature from fc1 and fc2 layers of AlexNet:</span>
 <span class="gp">>>> </span><span class="n">alexnet</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">model_zoo</span><span class="o">.</span><span class="n">vision</span><span class="o">.</span><span class="n">alexnet</span><span class="p">(</span><span class="n">pretrained</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">ctx</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(),</span>
 <span class="go">                                             prefix='model_')</span>
 <span class="gp">>>> </span><span class="n">inputs</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">var</span><span class="p">(</span><span class="s1">'data'</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="n">out</span> <span class="o">=</span> <span class="n">alexnet</span><span class="p">(</span><span class="n">inputs</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="n">internals</span> <span class="o">=</span> <span class="n">out</span><span class="o">.</span><span class="n">get_internals</span><span class="p">()</span>
 <span class="gp">>>> </span><span class="k">print</span><span class="p">(</span><span class="n">internals</span><span class="o">.</span><span class="n">list_outputs</span><span class="p">())</span>
 <span class="go">['data', ..., 'model_dense0_relu_fwd_output', ..., 'model_dense1_relu_fwd_output', ...]</span>
 <span class="gp">>>> </span><span class="n">outputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">internals</span><span class="p">[</span><span class="s1">'model_dense0_relu_fwd_output'</span><span class="p">],</span>
 <span class="go">               internals['model_dense1_relu_fwd_output']]</span>
 <span class="gp">>>> </span><span class="c1"># Create SymbolBlock that shares parameters with alexnet</span>
 <span class="gp">>>> </span><span class="n">feat_model</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">SymbolBlock</span><span class="p">(</span><span class="n">outputs</span><span class="p">,</span> <span class="n">inputs</span><span class="p">,</span> <span class="n">params</span><span class="o">=</span><span class="n">alexnet</span><span class="o">.</span><span class="n">collect_params</span><span class="p">())</span>
 <span class="gp">>>> </span><span class="n">x</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">16</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="k">print</span><span class="p">(</span><span class="n">feat_model</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
 </pre></div>
 </div>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Sequential">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Sequential</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Sequential" title="Permalink to this definition">¶</a></dt>
 <dd><p>Stacks <a href="#id62"><span class="problematic" id="id63">`</span></a>Block`s sequentially.</p>
 <p>Example:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">net</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">()</span>
 <span class="c1"># use net's name_scope to give child Blocks appropriate names.</span>
 <span class="k">with</span> <span class="n">net</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span>
     <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">'relu'</span><span class="p">))</span>
     <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">))</span>
 </pre></div>
 </div>
 <dl class="method">
 <dt id="mxnet.gluon.nn.Sequential.add">
 <code class="descname">add</code><span class="sig-paren">(</span><em>block</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Sequential.add" title="Permalink to this definition">¶</a></dt>
 <dd><p>Adds block on top of the stack.</p>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.HybridSequential">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">HybridSequential</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.HybridSequential" title="Permalink to this definition">¶</a></dt>
 <dd><p>Stacks <a href="#id64"><span class="problematic" id="id65">`</span></a>HybridBlock`s sequentially.</p>
 <p>Example:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">net</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">()</span>
 <span class="c1"># use net's name_scope to give child Blocks appropriate names.</span>
 <span class="k">with</span> <span class="n">net</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span>
     <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">'relu'</span><span class="p">))</span>
     <span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">20</span><span class="p">))</span>
 </pre></div>
 </div>
 <dl class="method">
 <dt id="mxnet.gluon.nn.HybridSequential.add">
 <code class="descname">add</code><span class="sig-paren">(</span><em>block</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.HybridSequential.add" title="Permalink to this definition">¶</a></dt>
 <dd><p>Adds block on top of the stack.</p>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Dense">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Dense</code><span class="sig-paren">(</span><em>units</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>flatten=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_units=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Dense" title="Permalink to this definition">¶</a></dt>
 <dd><p>Just your regular densely-connected NN layer.</p>
 <p><cite>Dense</cite> implements the operation:
 <cite>output = activation(dot(input, weight) + bias)</cite>
 where <cite>activation</cite> is the element-wise activation function
 passed as the <cite>activation</cite> argument, <cite>weight</cite> is a weights matrix
 created by the layer, and <cite>bias</cite> is a bias vector created by the layer
 (only applicable if <cite>use_bias</cite> is <cite>True</cite>).</p>
 <p>Note: the input must be a tensor with rank 2. Use <cite>flatten</cite> to convert it
 to rank 2 manually if necessary.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>units</strong> (<em>int</em>) – Dimensionality of the output space.</li>
 <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See help on <cite>Activation</cite> layer.
 If you don’t specify anything, no activation is applied
 (ie. “linear” activation: <cite>a(x) = x</cite>).</li>
 <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li>
 <li><strong>flatten</strong> (<em>bool</em>) – Whether the input tensor should be flattened.
 If true, all but the first axis of input data are collapsed together.
 If false, all but the last axis of input data are kept the same, and the transformation
 applies on the last axis.</li>
 <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>kernel</cite> weights matrix.</li>
 <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li>
 <li><strong>in_units</strong> (<em>int, optional</em>) – Size of the input data. If not specified, initialization will be
 deferred to the first time <cite>forward</cite> is called and <cite>in_units</cite>
 will be inferred from the shape of input data.</li>
 <li><strong>prefix</strong> (<em>str or None</em>) – See document of <cite>Block</cite>.</li>
 <li><strong>params</strong> (<em>ParameterDict or None</em>) – See document of <cite>Block</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <p>If <code class="docutils literal"><span class="pre">flatten</span></code> is set to be True, then the shapes are:
 Input shape:</p>
 <blockquote>
 <div>An N-D input with shape
 <cite>(batch_size, x1, x2, ..., xn) with x1 * x2 * ... * xn equal to in_units</cite>.</div></blockquote>
 <dl class="docutils">
 <dt>Output shape:</dt>
 <dd>The output would have shape <cite>(batch_size, units)</cite>.</dd>
 </dl>
 <p>If <code class="docutils literal"><span class="pre">flatten</span></code> is set to be false, then the shapes are:
 Input shape:</p>
 <blockquote>
 <div>An N-D input with shape
 <cite>(x1, x2, ..., xn, in_units)</cite>.</div></blockquote>
 <dl class="docutils">
 <dt>Output shape:</dt>
 <dd>The output would have shape <cite>(x1, x2, ..., xn, units)</cite>.</dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Activation">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Activation</code><span class="sig-paren">(</span><em>activation</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Activation" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies an activation function to input.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>activation</strong> (<em>str</em>) – Name of activation function to use.
 See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a> for available choices.</td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>Arbitrary.</dd>
 <dt>Output shape:</dt>
 <dd>Same shape as input.</dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Dropout">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Dropout</code><span class="sig-paren">(</span><em>rate</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Dropout" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies Dropout to the input.</p>
 <p>Dropout consists in randomly setting a fraction <cite>rate</cite> of input units
 to 0 at each update during training time, which helps prevent overfitting.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>rate</strong> (<em>float</em>) – Fraction of the input units to drop. Must be a number between 0 and 1.</td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>Arbitrary.</dd>
 <dt>Output shape:</dt>
 <dd>Same shape as input.</dd>
 </dl>
 <p class="rubric">References</p>
 <p><a class="reference external" href="http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf">Dropout: A Simple Way to Prevent Neural Networks from Overfitting</a></p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.BatchNorm">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">BatchNorm</code><span class="sig-paren">(</span><em>axis=1</em>, <em>momentum=0.9</em>, <em>epsilon=1e-05</em>, <em>center=True</em>, <em>scale=True</em>, <em>beta_initializer='zeros'</em>, <em>gamma_initializer='ones'</em>, <em>running_mean_initializer='zeros'</em>, <em>running_variance_initializer='ones'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.BatchNorm" title="Permalink to this definition">¶</a></dt>
 <dd><p>Batch normalization layer (Ioffe and Szegedy, 2014).
 Normalizes the input at each batch, i.e. applies a transformation
 that maintains the mean activation close to 0 and the activation
 standard deviation close to 1.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>axis</strong> (<em>int, default 1</em>) – The axis that should be normalized. This is typically the channels
 (C) axis. For instance, after a <cite>Conv2D</cite> layer with <cite>layout=’NCHW’</cite>,
 set <cite>axis=1</cite> in <cite>BatchNorm</cite>. If <cite>layout=’NHWC’</cite>, then set <cite>axis=3</cite>.</li>
 <li><strong>momentum</strong> (<em>float, default 0.9</em>) – Momentum for the moving average.</li>
 <li><strong>epsilon</strong> (<em>float, default 1e-5</em>) – Small float added to variance to avoid dividing by zero.</li>
 <li><strong>center</strong> (<em>bool, default True</em>) – If True, add offset of <cite>beta</cite> to normalized tensor.
 If False, <cite>beta</cite> is ignored.</li>
 <li><strong>scale</strong> (<em>bool, default True</em>) – If True, multiply by <cite>gamma</cite>. If False, <cite>gamma</cite> is not used.
 When the next layer is linear (also e.g. <cite>nn.relu</cite>),
 this can be disabled since the scaling
 will be done by the next layer.</li>
 <li><strong>beta_initializer</strong> (str or <cite>Initializer</cite>, default ‘zeros’) – Initializer for the beta weight.</li>
 <li><strong>gamma_initializer</strong> (str or <cite>Initializer</cite>, default ‘ones’) – Initializer for the gamma weight.</li>
 <li><strong>moving_mean_initializer</strong> (str or <cite>Initializer</cite>, default ‘zeros’) – Initializer for the moving mean.</li>
 <li><strong>moving_variance_initializer</strong> (str or <cite>Initializer</cite>, default ‘ones’) – Initializer for the moving variance.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – Number of channels (feature maps) in input data. If not specified,
 initialization will be deferred to the first time <cite>forward</cite> is called
 and <cite>in_channels</cite> will be inferred from the shape of input data.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>Arbitrary.</dd>
 <dt>Output shape:</dt>
 <dd>Same shape as input.</dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.LeakyReLU">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">LeakyReLU</code><span class="sig-paren">(</span><em>alpha</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.LeakyReLU" title="Permalink to this definition">¶</a></dt>
 <dd><p>Leaky version of a Rectified Linear Unit.</p>
 <p>It allows a small gradient when the unit is not active:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span>`f(x) = alpha * x for x < 0`,
 `f(x) = x for x >= 0`.
 </pre></div>
 </div>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>alpha</strong> (<em>float</em>) – slope coefficient for the negative half axis. Must be >= 0.</td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>Arbitrary.</dd>
 <dt>Output shape:</dt>
 <dd>Same shape as input.</dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Embedding">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Embedding</code><span class="sig-paren">(</span><em>input_dim</em>, <em>output_dim</em>, <em>dtype='float32'</em>, <em>weight_initializer=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Embedding" title="Permalink to this definition">¶</a></dt>
 <dd><p>Turns non-negative integers (indexes/tokens) into dense vectors
 of fixed size. eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>input_dim</strong> (<em>int</em>) – Size of the vocabulary, i.e. maximum integer index + 1.</li>
 <li><strong>output_dim</strong> (<em>int</em>) – Dimension of the dense embedding.</li>
 <li><strong>dtype</strong> (<em>str or np.dtype, default 'float32'</em>) – Data type of output embeddings.</li>
 <li><strong>weight_initializer</strong> (<a class="reference internal" href="optimization.html#mxnet.initializer.Initializer" title="mxnet.initializer.Initializer"><em>Initializer</em></a>) – Initializer for the <cite>embeddings</cite> matrix.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>2D tensor with shape: <cite>(N, M)</cite>.</dd>
 <dt>Output shape:</dt>
 <dd>3D tensor with shape: <cite>(N, M, output_dim)</cite>.</dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Conv1D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv1D</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=1</em>, <em>padding=0</em>, <em>dilation=1</em>, <em>groups=1</em>, <em>layout='NCW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv1D" title="Permalink to this definition">¶</a></dt>
 <dd><p>1D convolution layer (e.g. temporal convolution).</p>
 <p>This layer creates a convolution kernel that is convolved
 with the layer input over a single spatial (or temporal) dimension
 to produce a tensor of outputs.
 If <cite>use_bias</cite> is True, a bias vector is created and added to the outputs.
 Finally, if <cite>activation</cite> is not <cite>None</cite>,
 it is applied to the outputs as well.</p>
 <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be
 deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be
 inferred from the shape of input data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.</li>
 <li><strong>kernel_size</strong> (<em>int or tuple/list of 1 int</em>) – Specifies the dimensions of the convolution window.</li>
 <li><strong>strides</strong> (<em>int or tuple/list of 1 int,</em>) – Specify the strides of the convolution.</li>
 <li><strong>padding</strong> (<em>int or a tuple/list of 1 int,</em>) – If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points</li>
 <li><strong>dilation</strong> (<em>int or tuple/list of 1 int</em>) – Specifies the dilation rate to use for dilated convolution.</li>
 <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
 At groups=2, the operation becomes equivalent to having two conv
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.</li>
 <li><strong>layout</strong> (<em>str, default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc.
 ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions
 respectively. Convolution is applied on the ‘W’ dimension.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – The number of input channels to this layer. If not specified,
 initialization will be deferred to the first time <cite>forward</cite> is called
 and <cite>in_channels</cite> will be inferred from the shape of input data.</li>
 <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>.
 If you don’t specify anything, no activation is applied
 (ie. “linear” activation: <cite>a(x) = x</cite>).</li>
 <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li>
 <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li>
 <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape
 (batch_size, in_channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape
 (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>.
 out_width is calculated as:</p>
 <div class="last highlight-python"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">-</span><span class="n">dilation</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Conv2D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv2D</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv2D" title="Permalink to this definition">¶</a></dt>
 <dd><p>2D convolution layer (e.g. spatial convolution over images).</p>
 <p>This layer creates a convolution kernel that is convolved
 with the layer input to produce a tensor of
 outputs. If <cite>use_bias</cite> is True,
 a bias vector is created and added to the outputs. Finally, if
 <cite>activation</cite> is not <cite>None</cite>, it is applied to the outputs as well.</p>
 <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be
 deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be
 inferred from the shape of input data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.</li>
 <li><strong>kernel_size</strong> (<em>int or tuple/list of 2 int</em>) – Specifies the dimensions of the convolution window.</li>
 <li><strong>strides</strong> (<em>int or tuple/list of 2 int,</em>) – Specify the strides of the convolution.</li>
 <li><strong>padding</strong> (<em>int or a tuple/list of 2 int,</em>) – If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points</li>
 <li><strong>dilation</strong> (<em>int or tuple/list of 2 int</em>) – Specifies the dilation rate to use for dilated convolution.</li>
 <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
 At groups=2, the operation becomes equivalent to having two conv
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.</li>
 <li><strong>layout</strong> (<em>str, default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width
 dimensions respectively. Convolution is applied on the ‘H’ and
 ‘W’ dimensions.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – The number of input channels to this layer. If not specified,
 initialization will be deferred to the first time <cite>forward</cite> is called
 and <cite>in_channels</cite> will be inferred from the shape of input data.</li>
 <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>.
 If you don’t specify anything, no activation is applied
 (ie. “linear” activation: <cite>a(x) = x</cite>).</li>
 <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li>
 <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li>
 <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape
 (batch_size, in_channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape
 (batch_size, channels, out_height, out_width) if <cite>layout</cite> is <cite>NCHW</cite>.</p>
 <p>out_height and out_width are calculated as:</p>
 <div class="last highlight-python"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Conv3D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv3D</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCDHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv3D" title="Permalink to this definition">¶</a></dt>
 <dd><p>3D convolution layer (e.g. spatial convolution over volumes).</p>
 <p>This layer creates a convolution kernel that is convolved
 with the layer input to produce a tensor of
 outputs. If <cite>use_bias</cite> is <cite>True</cite>,
 a bias vector is created and added to the outputs. Finally, if
 <cite>activation</cite> is not <cite>None</cite>, it is applied to the outputs as well.</p>
 <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be
 deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be
 inferred from the shape of input data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.</li>
 <li><strong>kernel_size</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li>
 <li><strong>strides</strong> (<em>int or tuple/list of 3 int,</em>) – Specify the strides of the convolution.</li>
 <li><strong>padding</strong> (<em>int or a tuple/list of 3 int,</em>) – If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points</li>
 <li><strong>dilation</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li>
 <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
 At groups=2, the operation becomes equivalent to having two conv
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.</li>
 <li><strong>layout</strong> (<em>str, default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and
 depth dimensions respectively. Convolution is applied on the ‘D’,
 ‘H’ and ‘W’ dimensions.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – The number of input channels to this layer. If not specified,
 initialization will be deferred to the first time <cite>forward</cite> is called
 and <cite>in_channels</cite> will be inferred from the shape of input data.</li>
 <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>.
 If you don’t specify anything, no activation is applied
 (ie. “linear” activation: <cite>a(x) = x</cite>).</li>
 <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li>
 <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li>
 <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape
 (batch_size, in_channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape
 (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite> is
 <cite>NCDHW</cite>.</p>
 <p>out_depth, out_height and out_width are calculated as:</p>
 <div class="last highlight-python"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">depth</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">dilation</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">*</span><span class="p">(</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="n">stride</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Conv1DTranspose">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv1DTranspose</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=1</em>, <em>padding=0</em>, <em>output_padding=0</em>, <em>dilation=1</em>, <em>groups=1</em>, <em>layout='NCW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv1DTranspose" title="Permalink to this definition">¶</a></dt>
 <dd><p>Transposed 1D convolution layer (sometimes called Deconvolution).</p>
 <p>The need for transposed convolutions generally arises
 from the desire to use a transformation going in the opposite direction
 of a normal convolution, i.e., from something that has the shape of the
 output of some convolution to something that has the shape of its input
 while maintaining a connectivity pattern that is compatible with
 said convolution.</p>
 <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be
 deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be
 inferred from the shape of input data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.</li>
 <li><strong>kernel_size</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li>
 <li><strong>strides</strong> (<em>int or tuple/list of 3 int,</em>) – Specify the strides of the convolution.</li>
 <li><strong>padding</strong> (<em>int or a tuple/list of 3 int,</em>) – If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points</li>
 <li><strong>dilation</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li>
 <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
 At groups=2, the operation becomes equivalent to having two conv
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.</li>
 <li><strong>layout</strong> (<em>str, default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc.
 ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions
 respectively. Convolution is applied on the ‘W’ dimension.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – The number of input channels to this layer. If not specified,
 initialization will be deferred to the first time <cite>forward</cite> is called
 and <cite>in_channels</cite> will be inferred from the shape of input data.</li>
 <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>.
 If you don’t specify anything, no activation is applied
 (ie. “linear” activation: <cite>a(x) = x</cite>).</li>
 <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li>
 <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li>
 <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape
 (batch_size, in_channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape
 (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>.</p>
 <p>out_width is calculated as:</p>
 <div class="last highlight-python"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="p">(</span><span class="n">width</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">+</span><span class="n">kernel_size</span><span class="o">+</span><span class="n">output_padding</span>
 </pre></div>
 </div>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Conv2DTranspose">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv2DTranspose</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0)</em>, <em>output_padding=(0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv2DTranspose" title="Permalink to this definition">¶</a></dt>
 <dd><p>Transposed 2D convolution layer (sometimes called Deconvolution).</p>
 <p>The need for transposed convolutions generally arises
 from the desire to use a transformation going in the opposite direction
 of a normal convolution, i.e., from something that has the shape of the
 output of some convolution to something that has the shape of its input
 while maintaining a connectivity pattern that is compatible with
 said convolution.</p>
 <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be
 deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be
 inferred from the shape of input data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.</li>
 <li><strong>kernel_size</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li>
 <li><strong>strides</strong> (<em>int or tuple/list of 3 int,</em>) – Specify the strides of the convolution.</li>
 <li><strong>padding</strong> (<em>int or a tuple/list of 3 int,</em>) – If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points</li>
 <li><strong>dilation</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li>
 <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
 At groups=2, the operation becomes equivalent to having two conv
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.</li>
 <li><strong>layout</strong> (<em>str, default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width
 dimensions respectively. Convolution is applied on the ‘H’ and
 ‘W’ dimensions.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – The number of input channels to this layer. If not specified,
 initialization will be deferred to the first time <cite>forward</cite> is called
 and <cite>in_channels</cite> will be inferred from the shape of input data.</li>
 <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>.
 If you don’t specify anything, no activation is applied
 (ie. “linear” activation: <cite>a(x) = x</cite>).</li>
 <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li>
 <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li>
 <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape
 (batch_size, in_channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape
 (batch_size, channels, out_height, out_width) if <cite>layout</cite> is <cite>NCHW</cite>.</p>
 <p>out_height and out_width are calculated as:</p>
 <div class="last highlight-python"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="p">(</span><span class="n">height</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="p">(</span><span class="n">width</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
 </pre></div>
 </div>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.Conv3DTranspose">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">Conv3DTranspose</code><span class="sig-paren">(</span><em>channels</em>, <em>kernel_size</em>, <em>strides=(1</em>, <em>1</em>, <em>1)</em>, <em>padding=(0</em>, <em>0</em>, <em>0)</em>, <em>output_padding=(0</em>, <em>0</em>, <em>0)</em>, <em>dilation=(1</em>, <em>1</em>, <em>1)</em>, <em>groups=1</em>, <em>layout='NCDHW'</em>, <em>activation=None</em>, <em>use_bias=True</em>, <em>weight_initializer=None</em>, <em>bias_initializer='zeros'</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.Conv3DTranspose" title="Permalink to this definition">¶</a></dt>
 <dd><p>Transposed 3D convolution layer (sometimes called Deconvolution).</p>
 <p>The need for transposed convolutions generally arises
 from the desire to use a transformation going in the opposite direction
 of a normal convolution, i.e., from something that has the shape of the
 output of some convolution to something that has the shape of its input
 while maintaining a connectivity pattern that is compatible with
 said convolution.</p>
 <p>If <cite>in_channels</cite> is not specified, <cite>Parameter</cite> initialization will be
 deferred to the first time <cite>forward</cite> is called and <cite>in_channels</cite> will be
 inferred from the shape of input data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.</li>
 <li><strong>kernel_size</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dimensions of the convolution window.</li>
 <li><strong>strides</strong> (<em>int or tuple/list of 3 int,</em>) – Specify the strides of the convolution.</li>
 <li><strong>padding</strong> (<em>int or a tuple/list of 3 int,</em>) – If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points</li>
 <li><strong>dilation</strong> (<em>int or tuple/list of 3 int</em>) – Specifies the dilation rate to use for dilated convolution.</li>
 <li><strong>groups</strong> (<em>int</em>) – Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
 At groups=2, the operation becomes equivalent to having two conv
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.</li>
 <li><strong>layout</strong> (<em>str, default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and
 depth dimensions respectively. Convolution is applied on the ‘D’,
 ‘H’, and ‘W’ dimensions.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – The number of input channels to this layer. If not specified,
 initialization will be deferred to the first time <cite>forward</cite> is called
 and <cite>in_channels</cite> will be inferred from the shape of input data.</li>
 <li><strong>activation</strong> (<em>str</em>) – Activation function to use. See <a class="reference internal" href="ndarray.html#mxnet.ndarray.Activation" title="mxnet.ndarray.Activation"><code class="xref py py-func docutils literal"><span class="pre">Activation()</span></code></a>.
 If you don’t specify anything, no activation is applied
 (ie. “linear” activation: <cite>a(x) = x</cite>).</li>
 <li><strong>use_bias</strong> (<em>bool</em>) – Whether the layer uses a bias vector.</li>
 <li><strong>weight_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the <cite>weight</cite> weights matrix.</li>
 <li><strong>bias_initializer</strong> (str or <cite>Initializer</cite>) – Initializer for the bias vector.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape
 (batch_size, in_channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape
 (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite> is <cite>NCDHW</cite>.
 out_depth, out_height and out_width are calculated as:</p>
 <div class="last highlight-python"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="p">(</span><span class="n">depth</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
 <span class="n">out_height</span> <span class="o">=</span> <span class="p">(</span><span class="n">height</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="p">(</span><span class="n">width</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">strides</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">+</span><span class="n">kernel_size</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">+</span><span class="n">output_padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
 </pre></div>
 </div>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.MaxPool1D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">MaxPool1D</code><span class="sig-paren">(</span><em>pool_size=2</em>, <em>strides=None</em>, <em>padding=0</em>, <em>layout='NCW'</em>, <em>ceil_mode=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.MaxPool1D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Max pooling operation for one dimensional data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pool_size</strong> (<em>int</em>) – Size of the max pooling windows.</li>
 <li><strong>strides</strong> (<em>int, or None</em>) – Factor by which to downscale. E.g. 2 will halve the input size.
 If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li>
 <li><strong>padding</strong> (<em>int</em>) – If padding is non-zero, then the input is implicitly
 zero-padded on both sides for padding number of points.</li>
 <li><strong>layout</strong> (<em>str, default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc.
 ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions
 respectively. Pooling is applied on the W dimension.</li>
 <li><strong>ceil_mode</strong> (<em>bool, default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape
 (batch_size, channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape
 (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>.</p>
 <p>out_width is calculated as:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">-</span><span class="n">pool_size</span><span class="p">)</span><span class="o">/</span><span class="n">strides</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this
 equation.</p>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.MaxPool2D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">MaxPool2D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>layout='NCHW'</em>, <em>ceil_mode=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.MaxPool2D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Max pooling operation for two dimensional (spatial) data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pool_size</strong> (<em>int or list/tuple of 2 ints,</em>) – Size of the max pooling windows.</li>
 <li><strong>strides</strong> (<em>int, list/tuple of 2 ints, or None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size.
 If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li>
 <li><strong>padding</strong> (<em>int or list/tuple of 2 ints,</em>) – If padding is non-zero, then the input is implicitly
 zero-padded on both sides for padding number of points.</li>
 <li><strong>layout</strong> (<em>str, default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width
 dimensions respectively. padding is applied on ‘H’ and ‘W’ dimension.</li>
 <li><strong>ceil_mode</strong> (<em>bool, default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape
 (batch_size, channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape
 (batch_size, channels, out_height, out_width)  if <cite>layout</cite> is <cite>NCHW</cite>.</p>
 <p>out_height and out_width are calculated as:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this
 equation.</p>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.MaxPool3D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">MaxPool3D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>ceil_mode=False</em>, <em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.MaxPool3D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Max pooling operation for 3D data (spatial or spatio-temporal).</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pool_size</strong> (<em>int or list/tuple of 3 ints,</em>) – Size of the max pooling windows.</li>
 <li><strong>strides</strong> (<em>int, list/tuple of 3 ints, or None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size.
 If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li>
 <li><strong>padding</strong> (<em>int or list/tuple of 3 ints,</em>) – If padding is non-zero, then the input is implicitly
 zero-padded on both sides for padding number of points.</li>
 <li><strong>layout</strong> (<em>str, default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and
 depth dimensions respectively. padding is applied on ‘D’, ‘H’ and ‘W’
 dimension.</li>
 <li><strong>ceil_mode</strong> (<em>bool, default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape
 (batch_size, channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape
 (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite>
 is <cite>NCDHW</cite>.</p>
 <p>out_depth, out_height and out_width are calculated as</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">depth</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this
 equation.</p>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.AvgPool1D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">AvgPool1D</code><span class="sig-paren">(</span><em>pool_size=2</em>, <em>strides=None</em>, <em>padding=0</em>, <em>layout='NCW'</em>, <em>ceil_mode=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.AvgPool1D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Average pooling operation for temporal data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pool_size</strong> (<em>int</em>) – Size of the max pooling windows.</li>
 <li><strong>strides</strong> (<em>int, or None</em>) – Factor by which to downscale. E.g. 2 will halve the input size.
 If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li>
 <li><strong>padding</strong> (<em>int</em>) – If padding is non-zero, then the input is implicitly
 zero-padded on both sides for padding number of points.</li>
 <li><strong>layout</strong> (<em>str, default 'NCW'</em>) – Dimension ordering of data and weight. Can be ‘NCW’, ‘NWC’, etc.
 ‘N’, ‘C’, ‘W’ stands for batch, channel, and width (time) dimensions
 respectively. padding is applied on ‘W’ dimension.</li>
 <li><strong>ceil_mode</strong> (<em>bool, default False</em>) – When <cite>True</cite>, will use ceil instead of floor to compute the output shape.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 3D array of shape
 (batch_size, channels, width) if <cite>layout</cite> is <cite>NCW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 3D array of shape
 (batch_size, channels, out_width) if <cite>layout</cite> is <cite>NCW</cite>.</p>
 <p>out_width is calculated as:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="o">-</span><span class="n">pool_size</span><span class="p">)</span><span class="o">/</span><span class="n">strides</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this
 equation.</p>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.AvgPool2D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">AvgPool2D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>ceil_mode=False</em>, <em>layout='NCHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.AvgPool2D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Average pooling operation for spatial data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pool_size</strong> (<em>int or list/tuple of 2 ints,</em>) – Size of the max pooling windows.</li>
 <li><strong>strides</strong> (<em>int, list/tuple of 2 ints, or None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size.
 If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li>
 <li><strong>padding</strong> (<em>int or list/tuple of 2 ints,</em>) – If padding is non-zero, then the input is implicitly
 zero-padded on both sides for padding number of points.</li>
 <li><strong>layout</strong> (<em>str, default 'NCHW'</em>) – Dimension ordering of data and weight. Can be ‘NCHW’, ‘NHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’ stands for batch, channel, height, and width
 dimensions respectively. padding is applied on ‘H’ and ‘W’ dimension.</li>
 <li><strong>ceil_mode</strong> (<em>bool, default False</em>) – When True, will use ceil instead of floor to compute the output shape.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 4D array of shape
 (batch_size, channels, height, width) if <cite>layout</cite> is <cite>NCHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 4D array of shape
 (batch_size, channels, out_height, out_width)  if <cite>layout</cite> is <cite>NCHW</cite>.</p>
 <p>out_height and out_width are calculated as:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 <p class="last">When <cite>ceil_mode</cite> is <cite>True</cite>, ceil will be used instead of floor in this
 equation.</p>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.AvgPool3D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">AvgPool3D</code><span class="sig-paren">(</span><em>pool_size=(2</em>, <em>2</em>, <em>2)</em>, <em>strides=None</em>, <em>padding=0</em>, <em>ceil_mode=False</em>, <em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.AvgPool3D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Average pooling operation for 3D data (spatial or spatio-temporal).</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pool_size</strong> (<em>int or list/tuple of 3 ints,</em>) – Size of the max pooling windows.</li>
 <li><strong>strides</strong> (<em>int, list/tuple of 3 ints, or None.</em>) – Factor by which to downscale. E.g. 2 will halve the input size.
 If <cite>None</cite>, it will default to <cite>pool_size</cite>.</li>
 <li><strong>padding</strong> (<em>int or list/tuple of 3 ints,</em>) – If padding is non-zero, then the input is implicitly
 zero-padded on both sides for padding number of points.</li>
 <li><strong>layout</strong> (<em>str, default 'NCDHW'</em>) – Dimension ordering of data and weight. Can be ‘NCDHW’, ‘NDHWC’, etc.
 ‘N’, ‘C’, ‘H’, ‘W’, ‘D’ stands for batch, channel, height, width and
 depth dimensions respectively. padding is applied on ‘D’, ‘H’ and ‘W’
 dimension.</li>
 <li><strong>ceil_mode</strong> (<em>bool, default False</em>) – When True, will use ceil instead of floor to compute the output shape.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shape:</dt>
 <dd>This depends on the <cite>layout</cite> parameter. Input is 5D array of shape
 (batch_size, channels, depth, height, width) if <cite>layout</cite> is <cite>NCDHW</cite>.</dd>
 <dt>Output shape:</dt>
 <dd><p class="first">This depends on the <cite>layout</cite> parameter. Output is 5D array of shape
 (batch_size, channels, out_depth, out_height, out_width) if <cite>layout</cite>
 is <cite>NCDHW</cite>.</p>
 <p>out_depth, out_height and out_width are calculated as</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">out_depth</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">depth</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_height</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">height</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 <span class="n">out_width</span> <span class="o">=</span> <span class="n">floor</span><span class="p">((</span><span class="n">width</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">padding</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">pool_size</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">/</span><span class="n">strides</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="o">+</span><span class="mi">1</span>
 </pre></div>
 </div>
 <p class="last">When <cite>ceil_mode</cite> is <cite>True,</cite> ceil will be used instead of floor in this
 equation.</p>
 </dd>
 </dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.GlobalMaxPool1D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalMaxPool1D</code><span class="sig-paren">(</span><em>layout='NCW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalMaxPool1D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Global max pooling operation for temporal data.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.GlobalMaxPool2D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalMaxPool2D</code><span class="sig-paren">(</span><em>layout='NCHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalMaxPool2D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Global max pooling operation for spatial data.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.GlobalMaxPool3D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalMaxPool3D</code><span class="sig-paren">(</span><em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalMaxPool3D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Global max pooling operation for 3D data.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.GlobalAvgPool1D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalAvgPool1D</code><span class="sig-paren">(</span><em>layout='NCW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalAvgPool1D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Global average pooling operation for temporal data.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.GlobalAvgPool2D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalAvgPool2D</code><span class="sig-paren">(</span><em>layout='NCHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalAvgPool2D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Global average pooling operation for spatial data.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.nn.GlobalAvgPool3D">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.nn.</code><code class="descname">GlobalAvgPool3D</code><span class="sig-paren">(</span><em>layout='NCDHW'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.nn.GlobalAvgPool3D" title="Permalink to this definition">¶</a></dt>
 <dd><p>Global max pooling operation for 3D data.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.RecurrentCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">RecurrentCell</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Abstract base class for RNN cells</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>prefix</strong> (<em>str, optional</em>) – Prefix for names of <cite>Block`s
 (this prefix is also used for names of weights if `params</cite> is <cite>None</cite>
 i.e. if <cite>params</cite> are being created and not reused)</li>
 <li><strong>params</strong> (<em>Parameter or None, optional</em>) – Container for weight sharing between cells.
 A new Parameter container is created if <cite>params</cite> is <cite>None</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="method">
 <dt id="mxnet.gluon.rnn.RecurrentCell.reset">
 <code class="descname">reset</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.reset" title="Permalink to this definition">¶</a></dt>
 <dd><p>Reset before re-using the cell for another graph.</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.rnn.RecurrentCell.state_info">
 <code class="descname">state_info</code><span class="sig-paren">(</span><em>batch_size=0</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.state_info" title="Permalink to this definition">¶</a></dt>
 <dd><p>shape and layout information of states</p>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.rnn.RecurrentCell.begin_state">
 <code class="descname">begin_state</code><span class="sig-paren">(</span><em>batch_size=0</em>, <em>func=<function zeros=""></function></em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.begin_state" title="Permalink to this definition">¶</a></dt>
 <dd><p>Initial state for this cell.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>func</strong> (<em>callable, default symbol.zeros</em>) – <p>Function for creating initial state.</p>
 <p>For Symbol API, func can be <cite>symbol.zeros</cite>, <cite>symbol.uniform</cite>,
 <cite>symbol.var etc</cite>. Use <cite>symbol.var</cite> if you want to directly
 feed input as states.</p>
 <p>For NDArray API, func can be <cite>ndarray.zeros</cite>, <cite>ndarray.ones</cite>, etc.</p>
 </li>
 <li><strong>batch_size</strong> (<em>int, default 0</em>) – Only required for NDArray API. Size of the batch (‘N’ in layout)
 dimension of input.</li>
 <li><strong>**kwargs</strong> – <p>Additional keyword arguments passed to func. For example
 <cite>mean</cite>, <cite>std</cite>, <cite>dtype</cite>, etc.</p>
 </li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>states</strong> –
 Starting states for the first RNN step.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">nested list of Symbol</p>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.rnn.RecurrentCell.unroll">
 <code class="descname">unroll</code><span class="sig-paren">(</span><em>length</em>, <em>inputs</em>, <em>begin_state=None</em>, <em>layout='NTC'</em>, <em>merge_outputs=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.unroll" title="Permalink to this definition">¶</a></dt>
 <dd><p>Unrolls an RNN cell across time steps.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>length</strong> (<em>int</em>) – Number of steps to unroll.</li>
 <li><strong>inputs</strong> (<em>Symbol, list of Symbol, or None</em>) – <p>If <cite>inputs</cite> is a single Symbol (usually the output
 of Embedding symbol), it should have shape
 (batch_size, length, ...) if <cite>layout</cite> is ‘NTC’,
 or (length, batch_size, ...) if <cite>layout</cite> is ‘TNC’.</p>
 <p>If <cite>inputs</cite> is a list of symbols (usually output of
 previous unroll), they should all have shape
 (batch_size, ...).</p>
 </li>
 <li><strong>begin_state</strong> (<em>nested list of Symbol, optional</em>) – Input states created by <cite>begin_state()</cite>
 or output state of another cell.
 Created from <cite>begin_state()</cite> if <cite>None</cite>.</li>
 <li><strong>layout</strong> (<em>str, optional</em>) – <cite>layout</cite> of input symbol. Only used if inputs
 is a single Symbol.</li>
 <li><strong>merge_outputs</strong> (<em>bool, optional</em>) – If <cite>False</cite>, returns outputs as a list of Symbols.
 If <cite>True</cite>, concatenates output across time steps
 and returns a single symbol with shape
 (batch_size, length, ...) if layout is ‘NTC’,
 or (length, batch_size, ...) if layout is ‘TNC’.
 If <cite>None</cite>, output whatever is faster.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><ul class="simple">
 <li><strong>outputs</strong> (<em>list of Symbol or Symbol</em>) –
 Symbol (if <cite>merge_outputs</cite> is True) or list of Symbols
 (if <cite>merge_outputs</cite> is False) corresponding to the output from
 the RNN from this unrolling.</li>
 <li><strong>states</strong> (<em>list of Symbol</em>) –
 The new state of this RNN after this unrolling.
 The type of this symbol is same as the output of <cite>begin_state()</cite>.</li>
 </ul>
 </p>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.rnn.RecurrentCell.forward">
 <code class="descname">forward</code><span class="sig-paren">(</span><em>inputs</em>, <em>states</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RecurrentCell.forward" title="Permalink to this definition">¶</a></dt>
 <dd><p>Unrolls the recurrent cell for one time step.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>inputs</strong> (<em>sym.Variable</em>) – Input symbol, 2D, of shape (batch_size * num_units).</li>
 <li><strong>states</strong> (<em>list of sym.Variable</em>) – RNN state from previous step or the output of begin_state().</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><ul class="simple">
 <li><strong>output</strong> (<em>Symbol</em>) –
 Symbol corresponding to the output from the RNN when unrolling
 for a single time step.</li>
 <li><strong>states</strong> (<em>list of Symbol</em>) –
 The new state of this RNN after this unrolling.
 The type of this symbol is same as the output of <cite>begin_state()</cite>.
 This can be used as an input state to the next time step
 of this RNN.</li>
 </ul>
 </p>
 </td>
 </tr>
 </tbody>
 </table>
 <div class="admonition seealso">
 <p class="first admonition-title">See also</p>
 <dl class="last docutils">
 <dt><code class="xref py py-meth docutils literal"><span class="pre">begin_state()</span></code></dt>
 <dd>This function can provide the states for the first time step.</dd>
 <dt><code class="xref py py-meth docutils literal"><span class="pre">unroll()</span></code></dt>
 <dd>This function unrolls an RNN for a given number of (>=1) time steps.</dd>
 </dl>
 </div>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.RNN">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">RNN</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>num_layers=1</em>, <em>activation='relu'</em>, <em>layout='TNC'</em>, <em>dropout=0</em>, <em>bidirectional=False</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RNN" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies a multi-layer Elman RNN with <cite>tanh</cite> or <cite>ReLU</cite> non-linearity to an input sequence.</p>
 <p>For each element in the input sequence, each layer computes the following
 function:</p>
 <div class="math">
 \[h_t = \tanh(w_{ih} * x_t + b_{ih}  +  w_{hh} * h_{(t-1)} + b_{hh})\]</div>
 <p>where <span class="math">\(h_t\)</span> is the hidden state at time <cite>t</cite>, and <span class="math">\(x_t\)</span> is the hidden
 state of the previous layer at time <cite>t</cite> or <span class="math">\(input_t\)</span> for the first layer.
 If nonlinearity=’relu’, then <cite>ReLU</cite> is used instead of <cite>tanh</cite>.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>hidden_size</strong> (<em>int</em>) – The number of features in the hidden state h.</li>
 <li><strong>num_layers</strong> (<em>int, default 1</em>) – Number of recurrent layers.</li>
 <li><strong>activation</strong> (<em>{'relu' or 'tanh'}, default 'tanh'</em>) – The activation function to use.</li>
 <li><strong>layout</strong> (<em>str, default 'TNC'</em>) – The format of input and output tensors. T, N and C stand for
 sequence length, batch size, and feature dimensions respectively.</li>
 <li><strong>dropout</strong> (<em>float, default 0</em>) – If non-zero, introduces a dropout layer on the outputs of each
 RNN layer except the last layer.</li>
 <li><strong>bidirectional</strong> (<em>bool, default False</em>) – If <cite>True</cite>, becomes a bidirectional RNN.</li>
 <li><strong>i2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the input weights matrix, used for the linear
 transformation of the inputs.</li>
 <li><strong>h2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the recurrent weights matrix, used for the linear
 transformation of the recurrent state.</li>
 <li><strong>i2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>h2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>input_size</strong> (<em>int, default 0</em>) – The number of expected features in the input x.
 If not specified, it will be inferred from input.</li>
 <li><strong>prefix</strong> (<em>str or None</em>) – Prefix of this <cite>Block</cite>.</li>
 <li><strong>params</strong> (<em>ParameterDict or None</em>) – Shared Parameters for this <cite>Block</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shapes:</dt>
 <dd>The input shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the
 input has shape <cite>(sequence_length, batch_size, input_size)</cite></dd>
 <dt>Output shape:</dt>
 <dd>The output shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the
 output has shape <cite>(sequence_length, batch_size, num_hidden)</cite>.
 If <cite>bidirectional</cite> is True, output shape will instead be
 <cite>(sequence_length, batch_size, 2*num_hidden)</cite></dd>
 <dt>Recurrent state:</dt>
 <dd>The recurrent state is an NDArray with shape <cite>(num_layers, batch_size, num_hidden)</cite>.
 If <cite>bidirectional</cite> is True, the recurrent state shape will instead be
 <cite>(2*num_layers, batch_size, num_hidden)</cite>
 If input recurrent state is None, zeros are used as default begin states,
 and the output recurrent state is omitted.</dd>
 </dl>
 <p class="rubric">Examples</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">layer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">RNN</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="n">layer</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
 <span class="gp">>>> </span><span class="nb">input</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">10</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="c1"># by default zeros are used as begin state</span>
 <span class="gp">>>> </span><span class="n">output</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="c1"># manually specify begin state.</span>
 <span class="gp">>>> </span><span class="n">h0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="n">output</span><span class="p">,</span> <span class="n">hn</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">h0</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.LSTM">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">LSTM</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>num_layers=1</em>, <em>layout='TNC'</em>, <em>dropout=0</em>, <em>bidirectional=False</em>, <em>input_size=0</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.LSTM" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.</p>
 <p>For each element in the input sequence, each layer computes the following
 function:</p>
 <div class="math">
 \[\begin{split}\begin{array}{ll}
 i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
 f_t = sigmoid(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
 g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\
 o_t = sigmoid(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
 c_t = f_t * c_{(t-1)} + i_t * g_t \\
 h_t = o_t * \tanh(c_t)
 \end{array}\end{split}\]</div>
 <p>where <span class="math">\(h_t\)</span> is the hidden state at time <cite>t</cite>, <span class="math">\(c_t\)</span> is the
 cell state at time <cite>t</cite>, <span class="math">\(x_t\)</span> is the hidden state of the previous
 layer at time <cite>t</cite> or <span class="math">\(input_t\)</span> for the first layer, and <span class="math">\(i_t\)</span>,
 <span class="math">\(f_t\)</span>, <span class="math">\(g_t\)</span>, <span class="math">\(o_t\)</span> are the input, forget, cell, and
 out gates, respectively.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>hidden_size</strong> (<em>int</em>) – The number of features in the hidden state h.</li>
 <li><strong>num_layers</strong> (<em>int, default 1</em>) – Number of recurrent layers.</li>
 <li><strong>layout</strong> (<em>str, default 'TNC'</em>) – The format of input and output tensors. T, N and C stand for
 sequence length, batch size, and feature dimensions respectively.</li>
 <li><strong>dropout</strong> (<em>float, default 0</em>) – If non-zero, introduces a dropout layer on the outputs of each
 RNN layer except the last layer.</li>
 <li><strong>bidirectional</strong> (<em>bool, default False</em>) – If <cite>True</cite>, becomes a bidirectional RNN.</li>
 <li><strong>i2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the input weights matrix, used for the linear
 transformation of the inputs.</li>
 <li><strong>h2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the recurrent weights matrix, used for the linear
 transformation of the recurrent state.</li>
 <li><strong>i2h_bias_initializer</strong> (<em>str or Initializer, default 'lstmbias'</em>) – Initializer for the bias vector. By default, bias for the forget
 gate is initialized to 1 while all other biases are initialized
 to zero.</li>
 <li><strong>h2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>input_size</strong> (<em>int, default 0</em>) – The number of expected features in the input x.
 If not specified, it will be inferred from input.</li>
 <li><strong>prefix</strong> (<em>str or None</em>) – Prefix of this <cite>Block</cite>.</li>
 <li><strong>params</strong> (<cite>ParameterDict</cite> or <cite>None</cite>) – Shared Parameters for this <cite>Block</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shapes:</dt>
 <dd>The input shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the
 input has shape <cite>(sequence_length, batch_size, input_size)</cite></dd>
 <dt>Output shape:</dt>
 <dd>The output shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the
 output has shape <cite>(sequence_length, batch_size, num_hidden)</cite>.
 If <cite>bidirectional</cite> is True, output shape will instead be
 <cite>(sequence_length, batch_size, 2*num_hidden)</cite></dd>
 <dt>Recurrent state:</dt>
 <dd>The recurrent state is a list of two NDArrays. Both has shape
 <cite>(num_layers, batch_size, num_hidden)</cite>.
 If <cite>bidirectional</cite> is True, each recurrent state will instead have shape
 <cite>(2*num_layers, batch_size, num_hidden)</cite>.
 If input recurrent state is None, zeros are used as default begin states,
 and the output recurrent state is omitted.</dd>
 </dl>
 <p class="rubric">Examples</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">layer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="n">layer</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
 <span class="gp">>>> </span><span class="nb">input</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">10</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="c1"># by default zeros are used as begin state</span>
 <span class="gp">>>> </span><span class="n">output</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="c1"># manually specify begin state.</span>
 <span class="gp">>>> </span><span class="n">h0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="n">c0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="n">output</span><span class="p">,</span> <span class="n">hn</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="p">[</span><span class="n">h0</span><span class="p">,</span> <span class="n">c0</span><span class="p">])</span>
 </pre></div>
 </div>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.GRU">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">GRU</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>num_layers=1</em>, <em>layout='TNC'</em>, <em>dropout=0</em>, <em>bidirectional=False</em>, <em>input_size=0</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.GRU" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.</p>
 <p>For each element in the input sequence, each layer computes the following
 function:</p>
 <div class="math">
 \[\begin{split}\begin{array}{ll}
 r_t = sigmoid(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
 i_t = sigmoid(W_{ii} x_t + b_{ii} + W_hi h_{(t-1)} + b_{hi}) \\
 n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\
 h_t = (1 - i_t) * n_t + i_t * h_{(t-1)} \\
 \end{array}\end{split}\]</div>
 <p>where <span class="math">\(h_t\)</span> is the hidden state at time <cite>t</cite>, <span class="math">\(x_t\)</span> is the hidden
 state of the previous layer at time <cite>t</cite> or <span class="math">\(input_t\)</span> for the first layer,
 and <span class="math">\(r_t\)</span>, <span class="math">\(i_t\)</span>, <span class="math">\(n_t\)</span> are the reset, input, and new gates, respectively.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>hidden_size</strong> (<em>int</em>) – The number of features in the hidden state h</li>
 <li><strong>num_layers</strong> (<em>int, default 1</em>) – Number of recurrent layers.</li>
 <li><strong>layout</strong> (<em>str, default 'TNC'</em>) – The format of input and output tensors. T, N and C stand for
 sequence length, batch size, and feature dimensions respectively.</li>
 <li><strong>dropout</strong> (<em>float, default 0</em>) – If non-zero, introduces a dropout layer on the outputs of each
 RNN layer except the last layer</li>
 <li><strong>bidirectional</strong> (<em>bool, default False</em>) – If True, becomes a bidirectional RNN.</li>
 <li><strong>i2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the input weights matrix, used for the linear
 transformation of the inputs.</li>
 <li><strong>h2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the recurrent weights matrix, used for the linear
 transformation of the recurrent state.</li>
 <li><strong>i2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>h2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>input_size</strong> (<em>int, default 0</em>) – The number of expected features in the input x.
 If not specified, it will be inferred from input.</li>
 <li><strong>prefix</strong> (<em>str or None</em>) – Prefix of this <cite>Block</cite>.</li>
 <li><strong>params</strong> (<em>ParameterDict or None</em>) – Shared Parameters for this <cite>Block</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="docutils">
 <dt>Input shapes:</dt>
 <dd>The input shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the
 input has shape <cite>(sequence_length, batch_size, input_size)</cite></dd>
 <dt>Output shape:</dt>
 <dd>The output shape depends on <cite>layout</cite>. For <cite>layout=’TNC’</cite>, the
 output has shape <cite>(sequence_length, batch_size, num_hidden)</cite>.
 If <cite>bidirectional</cite> is True, output shape will instead be
 <cite>(sequence_length, batch_size, 2*num_hidden)</cite></dd>
 <dt>Recurrent state:</dt>
 <dd>The recurrent state is an NDArray with shape <cite>(num_layers, batch_size, num_hidden)</cite>.
 If <cite>bidirectional</cite> is True, the recurrent state shape will instead be
 <cite>(2*num_layers, batch_size, num_hidden)</cite>
 If input recurrent state is None, zeros are used as default begin states,
 and the output recurrent state is omitted.</dd>
 </dl>
 <p class="rubric">Examples</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">layer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">GRU</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="n">layer</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
 <span class="gp">>>> </span><span class="nb">input</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">10</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="c1"># by default zeros are used as begin state</span>
 <span class="gp">>>> </span><span class="n">output</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="c1"># manually specify begin state.</span>
 <span class="gp">>>> </span><span class="n">h0</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">))</span>
 <span class="gp">>>> </span><span class="n">output</span><span class="p">,</span> <span class="n">hn</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">h0</span><span class="p">)</span>
 </pre></div>
 </div>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.RNNCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">RNNCell</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>activation='tanh'</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.RNNCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Simple recurrent neural network cell.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>hidden_size</strong> (<em>int</em>) – Number of units in output symbol</li>
 <li><strong>activation</strong> (<em>str or Symbol, default 'tanh'</em>) – Type of activation function.</li>
 <li><strong>i2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the input weights matrix, used for the linear
 transformation of the inputs.</li>
 <li><strong>h2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the recurrent weights matrix, used for the linear
 transformation of the recurrent state.</li>
 <li><strong>i2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>h2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>prefix</strong> (str, default ‘<a href="#id115"><span class="problematic" id="id116">rnn_</span></a>‘) – Prefix for name of <cite>Block`s
 (and name of weight if params is `None</cite>).</li>
 <li><strong>params</strong> (<em>Parameter or None</em>) – Container for weight sharing between cells.
 Created if <cite>None</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.LSTMCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">LSTMCell</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.LSTMCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Long-Short Term Memory (LSTM) network cell.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>hidden_size</strong> (<em>int</em>) – Number of units in output symbol.</li>
 <li><strong>i2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the input weights matrix, used for the linear
 transformation of the inputs.</li>
 <li><strong>h2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the recurrent weights matrix, used for the linear
 transformation of the recurrent state.</li>
 <li><strong>i2h_bias_initializer</strong> (<em>str or Initializer, default 'lstmbias'</em>) – Initializer for the bias vector. By default, bias for the forget
 gate is initialized to 1 while all other biases are initialized
 to zero.</li>
 <li><strong>h2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>prefix</strong> (str, default ‘<a href="#id117"><span class="problematic" id="id118">lstm_</span></a>‘) – Prefix for name of <cite>Block`s
 (and name of weight if params is `None</cite>).</li>
 <li><strong>params</strong> (<em>Parameter or None</em>) – Container for weight sharing between cells.
 Created if <cite>None</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.GRUCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">GRUCell</code><span class="sig-paren">(</span><em>hidden_size</em>, <em>i2h_weight_initializer=None</em>, <em>h2h_weight_initializer=None</em>, <em>i2h_bias_initializer='zeros'</em>, <em>h2h_bias_initializer='zeros'</em>, <em>input_size=0</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.GRUCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Gated Rectified Unit (GRU) network cell.
 Note: this is an implementation of the cuDNN version of GRUs
 (slight modification compared to Cho et al. 2014).</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>hidden_size</strong> (<em>int</em>) – Number of units in output symbol.</li>
 <li><strong>i2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the input weights matrix, used for the linear
 transformation of the inputs.</li>
 <li><strong>h2h_weight_initializer</strong> (<em>str or Initializer</em>) – Initializer for the recurrent weights matrix, used for the linear
 transformation of the recurrent state.</li>
 <li><strong>i2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>h2h_bias_initializer</strong> (<em>str or Initializer</em>) – Initializer for the bias vector.</li>
 <li><strong>prefix</strong> (str, default ‘<a href="#id119"><span class="problematic" id="id120">gru_</span></a>‘) – prefix for name of <cite>Block`s
 (and name of weight if params is `None</cite>).</li>
 <li><strong>params</strong> (<em>Parameter or None</em>) – Container for weight sharing between cells.
 Created if <cite>None</cite>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.SequentialRNNCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">SequentialRNNCell</code><span class="sig-paren">(</span><em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.SequentialRNNCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Sequentially stacking multiple RNN cells.</p>
 <dl class="method">
 <dt id="mxnet.gluon.rnn.SequentialRNNCell.add">
 <code class="descname">add</code><span class="sig-paren">(</span><em>cell</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.SequentialRNNCell.add" title="Permalink to this definition">¶</a></dt>
 <dd><p>Appends a cell into the stack.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cell</strong> (<em>rnn cell</em>) – </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.BidirectionalCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">BidirectionalCell</code><span class="sig-paren">(</span><em>l_cell</em>, <em>r_cell</em>, <em>output_prefix='bi_'</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.BidirectionalCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bidirectional RNN cell.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>l_cell</strong> (<a class="reference internal" href="#mxnet.gluon.rnn.RecurrentCell" title="mxnet.gluon.rnn.RecurrentCell"><em>RecurrentCell</em></a>) – Cell for forward unrolling</li>
 <li><strong>r_cell</strong> (<a class="reference internal" href="#mxnet.gluon.rnn.RecurrentCell" title="mxnet.gluon.rnn.RecurrentCell"><em>RecurrentCell</em></a>) – Cell for backward unrolling</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.DropoutCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">DropoutCell</code><span class="sig-paren">(</span><em>rate</em>, <em>prefix=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.DropoutCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies dropout on input.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>rate</strong> (<em>float</em>) – Percentage of elements to drop out, which
 is 1 - percentage to retain.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.ZoneoutCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">ZoneoutCell</code><span class="sig-paren">(</span><em>base_cell</em>, <em>zoneout_outputs=0.0</em>, <em>zoneout_states=0.0</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.ZoneoutCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies Zoneout on base cell.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.rnn.ResidualCell">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.rnn.</code><code class="descname">ResidualCell</code><span class="sig-paren">(</span><em>base_cell</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.rnn.ResidualCell" title="Permalink to this definition">¶</a></dt>
 <dd><p>Adds residual connection as described in Wu et al, 2016
 (<a class="reference external" href="https://arxiv.org/abs/1609.08144">https://arxiv.org/abs/1609.08144</a>).
 Output of the cell is output of the base cell plus input.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.Trainer">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.</code><code class="descname">Trainer</code><span class="sig-paren">(</span><em>params</em>, <em>optimizer</em>, <em>optimizer_params=None</em>, <em>kvstore='device'</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Trainer" title="Permalink to this definition">¶</a></dt>
 <dd><p>Applies an <cite>Optimizer</cite> on a set of Parameters. Trainer should
 be used together with <cite>autograd</cite>.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>params</strong> (<a class="reference internal" href="#mxnet.gluon.ParameterDict" title="mxnet.gluon.ParameterDict"><em>ParameterDict</em></a>) – The set of parameters to optimize.</li>
 <li><strong>optimizer</strong> (<em>str or Optimizer</em>) – The optimizer to use. See
 <a class="reference external" href="https://mxnet.incubator.apache.org/api/python/optimization.html#the-mxnet-optimizer-package">help</a>
 on Optimizer for a list of available optimizers.</li>
 <li><strong>optimizer_params</strong> (<em>dict</em>) – Key-word arguments to be passed to optimizer constructor. For example,
 <cite>{‘learning_rate’: 0.1}</cite>. All optimizers accept learning_rate, wd (weight decay),
 clip_gradient, and lr_scheduler. See each optimizer’s
 constructor for a list of additional supported arguments.</li>
 <li><strong>kvstore</strong> (<em>str or KVStore</em>) – kvstore type for multi-gpu and distributed training. See help on
 <a class="reference internal" href="kvstore.html#mxnet.kvstore.create" title="mxnet.kvstore.create"><code class="xref any py py-func docutils literal"><span class="pre">mxnet.kvstore.create</span></code></a> for more information.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="method">
 <dt id="mxnet.gluon.Trainer.step">
 <code class="descname">step</code><span class="sig-paren">(</span><em>batch_size</em>, <em>ignore_stale_grad=False</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Trainer.step" title="Permalink to this definition">¶</a></dt>
 <dd><p>Makes one step of parameter update. Should be called after
 <cite>autograd.compute_gradient</cite> and outside of <cite>record()</cite> scope.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>batch_size</strong> (<em>int</em>) – Batch size of data processed. Gradient will be normalized by <cite>1/batch_size</cite>.
 Set this to 1 if you normalized loss manually with <cite>loss = mean(loss)</cite>.</li>
 <li><strong>ignore_stale_grad</strong> (<em>bool, optional, default=False</em>) – If true, ignores Parameters with stale gradient (gradient that has not
 been updated by <cite>backward</cite> after last step) and skip update.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Trainer.save_states">
 <code class="descname">save_states</code><span class="sig-paren">(</span><em>fname</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Trainer.save_states" title="Permalink to this definition">¶</a></dt>
 <dd><p>Saves trainer states (e.g. optimizer, momentum) to a file.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>fname</strong> (<em>str</em>) – Path to output states file.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.Trainer.load_states">
 <code class="descname">load_states</code><span class="sig-paren">(</span><em>fname</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.Trainer.load_states" title="Permalink to this definition">¶</a></dt>
 <dd><p>Loads trainer states (e.g. optimizer, momentum) from a file.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>fname</strong> (<em>str</em>) – Path to input states file.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.loss.L2Loss">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">L2Loss</code><span class="sig-paren">(</span><em>weight=1.0</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.L2Loss" title="Permalink to this definition">¶</a></dt>
 <dd><p>Calculates the mean squared error between output and label:</p>
 <div class="math">
 \[L = \frac{1}{2}\sum_i \Vert {output}_i - {label}_i \Vert^2.\]</div>
 <p>Output and label can have arbitrary shape as long as they have the same
 number of elements.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>weight</strong> (<em>float or None</em>) – Global scalar weight for loss.</li>
 <li><strong>sample_weight</strong> (<em>Symbol or None</em>) – Per sample weighting. Must be broadcastable to
 the same shape as loss. For example, if loss has
 shape (64, 10) and you want to weight each sample
 in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li>
 <li><strong>batch_axis</strong> (<em>int, default 0</em>) – The axis that represents mini-batch.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.loss.L1Loss">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">L1Loss</code><span class="sig-paren">(</span><em>weight=None</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.L1Loss" title="Permalink to this definition">¶</a></dt>
 <dd><p>Calculates the mean absolute error between output and label:</p>
 <div class="math">
 \[L = \frac{1}{2}\sum_i \vert {output}_i - {label}_i \vert.\]</div>
 <p>Output and label must have the same shape.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>weight</strong> (<em>float or None</em>) – Global scalar weight for loss.</li>
 <li><strong>sample_weight</strong> (<em>Symbol or None</em>) – Per sample weighting. Must be broadcastable to
 the same shape as loss. For example, if loss has
 shape (64, 10) and you want to weight each sample
 in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li>
 <li><strong>batch_axis</strong> (<em>int, default 0</em>) – The axis that represents mini-batch.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.loss.SoftmaxCrossEntropyLoss">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">SoftmaxCrossEntropyLoss</code><span class="sig-paren">(</span><em>axis=-1</em>, <em>sparse_label=True</em>, <em>from_logits=False</em>, <em>weight=None</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.SoftmaxCrossEntropyLoss" title="Permalink to this definition">¶</a></dt>
 <dd><p>Computes the softmax cross entropy loss. (alias: SoftmaxCELoss)</p>
 <p>If <cite>sparse_label</cite> is <cite>True</cite>, label should contain integer category indicators:</p>
 <div class="math">
 \[p = {softmax}({output})\]\[L = -\sum_i {log}(p_{i,{label}_i})\]</div>
 <p>Label’s shape should be output’s shape without the <cite>axis</cite> dimension. i.e. for
 <cite>output.shape</cite> = (1,2,3,4) and axis = 2, <cite>label.shape</cite> should be (1,2,4).</p>
 <p>If <cite>sparse_label</cite> is <cite>False</cite>, label should contain probability distribution
 with the same shape as output:</p>
 <div class="math">
 \[p = {softmax}({output})\]\[L = -\sum_i \sum_j {label}_j {log}(p_{ij})\]</div>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>axis</strong> (<em>int, default -1</em>) – The axis to sum over when computing softmax and entropy.</li>
 <li><strong>sparse_label</strong> (<em>bool, default True</em>) – Whether label is an integer array instead of probability distribution.</li>
 <li><strong>from_logits</strong> (<em>bool, default False</em>) – Whether input is a log probability (usually from log_softmax) instead
 of unnormalized numbers.</li>
 <li><strong>weight</strong> (<em>float or None</em>) – Global scalar weight for loss.</li>
 <li><strong>sample_weight</strong> (<em>Symbol or None</em>) – Per sample weighting. Must be broadcastable to
 the same shape as loss. For example, if loss has
 shape (64, 10) and you want to weight each sample
 in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li>
 <li><strong>batch_axis</strong> (<em>int, default 0</em>) – The axis that represents mini-batch.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.loss.KLDivLoss">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.loss.</code><code class="descname">KLDivLoss</code><span class="sig-paren">(</span><em>from_logits=True</em>, <em>weight=None</em>, <em>batch_axis=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.loss.KLDivLoss" title="Permalink to this definition">¶</a></dt>
 <dd><p>The Kullback-Leibler divergence loss.</p>
 <p>KL divergence is a useful distance measure for continuous distributions
 and is often useful when performing direct regression over the space of
 (discretely sampled) continuous output distributions.</p>
 <div class="math">
 \[L = 1/n \sum_i (label_i * (log(label_i) - output_i))\]</div>
 <p>Label’s shape should be the same as output’s.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>from_logits</strong> (bool, default is <cite>True</cite>) – Whether the input is log probability (usually from log_softmax) instead
 of unnormalized numbers.</li>
 <li><strong>weight</strong> (<em>float or None</em>) – Global scalar weight for loss.</li>
 <li><strong>sample_weight</strong> (<em>Symbol or None</em>) – Per sample weighting. Must be broadcastable to
 the same shape as loss. For example, if loss has
 shape (64, 10) and you want to weight each sample
 in the batch, <cite>sample_weight</cite> should have shape (64, 1).</li>
 <li><strong>batch_axis</strong> (<em>int, default 0</em>) – The axis that represents mini-batch.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.utils.split_data">
 <code class="descclassname">utils.</code><code class="descname">split_data</code><span class="sig-paren">(</span><em>data</em>, <em>num_slice</em>, <em>batch_axis=0</em>, <em>even_split=True</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.utils.split_data" title="Permalink to this definition">¶</a></dt>
 <dd><p>Splits an NDArray into <cite>num_slice</cite> slices along <cite>batch_axis</cite>.
 Usually used for data parallelism where each slices is sent
 to one device (i.e. GPU).</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>data</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – A batch of data.</li>
 <li><strong>num_slice</strong> (<em>int</em>) – Number of desired slices.</li>
 <li><strong>batch_axis</strong> (<em>int, default 0</em>) – The axis along which to slice.</li>
 <li><strong>even_split</strong> (<em>bool, default True</em>) – Whether to force all slices to have the same number of elements.
 If <cite>True</cite>, an error will be raised when <cite>num_slice</cite> does not evenly
 divide <cite>data.shape[batch_axis]</cite>.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Return value is a list even if <cite>num_slice</cite> is 1.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list of NDArray</p>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.utils.split_and_load">
 <code class="descclassname">utils.</code><code class="descname">split_and_load</code><span class="sig-paren">(</span><em>data</em>, <em>ctx_list</em>, <em>batch_axis=0</em>, <em>even_split=True</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.utils.split_and_load" title="Permalink to this definition">¶</a></dt>
 <dd><p>Splits an NDArray into <cite>len(ctx_list)</cite> slices along <cite>batch_axis</cite> and loads
 each slice to one context in <cite>ctx_list</cite>.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>data</strong> (<a class="reference internal" href="ndarray.html#mxnet.ndarray.NDArray" title="mxnet.ndarray.NDArray"><em>NDArray</em></a>) – A batch of data.</li>
 <li><strong>ctx_list</strong> (<em>list of Context</em>) – A list of Contexts.</li>
 <li><strong>batch_axis</strong> (<em>int, default 0</em>) – The axis along which to slice.</li>
 <li><strong>even_split</strong> (<em>bool, default True</em>) – Whether to force all slices to have the same number of elements.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Each corresponds to a context in <cite>ctx_list</cite>.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list of NDArray</p>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.utils.clip_global_norm">
 <code class="descclassname">utils.</code><code class="descname">clip_global_norm</code><span class="sig-paren">(</span><em>arrays</em>, <em>max_norm</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.utils.clip_global_norm" title="Permalink to this definition">¶</a></dt>
 <dd><p>Rescales NDArrays so that the sum of their 2-norm is smaller than <cite>max_norm</cite>.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.Dataset">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">Dataset</code><a class="headerlink" href="#mxnet.gluon.data.Dataset" title="Permalink to this definition">¶</a></dt>
 <dd><p>Abstract dataset class. All datasets should have this interface.</p>
 <p>Subclasses need to override <cite>__getitem__</cite>, which returns the i-th
 element, and <cite>__len__</cite>, which returns the total number elements.</p>
 <div class="admonition note">
 <p class="first admonition-title">Note</p>
 <p class="last">An mxnet or numpy array can be directly used as a dataset.</p>
 </div>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.ArrayDataset">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">ArrayDataset</code><span class="sig-paren">(</span><em>data</em>, <em>label</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.ArrayDataset" title="Permalink to this definition">¶</a></dt>
 <dd><p>A dataset with a data array and a label array.</p>
 <p>The i-th sample is <cite>(data[i], lable[i])</cite>.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>data</strong> (<em>array-like object</em>) – The data array. Can be mxnet or numpy array.</li>
 <li><strong>label</strong> (<em>array-like object</em>) – The label array. Can be mxnet or numpy array.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.RecordFileDataset">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">RecordFileDataset</code><span class="sig-paren">(</span><em>filename</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.RecordFileDataset" title="Permalink to this definition">¶</a></dt>
 <dd><p>A dataset wrapping over a RecordIO (.rec) file.</p>
 <p>Each sample is a string representing the raw content of an record.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>filename</strong> (<em>str</em>) – Path to rec file.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.Sampler">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">Sampler</code><a class="headerlink" href="#mxnet.gluon.data.Sampler" title="Permalink to this definition">¶</a></dt>
 <dd><p>Base class for samplers.</p>
 <p>All samplers should subclass <cite>Sampler</cite> and define <cite>__iter__</cite> and <cite>__len__</cite>
 methods.</p>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.SequentialSampler">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">SequentialSampler</code><span class="sig-paren">(</span><em>length</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.SequentialSampler" title="Permalink to this definition">¶</a></dt>
 <dd><p>Samples elements from [0, length) sequentially.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>length</strong> (<em>int</em>) – Length of the sequence.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.RandomSampler">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">RandomSampler</code><span class="sig-paren">(</span><em>length</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.RandomSampler" title="Permalink to this definition">¶</a></dt>
 <dd><p>Samples elements from [0, length) randomly without replacement.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>length</strong> (<em>int</em>) – Length of the sequence.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.BatchSampler">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">BatchSampler</code><span class="sig-paren">(</span><em>sampler</em>, <em>batch_size</em>, <em>last_batch='keep'</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.BatchSampler" title="Permalink to this definition">¶</a></dt>
 <dd><p>Wraps over another <cite>Sampler</cite> and return mini-batches of samples.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>sampler</strong> (<a class="reference internal" href="#mxnet.gluon.data.Sampler" title="mxnet.gluon.data.Sampler"><em>Sampler</em></a>) – The source Sampler.</li>
 <li><strong>batch_size</strong> (<em>int</em>) – Size of mini-batch.</li>
 <li><strong>last_batch</strong> (<em>{'keep', 'discard', 'rollover'}</em>) – <p>Specifies how the last batch is handled if batch_size does not evenly
 divide sequence length.</p>
 <p>If ‘keep’, the last batch will be returned directly, but will contain
 less element than <cite>batch_size</cite> requires.</p>
 <p>If ‘discard’, the last batch will be discarded.</p>
 <p>If ‘rollover’, the remaining elements will be rolled over to the next
 iteration.</p>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <p class="rubric">Examples</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sampler</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">SequentialSampler</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="n">batch_sampler</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">BatchSampler</span><span class="p">(</span><span class="n">sampler</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="s1">'keep'</span><span class="p">)</span>
 <span class="gp">>>> </span><span class="nb">list</span><span class="p">(</span><span class="n">batch_sampler</span><span class="p">)</span>
 <span class="go">[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]</span>
 </pre></div>
 </div>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.DataLoader">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.</code><code class="descname">DataLoader</code><span class="sig-paren">(</span><em>dataset</em>, <em>batch_size=None</em>, <em>shuffle=False</em>, <em>sampler=None</em>, <em>last_batch=None</em>, <em>batch_sampler=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.DataLoader" title="Permalink to this definition">¶</a></dt>
 <dd><p>Loads data from a dataset and returns mini-batches of data.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>dataset</strong> (<a class="reference internal" href="#mxnet.gluon.data.Dataset" title="mxnet.gluon.data.Dataset"><em>Dataset</em></a>) – Source dataset. Note that numpy and mxnet arrays can be directly used
 as a Dataset.</li>
 <li><strong>batch_size</strong> (<em>int</em>) – Size of mini-batch.</li>
 <li><strong>shuffle</strong> (<em>bool</em>) – Whether to shuffle the samples.</li>
 <li><strong>sampler</strong> (<a class="reference internal" href="#mxnet.gluon.data.Sampler" title="mxnet.gluon.data.Sampler"><em>Sampler</em></a>) – The sampler to use. Either specify sampler or shuffle, not both.</li>
 <li><strong>last_batch</strong> (<em>{'keep', 'discard', 'rollover'}</em>) – <p>How to handle the last batch if batch_size does not evenly divide
 <cite>len(dataset)</cite>.</p>
 <p>keep - A batch with less samples than previous batches is returned.
 discard - The last batch is discarded if its incomplete.
 rollover - The remaining samples are rolled over to the next epoch.</p>
 </li>
 <li><strong>batch_sampler</strong> (<a class="reference internal" href="#mxnet.gluon.data.Sampler" title="mxnet.gluon.data.Sampler"><em>Sampler</em></a>) – A sampler that returns mini-batches. Do not specify batch_size,
 shuffle, sampler, and last_batch if batch_sampler is specified.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <span class="target" id="module-mxnet.gluon.data.vision"></span><p>Dataset container.</p>
 <dl class="class">
 <dt id="mxnet.gluon.data.vision.MNIST">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.vision.</code><code class="descname">MNIST</code><span class="sig-paren">(</span><em>root='~/.mxnet/datasets/mnist'</em>, <em>train=True</em>, <em>transform=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.vision.MNIST" title="Permalink to this definition">¶</a></dt>
 <dd><p>MNIST handwritten digits dataset from <a href="#id121"><span class="problematic" id="id122">`http://yann.lecun.com/exdb/mnist`_</span></a>.</p>
 <p>Each sample is an image (in 3D NDArray) with shape (28, 28, 1).</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>root</strong> (<em>str</em>) – Path to temp folder for storing data.</li>
 <li><strong>train</strong> (<em>bool</em>) – Whether to load the training or testing set.</li>
 <li><strong>transform</strong> (<em>function</em>) – <p>A user defined callback that transforms each instance. For example:</p>
 <p>transform=lambda data, label: (data.astype(np.float32)/255, label)</p>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.vision.FashionMNIST">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.vision.</code><code class="descname">FashionMNIST</code><span class="sig-paren">(</span><em>root='~/.mxnet/datasets/fashion-mnist'</em>, <em>train=True</em>, <em>transform=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.vision.FashionMNIST" title="Permalink to this definition">¶</a></dt>
 <dd><p>A dataset of Zalando’s article images consisting of fashion products,
 a drop-in replacement of the original MNIST dataset from
 <a href="#id123"><span class="problematic" id="id124">`https://github.com/zalandoresearch/fashion-mnist`_</span></a>.</p>
 <p>Each sample is an image (in 3D NDArray) with shape (28, 28, 1).</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>root</strong> (<em>str</em>) – Path to temp folder for storing data.</li>
 <li><strong>train</strong> (<em>bool</em>) – Whether to load the training or testing set.</li>
 <li><strong>transform</strong> (<em>function</em>) – <p>A user defined callback that transforms each instance. For example:</p>
 <p>transform=lambda data, label: (data.astype(np.float32)/255, label)</p>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.vision.CIFAR10">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.vision.</code><code class="descname">CIFAR10</code><span class="sig-paren">(</span><em>root='~/.mxnet/datasets/cifar10'</em>, <em>train=True</em>, <em>transform=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.vision.CIFAR10" title="Permalink to this definition">¶</a></dt>
 <dd><p>CIFAR10 image classification dataset from <a href="#id125"><span class="problematic" id="id126">`https://www.cs.toronto.edu/~kriz/cifar.html`_</span></a>.</p>
 <p>Each sample is an image (in 3D NDArray) with shape (32, 32, 1).</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>root</strong> (<em>str</em>) – Path to temp folder for storing data.</li>
 <li><strong>train</strong> (<em>bool</em>) – Whether to load the training or testing set.</li>
 <li><strong>transform</strong> (<em>function</em>) – <p>A user defined callback that transforms each instance. For example:</p>
 <p>transform=lambda data, label: (data.astype(np.float32)/255, label)</p>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.vision.ImageRecordDataset">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.vision.</code><code class="descname">ImageRecordDataset</code><span class="sig-paren">(</span><em>filename</em>, <em>flag=1</em>, <em>transform=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.vision.ImageRecordDataset" title="Permalink to this definition">¶</a></dt>
 <dd><p>A dataset wrapping over a RecordIO file containing images.</p>
 <p>Each sample is an image and its corresponding label.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>filename</strong> (<em>str</em>) – Path to rec file.</li>
 <li><strong>flag</strong> (<em>{0, 1}, default 1</em>) – <p>If 0, always convert images to greyscale.</p>
 <p>If 1, always convert images to colored (RGB).</p>
 </li>
 <li><strong>transform</strong> (<em>function</em>) – <p>A user defined callback that transforms each instance. For example:</p>
 <p>transform=lambda data, label: (data.astype(np.float32)/255, label)</p>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.data.vision.ImageFolderDataset">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.data.vision.</code><code class="descname">ImageFolderDataset</code><span class="sig-paren">(</span><em>root</em>, <em>flag=1</em>, <em>transform=None</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.data.vision.ImageFolderDataset" title="Permalink to this definition">¶</a></dt>
 <dd><p>A dataset for loading image files stored in a folder structure like:</p>
 <div class="highlight-python"><div class="highlight"><pre><span></span>root/car/0001.jpg
 root/car/xxxa.jpg
 root/car/yyyb.jpg
 root/bus/123.jpg
 root/bus/023.jpg
 root/bus/wwww.jpg
 </pre></div>
 </div>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>root</strong> (<em>str</em>) – Path to root directory.</li>
 <li><strong>flag</strong> (<em>{0, 1}, default 1</em>) – If 0, always convert loaded images to greyscale (1 channel).
 If 1, always convert loaded images to colored (3 channels).</li>
 <li><strong>transform</strong> (<em>callable</em>) – <p>A function that takes data and label and transforms them:</p>
 <p>transform = lambda data, label: (data.astype(np.float32)/255, label)</p>
 </li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="attribute">
 <dt id="mxnet.gluon.data.vision.ImageFolderDataset.synsets">
 <code class="descname">synsets</code><a class="headerlink" href="#mxnet.gluon.data.vision.ImageFolderDataset.synsets" title="Permalink to this definition">¶</a></dt>
 <dd><p><em>list</em></p>
 <p>List of class names. <cite>synsets[i]</cite> is the name for the integer label <cite>i</cite></p>
 </dd></dl>
 <dl class="attribute">
 <dt id="mxnet.gluon.data.vision.ImageFolderDataset.items">
 <code class="descname">items</code><a class="headerlink" href="#mxnet.gluon.data.vision.ImageFolderDataset.items" title="Permalink to this definition">¶</a></dt>
 <dd><p><em>list of tuples</em></p>
 <p>List of all images in (filename, label) pairs.</p>
 </dd></dl>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.get_model">
 <code class="descclassname">vision.</code><code class="descname">get_model</code><span class="sig-paren">(</span><em>name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.get_model" title="Permalink to this definition">¶</a></dt>
 <dd><p>Returns a pre-defined model by name</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
 <li><strong>name</strong> (<em>str</em>) – Name of the model.</li>
 <li><strong>pretrained</strong> (<em>bool</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>classes</strong> (<em>int</em>) – Number of classes for the output layer.</li>
 </ul>
 </td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The model.</p>
 </td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#mxnet.gluon.HybridBlock" title="mxnet.gluon.HybridBlock">HybridBlock</a></p>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet18_v1">
 <code class="descclassname">vision.</code><code class="descname">resnet18_v1</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet18_v1" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-18 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet34_v1">
 <code class="descclassname">vision.</code><code class="descname">resnet34_v1</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet34_v1" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-34 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet50_v1">
 <code class="descclassname">vision.</code><code class="descname">resnet50_v1</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet50_v1" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-50 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet101_v1">
 <code class="descclassname">vision.</code><code class="descname">resnet101_v1</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet101_v1" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-101 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet152_v1">
 <code class="descclassname">vision.</code><code class="descname">resnet152_v1</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet152_v1" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-152 V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet18_v2">
 <code class="descclassname">vision.</code><code class="descname">resnet18_v2</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet18_v2" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-18 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet34_v2">
 <code class="descclassname">vision.</code><code class="descname">resnet34_v2</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet34_v2" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-34 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet50_v2">
 <code class="descclassname">vision.</code><code class="descname">resnet50_v2</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet50_v2" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-50 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet101_v2">
 <code class="descclassname">vision.</code><code class="descname">resnet101_v2</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet101_v2" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-101 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.resnet152_v2">
 <code class="descclassname">vision.</code><code class="descname">resnet152_v2</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.resnet152_v2" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet-152 V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.get_resnet">
 <code class="descclassname">vision.</code><code class="descname">get_resnet</code><span class="sig-paren">(</span><em>version</em>, <em>num_layers</em>, <em>pretrained=False</em>, <em>ctx=cpu(0)</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.get_resnet" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet V1 model from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.
 ResNet V2 model from <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>version</strong> (<em>int</em>) – Version of ResNet. Options are 1, 2.</li>
 <li><strong>num_layers</strong> (<em>int</em>) – Numbers of layers. Options are 18, 34, 50, 101, 152.</li>
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.ResNetV1">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">ResNetV1</code><span class="sig-paren">(</span><em>block</em>, <em>layers</em>, <em>channels</em>, <em>classes=1000</em>, <em>thumbnail=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.ResNetV1" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet V1 model from
 <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>block</strong> (<a class="reference internal" href="#mxnet.gluon.HybridBlock" title="mxnet.gluon.HybridBlock"><em>HybridBlock</em></a>) – Class for the residual block. Options are BasicBlockV1, BottleneckV1.</li>
 <li><strong>layers</strong> (<em>list of int</em>) – Numbers of layers in each block</li>
 <li><strong>channels</strong> (<em>list of int</em>) – Numbers of channels in each block. Length should be one larger than layers list.</li>
 <li><strong>classes</strong> (<em>int, default 1000</em>) – Number of classification classes.</li>
 <li><strong>thumbnail</strong> (<em>bool, default False</em>) – Enable thumbnail.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.BasicBlockV1">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">BasicBlockV1</code><span class="sig-paren">(</span><em>channels</em>, <em>stride</em>, <em>downsample=False</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.BasicBlockV1" title="Permalink to this definition">¶</a></dt>
 <dd><p>BasicBlock V1 from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.
 This is used for ResNet V1 for 18, 34 layers.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – Number of output channels.</li>
 <li><strong>stride</strong> (<em>int</em>) – Stride size.</li>
 <li><strong>downsample</strong> (<em>bool, default False</em>) – Whether to downsample the input.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – Number of input channels. Default is 0, to infer from the graph.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.BottleneckV1">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">BottleneckV1</code><span class="sig-paren">(</span><em>channels</em>, <em>stride</em>, <em>downsample=False</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.BottleneckV1" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bottleneck V1 from <a class="reference external" href="http://arxiv.org/abs/1512.03385">“Deep Residual Learning for Image Recognition”</a> paper.
 This is used for ResNet V1 for 50, 101, 152 layers.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – Number of output channels.</li>
 <li><strong>stride</strong> (<em>int</em>) – Stride size.</li>
 <li><strong>downsample</strong> (<em>bool, default False</em>) – Whether to downsample the input.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – Number of input channels. Default is 0, to infer from the graph.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.ResNetV2">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">ResNetV2</code><span class="sig-paren">(</span><em>block</em>, <em>layers</em>, <em>channels</em>, <em>classes=1000</em>, <em>thumbnail=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.ResNetV2" title="Permalink to this definition">¶</a></dt>
 <dd><p>ResNet V2 model from
 <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>block</strong> (<a class="reference internal" href="#mxnet.gluon.HybridBlock" title="mxnet.gluon.HybridBlock"><em>HybridBlock</em></a>) – Class for the residual block. Options are BasicBlockV1, BottleneckV1.</li>
 <li><strong>layers</strong> (<em>list of int</em>) – Numbers of layers in each block</li>
 <li><strong>channels</strong> (<em>list of int</em>) – Numbers of channels in each block. Length should be one larger than layers list.</li>
 <li><strong>classes</strong> (<em>int, default 1000</em>) – Number of classification classes.</li>
 <li><strong>thumbnail</strong> (<em>bool, default False</em>) – Enable thumbnail.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.BasicBlockV2">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">BasicBlockV2</code><span class="sig-paren">(</span><em>channels</em>, <em>stride</em>, <em>downsample=False</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.BasicBlockV2" title="Permalink to this definition">¶</a></dt>
 <dd><p>BasicBlock V2 from
 <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.
 This is used for ResNet V2 for 18, 34 layers.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – Number of output channels.</li>
 <li><strong>stride</strong> (<em>int</em>) – Stride size.</li>
 <li><strong>downsample</strong> (<em>bool, default False</em>) – Whether to downsample the input.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – Number of input channels. Default is 0, to infer from the graph.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.BottleneckV2">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">BottleneckV2</code><span class="sig-paren">(</span><em>channels</em>, <em>stride</em>, <em>downsample=False</em>, <em>in_channels=0</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.BottleneckV2" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bottleneck V2 from
 <a class="reference external" href="https://arxiv.org/abs/1603.05027">“Identity Mappings in Deep Residual Networks”</a> paper.
 This is used for ResNet V2 for 50, 101, 152 layers.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>channels</strong> (<em>int</em>) – Number of output channels.</li>
 <li><strong>stride</strong> (<em>int</em>) – Stride size.</li>
 <li><strong>downsample</strong> (<em>bool, default False</em>) – Whether to downsample the input.</li>
 <li><strong>in_channels</strong> (<em>int, default 0</em>) – Number of input channels. Default is 0, to infer from the graph.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg11">
 <code class="descclassname">vision.</code><code class="descname">vgg11</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg11" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-11 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg13">
 <code class="descclassname">vision.</code><code class="descname">vgg13</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg13" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-13 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg16">
 <code class="descclassname">vision.</code><code class="descname">vgg16</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg16" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-16 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg19">
 <code class="descclassname">vision.</code><code class="descname">vgg19</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg19" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-19 model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg11_bn">
 <code class="descclassname">vision.</code><code class="descname">vgg11_bn</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg11_bn" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-11 model with batch normalization from the
 <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg13_bn">
 <code class="descclassname">vision.</code><code class="descname">vgg13_bn</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg13_bn" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-13 model with batch normalization from the
 <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg16_bn">
 <code class="descclassname">vision.</code><code class="descname">vgg16_bn</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg16_bn" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-16 model with batch normalization from the
 <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.vgg19_bn">
 <code class="descclassname">vision.</code><code class="descname">vgg19_bn</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.vgg19_bn" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG-19 model with batch normalization from the
 <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.get_vgg">
 <code class="descclassname">vision.</code><code class="descname">get_vgg</code><span class="sig-paren">(</span><em>num_layers</em>, <em>pretrained=False</em>, <em>ctx=cpu(0)</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.get_vgg" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>num_layers</strong> (<em>int</em>) – Number of layers for the variant of densenet. Options are 11, 13, 16, 19.</li>
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.VGG">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">VGG</code><span class="sig-paren">(</span><em>layers</em>, <em>filters</em>, <em>classes=1000</em>, <em>batch_norm=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.VGG" title="Permalink to this definition">¶</a></dt>
 <dd><p>VGG model from the <a class="reference external" href="https://arxiv.org/abs/1409.1556">“Very Deep Convolutional Networks for Large-Scale Image Recognition”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>layers</strong> (<em>list of int</em>) – Numbers of layers in each feature block.</li>
 <li><strong>filters</strong> (<em>list of int</em>) – Numbers of filters in each feature block. List length should match the layers.</li>
 <li><strong>classes</strong> (<em>int, default 1000</em>) – Number of classification classes.</li>
 <li><strong>batch_norm</strong> (<em>bool, default False</em>) – Use batch normalization.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.alexnet">
 <code class="descclassname">vision.</code><code class="descname">alexnet</code><span class="sig-paren">(</span><em>pretrained=False</em>, <em>ctx=cpu(0)</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.alexnet" title="Permalink to this definition">¶</a></dt>
 <dd><p>AlexNet model from the <a class="reference external" href="https://arxiv.org/abs/1404.5997">“One weird trick...”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.AlexNet">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">AlexNet</code><span class="sig-paren">(</span><em>classes=1000</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.AlexNet" title="Permalink to this definition">¶</a></dt>
 <dd><p>AlexNet model from the <a class="reference external" href="https://arxiv.org/abs/1404.5997">“One weird trick...”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>classes</strong> (<em>int, default 1000</em>) – Number of classes for the output layer.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.densenet121">
 <code class="descclassname">vision.</code><code class="descname">densenet121</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.densenet121" title="Permalink to this definition">¶</a></dt>
 <dd><p>Densenet-BC 121-layer model from the
 <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.densenet161">
 <code class="descclassname">vision.</code><code class="descname">densenet161</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.densenet161" title="Permalink to this definition">¶</a></dt>
 <dd><p>Densenet-BC 161-layer model from the
 <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.densenet169">
 <code class="descclassname">vision.</code><code class="descname">densenet169</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.densenet169" title="Permalink to this definition">¶</a></dt>
 <dd><p>Densenet-BC 169-layer model from the
 <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.densenet201">
 <code class="descclassname">vision.</code><code class="descname">densenet201</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.densenet201" title="Permalink to this definition">¶</a></dt>
 <dd><p>Densenet-BC 201-layer model from the
 <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.DenseNet">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">DenseNet</code><span class="sig-paren">(</span><em>num_init_features</em>, <em>growth_rate</em>, <em>block_config</em>, <em>bn_size=4</em>, <em>dropout=0</em>, <em>classes=1000</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.DenseNet" title="Permalink to this definition">¶</a></dt>
 <dd><p>Densenet-BC model from the
 <a class="reference external" href="https://arxiv.org/pdf/1608.06993.pdf">“Densely Connected Convolutional Networks”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>num_init_features</strong> (<em>int</em>) – Number of filters to learn in the first convolution layer.</li>
 <li><strong>growth_rate</strong> (<em>int</em>) – Number of filters to add each layer (<cite>k</cite> in the paper).</li>
 <li><strong>block_config</strong> (<em>list of int</em>) – List of integers for numbers of layers in each pooling block.</li>
 <li><strong>bn_size</strong> (<em>int, default 4</em>) – Multiplicative factor for number of bottle neck layers.
 (i.e. bn_size * k features in the bottleneck layer)</li>
 <li><strong>dropout</strong> (<em>float, default 0</em>) – Rate of dropout after each dense layer.</li>
 <li><strong>classes</strong> (<em>int, default 1000</em>) – Number of classification classes.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.squeezenet1_0">
 <code class="descclassname">vision.</code><code class="descname">squeezenet1_0</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.squeezenet1_0" title="Permalink to this definition">¶</a></dt>
 <dd><p>SqueezeNet 1.0 model from the <a class="reference external" href="https://arxiv.org/abs/1602.07360">“SqueezeNet: AlexNet-level accuracy with 50x fewer parameters
 and <0.5MB model size”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.squeezenet1_1">
 <code class="descclassname">vision.</code><code class="descname">squeezenet1_1</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.squeezenet1_1" title="Permalink to this definition">¶</a></dt>
 <dd><p>SqueezeNet 1.1 model from the <a class="reference external" href="https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1">official SqueezeNet repo</a>.
 SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
 than SqueezeNet 1.0, without sacrificing accuracy.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.SqueezeNet">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">SqueezeNet</code><span class="sig-paren">(</span><em>version</em>, <em>classes=1000</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.SqueezeNet" title="Permalink to this definition">¶</a></dt>
 <dd><p>SqueezeNet model from the <a class="reference external" href="https://arxiv.org/abs/1602.07360">“SqueezeNet: AlexNet-level accuracy with 50x fewer parameters
 and <0.5MB model size”</a> paper.
 SqueezeNet 1.1 model from the <a class="reference external" href="https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1">official SqueezeNet repo</a>.
 SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
 than SqueezeNet 1.0, without sacrificing accuracy.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>version</strong> (<em>str</em>) – Version of squeezenet. Options are ‘1.0’, ‘1.1’.</li>
 <li><strong>classes</strong> (<em>int, default 1000</em>) – Number of classification classes.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="method">
 <dt id="mxnet.gluon.model_zoo.vision.inception_v3">
 <code class="descclassname">vision.</code><code class="descname">inception_v3</code><span class="sig-paren">(</span><em>pretrained=False</em>, <em>ctx=cpu(0)</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.inception_v3" title="Permalink to this definition">¶</a></dt>
 <dd><p>Inception v3 model from
 <a class="reference external" href="http://arxiv.org/abs/1512.00567">“Rethinking the Inception Architecture for Computer Vision”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>pretrained</strong> (<em>bool, default False</em>) – Whether to load the pretrained weights for model.</li>
 <li><strong>ctx</strong> (<em>Context, default CPU</em>) – The context in which to load the pretrained weights.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <dl class="class">
 <dt id="mxnet.gluon.model_zoo.vision.Inception3">
 <em class="property">class </em><code class="descclassname">mxnet.gluon.model_zoo.vision.</code><code class="descname">Inception3</code><span class="sig-paren">(</span><em>classes=1000</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#mxnet.gluon.model_zoo.vision.Inception3" title="Permalink to this definition">¶</a></dt>
 <dd><p>Inception v3 model from
 <a class="reference external" href="http://arxiv.org/abs/1512.00567">“Rethinking the Inception Architecture for Computer Vision”</a> paper.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name"/>
 <col class="field-body"/>
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>classes</strong> (<em>int, default 1000</em>) – Number of classification classes.</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>
 <script>auto_index("api-reference");</script></div>
 </div>
 <div class="container">
 <div class="footer">
 <p> </p>
 </div>
 </div>
 </div>
 <div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
 <div class="sphinxsidebarwrapper">
 <h3><a href="../../index.html">Table Of Contents</a></h3>
 <ul>
 <li><a class="reference internal" href="#">Gluon Package</a><ul>
 <li><a class="reference internal" href="#overview">Overview</a></li>
 <li><a class="reference internal" href="#parameter">Parameter</a></li>
 <li><a class="reference internal" href="#containers">Containers</a></li>
 <li><a class="reference internal" href="#neural-network-layers">Neural Network Layers</a><ul>
 <li><a class="reference internal" href="#containers">Containers</a></li>
 <li><a class="reference internal" href="#basic-layers">Basic Layers</a></li>
 <li><a class="reference internal" href="#convolutional-layers">Convolutional Layers</a></li>
 <li><a class="reference internal" href="#pooling-layers">Pooling Layers</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#recurrent-layers">Recurrent Layers</a></li>
 <li><a class="reference internal" href="#trainer">Trainer</a></li>
 <li><a class="reference internal" href="#loss-functions">Loss functions</a></li>
 <li><a class="reference internal" href="#utilities">Utilities</a></li>
 <li><a class="reference internal" href="#data">Data</a><ul>
 <li><a class="reference internal" href="#vision">Vision</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#model-zoo">Model Zoo</a><ul>
 <li><a class="reference internal" href="#vision">Vision</a><ul>
 <li><a class="reference internal" href="#resnet">ResNet</a></li>
 <li><a class="reference internal" href="#vgg">VGG</a></li>
 <li><a class="reference internal" href="#alexnet">Alexnet</a></li>
 <li><a class="reference internal" href="#densenet">DenseNet</a></li>
 <li><a class="reference internal" href="#squeezenet">SqueezeNet</a></li>
 <li><a class="reference internal" href="#inception">Inception</a></li>
 </ul>
 </li>
 </ul>
 </li>
 <li><a class="reference internal" href="#api-reference">API Reference</a></li>
 </ul>
 </li>
 </ul>
 </div>
 </div>
 </div> <!-- pagename != index -->
 <script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
 <script src="../../_static/js/sidebar.js" type="text/javascript"></script>
 <script src="../../_static/js/search.js" type="text/javascript"></script>
 <script src="../../_static/js/navbar.js" type="text/javascript"></script>
 <script src="../../_static/js/clipboard.min.js" type="text/javascript"></script>
 <script src="../../_static/js/copycode.js" type="text/javascript"></script>
 <script type="text/javascript">
         $('body').ready(function () {
             $('body').css('visibility', 'visible');
         });
     </script>
 </div></body>
 </html>