blob: ee91f6d435402c6f515d66afdebbde9f9a36a9a6 [file] [log] [blame]
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8" />
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta http-equiv="x-ua-compatible" content="ie=edge">
<style>
.dropdown {
position: relative;
display: inline-block;
}
.dropdown-content {
display: none;
position: absolute;
background-color: #f9f9f9;
min-width: 160px;
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
padding: 12px 16px;
z-index: 1;
text-align: left;
}
.dropdown:hover .dropdown-content {
display: block;
}
.dropdown-option:hover {
color: #FF4500;
}
.dropdown-option-active {
color: #FF4500;
font-weight: lighter;
}
.dropdown-option {
color: #000000;
font-weight: lighter;
}
.dropdown-header {
color: #FFFFFF;
display: inline-flex;
}
.dropdown-caret {
width: 18px;
height: 54px;
}
.dropdown-caret-path {
fill: #FFFFFF;
}
</style>
<title>Optimizers &#8212; Apache MXNet documentation</title>
<link rel="stylesheet" href="../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/mxnet.css" />
<link rel="stylesheet" href="../../../_static/material-design-lite-1.3.0/material.blue-deep_orange.min.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/sphinx_materialdesign_theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/fontawesome/all.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/fonts.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/feedback.css" type="text/css" />
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/matomo_analytics.js"></script>
<script src="../../../_static/autodoc.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<script src="../../../_static/sphinx_materialdesign_theme.js"></script>
<link rel="shortcut icon" href="../../../_static/mxnet-icon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<link rel="next" title="Visualization" href="../viz/index.html" />
<link rel="prev" title="Running inference on MXNet/Gluon from an ONNX model" href="../onnx/inference_on_onnx_model.html" />
</head>
<body><header class="site-header" role="banner">
<div class="wrapper">
<a class="site-title" rel="author" href="/"><img
src="../../../_static/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
</svg>
</span>
</label>
<div class="trigger">
<a class="page-link" href="/get_started">Get Started</a>
<a class="page-link" href="/features">Features</a>
<a class="page-link" href="/ecosystem">Ecosystem</a>
<a class="page-link page-current" href="/api">Docs & Tutorials</a>
<a class="page-link" href="/trusted_by">Trusted By</a>
<a class="page-link" href="https://github.com/apache/mxnet">GitHub</a>
<div class="dropdown" style="min-width:100px">
<span class="dropdown-header">Apache
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content" style="min-width:250px">
<a href="https://www.apache.org/foundation/">Apache Software Foundation</a>
<a href="https://incubator.apache.org/">Apache Incubator</a>
<a href="https://www.apache.org/licenses/">License</a>
<a href="/versions/1.9.1/api/faq/security.html">Security</a>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy</a>
<a href="https://www.apache.org/events/current-event">Events</a>
<a href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
<div class="dropdown">
<span class="dropdown-header">master
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content">
<a class="dropdown-option-active" href="/versions/master/">master</a><br>
<a class="dropdown-option" href="/versions/1.9.1/">1.9.1</a><br>
<a class="dropdown-option" href="/versions/1.8.0/">1.8.0</a><br>
<a class="dropdown-option" href="/versions/1.7.0/">1.7.0</a><br>
<a class="dropdown-option" href="/versions/1.6.0/">1.6.0</a><br>
<a class="dropdown-option" href="/versions/1.5.0/">1.5.0</a><br>
<a class="dropdown-option" href="/versions/1.4.1/">1.4.1</a><br>
<a class="dropdown-option" href="/versions/1.3.1/">1.3.1</a><br>
<a class="dropdown-option" href="/versions/1.2.1/">1.2.1</a><br>
<a class="dropdown-option" href="/versions/1.1.0/">1.1.0</a><br>
<a class="dropdown-option" href="/versions/1.0.0/">1.0.0</a><br>
<a class="dropdown-option" href="/versions/0.12.1/">0.12.1</a><br>
<a class="dropdown-option" href="/versions/0.11.0/">0.11.0</a>
</div>
</div>
</div>
</nav>
</div>
</header>
<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header mdl-layout--fixed-drawer"><header class="mdl-layout__header mdl-layout__header--waterfall ">
<div class="mdl-layout__header-row">
<nav class="mdl-navigation breadcrumb">
<a class="mdl-navigation__link" href="../../index.html">Python Tutorials</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="../index.html">Packages</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link is-active">Optimizers</a>
</nav>
<div class="mdl-layout-spacer"></div>
<nav class="mdl-navigation">
<form class="form-inline pull-sm-right" action="../../../search.html" method="get">
<div class="mdl-textfield mdl-js-textfield mdl-textfield--expandable mdl-textfield--floating-label mdl-textfield--align-right">
<label id="quick-search-icon" class="mdl-button mdl-js-button mdl-button--icon" for="waterfall-exp">
<i class="material-icons">search</i>
</label>
<div class="mdl-textfield__expandable-holder">
<input class="mdl-textfield__input" type="text" name="q" id="waterfall-exp" placeholder="Search" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</div>
<div class="mdl-tooltip" data-mdl-for="quick-search-icon">
Quick search
</div>
</form>
<a id="button-show-github"
href="https://github.com/apache/mxnet/edit/master/docs/python_docs/python/tutorials/packages/optimizer/index.md" class="mdl-button mdl-js-button mdl-button--icon">
<i class="material-icons">edit</i>
</a>
<div class="mdl-tooltip" data-mdl-for="button-show-github">
Edit on Github
</div>
</nav>
</div>
<div class="mdl-layout__header-row header-links">
<div class="mdl-layout-spacer"></div>
<nav class="mdl-navigation">
</nav>
</div>
</header><header class="mdl-layout__drawer">
<div class="globaltoc">
<span class="mdl-layout-title toc">Table Of Contents</span>
<nav class="mdl-navigation">
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="../../index.html">Python Tutorials</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../getting-started/index.html">Getting Started</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/crash-course/index.html">Crash Course</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/0-introduction.html">Introduction</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/1-nparray.html">Step 1: Manipulate data with NP on MXNet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/2-create-nn.html">Step 2: Create a neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/3-autograd.html">Step 3: Automatic differentiation with autograd</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/4-components.html">Step 4: Necessary components that are not in the network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html">Step 5: <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html#Using-your-own-data-with-custom-Datasets">Using your own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders">New in MXNet 2.0: faster C++ backend dataloaders</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/6-train-nn.html">Step 6: Train a Neural Network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/7-use-gpus.html">Step 7: Load and Run a NN using GPU</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/to-mxnet/index.html">Moving to MXNet from Other Frameworks</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/to-mxnet/pytorch.html">PyTorch vs Apache MXNet</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/gluon_from_experiment_to_deployment.html">Gluon: from experiment to deployment</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/gluon_migration_guide.html">Gluon2.0: Migration Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/logistic_regression_explained.html">Logistic regression explained</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/image/mnist.html">MNIST</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="reference internal" href="../index.html">Packages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../autograd/index.html">Automatic Differentiation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../gluon/index.html">Gluon</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../gluon/blocks/index.html">Blocks</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/custom-layer.html">Custom Layers</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/hybridize.html">Hybridize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/init.html">Initialization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/naming.html">Parameter and Block Naming</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/nn.html">Layers and Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/parameters.html">Parameter Management</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/save_load_params.html">Saving and Loading Gluon Models</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/activations/activations.html">Activation Blocks</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/data/index.html">Data Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/data_augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html">Gluon <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html#Using-own-data-with-custom-Datasets">Using own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html#Appendix:-Upgrading-from-Module-DataIter-to-Gluon-DataLoader">Appendix: Upgrading from Module <code class="docutils literal notranslate"><span class="pre">DataIter</span></code> to Gluon <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/image/index.html">Image Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/image/info_gan.html">Image similarity search with InfoGAN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/image/mnist.html">Handwritten Digit Recognition</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/loss/index.html">Losses</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/loss/custom-loss.html">Custom Loss Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/loss/kl_divergence.html">Kullback-Leibler (KL) Divergence</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/loss/loss.html">Loss functions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/text/index.html">Text Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/text/gnmt.html">Google Neural Machine Translation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/text/transformer.html">Machine Translation with Transformer</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/training/index.html">Training</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/fit_api_tutorial.html">MXNet Gluon Fit API</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/trainer.html">Trainer</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/learning_rates/index.html">Learning Rates</a><ul>
<li class="toctree-l6"><a class="reference internal" href="../gluon/training/learning_rates/learning_rate_finder.html">Learning Rate Finder</a></li>
<li class="toctree-l6"><a class="reference internal" href="../gluon/training/learning_rates/learning_rate_schedules.html">Learning Rate Schedules</a></li>
<li class="toctree-l6"><a class="reference internal" href="../gluon/training/learning_rates/learning_rate_schedules_advanced.html">Advanced Learning Rate Schedules</a></li>
</ul>
</li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/normalization/index.html">Normalization Blocks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../kvstore/index.html">KVStore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../kvstore/kvstore.html">Distributed Key-Value Store</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../legacy/index.html">Legacy</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../legacy/ndarray/index.html">NDArray</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/01-ndarray-intro.html">An Intro: Manipulate Data the MXNet Way with NDArray</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/02-ndarray-operations.html">NDArray Operations</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/03-ndarray-contexts.html">NDArray Contexts</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/gotchas_numpy_in_mxnet.html">Gotchas using NumPy in Apache MXNet</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/sparse/index.html">Tutorials</a><ul>
<li class="toctree-l6"><a class="reference internal" href="../legacy/ndarray/sparse/csr.html">CSRNDArray - NDArray in Compressed Sparse Row Storage Format</a></li>
<li class="toctree-l6"><a class="reference internal" href="../legacy/ndarray/sparse/row_sparse.html">RowSparseNDArray - NDArray for Sparse Gradient Updates</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../np/index.html">What is NP on MXNet</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../np/cheat-sheet.html">The NP on MXNet cheat sheet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../np/np-vs-numpy.html">Differences between NP on MXNet and NumPy</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../onnx/index.html">ONNX</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../onnx/fine_tuning_gluon.html">Fine-tuning an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../onnx/inference_on_onnx_model.html">Running inference on MXNet/Gluon from an ONNX model</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/deploy/export/onnx.html">Export ONNX Models</a></li>
</ul>
</li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">Optimizers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../viz/index.html">Visualization</a><ul>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/visualize_graph">Visualize networks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../performance/index.html">Performance</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../performance/compression/index.html">Compression</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../performance/compression/int8.html">Deploy with int-8</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/float16">Float16</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/gradient_compression">Gradient Compression</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/int8_inference.html">GluonCV with Quantized Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../performance/backend/index.html">Accelerated Backend Tools</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/dnnl/index.html">oneDNN</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../performance/backend/dnnl/dnnl_readme.html">Install MXNet with oneDNN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../performance/backend/dnnl/dnnl_quantization.html">oneDNN Quantization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../performance/backend/dnnl/dnnl_quantization_inc.html">Improving accuracy with Intel® Neural Compressor</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/tvm.html">Use TVM</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/profiler.html">Profiling MXNet Models</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/amp.html">Using AMP: Automatic Mixed Precision</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../deploy/index.html">Deployment</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../deploy/export/index.html">Export</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/export/onnx.html">Exporting to ONNX format</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/export_network.html">Export Gluon CV Models</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html">Save / Load Parameters</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../deploy/inference/index.html">Inference</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/inference/cpp.html">Deploy into C++</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/inference/image_classification_jetson.html">Image Classication using pretrained ResNet-50 model on Jetson module</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../deploy/run-on-aws/index.html">Run on AWS</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/run-on-aws/use_ec2.html">Run on an EC2 Instance</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/run-on-aws/use_sagemaker.html">Run on Amazon SageMaker</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/run-on-aws/cloud.html">MXNet on the Cloud</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../extend/index.html">Extend</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../extend/customop.html">Custom Numpy Operators</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/new_op">New Operator Creation</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/add_op_in_backend">New Operator in MXNet Backend</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/using_rtc">Using RTC for CUDA kernels</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/index.html">Python API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../api/np/index.html">mxnet.np</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/np/arrays.html">Array objects</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/arrays.ndarray.html">The N-dimensional array (<code class="xref py py-class docutils literal notranslate"><span class="pre">ndarray</span></code>)</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/arrays.indexing.html">Indexing</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/np/routines.html">Routines</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.array-creation.html">Array creation routines</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.eye.html">mxnet.np.eye</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.empty.html">mxnet.np.empty</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.full.html">mxnet.np.full</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.identity.html">mxnet.np.identity</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ones.html">mxnet.np.ones</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ones_like.html">mxnet.np.ones_like</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.zeros.html">mxnet.np.zeros</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.zeros_like.html">mxnet.np.zeros_like</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.array.html">mxnet.np.array</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.copy.html">mxnet.np.copy</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arange.html">mxnet.np.arange</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linspace.html">mxnet.np.linspace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.logspace.html">mxnet.np.logspace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.meshgrid.html">mxnet.np.meshgrid</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tril.html">mxnet.np.tril</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.array-manipulation.html">Array manipulation routines</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.reshape.html">mxnet.np.reshape</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ravel.html">mxnet.np.ravel</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.flatten.html">mxnet.np.ndarray.flatten</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.swapaxes.html">mxnet.np.swapaxes</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.T.html">mxnet.np.ndarray.T</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.transpose.html">mxnet.np.transpose</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.moveaxis.html">mxnet.np.moveaxis</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rollaxis.html">mxnet.np.rollaxis</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.expand_dims.html">mxnet.np.expand_dims</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.squeeze.html">mxnet.np.squeeze</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.broadcast_to.html">mxnet.np.broadcast_to</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.broadcast_arrays.html">mxnet.np.broadcast_arrays</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.atleast_1d.html">mxnet.np.atleast_1d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.atleast_2d.html">mxnet.np.atleast_2d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.atleast_3d.html">mxnet.np.atleast_3d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.concatenate.html">mxnet.np.concatenate</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.stack.html">mxnet.np.stack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.dstack.html">mxnet.np.dstack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.vstack.html">mxnet.np.vstack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.column_stack.html">mxnet.np.column_stack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.hstack.html">mxnet.np.hstack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.split.html">mxnet.np.split</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.hsplit.html">mxnet.np.hsplit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.vsplit.html">mxnet.np.vsplit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.array_split.html">mxnet.np.array_split</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.dsplit.html">mxnet.np.dsplit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tile.html">mxnet.np.tile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.repeat.html">mxnet.np.repeat</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.unique.html">mxnet.np.unique</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.delete.html">mxnet.np.delete</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.insert.html">mxnet.np.insert</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.append.html">mxnet.np.append</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.resize.html">mxnet.np.resize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trim_zeros.html">mxnet.np.trim_zeros</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.reshape.html">mxnet.np.reshape</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.flip.html">mxnet.np.flip</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.roll.html">mxnet.np.roll</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rot90.html">mxnet.np.rot90</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fliplr.html">mxnet.np.fliplr</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.flipud.html">mxnet.np.flipud</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.io.html">Input and output</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.genfromtxt.html">mxnet.np.genfromtxt</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.tolist.html">mxnet.np.ndarray.tolist</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.set_printoptions.html">mxnet.np.set_printoptions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.linalg.html">Linear algebra (<code class="xref py py-mod docutils literal notranslate"><span class="pre">numpy.linalg</span></code>)</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.dot.html">mxnet.np.dot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.vdot.html">mxnet.np.vdot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.inner.html">mxnet.np.inner</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.outer.html">mxnet.np.outer</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tensordot.html">mxnet.np.tensordot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.einsum.html">mxnet.np.einsum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.multi_dot.html">mxnet.np.linalg.multi_dot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.matmul.html">mxnet.np.matmul</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.matrix_power.html">mxnet.np.linalg.matrix_power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.kron.html">mxnet.np.kron</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.svd.html">mxnet.np.linalg.svd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.cholesky.html">mxnet.np.linalg.cholesky</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.qr.html">mxnet.np.linalg.qr</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eig.html">mxnet.np.linalg.eig</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eigh.html">mxnet.np.linalg.eigh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eigvals.html">mxnet.np.linalg.eigvals</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eigvalsh.html">mxnet.np.linalg.eigvalsh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.norm.html">mxnet.np.linalg.norm</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trace.html">mxnet.np.trace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.cond.html">mxnet.np.linalg.cond</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.det.html">mxnet.np.linalg.det</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.matrix_rank.html">mxnet.np.linalg.matrix_rank</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.slogdet.html">mxnet.np.linalg.slogdet</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.solve.html">mxnet.np.linalg.solve</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.tensorsolve.html">mxnet.np.linalg.tensorsolve</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.lstsq.html">mxnet.np.linalg.lstsq</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.inv.html">mxnet.np.linalg.inv</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.pinv.html">mxnet.np.linalg.pinv</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.tensorinv.html">mxnet.np.linalg.tensorinv</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.math.html">Mathematical functions</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sin.html">mxnet.np.sin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cos.html">mxnet.np.cos</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tan.html">mxnet.np.tan</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arcsin.html">mxnet.np.arcsin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arccos.html">mxnet.np.arccos</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arctan.html">mxnet.np.arctan</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.degrees.html">mxnet.np.degrees</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.radians.html">mxnet.np.radians</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.hypot.html">mxnet.np.hypot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arctan2.html">mxnet.np.arctan2</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.deg2rad.html">mxnet.np.deg2rad</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rad2deg.html">mxnet.np.rad2deg</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.unwrap.html">mxnet.np.unwrap</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sinh.html">mxnet.np.sinh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cosh.html">mxnet.np.cosh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tanh.html">mxnet.np.tanh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arcsinh.html">mxnet.np.arcsinh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arccosh.html">mxnet.np.arccosh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arctanh.html">mxnet.np.arctanh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rint.html">mxnet.np.rint</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fix.html">mxnet.np.fix</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.floor.html">mxnet.np.floor</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ceil.html">mxnet.np.ceil</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trunc.html">mxnet.np.trunc</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.around.html">mxnet.np.around</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.round_.html">mxnet.np.round_</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sum.html">mxnet.np.sum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.prod.html">mxnet.np.prod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cumsum.html">mxnet.np.cumsum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanprod.html">mxnet.np.nanprod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nansum.html">mxnet.np.nansum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cumprod.html">mxnet.np.cumprod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nancumprod.html">mxnet.np.nancumprod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nancumsum.html">mxnet.np.nancumsum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.diff.html">mxnet.np.diff</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ediff1d.html">mxnet.np.ediff1d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cross.html">mxnet.np.cross</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trapz.html">mxnet.np.trapz</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.exp.html">mxnet.np.exp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.expm1.html">mxnet.np.expm1</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log.html">mxnet.np.log</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log10.html">mxnet.np.log10</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log2.html">mxnet.np.log2</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log1p.html">mxnet.np.log1p</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.logaddexp.html">mxnet.np.logaddexp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.i0.html">mxnet.np.i0</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ldexp.html">mxnet.np.ldexp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.signbit.html">mxnet.np.signbit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.copysign.html">mxnet.np.copysign</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.frexp.html">mxnet.np.frexp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.spacing.html">mxnet.np.spacing</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.lcm.html">mxnet.np.lcm</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.gcd.html">mxnet.np.gcd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.add.html">mxnet.np.add</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.reciprocal.html">mxnet.np.reciprocal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.negative.html">mxnet.np.negative</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.divide.html">mxnet.np.divide</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.power.html">mxnet.np.power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.subtract.html">mxnet.np.subtract</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.mod.html">mxnet.np.mod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.multiply.html">mxnet.np.multiply</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.true_divide.html">mxnet.np.true_divide</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.remainder.html">mxnet.np.remainder</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.positive.html">mxnet.np.positive</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.float_power.html">mxnet.np.float_power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fmod.html">mxnet.np.fmod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.modf.html">mxnet.np.modf</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.divmod.html">mxnet.np.divmod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.floor_divide.html">mxnet.np.floor_divide</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.clip.html">mxnet.np.clip</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sqrt.html">mxnet.np.sqrt</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cbrt.html">mxnet.np.cbrt</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.square.html">mxnet.np.square</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.absolute.html">mxnet.np.absolute</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sign.html">mxnet.np.sign</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.maximum.html">mxnet.np.maximum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.minimum.html">mxnet.np.minimum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fabs.html">mxnet.np.fabs</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.heaviside.html">mxnet.np.heaviside</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fmax.html">mxnet.np.fmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fmin.html">mxnet.np.fmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nan_to_num.html">mxnet.np.nan_to_num</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.interp.html">mxnet.np.interp</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/random/index.html">np.random</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.choice.html">mxnet.np.random.choice</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.shuffle.html">mxnet.np.random.shuffle</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.normal.html">mxnet.np.random.normal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.uniform.html">mxnet.np.random.uniform</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.rand.html">mxnet.np.random.rand</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.randint.html">mxnet.np.random.randint</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.beta.html">mxnet.np.random.beta</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.chisquare.html">mxnet.np.random.chisquare</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.exponential.html">mxnet.np.random.exponential</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.f.html">mxnet.np.random.f</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.gamma.html">mxnet.np.random.gamma</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.gumbel.html">mxnet.np.random.gumbel</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.laplace.html">mxnet.np.random.laplace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.logistic.html">mxnet.np.random.logistic</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.lognormal.html">mxnet.np.random.lognormal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.multinomial.html">mxnet.np.random.multinomial</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.multivariate_normal.html">mxnet.np.random.multivariate_normal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.pareto.html">mxnet.np.random.pareto</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.power.html">mxnet.np.random.power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.rayleigh.html">mxnet.np.random.rayleigh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.weibull.html">mxnet.np.random.weibull</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.sort.html">Sorting, searching, and counting</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.sort.html">mxnet.np.ndarray.sort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sort.html">mxnet.np.sort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.lexsort.html">mxnet.np.lexsort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argsort.html">mxnet.np.argsort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.msort.html">mxnet.np.msort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.partition.html">mxnet.np.partition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argpartition.html">mxnet.np.argpartition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argmax.html">mxnet.np.argmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argmin.html">mxnet.np.argmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanargmax.html">mxnet.np.nanargmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanargmin.html">mxnet.np.nanargmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argwhere.html">mxnet.np.argwhere</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nonzero.html">mxnet.np.nonzero</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.flatnonzero.html">mxnet.np.flatnonzero</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.where.html">mxnet.np.where</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.searchsorted.html">mxnet.np.searchsorted</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.extract.html">mxnet.np.extract</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.count_nonzero.html">mxnet.np.count_nonzero</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.statistics.html">Statistics</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.min.html">mxnet.np.min</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.max.html">mxnet.np.max</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.amin.html">mxnet.np.amin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.amax.html">mxnet.np.amax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanmin.html">mxnet.np.nanmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanmax.html">mxnet.np.nanmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ptp.html">mxnet.np.ptp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.percentile.html">mxnet.np.percentile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanpercentile.html">mxnet.np.nanpercentile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.quantile.html">mxnet.np.quantile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanquantile.html">mxnet.np.nanquantile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.mean.html">mxnet.np.mean</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.std.html">mxnet.np.std</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.var.html">mxnet.np.var</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.median.html">mxnet.np.median</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.average.html">mxnet.np.average</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanmedian.html">mxnet.np.nanmedian</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanstd.html">mxnet.np.nanstd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanvar.html">mxnet.np.nanvar</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.corrcoef.html">mxnet.np.corrcoef</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.correlate.html">mxnet.np.correlate</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cov.html">mxnet.np.cov</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogram.html">mxnet.np.histogram</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogram2d.html">mxnet.np.histogram2d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogramdd.html">mxnet.np.histogramdd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.bincount.html">mxnet.np.bincount</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogram_bin_edges.html">mxnet.np.histogram_bin_edges</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.digitize.html">mxnet.np.digitize</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/npx/index.html">NPX: NumPy Neural Network Extension</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.set_np.html">mxnet.npx.set_np</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.reset_np.html">mxnet.npx.reset_np</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.cpu.html">mxnet.npx.cpu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.cpu_pinned.html">mxnet.npx.cpu_pinned</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.gpu.html">mxnet.npx.gpu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.gpu_memory_info.html">mxnet.npx.gpu_memory_info</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.current_device.html">mxnet.npx.current_device</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.num_gpus.html">mxnet.npx.num_gpus</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.activation.html">mxnet.npx.activation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.batch_norm.html">mxnet.npx.batch_norm</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.convolution.html">mxnet.npx.convolution</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.dropout.html">mxnet.npx.dropout</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.embedding.html">mxnet.npx.embedding</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.fully_connected.html">mxnet.npx.fully_connected</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.layer_norm.html">mxnet.npx.layer_norm</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.pooling.html">mxnet.npx.pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.rnn.html">mxnet.npx.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.leaky_relu.html">mxnet.npx.leaky_relu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.multibox_detection.html">mxnet.npx.multibox_detection</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.multibox_prior.html">mxnet.npx.multibox_prior</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.multibox_target.html">mxnet.npx.multibox_target</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.roi_pooling.html">mxnet.npx.roi_pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.sigmoid.html">mxnet.npx.sigmoid</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.relu.html">mxnet.npx.relu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.smooth_l1.html">mxnet.npx.smooth_l1</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.softmax.html">mxnet.npx.softmax</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.log_softmax.html">mxnet.npx.log_softmax</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.topk.html">mxnet.npx.topk</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.waitall.html">mxnet.npx.waitall</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.load.html">mxnet.npx.load</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.save.html">mxnet.npx.save</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.one_hot.html">mxnet.npx.one_hot</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.pick.html">mxnet.npx.pick</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.reshape_like.html">mxnet.npx.reshape_like</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.batch_flatten.html">mxnet.npx.batch_flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.batch_dot.html">mxnet.npx.batch_dot</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.gamma.html">mxnet.npx.gamma</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.sequence_mask.html">mxnet.npx.sequence_mask</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/gluon/index.html">mxnet.gluon</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/block.html">gluon.Block</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/hybrid_block.html">gluon.HybridBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/symbol_block.html">gluon.SymbolBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/constant.html">gluon.Constant</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/parameter.html">gluon.Parameter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/trainer.html">gluon.Trainer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/contrib/index.html">gluon.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/data/index.html">gluon.data</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/gluon/data/vision/index.html">data.vision</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/gluon/data/vision/datasets/index.html">vision.datasets</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/gluon/data/vision/transforms/index.html">vision.transforms</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/loss/index.html">gluon.loss</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/metric/index.html">gluon.metric</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/model_zoo/index.html">gluon.model_zoo.vision</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/nn/index.html">gluon.nn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/rnn/index.html">gluon.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/utils/index.html">gluon.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/autograd/index.html">mxnet.autograd</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/initializer/index.html">mxnet.initializer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/optimizer/index.html">mxnet.optimizer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/lr_scheduler/index.html">mxnet.lr_scheduler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html">KVStore: Communication for Distributed Training</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html#horovod">Horovod</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.Horovod.html">mxnet.kvstore.Horovod</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html#byteps">BytePS</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.BytePS.html">mxnet.kvstore.BytePS</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html#kvstore-interface">KVStore Interface</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.KVStore.html">mxnet.kvstore.KVStore</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.KVStoreBase.html">mxnet.kvstore.KVStoreBase</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.KVStoreServer.html">mxnet.kvstore.KVStoreServer</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/contrib/index.html">mxnet.contrib</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/io/index.html">contrib.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/ndarray/index.html">contrib.ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/onnx/index.html">contrib.onnx</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/quantization/index.html">contrib.quantization</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/symbol/index.html">contrib.symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/tensorboard/index.html">contrib.tensorboard</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/tensorrt/index.html">contrib.tensorrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/text/index.html">contrib.text</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/legacy/index.html">Legacy</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/callback/index.html">mxnet.callback</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/image/index.html">mxnet.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/io/index.html">mxnet.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/ndarray/index.html">mxnet.ndarray</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/ndarray.html">ndarray</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/contrib/index.html">ndarray.contrib</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/image/index.html">ndarray.image</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/linalg/index.html">ndarray.linalg</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/op/index.html">ndarray.op</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/random/index.html">ndarray.random</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/register/index.html">ndarray.register</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/sparse/index.html">ndarray.sparse</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/utils/index.html">ndarray.utils</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/recordio/index.html">mxnet.recordio</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/symbol/index.html">mxnet.symbol</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/symbol.html">symbol</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/contrib/index.html">symbol.contrib</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/image/index.html">symbol.image</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/linalg/index.html">symbol.linalg</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/op/index.html">symbol.op</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/random/index.html">symbol.random</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/register/index.html">symbol.register</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/sparse/index.html">symbol.sparse</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/visualization/index.html">mxnet.visualization</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/device/index.html">mxnet.device</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/engine/index.html">mxnet.engine</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/executor/index.html">mxnet.executor</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore_server/index.html">mxnet.kvstore_server</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/profiler/index.html">mxnet.profiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/rtc/index.html">mxnet.rtc</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/runtime/index.html">mxnet.runtime</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/runtime/generated/mxnet.runtime.Feature.html">mxnet.runtime.Feature</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/runtime/generated/mxnet.runtime.Features.html">mxnet.runtime.Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/runtime/generated/mxnet.runtime.feature_list.html">mxnet.runtime.feature_list</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/test_utils/index.html">mxnet.test_utils</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/util/index.html">mxnet.util</a></li>
</ul>
</li>
</ul>
</nav>
</div>
</header>
<main class="mdl-layout__content" tabIndex="0">
<header class="mdl-layout__drawer">
<div class="globaltoc">
<span class="mdl-layout-title toc">Table Of Contents</span>
<nav class="mdl-navigation">
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="../../index.html">Python Tutorials</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../getting-started/index.html">Getting Started</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/crash-course/index.html">Crash Course</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/0-introduction.html">Introduction</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/1-nparray.html">Step 1: Manipulate data with NP on MXNet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/2-create-nn.html">Step 2: Create a neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/3-autograd.html">Step 3: Automatic differentiation with autograd</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/4-components.html">Step 4: Necessary components that are not in the network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html">Step 5: <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html#Using-your-own-data-with-custom-Datasets">Using your own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/5-datasets.html#New-in-MXNet-2.0:-faster-C++-backend-dataloaders">New in MXNet 2.0: faster C++ backend dataloaders</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/6-train-nn.html">Step 6: Train a Neural Network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/crash-course/7-use-gpus.html">Step 7: Load and Run a NN using GPU</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/to-mxnet/index.html">Moving to MXNet from Other Frameworks</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../getting-started/to-mxnet/pytorch.html">PyTorch vs Apache MXNet</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/gluon_from_experiment_to_deployment.html">Gluon: from experiment to deployment</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/gluon_migration_guide.html">Gluon2.0: Migration Guide</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getting-started/logistic_regression_explained.html">Logistic regression explained</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/image/mnist.html">MNIST</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="reference internal" href="../index.html">Packages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../autograd/index.html">Automatic Differentiation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../gluon/index.html">Gluon</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../gluon/blocks/index.html">Blocks</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/custom-layer.html">Custom Layers</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/hybridize.html">Hybridize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/init.html">Initialization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/naming.html">Parameter and Block Naming</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/nn.html">Layers and Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/parameters.html">Parameter Management</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/save_load_params.html">Saving and Loading Gluon Models</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/blocks/activations/activations.html">Activation Blocks</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/data/index.html">Data Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/data_augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html">Gluon <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html#Using-own-data-with-custom-Datasets">Using own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/data/datasets.html#Appendix:-Upgrading-from-Module-DataIter-to-Gluon-DataLoader">Appendix: Upgrading from Module <code class="docutils literal notranslate"><span class="pre">DataIter</span></code> to Gluon <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/image/index.html">Image Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/image/info_gan.html">Image similarity search with InfoGAN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/image/mnist.html">Handwritten Digit Recognition</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/loss/index.html">Losses</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/loss/custom-loss.html">Custom Loss Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/loss/kl_divergence.html">Kullback-Leibler (KL) Divergence</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/loss/loss.html">Loss functions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/text/index.html">Text Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/text/gnmt.html">Google Neural Machine Translation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/text/transformer.html">Machine Translation with Transformer</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../gluon/training/index.html">Training</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/fit_api_tutorial.html">MXNet Gluon Fit API</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/trainer.html">Trainer</a></li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/learning_rates/index.html">Learning Rates</a><ul>
<li class="toctree-l6"><a class="reference internal" href="../gluon/training/learning_rates/learning_rate_finder.html">Learning Rate Finder</a></li>
<li class="toctree-l6"><a class="reference internal" href="../gluon/training/learning_rates/learning_rate_schedules.html">Learning Rate Schedules</a></li>
<li class="toctree-l6"><a class="reference internal" href="../gluon/training/learning_rates/learning_rate_schedules_advanced.html">Advanced Learning Rate Schedules</a></li>
</ul>
</li>
<li class="toctree-l5"><a class="reference internal" href="../gluon/training/normalization/index.html">Normalization Blocks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../kvstore/index.html">KVStore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../kvstore/kvstore.html">Distributed Key-Value Store</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../legacy/index.html">Legacy</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../legacy/ndarray/index.html">NDArray</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/01-ndarray-intro.html">An Intro: Manipulate Data the MXNet Way with NDArray</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/02-ndarray-operations.html">NDArray Operations</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/03-ndarray-contexts.html">NDArray Contexts</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/gotchas_numpy_in_mxnet.html">Gotchas using NumPy in Apache MXNet</a></li>
<li class="toctree-l5"><a class="reference internal" href="../legacy/ndarray/sparse/index.html">Tutorials</a><ul>
<li class="toctree-l6"><a class="reference internal" href="../legacy/ndarray/sparse/csr.html">CSRNDArray - NDArray in Compressed Sparse Row Storage Format</a></li>
<li class="toctree-l6"><a class="reference internal" href="../legacy/ndarray/sparse/row_sparse.html">RowSparseNDArray - NDArray for Sparse Gradient Updates</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../np/index.html">What is NP on MXNet</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../np/cheat-sheet.html">The NP on MXNet cheat sheet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../np/np-vs-numpy.html">Differences between NP on MXNet and NumPy</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../onnx/index.html">ONNX</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../onnx/fine_tuning_gluon.html">Fine-tuning an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../onnx/inference_on_onnx_model.html">Running inference on MXNet/Gluon from an ONNX model</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/deploy/export/onnx.html">Export ONNX Models</a></li>
</ul>
</li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">Optimizers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../viz/index.html">Visualization</a><ul>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/visualize_graph">Visualize networks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../performance/index.html">Performance</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../performance/compression/index.html">Compression</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../performance/compression/int8.html">Deploy with int-8</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/float16">Float16</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/gradient_compression">Gradient Compression</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/int8_inference.html">GluonCV with Quantized Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../performance/backend/index.html">Accelerated Backend Tools</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/dnnl/index.html">oneDNN</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../performance/backend/dnnl/dnnl_readme.html">Install MXNet with oneDNN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../performance/backend/dnnl/dnnl_quantization.html">oneDNN Quantization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../performance/backend/dnnl/dnnl_quantization_inc.html">Improving accuracy with Intel® Neural Compressor</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/tvm.html">Use TVM</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/profiler.html">Profiling MXNet Models</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../performance/backend/amp.html">Using AMP: Automatic Mixed Precision</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../deploy/index.html">Deployment</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../deploy/export/index.html">Export</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/export/onnx.html">Exporting to ONNX format</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/export_network.html">Export Gluon CV Models</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html">Save / Load Parameters</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../deploy/inference/index.html">Inference</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/inference/cpp.html">Deploy into C++</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/inference/image_classification_jetson.html">Image Classication using pretrained ResNet-50 model on Jetson module</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../deploy/run-on-aws/index.html">Run on AWS</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/run-on-aws/use_ec2.html">Run on an EC2 Instance</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/run-on-aws/use_sagemaker.html">Run on Amazon SageMaker</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../deploy/run-on-aws/cloud.html">MXNet on the Cloud</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../extend/index.html">Extend</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../extend/customop.html">Custom Numpy Operators</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/new_op">New Operator Creation</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/add_op_in_backend">New Operator in MXNet Backend</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/using_rtc">Using RTC for CUDA kernels</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/index.html">Python API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../api/np/index.html">mxnet.np</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/np/arrays.html">Array objects</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/arrays.ndarray.html">The N-dimensional array (<code class="xref py py-class docutils literal notranslate"><span class="pre">ndarray</span></code>)</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/arrays.indexing.html">Indexing</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/np/routines.html">Routines</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.array-creation.html">Array creation routines</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.eye.html">mxnet.np.eye</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.empty.html">mxnet.np.empty</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.full.html">mxnet.np.full</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.identity.html">mxnet.np.identity</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ones.html">mxnet.np.ones</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ones_like.html">mxnet.np.ones_like</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.zeros.html">mxnet.np.zeros</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.zeros_like.html">mxnet.np.zeros_like</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.array.html">mxnet.np.array</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.copy.html">mxnet.np.copy</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arange.html">mxnet.np.arange</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linspace.html">mxnet.np.linspace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.logspace.html">mxnet.np.logspace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.meshgrid.html">mxnet.np.meshgrid</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tril.html">mxnet.np.tril</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.array-manipulation.html">Array manipulation routines</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.reshape.html">mxnet.np.reshape</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ravel.html">mxnet.np.ravel</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.flatten.html">mxnet.np.ndarray.flatten</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.swapaxes.html">mxnet.np.swapaxes</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.T.html">mxnet.np.ndarray.T</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.transpose.html">mxnet.np.transpose</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.moveaxis.html">mxnet.np.moveaxis</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rollaxis.html">mxnet.np.rollaxis</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.expand_dims.html">mxnet.np.expand_dims</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.squeeze.html">mxnet.np.squeeze</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.broadcast_to.html">mxnet.np.broadcast_to</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.broadcast_arrays.html">mxnet.np.broadcast_arrays</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.atleast_1d.html">mxnet.np.atleast_1d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.atleast_2d.html">mxnet.np.atleast_2d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.atleast_3d.html">mxnet.np.atleast_3d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.concatenate.html">mxnet.np.concatenate</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.stack.html">mxnet.np.stack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.dstack.html">mxnet.np.dstack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.vstack.html">mxnet.np.vstack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.column_stack.html">mxnet.np.column_stack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.hstack.html">mxnet.np.hstack</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.split.html">mxnet.np.split</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.hsplit.html">mxnet.np.hsplit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.vsplit.html">mxnet.np.vsplit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.array_split.html">mxnet.np.array_split</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.dsplit.html">mxnet.np.dsplit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tile.html">mxnet.np.tile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.repeat.html">mxnet.np.repeat</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.unique.html">mxnet.np.unique</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.delete.html">mxnet.np.delete</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.insert.html">mxnet.np.insert</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.append.html">mxnet.np.append</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.resize.html">mxnet.np.resize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trim_zeros.html">mxnet.np.trim_zeros</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.reshape.html">mxnet.np.reshape</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.flip.html">mxnet.np.flip</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.roll.html">mxnet.np.roll</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rot90.html">mxnet.np.rot90</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fliplr.html">mxnet.np.fliplr</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.flipud.html">mxnet.np.flipud</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.io.html">Input and output</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.genfromtxt.html">mxnet.np.genfromtxt</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.tolist.html">mxnet.np.ndarray.tolist</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.set_printoptions.html">mxnet.np.set_printoptions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.linalg.html">Linear algebra (<code class="xref py py-mod docutils literal notranslate"><span class="pre">numpy.linalg</span></code>)</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.dot.html">mxnet.np.dot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.vdot.html">mxnet.np.vdot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.inner.html">mxnet.np.inner</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.outer.html">mxnet.np.outer</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tensordot.html">mxnet.np.tensordot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.einsum.html">mxnet.np.einsum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.multi_dot.html">mxnet.np.linalg.multi_dot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.matmul.html">mxnet.np.matmul</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.matrix_power.html">mxnet.np.linalg.matrix_power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.kron.html">mxnet.np.kron</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.svd.html">mxnet.np.linalg.svd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.cholesky.html">mxnet.np.linalg.cholesky</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.qr.html">mxnet.np.linalg.qr</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eig.html">mxnet.np.linalg.eig</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eigh.html">mxnet.np.linalg.eigh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eigvals.html">mxnet.np.linalg.eigvals</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.eigvalsh.html">mxnet.np.linalg.eigvalsh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.norm.html">mxnet.np.linalg.norm</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trace.html">mxnet.np.trace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.cond.html">mxnet.np.linalg.cond</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.det.html">mxnet.np.linalg.det</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.matrix_rank.html">mxnet.np.linalg.matrix_rank</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.slogdet.html">mxnet.np.linalg.slogdet</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.solve.html">mxnet.np.linalg.solve</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.tensorsolve.html">mxnet.np.linalg.tensorsolve</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.lstsq.html">mxnet.np.linalg.lstsq</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.inv.html">mxnet.np.linalg.inv</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.pinv.html">mxnet.np.linalg.pinv</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.linalg.tensorinv.html">mxnet.np.linalg.tensorinv</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.math.html">Mathematical functions</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sin.html">mxnet.np.sin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cos.html">mxnet.np.cos</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tan.html">mxnet.np.tan</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arcsin.html">mxnet.np.arcsin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arccos.html">mxnet.np.arccos</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arctan.html">mxnet.np.arctan</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.degrees.html">mxnet.np.degrees</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.radians.html">mxnet.np.radians</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.hypot.html">mxnet.np.hypot</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arctan2.html">mxnet.np.arctan2</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.deg2rad.html">mxnet.np.deg2rad</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rad2deg.html">mxnet.np.rad2deg</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.unwrap.html">mxnet.np.unwrap</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sinh.html">mxnet.np.sinh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cosh.html">mxnet.np.cosh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.tanh.html">mxnet.np.tanh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arcsinh.html">mxnet.np.arcsinh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arccosh.html">mxnet.np.arccosh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.arctanh.html">mxnet.np.arctanh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.rint.html">mxnet.np.rint</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fix.html">mxnet.np.fix</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.floor.html">mxnet.np.floor</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ceil.html">mxnet.np.ceil</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trunc.html">mxnet.np.trunc</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.around.html">mxnet.np.around</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.round_.html">mxnet.np.round_</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sum.html">mxnet.np.sum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.prod.html">mxnet.np.prod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cumsum.html">mxnet.np.cumsum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanprod.html">mxnet.np.nanprod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nansum.html">mxnet.np.nansum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cumprod.html">mxnet.np.cumprod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nancumprod.html">mxnet.np.nancumprod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nancumsum.html">mxnet.np.nancumsum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.diff.html">mxnet.np.diff</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ediff1d.html">mxnet.np.ediff1d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cross.html">mxnet.np.cross</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.trapz.html">mxnet.np.trapz</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.exp.html">mxnet.np.exp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.expm1.html">mxnet.np.expm1</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log.html">mxnet.np.log</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log10.html">mxnet.np.log10</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log2.html">mxnet.np.log2</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.log1p.html">mxnet.np.log1p</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.logaddexp.html">mxnet.np.logaddexp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.i0.html">mxnet.np.i0</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ldexp.html">mxnet.np.ldexp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.signbit.html">mxnet.np.signbit</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.copysign.html">mxnet.np.copysign</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.frexp.html">mxnet.np.frexp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.spacing.html">mxnet.np.spacing</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.lcm.html">mxnet.np.lcm</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.gcd.html">mxnet.np.gcd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.add.html">mxnet.np.add</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.reciprocal.html">mxnet.np.reciprocal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.negative.html">mxnet.np.negative</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.divide.html">mxnet.np.divide</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.power.html">mxnet.np.power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.subtract.html">mxnet.np.subtract</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.mod.html">mxnet.np.mod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.multiply.html">mxnet.np.multiply</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.true_divide.html">mxnet.np.true_divide</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.remainder.html">mxnet.np.remainder</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.positive.html">mxnet.np.positive</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.float_power.html">mxnet.np.float_power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fmod.html">mxnet.np.fmod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.modf.html">mxnet.np.modf</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.divmod.html">mxnet.np.divmod</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.floor_divide.html">mxnet.np.floor_divide</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.clip.html">mxnet.np.clip</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sqrt.html">mxnet.np.sqrt</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cbrt.html">mxnet.np.cbrt</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.square.html">mxnet.np.square</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.absolute.html">mxnet.np.absolute</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sign.html">mxnet.np.sign</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.maximum.html">mxnet.np.maximum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.minimum.html">mxnet.np.minimum</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fabs.html">mxnet.np.fabs</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.heaviside.html">mxnet.np.heaviside</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fmax.html">mxnet.np.fmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.fmin.html">mxnet.np.fmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nan_to_num.html">mxnet.np.nan_to_num</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.interp.html">mxnet.np.interp</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/random/index.html">np.random</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.choice.html">mxnet.np.random.choice</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.shuffle.html">mxnet.np.random.shuffle</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.normal.html">mxnet.np.random.normal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.uniform.html">mxnet.np.random.uniform</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.rand.html">mxnet.np.random.rand</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.randint.html">mxnet.np.random.randint</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.beta.html">mxnet.np.random.beta</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.chisquare.html">mxnet.np.random.chisquare</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.exponential.html">mxnet.np.random.exponential</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.f.html">mxnet.np.random.f</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.gamma.html">mxnet.np.random.gamma</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.gumbel.html">mxnet.np.random.gumbel</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.laplace.html">mxnet.np.random.laplace</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.logistic.html">mxnet.np.random.logistic</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.lognormal.html">mxnet.np.random.lognormal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.multinomial.html">mxnet.np.random.multinomial</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.multivariate_normal.html">mxnet.np.random.multivariate_normal</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.pareto.html">mxnet.np.random.pareto</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.power.html">mxnet.np.random.power</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.rayleigh.html">mxnet.np.random.rayleigh</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/random/generated/mxnet.np.random.weibull.html">mxnet.np.random.weibull</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.sort.html">Sorting, searching, and counting</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ndarray.sort.html">mxnet.np.ndarray.sort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.sort.html">mxnet.np.sort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.lexsort.html">mxnet.np.lexsort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argsort.html">mxnet.np.argsort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.msort.html">mxnet.np.msort</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.partition.html">mxnet.np.partition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argpartition.html">mxnet.np.argpartition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argmax.html">mxnet.np.argmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argmin.html">mxnet.np.argmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanargmax.html">mxnet.np.nanargmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanargmin.html">mxnet.np.nanargmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.argwhere.html">mxnet.np.argwhere</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nonzero.html">mxnet.np.nonzero</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.flatnonzero.html">mxnet.np.flatnonzero</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.where.html">mxnet.np.where</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.searchsorted.html">mxnet.np.searchsorted</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.extract.html">mxnet.np.extract</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.count_nonzero.html">mxnet.np.count_nonzero</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/np/routines.statistics.html">Statistics</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.min.html">mxnet.np.min</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.max.html">mxnet.np.max</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.amin.html">mxnet.np.amin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.amax.html">mxnet.np.amax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanmin.html">mxnet.np.nanmin</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanmax.html">mxnet.np.nanmax</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.ptp.html">mxnet.np.ptp</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.percentile.html">mxnet.np.percentile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanpercentile.html">mxnet.np.nanpercentile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.quantile.html">mxnet.np.quantile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanquantile.html">mxnet.np.nanquantile</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.mean.html">mxnet.np.mean</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.std.html">mxnet.np.std</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.var.html">mxnet.np.var</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.median.html">mxnet.np.median</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.average.html">mxnet.np.average</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanmedian.html">mxnet.np.nanmedian</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanstd.html">mxnet.np.nanstd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.nanvar.html">mxnet.np.nanvar</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.corrcoef.html">mxnet.np.corrcoef</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.correlate.html">mxnet.np.correlate</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.cov.html">mxnet.np.cov</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogram.html">mxnet.np.histogram</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogram2d.html">mxnet.np.histogram2d</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogramdd.html">mxnet.np.histogramdd</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.bincount.html">mxnet.np.bincount</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.histogram_bin_edges.html">mxnet.np.histogram_bin_edges</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/np/generated/mxnet.np.digitize.html">mxnet.np.digitize</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/npx/index.html">NPX: NumPy Neural Network Extension</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.set_np.html">mxnet.npx.set_np</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.reset_np.html">mxnet.npx.reset_np</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.cpu.html">mxnet.npx.cpu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.cpu_pinned.html">mxnet.npx.cpu_pinned</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.gpu.html">mxnet.npx.gpu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.gpu_memory_info.html">mxnet.npx.gpu_memory_info</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.current_device.html">mxnet.npx.current_device</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.num_gpus.html">mxnet.npx.num_gpus</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.activation.html">mxnet.npx.activation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.batch_norm.html">mxnet.npx.batch_norm</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.convolution.html">mxnet.npx.convolution</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.dropout.html">mxnet.npx.dropout</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.embedding.html">mxnet.npx.embedding</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.fully_connected.html">mxnet.npx.fully_connected</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.layer_norm.html">mxnet.npx.layer_norm</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.pooling.html">mxnet.npx.pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.rnn.html">mxnet.npx.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.leaky_relu.html">mxnet.npx.leaky_relu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.multibox_detection.html">mxnet.npx.multibox_detection</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.multibox_prior.html">mxnet.npx.multibox_prior</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.multibox_target.html">mxnet.npx.multibox_target</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.roi_pooling.html">mxnet.npx.roi_pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.sigmoid.html">mxnet.npx.sigmoid</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.relu.html">mxnet.npx.relu</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.smooth_l1.html">mxnet.npx.smooth_l1</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.softmax.html">mxnet.npx.softmax</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.log_softmax.html">mxnet.npx.log_softmax</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.topk.html">mxnet.npx.topk</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.waitall.html">mxnet.npx.waitall</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.load.html">mxnet.npx.load</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.save.html">mxnet.npx.save</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.one_hot.html">mxnet.npx.one_hot</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.pick.html">mxnet.npx.pick</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.reshape_like.html">mxnet.npx.reshape_like</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.batch_flatten.html">mxnet.npx.batch_flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.batch_dot.html">mxnet.npx.batch_dot</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.gamma.html">mxnet.npx.gamma</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/npx/generated/mxnet.npx.sequence_mask.html">mxnet.npx.sequence_mask</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/gluon/index.html">mxnet.gluon</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/block.html">gluon.Block</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/hybrid_block.html">gluon.HybridBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/symbol_block.html">gluon.SymbolBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/constant.html">gluon.Constant</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/parameter.html">gluon.Parameter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/trainer.html">gluon.Trainer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/contrib/index.html">gluon.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/data/index.html">gluon.data</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/gluon/data/vision/index.html">data.vision</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../api/gluon/data/vision/datasets/index.html">vision.datasets</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../api/gluon/data/vision/transforms/index.html">vision.transforms</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/loss/index.html">gluon.loss</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/metric/index.html">gluon.metric</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/model_zoo/index.html">gluon.model_zoo.vision</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/nn/index.html">gluon.nn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/rnn/index.html">gluon.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/gluon/utils/index.html">gluon.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/autograd/index.html">mxnet.autograd</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/initializer/index.html">mxnet.initializer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/optimizer/index.html">mxnet.optimizer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/lr_scheduler/index.html">mxnet.lr_scheduler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html">KVStore: Communication for Distributed Training</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html#horovod">Horovod</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.Horovod.html">mxnet.kvstore.Horovod</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html#byteps">BytePS</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.BytePS.html">mxnet.kvstore.BytePS</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore/index.html#kvstore-interface">KVStore Interface</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.KVStore.html">mxnet.kvstore.KVStore</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.KVStoreBase.html">mxnet.kvstore.KVStoreBase</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/kvstore/generated/mxnet.kvstore.KVStoreServer.html">mxnet.kvstore.KVStoreServer</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/contrib/index.html">mxnet.contrib</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/io/index.html">contrib.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/ndarray/index.html">contrib.ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/onnx/index.html">contrib.onnx</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/quantization/index.html">contrib.quantization</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/symbol/index.html">contrib.symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/tensorboard/index.html">contrib.tensorboard</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/tensorrt/index.html">contrib.tensorrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/contrib/text/index.html">contrib.text</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/legacy/index.html">Legacy</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/callback/index.html">mxnet.callback</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/image/index.html">mxnet.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/io/index.html">mxnet.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/ndarray/index.html">mxnet.ndarray</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/ndarray.html">ndarray</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/contrib/index.html">ndarray.contrib</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/image/index.html">ndarray.image</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/linalg/index.html">ndarray.linalg</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/op/index.html">ndarray.op</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/random/index.html">ndarray.random</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/register/index.html">ndarray.register</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/sparse/index.html">ndarray.sparse</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/ndarray/utils/index.html">ndarray.utils</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/recordio/index.html">mxnet.recordio</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/symbol/index.html">mxnet.symbol</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/symbol.html">symbol</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/contrib/index.html">symbol.contrib</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/image/index.html">symbol.image</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/linalg/index.html">symbol.linalg</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/op/index.html">symbol.op</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/random/index.html">symbol.random</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/register/index.html">symbol.register</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../api/legacy/symbol/sparse/index.html">symbol.sparse</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/legacy/visualization/index.html">mxnet.visualization</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/device/index.html">mxnet.device</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/engine/index.html">mxnet.engine</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/executor/index.html">mxnet.executor</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/kvstore_server/index.html">mxnet.kvstore_server</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/profiler/index.html">mxnet.profiler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/rtc/index.html">mxnet.rtc</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/runtime/index.html">mxnet.runtime</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../api/runtime/generated/mxnet.runtime.Feature.html">mxnet.runtime.Feature</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/runtime/generated/mxnet.runtime.Features.html">mxnet.runtime.Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../api/runtime/generated/mxnet.runtime.feature_list.html">mxnet.runtime.feature_list</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/test_utils/index.html">mxnet.test_utils</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../api/util/index.html">mxnet.util</a></li>
</ul>
</li>
</ul>
</nav>
</div>
</header>
<div class="document">
<div class="page-content" role="main">
<style>
/* CSS for nbsphinx extension */
/* remove conflicting styling from Sphinx themes */
div.nbinput,
div.nbinput div.prompt,
div.nbinput div.input_area,
div.nbinput div[class*=highlight],
div.nbinput div[class*=highlight] pre,
div.nboutput,
div.nbinput div.prompt,
div.nbinput div.output_area,
div.nboutput div[class*=highlight],
div.nboutput div[class*=highlight] pre {
background: none;
border: none;
padding: 0 0;
margin: 0;
box-shadow: none;
}
/* avoid gaps between output lines */
div.nboutput div[class*=highlight] pre {
line-height: normal;
}
/* input/output containers */
div.nbinput,
div.nboutput {
display: -webkit-flex;
display: flex;
align-items: flex-start;
margin: 0;
width: 100%;
}
@media (max-width: 540px) {
div.nbinput,
div.nboutput {
flex-direction: column;
}
}
/* input container */
div.nbinput {
padding-top: 5px;
}
/* last container */
div.nblast {
padding-bottom: 5px;
}
/* input prompt */
div.nbinput div.prompt pre {
color: #307FC1;
}
/* output prompt */
div.nboutput div.prompt pre {
color: #BF5B3D;
}
/* all prompts */
div.nbinput div.prompt,
div.nboutput div.prompt {
min-width: 7ex;
padding-top: 0.4em;
padding-right: 0.4em;
text-align: right;
flex: 0;
}
@media (max-width: 540px) {
div.nbinput div.prompt,
div.nboutput div.prompt {
text-align: left;
padding: 0.4em;
}
div.nboutput div.prompt.empty {
padding: 0;
}
}
/* disable scrollbars on prompts */
div.nbinput div.prompt pre,
div.nboutput div.prompt pre {
overflow: hidden;
}
/* input/output area */
div.nbinput div.input_area,
div.nboutput div.output_area {
padding: 0.4em;
-webkit-flex: 1;
flex: 1;
overflow: auto;
}
@media (max-width: 540px) {
div.nbinput div.input_area,
div.nboutput div.output_area {
width: 100%;
}
}
/* input area */
div.nbinput div.input_area {
border: 1px solid #e0e0e0;
border-radius: 2px;
background: #f5f5f5;
}
/* override MathJax center alignment in output cells */
div.nboutput div[class*=MathJax] {
text-align: left !important;
}
/* override sphinx.ext.imgmath center alignment in output cells */
div.nboutput div.math p {
text-align: left;
}
/* standard error */
div.nboutput div.output_area.stderr {
background: #fdd;
}
/* ANSI colors */
.ansi-black-fg { color: #3E424D; }
.ansi-black-bg { background-color: #3E424D; }
.ansi-black-intense-fg { color: #282C36; }
.ansi-black-intense-bg { background-color: #282C36; }
.ansi-red-fg { color: #E75C58; }
.ansi-red-bg { background-color: #E75C58; }
.ansi-red-intense-fg { color: #B22B31; }
.ansi-red-intense-bg { background-color: #B22B31; }
.ansi-green-fg { color: #00A250; }
.ansi-green-bg { background-color: #00A250; }
.ansi-green-intense-fg { color: #007427; }
.ansi-green-intense-bg { background-color: #007427; }
.ansi-yellow-fg { color: #DDB62B; }
.ansi-yellow-bg { background-color: #DDB62B; }
.ansi-yellow-intense-fg { color: #B27D12; }
.ansi-yellow-intense-bg { background-color: #B27D12; }
.ansi-blue-fg { color: #208FFB; }
.ansi-blue-bg { background-color: #208FFB; }
.ansi-blue-intense-fg { color: #0065CA; }
.ansi-blue-intense-bg { background-color: #0065CA; }
.ansi-magenta-fg { color: #D160C4; }
.ansi-magenta-bg { background-color: #D160C4; }
.ansi-magenta-intense-fg { color: #A03196; }
.ansi-magenta-intense-bg { background-color: #A03196; }
.ansi-cyan-fg { color: #60C6C8; }
.ansi-cyan-bg { background-color: #60C6C8; }
.ansi-cyan-intense-fg { color: #258F8F; }
.ansi-cyan-intense-bg { background-color: #258F8F; }
.ansi-white-fg { color: #C5C1B4; }
.ansi-white-bg { background-color: #C5C1B4; }
.ansi-white-intense-fg { color: #A1A6B2; }
.ansi-white-intense-bg { background-color: #A1A6B2; }
.ansi-default-inverse-fg { color: #FFFFFF; }
.ansi-default-inverse-bg { background-color: #000000; }
.ansi-bold { font-weight: bold; }
.ansi-underline { text-decoration: underline; }
/* Some additional styling taken form the Jupyter notebook CSS */
div.rendered_html table {
border: none;
border-collapse: collapse;
border-spacing: 0;
color: black;
font-size: 12px;
table-layout: fixed;
}
div.rendered_html thead {
border-bottom: 1px solid black;
vertical-align: bottom;
}
div.rendered_html tr,
div.rendered_html th,
div.rendered_html td {
text-align: right;
vertical-align: middle;
padding: 0.5em 0.5em;
line-height: normal;
white-space: normal;
max-width: none;
border: none;
}
div.rendered_html th {
font-weight: bold;
}
div.rendered_html tbody tr:nth-child(odd) {
background: #f5f5f5;
}
div.rendered_html tbody tr:hover {
background: rgba(66, 165, 245, 0.2);
}
</style>
<!--- Licensed to the Apache Software Foundation (ASF) under one --><!--- or more contributor license agreements. See the NOTICE file --><!--- distributed with this work for additional information --><!--- regarding copyright ownership. The ASF licenses this file --><!--- to you under the Apache License, Version 2.0 (the --><!--- "License"); you may not use this file except in compliance --><!--- with the License. You may obtain a copy of the License at --><!--- http://www.apache.org/licenses/LICENSE-2.0 --><!--- Unless required by applicable law or agreed to in writing, --><!--- software distributed under the License is distributed on an --><!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --><!--- KIND, either express or implied. See the License for the --><!--- specific language governing permissions and limitations --><!--- under the License. --><div class="section" id="Optimizers">
<h1>Optimizers<a class="headerlink" href="#Optimizers" title="Permalink to this headline"></a></h1>
<p>Deep learning models are comprised of a model architecture and the model parameters. The model architecture is chosen based on the task - for example Convolutional Neural Networks (CNNs) are very successful in handling image based tasks and Recurrent Neural Networks (RNNs) are better suited for sequential prediction tasks. However, the values of the model parameters are learned by solving an optimization problem during model training.</p>
<p>To learn the parameters, we start with an initialization scheme and iteratively refine the parameter initial values by moving them along a direction that is opposite to the (approximate) gradient of the loss function. The extent to which the parameters are updated in this direction is governed by a hyperparameter called the learning rate. This process, known as gradient descent, is the backbone of optimization algorithms in deep learning. In MXNet, this functionality is abstracted by the
<a class="reference internal" href="../../../api/optimizer/index.html"><span class="doc">Optimizer API</span></a>.</p>
<p>When training a deep learning model using the MXNet <a class="reference internal" href="../gluon/index.html"><span class="doc">Gluon API</span></a>, a Gluon <a class="reference internal" href="../gluon/training/trainer.html"><span class="doc">Trainer</span></a> is initialized with the all the learnable parameters and the optimizer to be used to learn those parameters. A single step of iterative refinement of model parameters in MXNet is achieved by calling <a class="reference internal" href="../../../api/gluon/trainer.html#mxnet.gluon.Trainer.step"><span class="std std-ref">Trainer.step</span></a> which in turn uses the gradient (and perhaps some state information) to update the
parameters by calling <code class="docutils literal notranslate"><span class="pre">optimizer.update</span></code>.</p>
<p>Here is an example of how a trainer with an optimizer is created for, a simple Linear (Dense) Network.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">mxnet</span> <span class="kn">import</span> <span class="n">gluon</span><span class="p">,</span> <span class="n">optimizer</span>
<span class="n">net</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="n">net</span><span class="o">.</span><span class="n">initialize</span><span class="p">()</span>
<span class="n">optim</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">SGD</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span>
<span class="n">trainer</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">Trainer</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">collect_params</span><span class="p">(),</span> <span class="n">optimizer</span><span class="o">=</span><span class="n">optim</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area stderr docutils container">
<div class="highlight"><pre>
[04:51:40] /work/mxnet/src/storage/storage.cc:202: Using Pooled (Naive) StorageManager for CPU
</pre></div></div>
</div>
<p>In model training, the code snippet above would be followed by a training loop which, at every iteration performs a forward pass (to compute the loss), a backward pass (to compute the gradient of the loss with respect to the parameters) and a trainer step (which updates the parameters using the gradient). See the <a class="reference internal" href="../gluon/training/trainer.html"><span class="doc">Gluon Trainer guide</span></a> for a complete example.</p>
<p>We can also create the trainer by passing in the optimizer name and optimizer params into the trainer constructor directly, as shown below.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">trainer</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">Trainer</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">collect_params</span><span class="p">(),</span> <span class="n">optimizer</span><span class="o">=</span><span class="s1">&#39;adam&#39;</span><span class="p">,</span> <span class="n">optimizer_params</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;learning_rate&#39;</span><span class="p">:</span><span class="mi">1</span><span class="p">})</span>
</pre></div>
</div>
</div>
<div class="section" id="What-should-I-use?">
<h2>What should I use?<a class="headerlink" href="#What-should-I-use?" title="Permalink to this headline"></a></h2>
<p>For many deep learning model architectures, the <code class="docutils literal notranslate"><span class="pre">sgd</span></code> and <code class="docutils literal notranslate"><span class="pre">adam</span></code> optimizers are a really good place to start. If you are implementing a deep learning model and trying to pick an optimizer, start with <a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.SGD"><span class="std std-ref">SGD</span></a> as you will often get good enough results as long as your learning problem is tractable. If you already have a trainable model and you want to improve the convergence then you can try
<a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.Adam"><span class="std std-ref">Adam</span></a>. If you would like to improve your model training process further, there are a number of specialized optimizers out there with many of them already implemented in MXNet. This guide walks through these optimizers in some detail.</p>
<div class="section" id="Stochastic-Gradient-Descent">
<h3>Stochastic Gradient Descent<a class="headerlink" href="#Stochastic-Gradient-Descent" title="Permalink to this headline"></a></h3>
<p><a class="reference external" href="https://en.wikipedia.org/wiki/Gradient_descent">Gradient descent</a> is a general purpose algorithm for minimizing a function using information from the gradient of the function with respect to its parameters. In deep learning, the function we are interested in minimizing is the <a class="reference internal" href="../gluon/loss/loss.html"><span class="doc">loss function</span></a>. Our model accepts training data as inputs and the loss function tells us how good our model predictions are. Since the training data can routinely consist of millions of
examples, computing the loss gradient on the full batch of training data is very computationally expensive. Luckily, we can effectively approximate the full gradient with the gradient of the loss function on randomly chosen minibatches of our training data. This variant of gradient descent is <a class="reference external" href="https://en.wikipedia.org/wiki/Stochastic_gradient_descent">stochastic gradient descent</a>.</p>
<p>Technically, stochastic gradient descent (SGD) refers to an online approximation of the gradient descent algorithm that computes the gradient of the loss function applied to a <em>single datapoint</em>, instead of your entire dataset, and uses this approximate gradient to update the model parameter values. However, in MXNet, and other deep learning frameworks, the SGD optimizer is agnostic to how many datapoints the loss function is applied to, and it is more effective to use a mini-batch loss
gradient, as described earlier, instead of a single datapoint loss gradient.</p>
</div>
</div>
<div class="section" id="SGD-optimizer">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.SGD"><span class="std std-ref">SGD optimizer</span></a><a class="headerlink" href="#SGD-optimizer" title="Permalink to this headline"></a></h2>
<p>For an SGD optimizer initialized with learning rate <span class="math notranslate nohighlight">\(lr\)</span>, the update function accepts parameters (weights) <span class="math notranslate nohighlight">\(w_i\)</span>, and their gradients <span class="math notranslate nohighlight">\(grad(w_i)\)</span>, and performs the single update step:</p>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + lr\cdot -grad(w_i)\]</div>
<p>visualized in the diagram shown below.</p>
<p align="center"></p></div>
<div class="section" id="Weight-decay">
<h2>Weight decay<a class="headerlink" href="#Weight-decay" title="Permalink to this headline"></a></h2>
<p>The SGD update step can be modified by introducing an extra term that enforces a penalty on the size of the parameters. This is achieved by subtracting a fraction of the weight <span class="math notranslate nohighlight">\(\delta\cdot w\)</span> during the weight update as shown below.</p>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + lr\cdot (-grad(w_i) -\delta\cdot w_i)\]</div>
<p>Introducing weight decay modifies the objective of the optimization problem by adding an implicit regularization term to penalizes large weights. Weight decay is discussed more extensively in this <a class="reference external" href="https://papers.nips.cc/paper/563-a-simple-weight-decay-can-improve-generalization.pdf">paper</a>.</p>
</div>
<div class="section" id="Momentum">
<h2>Momentum<a class="headerlink" href="#Momentum" title="Permalink to this headline"></a></h2>
<p>The convergence of the SGD optimizer can be accelerated by incorporating momentum. Originally proposed by <a class="reference external" href="https://www.sciencedirect.com/science/article/abs/pii/0041555364901375">Polyak (1964)</a>, SGD with momentum improves the approximation of the gradient term by incorporating the gradients from previous update steps. To achieve this, SGD with momentum stores and ‘remembers’ the update at each iteration to be included in the next iteration. In the equations below we denote the momentum
history as <span class="math notranslate nohighlight">\(v\)</span>.</p>
<p>For the first update the SGD optimizer with momentum performs the single update step:</p>
<div class="math notranslate nohighlight">
\[v_1= lr\cdot -grad(w_0)\]</div>
<div class="math notranslate nohighlight">
\[w_1= w_0 + v_1\]</div>
<p>For subsequent updates, SGD with momentum, with momentum parameter <span class="math notranslate nohighlight">\(\gamma\)</span>, performs the update step:</p>
<div class="math notranslate nohighlight">
\[v_{i+1} = \gamma \cdot v_{i} + lr\cdot -grad(w_{i})\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + v_{i+1}\]</div>
<p>This is also shown in the diagram below.</p>
<p align="center"></p><p>The use of SGD with momentum for learning in neural networks was introduced by Rumelhart, Hinton and Williams in <a class="reference external" href="https://dl.acm.org/citation.cfm?id=104279.104293">Learning Internal Representations by Error Propagation</a>.</p>
<p>To create an SGD optimizer with momentum <span class="math notranslate nohighlight">\(\gamma\)</span> and weight decay in MXNet simply use the following code.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">sgd_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">SGD</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">wd</span><span class="o">=</span><span class="mf">0.</span><span class="p">,</span> <span class="n">momentum</span><span class="o">=</span><span class="mf">0.8</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="Nesterov-Accelerated-Stochastic-Gradient-Descent">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.NAG"><span class="std std-ref">Nesterov Accelerated Stochastic Gradient Descent</span></a><a class="headerlink" href="#Nesterov-Accelerated-Stochastic-Gradient-Descent" title="Permalink to this headline"></a></h2>
<p>The momentum method of [Nesterov] is a modification to SGD with momentum that allows for even faster convergence in practice. With Nesterov accelerated gradient (NAG) descent, the update term is derived from the gradient of the loss function with respect to <em>refined parameter values</em>. These refined parameter values are computed by performing a SGD update step using the momentum history as the gradient term.</p>
<p>Alternatively, you can think of the NAG optimizer as performing two update steps: * The first (internal) update step approximates uses the current momentum history <span class="math notranslate nohighlight">\(v_i\)</span> to calculate the refined parameter values <span class="math notranslate nohighlight">\((w_i + \gamma \cdot v_i)\)</span>. This is also known as the lookahead step. * The second (actual) step uses the gradient of the loss function with respect to the lookahead parameter values from the first step and the current momentum history <span class="math notranslate nohighlight">\(v_i\)</span> to obtain a new direction
to update our original parameter values, like classical momentum.</p>
<p>The NAG optimizer with momentum parameter <span class="math notranslate nohighlight">\(\gamma\)</span> performs the update step:</p>
<div class="math notranslate nohighlight">
\[v_{i+1} = \gamma \cdot v_{i} + lr\cdot -grad(w_{i} + \gamma \cdot v_i)\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + v_{i+1}\]</div>
<p align="center"></p><p>The effects of using NAG over SGD and classical momentum are discussed in this <a class="reference external" href="http://proceedings.mlr.press/v28/sutskever13.pdf">paper</a> by Sutskever et al.</p>
<p>The NAG optimizer can be initialized in MXNet by using the code snippet below or by creating a trainer with argument <code class="docutils literal notranslate"><span class="pre">optimizer='nag'</span></code>.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">nag_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">NAG</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">momentum</span><span class="o">=</span><span class="mf">0.8</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="Adaptive-Learning-Rate-Methods">
<h3>Adaptive Learning Rate Methods<a class="headerlink" href="#Adaptive-Learning-Rate-Methods" title="Permalink to this headline"></a></h3>
<p>The gradient methods implemented by the optimizers described above use a global learning rate hyperparameter for all parameter updates. This has a well-documented shortcoming in that it makes the training process and convergence of the optimization algorithm really sensitive to the choice of the global learning rate. Adaptive learning rate methods avoid this pitfall by incorporating some history of the gradients observed in earlier iterations to scale step sizes (learning rates) to each
learnable parameter in the model.</p>
</div>
</div>
<div class="section" id="AdaGrad">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.AdaGrad"><span class="std std-ref">AdaGrad</span></a><a class="headerlink" href="#AdaGrad" title="Permalink to this headline"></a></h2>
<p>The AdaGrad optimizer, which implements the optimization method originally described by <a class="reference external" href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">Duchi et al</a>, multiplies the global learning rate by the <span class="math notranslate nohighlight">\(L_2\)</span> norm of the preceeding gradient estimates for each paramater to obtain the per-parameter learning rate. To achieve this, AdaGrad introduces a new term which we’ll denote as <span class="math notranslate nohighlight">\(g^2\)</span> - the accumulated square of the gradient of the loss function with respect to the parameters.</p>
<p>Thus the AdaGrad optimizer update function performs the update steps below to obtain <span class="math notranslate nohighlight">\(i+1\)</span>th refinement.</p>
<div class="math notranslate nohighlight">
\[g^2_{i+1} = g^2_{i} + grad(w_i)^2\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + \dfrac{lr}{\sqrt{g^2 + \epsilon}}\cdot -grad(w_i)\]</div>
<p>The <span class="math notranslate nohighlight">\(\epsilon\)</span> term is a tiny positive value introduced to avoid division by zero due to floating point issues.</p>
<p>The overaching benefit of AdaGrad over SGD is that it ensures the overall convergence is more resilient to the choice of the global learning rate <span class="math notranslate nohighlight">\(lr\)</span> especially in tasks, such as natural language processing some data is sparse but the parameters influenced by the sparse data are quite informative.</p>
<p>To instantiate the Adagrad optimizer in MXNet you can use the following line of code.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">adagrad_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">AdaGrad</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-07</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="RMSProp">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.RMSProp"><span class="std std-ref">RMSProp</span></a><a class="headerlink" href="#RMSProp" title="Permalink to this headline"></a></h2>
<p>RMSProp, introduced by <a class="reference external" href="http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">Tielemen and Hinton</a>, is similar to AdaGrad described above, but, instead of accumulating the sum of historical square gradients, maintains an exponential decaying average of the historical square gradients, in order to give more weighting to more recent gradients.</p>
<p>For rmsprop, we introduce the term <span class="math notranslate nohighlight">\(\mathbb{E}[g^2]\)</span> - the decaying average over past squared gradients and <span class="math notranslate nohighlight">\(\beta\)</span> as the forgetting factor. The rmsprop optimizer performs the update given below.</p>
<div class="math notranslate nohighlight">
\[\mathbb{E}[g^2]_{i+1} = \beta\cdot\mathbb{E}[g^2]_{i} + (1-\beta)\cdot [grad(w_{i})]^2\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + \dfrac{lr}{\sqrt{\mathbb{E}[g^2]_{i+1} + \epsilon}}\cdot -grad(w_i)\]</div>
<p>The <span class="math notranslate nohighlight">\(\epsilon\)</span> term is included, as in AdaGrad, for numerical stability.</p>
<p>RMSProp was derived independently of AdaGrad and the name RMSProp derives from a combination of <a class="reference external" href="https://en.wikipedia.org/wiki/Rprop">RProp</a> and the RMS, root mean square, operation in the denominator of the weight update.</p>
</div>
<div class="section" id="RMSProp-(Centered)">
<h2>RMSProp (Centered)<a class="headerlink" href="#RMSProp-(Centered)" title="Permalink to this headline"></a></h2>
<p>The MXNet RMSProp optimizer with the <code class="docutils literal notranslate"><span class="pre">centered=True</span></code> argument implements a variant of the RMSProp update described by <a class="reference external" href="https://arxiv.org/pdf/1308.0850v5.pdf">Alex Graves</a>, which centres the second moment <span class="math notranslate nohighlight">\(\mathbb{E}[g^2]\)</span> or decaying average of square gradients by subtracting the square of decaying average of gradients. It also adds an explicit momentum term to weight past update steps. Representing the decaying average of gradients as <span class="math notranslate nohighlight">\(\mathbb{E}[g]\)</span> and momentum parameter as
<span class="math notranslate nohighlight">\(\gamma\)</span>, we add another equation to the non-centered rmsprop update described above.</p>
<p>The centered RMSProp optimizer performs the update step:</p>
<div class="math notranslate nohighlight">
\[\mathbb{E}[g]_{i+1} = \beta\cdot\mathbb{E}[g]_{i} + (1-\beta)\cdot [grad(w_{i})]\]</div>
<div class="math notranslate nohighlight">
\[\mathbb{E}[g^2]_{i+1} = \beta\cdot\mathbb{E}[g^2]_{i} + (1-\beta)\cdot [grad(w_{i})]^2\]</div>
<div class="math notranslate nohighlight">
\[v_{i+1} = \gamma \cdot v_{i} + \dfrac{lr}{\sqrt{\mathbb{E}[g^2]_{i+1} - \mathbb{E}[g]^2_{i+1}+ \epsilon}}\cdot -grad(w_{i})\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + v_{i+1}\]</div>
<p>Here is an example snippet creating the RMSProp optimizer in MXNet.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">rmsprop_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">RMSProp</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.001</span><span class="p">,</span> <span class="n">rho</span><span class="o">=</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">momentum</span><span class="o">=</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-07</span><span class="p">,</span> <span class="n">centered</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
</pre></div>
</div>
</div>
<p>In the code snippet above, <code class="docutils literal notranslate"><span class="pre">rho</span></code> is <span class="math notranslate nohighlight">\(\beta\)</span> in the equations above and <code class="docutils literal notranslate"><span class="pre">momentum</span></code> is <span class="math notranslate nohighlight">\(\gamma\)</span>, which is only used where <code class="docutils literal notranslate"><span class="pre">centered=True</span></code>.</p>
</div>
<div class="section" id="AdaDelta">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.AdaDelta"><span class="std std-ref">AdaDelta</span></a><a class="headerlink" href="#AdaDelta" title="Permalink to this headline"></a></h2>
<p>AdaDelta was introduced to address some remaining lingering issues with AdaGrad and RMSProp - the selection of a global learning rate. AdaGrad and RMSProp assign each parameter its own learning rate but the per-parameter learning rate are still calculated using the global learning rate. In contrast, AdaDelta does not require a global learning rate, instead, it tracks the square of previous update steps, represented below as <span class="math notranslate nohighlight">\(\mathbb{E}[\Delta w^2]\)</span> and uses the root mean square of the
previous update steps as an estimate of the learning rate.</p>
<p>The AdaDelta optimizer performs the following equations in its update step:</p>
<div class="math notranslate nohighlight">
\[\mathbb{E}[\Delta w^2]_{i+1} = \beta\cdot\mathbb{E}[\Delta w^2]_i + (1 - \beta) \cdot (w_i - w_{i-1})^2\]</div>
<div class="math notranslate nohighlight">
\[\mathbb{E}[g^2]_{i+1} = \beta\cdot\mathbb{E}[g^2]_{i} + (1-\beta)\cdot [grad(w_{i})]^2\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + \dfrac{\sqrt{\mathbb{E}[\Delta w^2] + \epsilon}}{\sqrt{\mathbb{E}[g^2]_{i+1} + \epsilon}} \cdot -grad(w_i)\]</div>
<p>As evident from the above equations, AdaDelta is similar to RMSProp but does not require you to specify <span class="math notranslate nohighlight">\(lr\)</span> and instead uses <span class="math notranslate nohighlight">\(\sqrt{\mathbb{E}[\Delta w^2] + \epsilon}\)</span> as the estimated learning rate. AdaDelta was introduced by Zeiler in this <a class="reference external" href="https://arxiv.org/abs/1212.5701">paper</a>.</p>
<p>Here is the code snippet creating the AdaDelta optimizer in MXNet. The argument <code class="docutils literal notranslate"><span class="pre">rho</span></code> in the code is <span class="math notranslate nohighlight">\(\beta\)</span> in the update equations. Notice there is no learning rate argument in the code.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">adadelta_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">AdaDelta</span><span class="p">(</span><span class="n">rho</span><span class="o">=</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-07</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="Adam">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.Adam"><span class="std std-ref">Adam</span></a><a class="headerlink" href="#Adam" title="Permalink to this headline"></a></h2>
<p>Adam, introduced by <a class="reference external" href="https://arxiv.org/abs/1412.6980">Kingma and Ba</a>, is one of the popular adaptive algorithms for deep learning. It combines elements of RMSProp with momentum SGD. Like RMSProp, Adam uses the RootMeanSquare of decaying average of historical gradients but also explicitly keeps track of a decaying average of momentum and uses that for the update step direction. Thus, Adam accepts two hyperparameters <span class="math notranslate nohighlight">\(\beta_1\)</span> and <span class="math notranslate nohighlight">\(\beta_2\)</span> for momentum weighting and gradient RMS
weighting respectively. Adam also accepts a global learning rate that’s adaptively tuned to each parameter with the gradient RootMeanSquare. Finally, Adam also includes bias correction steps within the update that transform the biased estimates of first and second order moments, <span class="math notranslate nohighlight">\(v_{i+1}\)</span> and <span class="math notranslate nohighlight">\(\mathbb{E}[g^2]_{i+1}\)</span> to their unbiased counterparts <span class="math notranslate nohighlight">\(\tilde{v}_{i+1}\)</span> and <span class="math notranslate nohighlight">\(\tilde{\mathbb{E}[g^2]}_{i+1}\)</span></p>
<p>The Adam optimizer performs the update step described the following equations:</p>
<div class="math notranslate nohighlight">
\[v_{i+1} = \beta_1 \cdot v_{i} + (1 - \beta_1) \cdot grad(w_i)\]</div>
<div class="math notranslate nohighlight">
\[\mathbb{E}[g^2]_{i+1} = \beta_2\cdot\mathbb{E}[g^2]_{i} + (1-\beta_2)\cdot [grad(w_{i})]^2\]</div>
<div class="math notranslate nohighlight">
\[\tilde{v}_{i+1} = \dfrac{v_{i+1}}{1 - (\beta_1)^{i+1}}\]</div>
<div class="math notranslate nohighlight">
\[\tilde{\mathbb{E}[g^2]}_{i+1} = \dfrac{\mathbb{E}[g^2]_{i+1}}{1 - (\beta_2)^{i+1}}\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + \dfrac{lr}{\sqrt{\tilde{\mathbb{E}[g^2]}_{i+1}} + \epsilon} \cdot -\tilde{v}_{i+1}\]</div>
<p>In MXNet, you can construct the Adam optimizer with the following line of code.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">adam_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.001</span><span class="p">,</span> <span class="n">beta1</span><span class="o">=</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">beta2</span><span class="o">=</span><span class="mf">0.999</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-08</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="Adamax">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.Adamax"><span class="std std-ref">Adamax</span></a><a class="headerlink" href="#Adamax" title="Permalink to this headline"></a></h2>
<p>Adamax is a variant of Adam also included in the original paper by <a class="reference external" href="https://arxiv.org/abs/1412.6980">Kingma and Ba</a>. Like Adam, Adamax maintains a moving average for first and second moments but Adamax uses the <span class="math notranslate nohighlight">\(L_{\infty}\)</span> norm for the exponentially weighted average of the gradients, instead of the <span class="math notranslate nohighlight">\(L_2\)</span> norm used in Adam used to keep track of the gradient second moment. The <span class="math notranslate nohighlight">\(L_{\infty}\)</span> norm of a vector is equivalent to take the maximum absolute value of elements in that
vector.</p>
<div class="math notranslate nohighlight">
\[v_{i+1} = \beta_1 \cdot v_{i} + (1 - \beta_1) \cdot grad(w_i)\]</div>
<div class="math notranslate nohighlight">
\[g^\infty_{i+1} = \mathtt{max}(\beta_2\cdot g^\infty_{i}, |{grad(w_i)}|)\]</div>
<div class="math notranslate nohighlight">
\[\tilde{v}_{i+1} = \dfrac{v_{i+1}}{1 - \beta_1^{i+1}}\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + \dfrac{lr}{g^\infty_{i+1} + \epsilon} \cdot - \tilde{v}_{i+1}\]</div>
<p>See the code snippet below for how to construct Adamax in MXNet.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">adamax_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">Adamax</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.002</span><span class="p">,</span> <span class="n">beta1</span><span class="o">=</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">beta2</span><span class="o">=</span><span class="mf">0.999</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="Nadam">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.Nadam"><span class="std std-ref">Nadam</span></a><a class="headerlink" href="#Nadam" title="Permalink to this headline"></a></h2>
<p>Nadam is also a variant of Adam and draws from the perspective that Adam can be viewed as a combination of RMSProp and classical Momentum (or Polyak Momentum). Nadam replaces the classical Momentum component of Adam with Nesterov Momentum (See <a class="reference external" href="http://cs229.stanford.edu/proj2015/054_report.pdf">paper</a> by Dozat). The consequence of this is that the gradient used to update the weighted average of the momentum term is a lookahead gradient as is the case with NAG.</p>
<p>The Nadam optimizer performs the update step:</p>
<div class="math notranslate nohighlight">
\[v_{i+1} = \beta_1 \cdot v_{i} + (1 - \beta_1) \cdot grad(w_i + \beta_1 \cdot v_{i})\]</div>
<div class="math notranslate nohighlight">
\[\mathbb{E}[g^2]_{i+1} = \beta_2\cdot\mathbb{E}[g^2]_{i} + (1-\beta_2)\cdot [grad(w_{i})]^2\]</div>
<div class="math notranslate nohighlight">
\[\tilde{v}_{i+1} = \dfrac{v_{i+1}}{1 - \beta_1^{i+1}}\]</div>
<div class="math notranslate nohighlight">
\[\tilde{\mathbb{E}[g^2]}_{i+1} = \dfrac{\mathbb{E}[g^2]_{i+1}}{1 - \beta_2^{i+1}}\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + \dfrac{lr}{\sqrt{\tilde{\mathbb{E}[g^2]}_{i+1}} + \epsilon}\cdot - \tilde{v}_{i+1}\]</div>
<p>Here is the line of code to create the NAdam optimizer in MXNet.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">nadam_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">Nadam</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.001</span><span class="p">,</span> <span class="n">beta1</span><span class="o">=</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">beta2</span><span class="o">=</span><span class="mf">0.999</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-08</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="SGD-optimized-for-large-scale-distributed-training">
<h3>SGD optimized for large scale distributed training<a class="headerlink" href="#SGD-optimized-for-large-scale-distributed-training" title="Permalink to this headline"></a></h3>
<p>Training very deep neural networks can be time consuming and as such it is very common now to see practitioners turn to distributed training on multiple processors on the same machine or even across a fleet of machines to parallelize network training because this can reduce neural network training time from days to minutes.</p>
<p>While all the preceding optimizers, from SGD to Adam, can be readily used in the distributed setting, the following optimizers in MXNet provide extra features targeted at alleviating some of the problems associated with distributed training.</p>
</div>
</div>
<div class="section" id="Signum">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.Signum"><span class="std std-ref">Signum</span></a><a class="headerlink" href="#Signum" title="Permalink to this headline"></a></h2>
<p>In distributed training, communicating gradients across multiple worker nodes can be expensive and create a performance bottleneck. The Signum optimizer addresses this problem by transmitting just the sign of each minibatch gradient instead of the full precision gradient. In MXNet, the signum optimizer implements two variants of compressed gradients described in the paper by <a class="reference external" href="https://arxiv.org/pdf/1802.04434.pdf">Bernstein et al</a>.</p>
<p>The first variant, achieved by constructing the Signum optimizer with <code class="docutils literal notranslate"><span class="pre">momentum=0</span></code>, implements SignSGD update which performs the update below.</p>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i - lr \cdot sign(grad(w_i))\]</div>
<p>The second variant, achieved by passing a non-zero momentum parameter implements the Signum update which is equivalent to SignSGD and momentum. For momentum parameter <span class="math notranslate nohighlight">\(\gamma \in [0, 1]\)</span>, the Signum optimizer performs the following update:</p>
<div class="math notranslate nohighlight">
\[v_{i+1} = \gamma \cdot v_i + (1 - \gamma) \cdot grad(w_i)\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i - lr \cdot sign(v_{i+1})\]</div>
<p>Here is how to create the signum optimizer in MXNet.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">signum_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">Signum</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">momentum</span><span class="o">=</span><span class="mf">0.9</span><span class="p">,</span> <span class="n">wd_lh</span><span class="o">=</span><span class="mf">0.0</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="DCASGD">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.DCASGD"><span class="std std-ref">DCASGD</span></a><a class="headerlink" href="#DCASGD" title="Permalink to this headline"></a></h2>
<p>The DCASGD optimizer implements Delay Compensated Asynchronous Stochastic Gradient Descent by <a class="reference external" href="https://arxiv.org/pdf/1609.08326.pdf">Zheng et al</a>. In asynchronous distributed SGD, it is possible that a training worker node add its gradients too late to the global (parameter) server resulting in a delayed gradient being used to update the current parameters. DCASGD addresses this issue of delayed gradients by compensating for this delay in the parameter update steps.</p>
<p>If <span class="math notranslate nohighlight">\(grad(w_i)\)</span> denotes the delayed gradient, <span class="math notranslate nohighlight">\(w_{i+\tau}\)</span> denotes the parameter values at the current iteration, and <span class="math notranslate nohighlight">\(\lambda\)</span> is the delay scale factor, the DCASGD optimizer update function performs the update:</p>
<div class="math notranslate nohighlight">
\[w_{i+\tau+1} = w_{i+\tau} − lr \cdot (grad(w_i) + \lambda \cdot grad(w_i)^2 \cdot (w_{i+\tau} − w_i))\]</div>
<p>The DCASGD optimizer in MXNet can be initialized using the code below.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">dcasgd_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">DCASGD</span><span class="p">(</span><span class="n">momentum</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> <span class="n">lamda</span><span class="o">=</span><span class="mf">0.04</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="Online-Learning-Algorithms">
<h3>Online Learning Algorithms<a class="headerlink" href="#Online-Learning-Algorithms" title="Permalink to this headline"></a></h3>
<p>Before deep neural networks became popular post 2012, people were already solving large scale optimization problems to train (shallow) machine learning models. One particular area this was done was active or online learning where the model is continually learning and updating its parameters after it is deployed to production. In online learning, the model has to make predictions on new inputs but moments later may become aware of the true value of what it tried to predict and use this
information to update its parameters.</p>
<p>The class of optimization algorithms designed to tackle online learning problems have also seen some success in offline training of deep neural models. The following optimizers are algorithms taken from online learning that have been implemented in MXNet.</p>
</div>
</div>
<div class="section" id="FTRL">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.Ftrl"><span class="std std-ref">FTRL</span></a><a class="headerlink" href="#FTRL" title="Permalink to this headline"></a></h2>
<p>FTRL stands for Follow the Regularized Leader and describes a family of algorithms originally designed for online learning tasks.</p>
<p>For each iteration, FTRL algorithms finds the next parameter by solving the following optimization problem which minimizes the total regret i.e the sum of the inner product all preceding gradients and next parameter. The optimization objective is regularized so that the next parameter is close (proximal) in <span class="math notranslate nohighlight">\(L2\)</span> norm to the preceding parameter values and is sparse which is enforced by the <span class="math notranslate nohighlight">\(L1\)</span> norm.</p>
<div class="math notranslate nohighlight">
\[w_{i+1} = \texttt{argmin}_{w} \left[\sum_{j=1}^{i} grad(w_i)\cdot w + \dfrac{1}{2}\sum_{j=1}^{i} \sigma_j \cdot ||w - w_j||_2^2 + \lambda ||w||_1\right]\]</div>
<p>Due to the similarity of online learning and neural network training, there is an <a class="reference external" href="https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/37013.pdf">equivalence</a> between variants of gradient descent and FTRL algorithms. In fact, the <span class="math notranslate nohighlight">\(w\)</span> that minimizes FTRL with only <span class="math notranslate nohighlight">\(L_2\)</span> regularization (i.e <span class="math notranslate nohighlight">\(\lambda\)</span> in the equation above is set to 0) is exactly the <span class="math notranslate nohighlight">\(w\)</span> derived from stochastic gradient descent update.</p>
<p>The version of FTRL implemented as an MXNet optimizer is from <a class="reference external" href="https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41159.pdf">McMahan et al</a> and encourages sparse parameters due to <span class="math notranslate nohighlight">\(L_1\)</span> regularization. It performs the following update:</p>
<div class="math notranslate nohighlight">
\[z_{i+1} = z_i + \dfrac{\left(\sqrt{\eta_i + grad(w_i)^2} - \sqrt{\eta_i}\right) \cdot w_i}{lr}\]</div>
<div class="math notranslate nohighlight">
\[\eta_{i+1} = \eta_i + grad(w_i)^2\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = (|z_{i+1}| &gt; \lambda) \cdot \left[ \dfrac{-lr}{\beta + \sqrt{\eta_{i+1}}} (z_{i+1} - \lambda \cdot sign(z_{i+1}))\right]\]</div>
<p>Here is how to initialize the FTRL optimizer in MXNet</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">ftrl_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">Ftrl</span><span class="p">(</span><span class="n">lamda1</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">beta</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="FTML">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.FTML"><span class="std std-ref">FTML</span></a><a class="headerlink" href="#FTML" title="Permalink to this headline"></a></h2>
<p>FTML stands for Follow the Moving Leader and is a variant of the FTRL family of algorithms adapted specifically to deep learning. Regular FTRL algorithms, described above, solve an optimization problem every update that involves the sum of all previous gradients. This is not well suited for the non-convex loss functions in deep learning. In the non-convex settings, older gradients are likely uninformative as the parameter updates can move to converge towards different local minima at different
iterations. FTML addresses this problem by reweighing the learning subproblems in each iteration as shown below.</p>
<p><span class="math">\begin{equation*}
w_{i+1} = \texttt{argmin}_{w} \left[\sum_{j=1}^{i} (1 − \beta_1)\beta_1^{i−j} grad(w_i)\cdot w + \dfrac{1}{2}\sum_{j=1}^{i} \sigma_j \cdot ||w - w_j||_2^2 \right]
\end{equation*}</span></p>
<p><span class="math notranslate nohighlight">\(\beta_1\)</span> is introduced to compute the exponential moving average of the previous accumulated gradient. The improvements of FTML over FTRL can be compared to the improvements of RMSProp/Adam to AdaGrad. According to <a class="reference external" href="http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf">Zheng et al</a>, FTML enjoys some of the nice properties of RMSProp and Adam while avoiding their pitfalls.</p>
<p>The FTML optimizer performs the following update:</p>
<div class="math notranslate nohighlight">
\[v_{i+1} = \beta_2 \cdot v_i + (1 - \beta_2) \cdot grad(w_i)^2\]</div>
<div class="math notranslate nohighlight">
\[d_{i+1} = \dfrac{1 - \beta_1^{i+1}}{lr} \big(\sqrt{\dfrac{v_{i+1}}{1 - \beta_2^{i+1}}} + \epsilon\big)\]</div>
<div class="math notranslate nohighlight">
\[z_{i+1} = \beta_1 \cdot z_i + (1 - \beta_1)\cdot grad(w_i) - (d_{i+1} - \beta_1 \cdot d_i) \cdot w_i\]</div>
<div class="math notranslate nohighlight">
\[w_{i+1} = \dfrac{-z_{i+1}}{d_{i+1}}\]</div>
<p>In MXNet, you can initialize the FTML optimizer using</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">ftml_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">FTML</span><span class="p">(</span><span class="n">beta1</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span> <span class="n">beta2</span><span class="o">=</span><span class="mf">0.999</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-08</span><span class="p">)</span>
</pre></div>
</div>
</div>
<p>Here <code class="docutils literal notranslate"><span class="pre">beta1</span></code> and <code class="docutils literal notranslate"><span class="pre">beta2</span></code> are similar to the arguments in the Adam optimizer.</p>
<div class="section" id="Bayesian-SGD">
<h3>Bayesian SGD<a class="headerlink" href="#Bayesian-SGD" title="Permalink to this headline"></a></h3>
<p>A notable shortcoming of deep learning is that the model parameters learned after training are only point estimates, therefore deep learning model predictions have no information about uncertainty or confidence bounds. This is in contrast to a fully Bayesian approach which incorporates prior distributions on the model parameters and estimates the model parameters as belonging to a posterior distribution. This approach allows the predictions of a bayesian model to have information about
uncertainty, as you can sample different values from the posterior distribution to obtain different model parameters. One approach to close the bayesian gap in deep learning is to incorporate the gradient descent algorithm with properties that allow the model parameters to converge to a distribution instead of a single value or point estimate.</p>
</div>
</div>
<div class="section" id="SGLD">
<h2><a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.SGLD"><span class="std std-ref">SGLD</span></a><a class="headerlink" href="#SGLD" title="Permalink to this headline"></a></h2>
<p>Stochastic Gradient Langevin Dynamics or SGLD was introduced to allow uncertainties around model parameters to be captured directly during model training. With every update in SGLD, the learning rate decreases to zero and a gaussian noise of known variances is injected into the SGD step. This has the effect of having the training parameters converge to a sufficient statistic for a posterior distribution instead of simply a point estimate of the model parameters.</p>
<p>SGLD performs the parameter update:</p>
<div class="math notranslate nohighlight">
\[w_{i+1} = w_i + \dfrac{lr_{i+1}}{2}\cdot -grad(w_i) + \eta_{i+1}\]</div>
<p>where $ <span class="math">\eta</span><em>{i+1} :nbsphinx-math:`sim `N(0, lr</em>{i+1})$ i.e <span class="math notranslate nohighlight">\(\eta_{i+1}\)</span> is drawn from a zero centered gaussian with variance <span class="math notranslate nohighlight">\(lr_{i+1}\)</span></p>
<p>SGLD was introduced by <a class="reference external" href="https://papers.nips.cc/paper/4883-stochastic-gradient-riemannian-langevin-dynamics-on-the-probability-simplex.pdf">Patterson and Teh</a> and the optimizer can be created in MXNet with the following line of code.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
</pre></div>
</div>
<div class="input_area highlight-python notranslate"><div class="highlight"><pre>
<span></span><span class="n">sgld_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span><span class="o">.</span><span class="n">SGLD</span><span class="p">()</span>
</pre></div>
</div>
</div>
<div class="section" id="Custom-Optimizer">
<h3>Custom Optimizer<a class="headerlink" href="#Custom-Optimizer" title="Permalink to this headline"></a></h3>
<p>If you would like to use a particular optimizer that is not yet implemented in MXNet or you have a custom optimization algorithm of your own that you would like to use to train your model, it is very straightforward to create a custom optimizer.</p>
<p>Step 1: First create a function that is able to perform your desired updates given the weights, gradients and other state information.</p>
<p>Step 2: You will have to write your own optimizer class that extends the <a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.Optimizer"><span class="std std-ref">base optimizer class</span></a> and override the following functions * <code class="docutils literal notranslate"><span class="pre">__init__</span></code>: accepts the parameters of your optimizer algorithm as inputs as saves them as member variables. * <code class="docutils literal notranslate"><span class="pre">create_state</span></code>: If your custom optimizer uses some additional state information besides the gradient, then you should implement a function that accepts the weights and returns the state.
* <code class="docutils literal notranslate"><span class="pre">update</span></code>: Implement your optimizer update function using the function in Step 1</p>
<p>Step 3: Register your optimizer with <code class="docutils literal notranslate"><span class="pre">&#64;register</span></code> decorator on your optimizer class.</p>
<p>See the <a class="reference internal" href="../../../api/optimizer/index.html#mxnet.optimizer.NAG"><span class="std std-ref">source code</span></a> for the NAG optimizer for a concrete example.</p>
</div>
<div class="section" id="Summary">
<h3>Summary<a class="headerlink" href="#Summary" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><p>MXNet implements many state-of-the-art optimizers which can be passed directly into a gluon trainer object. Calling <code class="docutils literal notranslate"><span class="pre">trainer.step</span></code> during model training uses the optimizers to update the model parameters.</p></li>
<li><p>Gradient descent algorithms minimize the loss function by using information from the gradient of the loss function and a learning rate hyperparameter.</p></li>
<li><p>Stochastic Gradient Descent is the backbone of deep learning optimization algorithms and simple SGD optimizers can be made really powerful by incorporating momentum, for example <code class="docutils literal notranslate"><span class="pre">sgd</span></code> with momentum and <code class="docutils literal notranslate"><span class="pre">nag</span></code>.</p></li>
<li><p>Adaptive learning rate methods compute per-parameter learning rates to make optimization less sensitive to the choice of global learning rate. <code class="docutils literal notranslate"><span class="pre">adam</span></code> is a popular adaptive learning rate optimizer.</p></li>
<li><p>Certain MXNet optimizers like <code class="docutils literal notranslate"><span class="pre">Signum</span></code> and Large Batch SGD are well suited for large scale distributed training as they consider challenges specific these tasks.</p></li>
<li><p>MXNet also implements optimizers from active learning like <code class="docutils literal notranslate"><span class="pre">FTML</span></code>, <code class="docutils literal notranslate"><span class="pre">FTRL</span></code>, and optimizers for bayesian learning like <code class="docutils literal notranslate"><span class="pre">SGLD</span></code>.</p></li>
<li><p>Finally, it is easy to create a custom optimizer by following the patterns in the source code implementation for the optimizers that already exist in MXNet.</p></li>
</ul>
</div>
<div class="section" id="Next-Steps">
<h3>Next Steps<a class="headerlink" href="#Next-Steps" title="Permalink to this headline"></a></h3>
<p>While optimization and optimizers play a significant role in deep learning model training, there are still other important components to model training. Here are a few suggestions about where to look next. * The <a class="reference internal" href="../../../api/gluon/trainer.html"><span class="doc">trainer API</span></a> and <a class="reference internal" href="../gluon/training/trainer.html"><span class="doc">guide</span></a> have information about how to construct the trainer that encapsulate the optimizers and will actually be used in your model training loop. * Check out the guide to MXNet gluon <a class="reference internal" href="../gluon/loss/loss.html"><span class="doc">Loss
functions</span></a> and <a class="reference internal" href="../gluon/loss/custom-loss.html"><span class="doc">custom losses</span></a> to learn about the loss functions optimized by these optimizers, see what loss functions are already implemented in MXNet and understand how to write your own custom loss functions. * Take a look at the <a class="reference internal" href="../gluon/blocks/init.html"><span class="doc">guide to parameter initialization</span></a> in MXNet to learn about what initialization schemes are already implemented, and how to implement your custom initialization schemes.
* Also check out the <a class="reference internal" href="../autograd/index.html"><span class="doc">autograd guide</span></a> to learn about automatic differentiation and how gradients are automatically computed in MXNet. * Make sure to take a look at the <a class="reference internal" href="../gluon/training/learning_rates/learning_rate_schedules.html"><span class="doc">guide to scheduling learning rates</span></a> to learn how to create learning rate schedules to supercharge the convergence of your optimizer. * Finally take a look at the <a class="reference internal" href="../kvstore/index.html"><span class="doc">KVStore API</span></a> to learn how parameter values
are synchronized over multiple devices.</p>
</div>
</div>
</div>
<hr class="feedback-hr-top" />
<div class="feedback-container">
<div class="feedback-question">Did this page help you?</div>
<div class="feedback-answer-container">
<div class="feedback-answer yes-link" data-response="yes">Yes</div>
<div class="feedback-answer no-link" data-response="no">No</div>
</div>
<div class="feedback-thank-you">Thanks for your feedback!</div>
</div>
<hr class="feedback-hr-bottom" />
</div>
<div class="side-doc-outline">
<div class="side-doc-outline--content">
<div class="localtoc">
<p class="caption">
<span class="caption-text">Table Of Contents</span>
</p>
<ul>
<li><a class="reference internal" href="#">Optimizers</a><ul>
<li><a class="reference internal" href="#What-should-I-use?">What should I use?</a><ul>
<li><a class="reference internal" href="#Stochastic-Gradient-Descent">Stochastic Gradient Descent</a></li>
</ul>
</li>
<li><a class="reference internal" href="#SGD-optimizer">SGD optimizer</a></li>
<li><a class="reference internal" href="#Weight-decay">Weight decay</a></li>
<li><a class="reference internal" href="#Momentum">Momentum</a></li>
<li><a class="reference internal" href="#Nesterov-Accelerated-Stochastic-Gradient-Descent">Nesterov Accelerated Stochastic Gradient Descent</a><ul>
<li><a class="reference internal" href="#Adaptive-Learning-Rate-Methods">Adaptive Learning Rate Methods</a></li>
</ul>
</li>
<li><a class="reference internal" href="#AdaGrad">AdaGrad</a></li>
<li><a class="reference internal" href="#RMSProp">RMSProp</a></li>
<li><a class="reference internal" href="#RMSProp-(Centered)">RMSProp (Centered)</a></li>
<li><a class="reference internal" href="#AdaDelta">AdaDelta</a></li>
<li><a class="reference internal" href="#Adam">Adam</a></li>
<li><a class="reference internal" href="#Adamax">Adamax</a></li>
<li><a class="reference internal" href="#Nadam">Nadam</a><ul>
<li><a class="reference internal" href="#SGD-optimized-for-large-scale-distributed-training">SGD optimized for large scale distributed training</a></li>
</ul>
</li>
<li><a class="reference internal" href="#Signum">Signum</a></li>
<li><a class="reference internal" href="#DCASGD">DCASGD</a><ul>
<li><a class="reference internal" href="#Online-Learning-Algorithms">Online Learning Algorithms</a></li>
</ul>
</li>
<li><a class="reference internal" href="#FTRL">FTRL</a></li>
<li><a class="reference internal" href="#FTML">FTML</a><ul>
<li><a class="reference internal" href="#Bayesian-SGD">Bayesian SGD</a></li>
</ul>
</li>
<li><a class="reference internal" href="#SGLD">SGLD</a><ul>
<li><a class="reference internal" href="#Custom-Optimizer">Custom Optimizer</a></li>
<li><a class="reference internal" href="#Summary">Summary</a></li>
<li><a class="reference internal" href="#Next-Steps">Next Steps</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<div class="clearer"></div>
</div><div class="pagenation">
<a id="button-prev" href="../onnx/inference_on_onnx_model.html" class="mdl-button mdl-js-button mdl-js-ripple-effect mdl-button--colored" role="botton" accesskey="P">
<i class="pagenation-arrow-L fas fa-arrow-left fa-lg"></i>
<div class="pagenation-text">
<span class="pagenation-direction">Previous</span>
<div>Running inference on MXNet/Gluon from an ONNX model</div>
</div>
</a>
<a id="button-next" href="../viz/index.html" class="mdl-button mdl-js-button mdl-js-ripple-effect mdl-button--colored" role="botton" accesskey="N">
<i class="pagenation-arrow-R fas fa-arrow-right fa-lg"></i>
<div class="pagenation-text">
<span class="pagenation-direction">Next</span>
<div>Visualization</div>
</div>
</a>
</div>
<footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a href="https://lists.apache.org/list.html?dev@mxnet.apache.org">Mailing list</a> <a class="u-email" href="mailto:dev-subscribe@mxnet.apache.org">(subscribe)</a></li>
<li><a href="https://discuss.mxnet.io">MXNet Discuss forum</a></li>
<li><a href="https://github.com/apache/mxnet/issues">Github Issues</a></li>
<li><a href="https://github.com/apache/mxnet/projects">Projects</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
<li><a href="/community">Contribute To MXNet</a></li>
</ul>
</div>
<div class="col-4"><ul class="social-media-list"><li><a href="https://github.com/apache/mxnet"><svg class="svg-icon"><use xlink:href="../../../_static/minima-social-icons.svg#github"></use></svg> <span class="username">apache/mxnet</span></a></li><li><a href="https://www.twitter.com/apachemxnet"><svg class="svg-icon"><use xlink:href="../../../_static/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href="https://youtube.com/apachemxnet"><svg class="svg-icon"><use xlink:href="../../../_static/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
</div>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
</div>
</div>
</div>
</footer>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="../../../_static/apache_incubator_logo.png" class="footer-logo col-2">
</div>
<div class="footer-bottom-warning col-9">
<p>Apache MXNet is an effort undergoing incubation at <a href="http://www.apache.org/">The Apache Software Foundation</a> (ASF), <span style="font-weight:bold">sponsored by the <i>Apache Incubator</i></span>. Incubation is required
of all newly accepted projects until a further review indicates that the infrastructure,
communications, and decision making process have stabilized in a manner consistent with other
successful ASF projects. While incubation status is not necessarily a reflection of the completeness
or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
</p><p>"Copyright © 2017-2018, The Apache Software Foundation Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>
</div>
</div>
</div>
</footer>
</body>
</html>