blob: f64ed636086dec5520c2ada7a8175abb52084cad [file] [log] [blame]
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8" />
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta http-equiv="x-ua-compatible" content="ie=edge">
<style>
.dropdown {
position: relative;
display: inline-block;
}
.dropdown-content {
display: none;
position: absolute;
background-color: #f9f9f9;
min-width: 160px;
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
padding: 12px 16px;
z-index: 1;
text-align: left;
}
.dropdown:hover .dropdown-content {
display: block;
}
.dropdown-option:hover {
color: #FF4500;
}
.dropdown-option-active {
color: #FF4500;
font-weight: lighter;
}
.dropdown-option {
color: #000000;
font-weight: lighter;
}
.dropdown-header {
color: #FFFFFF;
display: inline-flex;
}
.dropdown-caret {
width: 18px;
}
.dropdown-caret-path {
fill: #FFFFFF;
}
</style>
<title>Learning Rate Schedules &#8212; Apache MXNet documentation</title>
<link rel="stylesheet" href="../../../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../../../_static/mxnet.css" />
<link rel="stylesheet" href="../../../../../_static/material-design-lite-1.3.0/material.blue-deep_orange.min.css" type="text/css" />
<link rel="stylesheet" href="../../../../../_static/sphinx_materialdesign_theme.css" type="text/css" />
<link rel="stylesheet" href="../../../../../_static/fontawesome/all.css" type="text/css" />
<link rel="stylesheet" href="../../../../../_static/fonts.css" type="text/css" />
<link rel="stylesheet" href="../../../../../_static/feedback.css" type="text/css" />
<script id="documentation_options" data-url_root="../../../../../" src="../../../../../_static/documentation_options.js"></script>
<script src="../../../../../_static/jquery.js"></script>
<script src="../../../../../_static/underscore.js"></script>
<script src="../../../../../_static/doctools.js"></script>
<script src="../../../../../_static/language_data.js"></script>
<script src="../../../../../_static/matomo_analytics.js"></script>
<script src="../../../../../_static/autodoc.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="shortcut icon" href="../../../../../_static/mxnet-icon.png"/>
<link rel="index" title="Index" href="../../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../../search.html" />
<link rel="next" title="Advanced Learning Rate Schedules" href="learning_rate_schedules_advanced.html" />
<link rel="prev" title="Learning Rate Finder" href="learning_rate_finder.html" />
</head>
<body><header class="site-header" role="banner">
<div class="wrapper">
<a class="site-title" rel="author" href="/versions/1.9.1/"><img
src="../../../../../_static/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
</svg>
</span>
</label>
<div class="trigger">
<a class="page-link" href="/versions/1.9.1/get_started">Get Started</a>
<a class="page-link" href="/versions/1.9.1/features">Features</a>
<a class="page-link" href="/versions/1.9.1/ecosystem">Ecosystem</a>
<a class="page-link page-current" href="/versions/1.9.1/api">Docs & Tutorials</a>
<a class="page-link" href="/versions/1.9.1/trusted_by">Trusted By</a>
<a class="page-link" href="https://github.com/apache/mxnet">GitHub</a>
<div class="dropdown" style="min-width:100px">
<span class="dropdown-header">Apache
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content" style="min-width:250px">
<a href="https://www.apache.org/foundation/">Apache Software Foundation</a>
<a href="https://incubator.apache.org/">Apache Incubator</a>
<a href="https://www.apache.org/licenses/">License</a>
<a href="/versions/1.9.1/api/faq/security.html">Security</a>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy</a>
<a href="https://www.apache.org/events/current-event">Events</a>
<a href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
<div class="dropdown">
<span class="dropdown-header">1.9.1
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content">
<a class="dropdown-option" href="/">master</a><br>
<a class="dropdown-option-active" href="/versions/1.9.1/">1.9.1</a><br>
<a class="dropdown-option" href="/versions/1.8.0/">1.8.0</a><br>
<a class="dropdown-option" href="/versions/1.7.0/">1.7.0</a><br>
<a class="dropdown-option" href="/versions/1.6.0/">1.6.0</a><br>
<a class="dropdown-option" href="/versions/1.5.0/">1.5.0</a><br>
<a class="dropdown-option" href="/versions/1.4.1/">1.4.1</a><br>
<a class="dropdown-option" href="/versions/1.3.1/">1.3.1</a><br>
<a class="dropdown-option" href="/versions/1.2.1/">1.2.1</a><br>
<a class="dropdown-option" href="/versions/1.1.0/">1.1.0</a><br>
<a class="dropdown-option" href="/versions/1.0.0/">1.0.0</a><br>
<a class="dropdown-option" href="/versions/0.12.1/">0.12.1</a><br>
<a class="dropdown-option" href="/versions/0.11.0/">0.11.0</a>
</div>
</div>
</div>
</nav>
</div>
</header>
<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header mdl-layout--fixed-drawer"><header class="mdl-layout__header mdl-layout__header--waterfall ">
<div class="mdl-layout__header-row">
<nav class="mdl-navigation breadcrumb">
<a class="mdl-navigation__link" href="../../../../index.html">Python Tutorials</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="../../../index.html">Packages</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="../../index.html">Gluon</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="../index.html">Training</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="index.html">Learning Rates</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link is-active">Learning Rate Schedules</a>
</nav>
<div class="mdl-layout-spacer"></div>
<nav class="mdl-navigation">
<form class="form-inline pull-sm-right" action="../../../../../search.html" method="get">
<div class="mdl-textfield mdl-js-textfield mdl-textfield--expandable mdl-textfield--floating-label mdl-textfield--align-right">
<label id="quick-search-icon" class="mdl-button mdl-js-button mdl-button--icon" for="waterfall-exp">
<i class="material-icons">search</i>
</label>
<div class="mdl-textfield__expandable-holder">
<input class="mdl-textfield__input" type="text" name="q" id="waterfall-exp" placeholder="Search" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</div>
<div class="mdl-tooltip" data-mdl-for="quick-search-icon">
Quick search
</div>
</form>
<a id="button-show-source"
class="mdl-button mdl-js-button mdl-button--icon"
href="../../../../../_sources/tutorials/packages/gluon/training/learning_rates/learning_rate_schedules.ipynb" rel="nofollow">
<i class="material-icons">code</i>
</a>
<div class="mdl-tooltip" data-mdl-for="button-show-source">
Show Source
</div>
</nav>
</div>
<div class="mdl-layout__header-row header-links">
<div class="mdl-layout-spacer"></div>
<nav class="mdl-navigation">
</nav>
</div>
</header><header class="mdl-layout__drawer">
<div class="globaltoc">
<span class="mdl-layout-title toc">Table Of Contents</span>
<nav class="mdl-navigation">
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="../../../../index.html">Python Tutorials</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../../../getting-started/index.html">Getting Started</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/crash-course/index.html">Crash Course</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/1-ndarray.html">Manipulate data with <code class="docutils literal notranslate"><span class="pre">ndarray</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/2-nn.html">Create a neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/3-autograd.html">Automatic differentiation with <code class="docutils literal notranslate"><span class="pre">autograd</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/4-train.html">Train the neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/5-predict.html">Predict with a pre-trained model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/6-use_gpus.html">Use GPUs</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/to-mxnet/index.html">Moving to MXNet from Other Frameworks</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/to-mxnet/pytorch.html">PyTorch vs Apache MXNet</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/gluon_from_experiment_to_deployment.html">Gluon: from experiment to deployment</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/logistic_regression_explained.html">Logistic regression explained</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/image/mnist.html">MNIST</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="reference internal" href="../../../index.html">Packages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../../../autograd/index.html">Automatic Differentiation</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="../../index.html">Gluon</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="../../blocks/index.html">Blocks</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/custom-layer.html">Custom Layers</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/custom_layer_beginners.html">Customer Layers (Beginners)</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/hybridize.html">Hybridize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/init.html">Initialization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/naming.html">Parameter and Block Naming</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/nn.html">Layers and Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/parameters.html">Parameter Management</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/save_load_params.html">Saving and Loading Gluon Models</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/activations/activations.html">Activation Blocks</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../data/index.html">Data Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html#Spatial-Augmentation">Spatial Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html#Color-Augmentation">Color Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html#Composed-Augmentations">Composed Augmentations</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html">Gluon <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html#Using-own-data-with-custom-Datasets">Using own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html#Appendix:-Upgrading-from-Module-DataIter-to-Gluon-DataLoader">Appendix: Upgrading from Module <code class="docutils literal notranslate"><span class="pre">DataIter</span></code> to Gluon <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../image/index.html">Image Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../image/image-augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../image/info_gan.html">Image similarity search with InfoGAN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../image/mnist.html">Handwritten Digit Recognition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../image/pretrained_models.html">Using pre-trained models in MXNet</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../loss/index.html">Losses</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../loss/custom-loss.html">Custom Loss Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../loss/kl_divergence.html">Kullback-Leibler (KL) Divergence</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../loss/loss.html">Loss functions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../text/index.html">Text Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../text/gnmt.html">Google Neural Machine Translation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../text/transformer.html">Machine Translation with Transformer</a></li>
</ul>
</li>
<li class="toctree-l4 current"><a class="reference internal" href="../index.html">Training</a><ul class="current">
<li class="toctree-l5"><a class="reference internal" href="../fit_api_tutorial.html">MXNet Gluon Fit API</a></li>
<li class="toctree-l5"><a class="reference internal" href="../trainer.html">Trainer</a></li>
<li class="toctree-l5 current"><a class="reference internal" href="index.html">Learning Rates</a><ul class="current">
<li class="toctree-l6"><a class="reference internal" href="learning_rate_finder.html">Learning Rate Finder</a></li>
<li class="toctree-l6 current"><a class="current reference internal" href="#">Learning Rate Schedules</a></li>
<li class="toctree-l6"><a class="reference internal" href="learning_rate_schedules_advanced.html">Advanced Learning Rate Schedules</a></li>
</ul>
</li>
<li class="toctree-l5"><a class="reference internal" href="../normalization/index.html">Normalization Blocks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../kvstore/index.html">KVStore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../kvstore/kvstore.html">Distributed Key-Value Store</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../ndarray/index.html">NDArray</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/01-ndarray-intro.html">An Intro: Manipulate Data the MXNet Way with NDArray</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/02-ndarray-operations.html">NDArray Operations</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/03-ndarray-contexts.html">NDArray Contexts</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/gotchas_numpy_in_mxnet.html">Gotchas using NumPy in Apache MXNet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/sparse/index.html">Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/csr.html">CSRNDArray - NDArray in Compressed Sparse Row Storage Format</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/row_sparse.html">RowSparseNDArray - NDArray for Sparse Gradient Updates</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/train.html">Train a Linear Regression Model with Sparse Symbols</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/train_gluon.html">Sparse NDArrays with Gluon</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../onnx/index.html">ONNX</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../onnx/fine_tuning_gluon.html">Fine-tuning an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../onnx/inference_on_onnx_model.html">Running inference on MXNet/Gluon from an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../onnx/super_resolution.html">Importing an ONNX model into MXNet</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/deploy/export/onnx.html">Export ONNX Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../optimizer/index.html">Optimizers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../viz/index.html">Visualization</a><ul>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/visualize_graph">Visualize networks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../performance/index.html">Performance</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../performance/compression/index.html">Compression</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/compression/int8.html">Deploy with int-8</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/float16">Float16</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/gradient_compression">Gradient Compression</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/int8_inference.html">GluonCV with Quantized Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../performance/backend/index.html">Accelerated Backend Tools</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/mkldnn/index.html">Intel MKL-DNN</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/mkldnn/mkldnn_quantization.html">Quantize with MKL-DNN backend</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/mkldnn/mkldnn_quantization.html#Improving-accuracy-with-Intel®-Neural-Compressor">Improving accuracy with Intel® Neural Compressor</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/mkldnn/mkldnn_readme.html">Install MXNet with MKL-DNN</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/tensorrt/index.html">TensorRT</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/tensorrt/tensorrt.html">Optimizing Deep Learning Computation Graphs with TensorRT</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/tvm.html">Use TVM</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/profiler.html">Profiling MXNet Models</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/amp.html">Using AMP: Automatic Mixed Precision</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../deploy/index.html">Deployment</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../deploy/export/index.html">Export</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/export/onnx.html">Exporting to ONNX format</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/export_network.html">Export Gluon CV Models</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html">Save / Load Parameters</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../deploy/inference/index.html">Inference</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/cpp.html">Deploy into C++</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/image_classification_jetson.html">Image Classication using pretrained ResNet-50 model on Jetson module</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/scala.html">Deploy into a Java or Scala Environment</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/wine_detector.html">Real-time Object Detection with MXNet On The Raspberry Pi</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../deploy/run-on-aws/index.html">Run on AWS</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/run-on-aws/use_ec2.html">Run on an EC2 Instance</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/run-on-aws/use_sagemaker.html">Run on Amazon SageMaker</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/run-on-aws/cloud.html">MXNet on the Cloud</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../extend/index.html">Extend</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../extend/custom_layer.html">Custom Layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../extend/customop.html">Custom Numpy Operators</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/new_op">New Operator Creation</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/add_op_in_backend">New Operator in MXNet Backend</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../../api/index.html">Python API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/ndarray/index.html">mxnet.ndarray</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/ndarray.html">ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/contrib/index.html">ndarray.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/image/index.html">ndarray.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/linalg/index.html">ndarray.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/op/index.html">ndarray.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/random/index.html">ndarray.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/register/index.html">ndarray.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/sparse/index.html">ndarray.sparse</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/utils/index.html">ndarray.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/gluon/index.html">mxnet.gluon</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/block.html">gluon.Block</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/hybrid_block.html">gluon.HybridBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/symbol_block.html">gluon.SymbolBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/constant.html">gluon.Constant</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/parameter.html">gluon.Parameter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/parameter_dict.html">gluon.ParameterDict</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/trainer.html">gluon.Trainer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/contrib/index.html">gluon.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/data/index.html">gluon.data</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../../api/gluon/data/vision/index.html">data.vision</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../../api/gluon/data/vision/datasets/index.html">vision.datasets</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../../api/gluon/data/vision/transforms/index.html">vision.transforms</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/loss/index.html">gluon.loss</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/model_zoo/index.html">gluon.model_zoo.vision</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/nn/index.html">gluon.nn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/rnn/index.html">gluon.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/utils/index.html">gluon.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/autograd/index.html">mxnet.autograd</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/initializer/index.html">mxnet.initializer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/optimizer/index.html">mxnet.optimizer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/lr_scheduler/index.html">mxnet.lr_scheduler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/metric/index.html">mxnet.metric</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/kvstore/index.html">mxnet.kvstore</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/symbol/index.html">mxnet.symbol</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/symbol.html">symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/contrib/index.html">symbol.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/image/index.html">symbol.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/linalg/index.html">symbol.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/op/index.html">symbol.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/random/index.html">symbol.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/register/index.html">symbol.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/sparse/index.html">symbol.sparse</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/module/index.html">mxnet.module</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/contrib/index.html">mxnet.contrib</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/autograd/index.html">contrib.autograd</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/io/index.html">contrib.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/ndarray/index.html">contrib.ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/onnx/index.html">contrib.onnx</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/quantization/index.html">contrib.quantization</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/symbol/index.html">contrib.symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/tensorboard/index.html">contrib.tensorboard</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/tensorrt/index.html">contrib.tensorrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/text/index.html">contrib.text</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/mxnet/index.html">mxnet</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/attribute/index.html">mxnet.attribute</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/base/index.html">mxnet.base</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/callback/index.html">mxnet.callback</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/context/index.html">mxnet.context</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/engine/index.html">mxnet.engine</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/executor/index.html">mxnet.executor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/executor_manager/index.html">mxnet.executor_manager</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/image/index.html">mxnet.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/io/index.html">mxnet.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/kvstore_server/index.html">mxnet.kvstore_server</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/libinfo/index.html">mxnet.libinfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/log/index.html">mxnet.log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/model/index.html">mxnet.model</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/monitor/index.html">mxnet.monitor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/name/index.html">mxnet.name</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/notebook/index.html">mxnet.notebook</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/operator/index.html">mxnet.operator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/profiler/index.html">mxnet.profiler</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/random/index.html">mxnet.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/recordio/index.html">mxnet.recordio</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/registry/index.html">mxnet.registry</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/rtc/index.html">mxnet.rtc</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/runtime/index.html">mxnet.runtime</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/test_utils/index.html">mxnet.test_utils</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/torch/index.html">mxnet.torch</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/util/index.html">mxnet.util</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/visualization/index.html">mxnet.visualization</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</div>
</header>
<main class="mdl-layout__content" tabIndex="0">
<script type="text/javascript" src="../../../../../_static/sphinx_materialdesign_theme.js "></script>
<script type="text/javascript" src="../../../../../_static/feedback.js"></script>
<header class="mdl-layout__drawer">
<div class="globaltoc">
<span class="mdl-layout-title toc">Table Of Contents</span>
<nav class="mdl-navigation">
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="../../../../index.html">Python Tutorials</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../../../getting-started/index.html">Getting Started</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/crash-course/index.html">Crash Course</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/1-ndarray.html">Manipulate data with <code class="docutils literal notranslate"><span class="pre">ndarray</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/2-nn.html">Create a neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/3-autograd.html">Automatic differentiation with <code class="docutils literal notranslate"><span class="pre">autograd</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/4-train.html">Train the neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/5-predict.html">Predict with a pre-trained model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/crash-course/6-use_gpus.html">Use GPUs</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/to-mxnet/index.html">Moving to MXNet from Other Frameworks</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../getting-started/to-mxnet/pytorch.html">PyTorch vs Apache MXNet</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/gluon_from_experiment_to_deployment.html">Gluon: from experiment to deployment</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../getting-started/logistic_regression_explained.html">Logistic regression explained</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/image/mnist.html">MNIST</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="reference internal" href="../../../index.html">Packages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../../../autograd/index.html">Automatic Differentiation</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="../../index.html">Gluon</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="../../blocks/index.html">Blocks</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/custom-layer.html">Custom Layers</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/custom_layer_beginners.html">Customer Layers (Beginners)</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/hybridize.html">Hybridize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/init.html">Initialization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/naming.html">Parameter and Block Naming</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/nn.html">Layers and Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/parameters.html">Parameter Management</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/save_load_params.html">Saving and Loading Gluon Models</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../blocks/activations/activations.html">Activation Blocks</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../data/index.html">Data Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html#Spatial-Augmentation">Spatial Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html#Color-Augmentation">Color Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/data_augmentation.html#Composed-Augmentations">Composed Augmentations</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html">Gluon <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html#Using-own-data-with-custom-Datasets">Using own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../data/datasets.html#Appendix:-Upgrading-from-Module-DataIter-to-Gluon-DataLoader">Appendix: Upgrading from Module <code class="docutils literal notranslate"><span class="pre">DataIter</span></code> to Gluon <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../image/index.html">Image Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../image/image-augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../image/info_gan.html">Image similarity search with InfoGAN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../image/mnist.html">Handwritten Digit Recognition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../image/pretrained_models.html">Using pre-trained models in MXNet</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../loss/index.html">Losses</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../loss/custom-loss.html">Custom Loss Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../loss/kl_divergence.html">Kullback-Leibler (KL) Divergence</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../loss/loss.html">Loss functions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../text/index.html">Text Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../text/gnmt.html">Google Neural Machine Translation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../text/transformer.html">Machine Translation with Transformer</a></li>
</ul>
</li>
<li class="toctree-l4 current"><a class="reference internal" href="../index.html">Training</a><ul class="current">
<li class="toctree-l5"><a class="reference internal" href="../fit_api_tutorial.html">MXNet Gluon Fit API</a></li>
<li class="toctree-l5"><a class="reference internal" href="../trainer.html">Trainer</a></li>
<li class="toctree-l5 current"><a class="reference internal" href="index.html">Learning Rates</a><ul class="current">
<li class="toctree-l6"><a class="reference internal" href="learning_rate_finder.html">Learning Rate Finder</a></li>
<li class="toctree-l6 current"><a class="current reference internal" href="#">Learning Rate Schedules</a></li>
<li class="toctree-l6"><a class="reference internal" href="learning_rate_schedules_advanced.html">Advanced Learning Rate Schedules</a></li>
</ul>
</li>
<li class="toctree-l5"><a class="reference internal" href="../normalization/index.html">Normalization Blocks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../kvstore/index.html">KVStore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../kvstore/kvstore.html">Distributed Key-Value Store</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../ndarray/index.html">NDArray</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/01-ndarray-intro.html">An Intro: Manipulate Data the MXNet Way with NDArray</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/02-ndarray-operations.html">NDArray Operations</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/03-ndarray-contexts.html">NDArray Contexts</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/gotchas_numpy_in_mxnet.html">Gotchas using NumPy in Apache MXNet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../ndarray/sparse/index.html">Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/csr.html">CSRNDArray - NDArray in Compressed Sparse Row Storage Format</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/row_sparse.html">RowSparseNDArray - NDArray for Sparse Gradient Updates</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/train.html">Train a Linear Regression Model with Sparse Symbols</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../ndarray/sparse/train_gluon.html">Sparse NDArrays with Gluon</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../onnx/index.html">ONNX</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../onnx/fine_tuning_gluon.html">Fine-tuning an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../onnx/inference_on_onnx_model.html">Running inference on MXNet/Gluon from an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../onnx/super_resolution.html">Importing an ONNX model into MXNet</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/deploy/export/onnx.html">Export ONNX Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../optimizer/index.html">Optimizers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../viz/index.html">Visualization</a><ul>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/visualize_graph">Visualize networks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../performance/index.html">Performance</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../performance/compression/index.html">Compression</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/compression/int8.html">Deploy with int-8</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/float16">Float16</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/gradient_compression">Gradient Compression</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/int8_inference.html">GluonCV with Quantized Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../performance/backend/index.html">Accelerated Backend Tools</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/mkldnn/index.html">Intel MKL-DNN</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/mkldnn/mkldnn_quantization.html">Quantize with MKL-DNN backend</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/mkldnn/mkldnn_quantization.html#Improving-accuracy-with-Intel®-Neural-Compressor">Improving accuracy with Intel® Neural Compressor</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/mkldnn/mkldnn_readme.html">Install MXNet with MKL-DNN</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/tensorrt/index.html">TensorRT</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../performance/backend/tensorrt/tensorrt.html">Optimizing Deep Learning Computation Graphs with TensorRT</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/tvm.html">Use TVM</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/profiler.html">Profiling MXNet Models</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../performance/backend/amp.html">Using AMP: Automatic Mixed Precision</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../deploy/index.html">Deployment</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../deploy/export/index.html">Export</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/export/onnx.html">Exporting to ONNX format</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/export_network.html">Export Gluon CV Models</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html">Save / Load Parameters</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../deploy/inference/index.html">Inference</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/cpp.html">Deploy into C++</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/image_classification_jetson.html">Image Classication using pretrained ResNet-50 model on Jetson module</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/scala.html">Deploy into a Java or Scala Environment</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/inference/wine_detector.html">Real-time Object Detection with MXNet On The Raspberry Pi</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../deploy/run-on-aws/index.html">Run on AWS</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/run-on-aws/use_ec2.html">Run on an EC2 Instance</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/run-on-aws/use_sagemaker.html">Run on Amazon SageMaker</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../../deploy/run-on-aws/cloud.html">MXNet on the Cloud</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../extend/index.html">Extend</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../extend/custom_layer.html">Custom Layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../extend/customop.html">Custom Numpy Operators</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/new_op">New Operator Creation</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/add_op_in_backend">New Operator in MXNet Backend</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../../api/index.html">Python API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/ndarray/index.html">mxnet.ndarray</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/ndarray.html">ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/contrib/index.html">ndarray.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/image/index.html">ndarray.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/linalg/index.html">ndarray.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/op/index.html">ndarray.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/random/index.html">ndarray.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/register/index.html">ndarray.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/sparse/index.html">ndarray.sparse</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/ndarray/utils/index.html">ndarray.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/gluon/index.html">mxnet.gluon</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/block.html">gluon.Block</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/hybrid_block.html">gluon.HybridBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/symbol_block.html">gluon.SymbolBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/constant.html">gluon.Constant</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/parameter.html">gluon.Parameter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/parameter_dict.html">gluon.ParameterDict</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/trainer.html">gluon.Trainer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/contrib/index.html">gluon.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/data/index.html">gluon.data</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../../api/gluon/data/vision/index.html">data.vision</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../../api/gluon/data/vision/datasets/index.html">vision.datasets</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../../api/gluon/data/vision/transforms/index.html">vision.transforms</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/loss/index.html">gluon.loss</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/model_zoo/index.html">gluon.model_zoo.vision</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/nn/index.html">gluon.nn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/rnn/index.html">gluon.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/gluon/utils/index.html">gluon.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/autograd/index.html">mxnet.autograd</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/initializer/index.html">mxnet.initializer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/optimizer/index.html">mxnet.optimizer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/lr_scheduler/index.html">mxnet.lr_scheduler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/metric/index.html">mxnet.metric</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/kvstore/index.html">mxnet.kvstore</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/symbol/index.html">mxnet.symbol</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/symbol.html">symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/contrib/index.html">symbol.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/image/index.html">symbol.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/linalg/index.html">symbol.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/op/index.html">symbol.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/random/index.html">symbol.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/register/index.html">symbol.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/symbol/sparse/index.html">symbol.sparse</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/module/index.html">mxnet.module</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/contrib/index.html">mxnet.contrib</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/autograd/index.html">contrib.autograd</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/io/index.html">contrib.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/ndarray/index.html">contrib.ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/onnx/index.html">contrib.onnx</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/quantization/index.html">contrib.quantization</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/symbol/index.html">contrib.symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/tensorboard/index.html">contrib.tensorboard</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/tensorrt/index.html">contrib.tensorrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/contrib/text/index.html">contrib.text</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../../api/mxnet/index.html">mxnet</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/attribute/index.html">mxnet.attribute</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/base/index.html">mxnet.base</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/callback/index.html">mxnet.callback</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/context/index.html">mxnet.context</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/engine/index.html">mxnet.engine</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/executor/index.html">mxnet.executor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/executor_manager/index.html">mxnet.executor_manager</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/image/index.html">mxnet.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/io/index.html">mxnet.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/kvstore_server/index.html">mxnet.kvstore_server</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/libinfo/index.html">mxnet.libinfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/log/index.html">mxnet.log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/model/index.html">mxnet.model</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/monitor/index.html">mxnet.monitor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/name/index.html">mxnet.name</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/notebook/index.html">mxnet.notebook</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/operator/index.html">mxnet.operator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/profiler/index.html">mxnet.profiler</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/random/index.html">mxnet.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/recordio/index.html">mxnet.recordio</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/registry/index.html">mxnet.registry</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/rtc/index.html">mxnet.rtc</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/runtime/index.html">mxnet.runtime</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/test_utils/index.html">mxnet.test_utils</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/torch/index.html">mxnet.torch</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/util/index.html">mxnet.util</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../../api/mxnet/visualization/index.html">mxnet.visualization</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</div>
</header>
<div class="document">
<div class="page-content" role="main">
<!--- Licensed to the Apache Software Foundation (ASF) under one --><!--- or more contributor license agreements. See the NOTICE file --><!--- distributed with this work for additional information --><!--- regarding copyright ownership. The ASF licenses this file --><!--- to you under the Apache License, Version 2.0 (the --><!--- "License"); you may not use this file except in compliance --><!--- with the License. You may obtain a copy of the License at --><!--- http://www.apache.org/licenses/LICENSE-2.0 --><!--- Unless required by applicable law or agreed to in writing, --><!--- software distributed under the License is distributed on an --><!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --><!--- KIND, either express or implied. See the License for the --><!--- specific language governing permissions and limitations --><!--- under the License. --><div class="section" id="Learning-Rate-Schedules">
<h1>Learning Rate Schedules<a class="headerlink" href="#Learning-Rate-Schedules" title="Permalink to this headline"></a></h1>
<p>Setting the learning rate for stochastic gradient descent (SGD) is crucially important when training neural networks because it controls both the speed of convergence and the ultimate performance of the network. One of the simplest learning rate strategies is to have a fixed learning rate throughout the training process. Choosing a small learning rate allows the optimizer find good solutions, but this comes at the expense of limiting the initial speed of convergence. Changing the learning rate
over time can overcome this tradeoff.</p>
<p>Schedules define how the learning rate changes over time and are typically specified for each epoch or iteration (i.e. batch) of training. Schedules differ from adaptive methods (such as AdaDelta and Adam) because they:</p>
<ul class="simple">
<li><p>change the global learning rate for the optimizer, rather than parameter-wise learning rates</p></li>
<li><p>don’t take feedback from the training process and are specified beforehand</p></li>
</ul>
<p>In this tutorial, we visualize the schedules defined in <code class="docutils literal notranslate"><span class="pre">mx.lr_scheduler</span></code>, show how to implement custom schedules and see an example of using a schedule while training models. Since schedules are passed to <code class="docutils literal notranslate"><span class="pre">mx.optimizer.Optimizer</span></code> classes, these methods work with both Module and Gluon APIs.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
<span class="kn">import</span> <span class="nn">math</span>
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
<span class="kn">import</span> <span class="nn">mxnet</span> <span class="k">as</span> <span class="nn">mx</span>
<span class="kn">from</span> <span class="nn">mxnet.gluon</span> <span class="kn">import</span> <span class="n">nn</span>
<span class="kn">from</span> <span class="nn">mxnet.gluon.data.vision</span> <span class="kn">import</span> <span class="n">transforms</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="o">%</span><span class="n">matplotlib</span> <span class="n">inline</span>
</pre></div>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">plot_schedule</span><span class="p">(</span><span class="n">schedule_fn</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1500</span><span class="p">):</span>
<span class="c1"># Iteration count starting at 1</span>
<span class="n">iterations</span> <span class="o">=</span> <span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">)]</span>
<span class="n">lrs</span> <span class="o">=</span> <span class="p">[</span><span class="n">schedule_fn</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">iterations</span><span class="p">]</span>
<span class="n">plt</span><span class="o">.</span><span class="n">scatter</span><span class="p">(</span><span class="n">iterations</span><span class="p">,</span> <span class="n">lrs</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s2">&quot;Iteration&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s2">&quot;Learning Rate&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
</pre></div>
</div>
<div class="section" id="Schedules">
<h2>Schedules<a class="headerlink" href="#Schedules" title="Permalink to this headline"></a></h2>
<p>In this section, we take a look at the schedules in <code class="docutils literal notranslate"><span class="pre">mx.lr_scheduler</span></code>. All of these schedules define the learning rate for a given iteration, and it is expected that iterations start at 1 rather than 0. So to find the learning rate for the 100th iteration, you can call <code class="docutils literal notranslate"><span class="pre">schedule(100)</span></code>.</p>
<div class="section" id="Stepwise-Decay-Schedule">
<h3>Stepwise Decay Schedule<a class="headerlink" href="#Stepwise-Decay-Schedule" title="Permalink to this headline"></a></h3>
<p>One of the most commonly used learning rate schedules is called stepwise decay, where the learning rate is reduced by a factor at certain intervals. MXNet implements a <code class="docutils literal notranslate"><span class="pre">FactorScheduler</span></code> for equally spaced intervals, and <code class="docutils literal notranslate"><span class="pre">MultiFactorScheduler</span></code> for greater control. We start with an example of halving the learning rate every 250 iterations. More precisely, the learning rate will be multiplied by <code class="docutils literal notranslate"><span class="pre">factor</span></code> <em>after</em> the <code class="docutils literal notranslate"><span class="pre">step</span></code> index and multiples thereafter. So in the example below the learning
rate of the 250th iteration will be 1 and the 251st iteration will be 0.5.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">schedule</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">lr_scheduler</span><span class="o">.</span><span class="n">FactorScheduler</span><span class="p">(</span><span class="n">step</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">factor</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">schedule</span><span class="o">.</span><span class="n">base_lr</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">plot_schedule</span><span class="p">(</span><span class="n">schedule</span><span class="p">)</span>
</pre></div>
</div>
<p><img alt="png" src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/lr_schedules/cosine.png" /></p>
<p>Note: the <code class="docutils literal notranslate"><span class="pre">base_lr</span></code> is used to determine the initial learning rate. It takes a default value of 0.01 since we inherit from <code class="docutils literal notranslate"><span class="pre">mx.lr_scheduler.LRScheduler</span></code>, but it can be set as a property of the schedule. We will see later in this tutorial that <code class="docutils literal notranslate"><span class="pre">base_lr</span></code> is set automatically when providing the <code class="docutils literal notranslate"><span class="pre">lr_schedule</span></code> to <code class="docutils literal notranslate"><span class="pre">Optimizer</span></code>. Also be aware that the schedules in <code class="docutils literal notranslate"><span class="pre">mx.lr_scheduler</span></code> have state (i.e. counters, etc) so calling the schedule out of order may give unexpected results.</p>
<p>We can define non-uniform intervals with <code class="docutils literal notranslate"><span class="pre">MultiFactorScheduler</span></code> and in the example below we halve the learning rate <em>after</em> the 250th, 750th (i.e. a step length of 500 iterations) and 900th (a step length of 150 iterations). As before, the learning rate of the 250th iteration will be 1 and the 251th iteration will be 0.5.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">schedule</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">lr_scheduler</span><span class="o">.</span><span class="n">MultiFactorScheduler</span><span class="p">(</span><span class="n">step</span><span class="o">=</span><span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">750</span><span class="p">,</span> <span class="mi">900</span><span class="p">],</span> <span class="n">factor</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">schedule</span><span class="o">.</span><span class="n">base_lr</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">plot_schedule</span><span class="p">(</span><span class="n">schedule</span><span class="p">)</span>
</pre></div>
</div>
<p><img alt="png" src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/lr_schedules/cosine.png" /></p>
</div>
<div class="section" id="Polynomial-Schedule">
<h3>Polynomial Schedule<a class="headerlink" href="#Polynomial-Schedule" title="Permalink to this headline"></a></h3>
<p>Stepwise schedules and the discontinuities they introduce may sometimes lead to instability in the optimization, so in some cases smoother schedules are preferred. <code class="docutils literal notranslate"><span class="pre">PolyScheduler</span></code> gives a smooth decay using a polynomial function and reaches a learning rate of 0 after <code class="docutils literal notranslate"><span class="pre">max_update</span></code> iterations. In the example below, we have a quadratic function (<code class="docutils literal notranslate"><span class="pre">pwr=2</span></code>) that falls from 0.998 at iteration 1 to 0 at iteration 1000. After this the learning rate stays at 0, so nothing will be learnt from
<code class="docutils literal notranslate"><span class="pre">max_update</span></code> iterations onwards.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">schedule</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">lr_scheduler</span><span class="o">.</span><span class="n">PolyScheduler</span><span class="p">(</span><span class="n">max_update</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">base_lr</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">pwr</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="n">plot_schedule</span><span class="p">(</span><span class="n">schedule</span><span class="p">)</span>
</pre></div>
</div>
<p><img alt="png" src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/lr_schedules/cosine.png" /></p>
<p>Note: unlike <code class="docutils literal notranslate"><span class="pre">FactorScheduler</span></code>, the <code class="docutils literal notranslate"><span class="pre">base_lr</span></code> is set as an argument when instantiating the schedule.</p>
<p>And we don’t evaluate at <code class="docutils literal notranslate"><span class="pre">iteration=0</span></code> (to get <code class="docutils literal notranslate"><span class="pre">base_lr</span></code>) since we are working with schedules starting at <code class="docutils literal notranslate"><span class="pre">iteration=1</span></code>.</p>
</div>
<div class="section" id="Custom-Schedules">
<h3>Custom Schedules<a class="headerlink" href="#Custom-Schedules" title="Permalink to this headline"></a></h3>
<p>You can implement your own custom schedule with a function or callable class, that takes an integer denoting the iteration index (starting at 1) and returns a float representing the learning rate to be used for that iteration. We implement the Cosine Annealing Schedule in the example below as a callable class (see <code class="docutils literal notranslate"><span class="pre">__call__</span></code> method).</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">CosineAnnealingSchedule</span><span class="p">():</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">min_lr</span><span class="p">,</span> <span class="n">max_lr</span><span class="p">,</span> <span class="n">cycle_length</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">min_lr</span> <span class="o">=</span> <span class="n">min_lr</span>
<span class="bp">self</span><span class="o">.</span><span class="n">max_lr</span> <span class="o">=</span> <span class="n">max_lr</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cycle_length</span> <span class="o">=</span> <span class="n">cycle_length</span>
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">iteration</span><span class="p">):</span>
<span class="k">if</span> <span class="n">iteration</span> <span class="o">&lt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cycle_length</span><span class="p">:</span>
<span class="n">unit_cycle</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">math</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">iteration</span> <span class="o">*</span> <span class="n">math</span><span class="o">.</span><span class="n">pi</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">cycle_length</span><span class="p">))</span> <span class="o">/</span> <span class="mi">2</span>
<span class="n">adjusted_cycle</span> <span class="o">=</span> <span class="p">(</span><span class="n">unit_cycle</span> <span class="o">*</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">max_lr</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">min_lr</span><span class="p">))</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">min_lr</span>
<span class="k">return</span> <span class="n">adjusted_cycle</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">min_lr</span>
<span class="n">schedule</span> <span class="o">=</span> <span class="n">CosineAnnealingSchedule</span><span class="p">(</span><span class="n">min_lr</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_lr</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">cycle_length</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
<span class="n">plot_schedule</span><span class="p">(</span><span class="n">schedule</span><span class="p">)</span>
</pre></div>
</div>
<p><img alt="png" src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/lr_schedules/cosine.png" /></p>
</div>
</div>
<div class="section" id="Using-Schedules">
<h2>Using Schedules<a class="headerlink" href="#Using-Schedules" title="Permalink to this headline"></a></h2>
<p>While training a simple handwritten digit classifier on the MNIST dataset, we take a look at how to use a learning rate schedule during training. Our demonstration model is a basic convolutional neural network. We start by preparing our <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code> and defining the network.</p>
<p>As discussed above, the schedule should return a learning rate given an (1-based) iteration index.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Use GPU if one exists, else use CPU</span>
<span class="n">ctx</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">()</span> <span class="k">if</span> <span class="n">mx</span><span class="o">.</span><span class="n">context</span><span class="o">.</span><span class="n">num_gpus</span><span class="p">()</span> <span class="k">else</span> <span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
<span class="c1"># MNIST images are 28x28. Total pixels in input layer is 28x28 = 784</span>
<span class="n">num_inputs</span> <span class="o">=</span> <span class="mi">784</span>
<span class="c1"># Clasify the images into one of the 10 digits</span>
<span class="n">num_outputs</span> <span class="o">=</span> <span class="mi">10</span>
<span class="c1"># 64 images in a batch</span>
<span class="n">batch_size</span> <span class="o">=</span> <span class="mi">64</span>
<span class="c1"># Load the training data</span>
<span class="n">train_dataset</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">vision</span><span class="o">.</span><span class="n">MNIST</span><span class="p">(</span><span class="n">train</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">transform_first</span><span class="p">(</span><span class="n">transforms</span><span class="o">.</span><span class="n">ToTensor</span><span class="p">())</span>
<span class="n">train_dataloader</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">train_dataset</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">num_workers</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
<span class="c1"># Build a simple convolutional network</span>
<span class="k">def</span> <span class="nf">build_cnn</span><span class="p">():</span>
<span class="n">net</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">HybridSequential</span><span class="p">()</span>
<span class="k">with</span> <span class="n">net</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span>
<span class="c1"># First convolution</span>
<span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2D</span><span class="p">(</span><span class="n">channels</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">&#39;relu&#39;</span><span class="p">))</span>
<span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">MaxPool2D</span><span class="p">(</span><span class="n">pool_size</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">strides</span><span class="o">=</span><span class="mi">2</span><span class="p">))</span>
<span class="c1"># Second convolution</span>
<span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2D</span><span class="p">(</span><span class="n">channels</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">&#39;relu&#39;</span><span class="p">))</span>
<span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">MaxPool2D</span><span class="p">(</span><span class="n">pool_size</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">strides</span><span class="o">=</span><span class="mi">2</span><span class="p">))</span>
<span class="c1"># Flatten the output before the fully connected layers</span>
<span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Flatten</span><span class="p">())</span>
<span class="c1"># First fully connected layers with 512 neurons</span>
<span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s2">&quot;relu&quot;</span><span class="p">))</span>
<span class="c1"># Second fully connected layer with as many neurons as the number of classes</span>
<span class="n">net</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="n">num_outputs</span><span class="p">))</span>
<span class="k">return</span> <span class="n">net</span>
<span class="n">net</span> <span class="o">=</span> <span class="n">build_cnn</span><span class="p">()</span>
</pre></div>
</div>
<p>We then initialize our network (technically deferred until we pass the first batch) and define the loss.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Initialize the parameters with Xavier initializer</span>
<span class="n">net</span><span class="o">.</span><span class="n">collect_params</span><span class="p">()</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Xavier</span><span class="p">(),</span> <span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span>
<span class="c1"># Use cross entropy loss</span>
<span class="n">softmax_cross_entropy</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">loss</span><span class="o">.</span><span class="n">SoftmaxCrossEntropyLoss</span><span class="p">()</span>
</pre></div>
</div>
<p>We’re now ready to create our schedule, and in this example we opt for a stepwise decay schedule using <code class="docutils literal notranslate"><span class="pre">MultiFactorScheduler</span></code>. Since we’re only training a demonstration model for a limited number of epochs (10 in total) we will exaggerate the schedule and drop the learning rate by 90% after the 4th, 7th and 9th epochs. We call these steps, and the drop occurs <em>after</em> the step index. Schedules are defined for iterations (i.e. training batches), so we must represent our steps in iterations too.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">steps_epochs</span> <span class="o">=</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">9</span><span class="p">]</span>
<span class="c1"># assuming we keep partial batches, see `last_batch` parameter of DataLoader</span>
<span class="n">iterations_per_epoch</span> <span class="o">=</span> <span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">train_dataset</span><span class="p">)</span> <span class="o">/</span> <span class="n">batch_size</span><span class="p">)</span>
<span class="c1"># iterations just before starts of epochs (iterations are 1-indexed)</span>
<span class="n">steps_iterations</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="o">*</span><span class="n">iterations_per_epoch</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">steps_epochs</span><span class="p">]</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Learning rate drops after iterations: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">steps_iterations</span><span class="p">))</span>
</pre></div>
</div>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Learning</span> <span class="n">rate</span> <span class="n">drops</span> <span class="n">after</span> <span class="n">iterations</span><span class="p">:</span> <span class="p">[</span><span class="mi">3752</span><span class="p">,</span> <span class="mi">6566</span><span class="p">,</span> <span class="mi">8442</span><span class="p">]</span>
</pre></div>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">schedule</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">lr_scheduler</span><span class="o">.</span><span class="n">MultiFactorScheduler</span><span class="p">(</span><span class="n">step</span><span class="o">=</span><span class="n">steps_iterations</span><span class="p">,</span> <span class="n">factor</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span>
</pre></div>
</div>
<p><strong>We create our ``Optimizer`` and pass the schedule via the ``lr_scheduler`` parameter.</strong> In this example we’re using Stochastic Gradient Descent.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">sgd_optimizer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">SGD</span><span class="p">(</span><span class="n">learning_rate</span><span class="o">=</span><span class="mf">0.03</span><span class="p">,</span> <span class="n">lr_scheduler</span><span class="o">=</span><span class="n">schedule</span><span class="p">)</span>
</pre></div>
</div>
<p>And we use this optimizer (with schedule) in our <code class="docutils literal notranslate"><span class="pre">Trainer</span></code> and train for 10 epochs. Alternatively, we could have set the <code class="docutils literal notranslate"><span class="pre">optimizer</span></code> to the string <code class="docutils literal notranslate"><span class="pre">sgd</span></code>, and pass a dictionary of the optimizer parameters directly to the trainer using <code class="docutils literal notranslate"><span class="pre">optimizer_params</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">trainer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Trainer</span><span class="p">(</span><span class="n">params</span><span class="o">=</span><span class="n">net</span><span class="o">.</span><span class="n">collect_params</span><span class="p">(),</span> <span class="n">optimizer</span><span class="o">=</span><span class="n">sgd_optimizer</span><span class="p">)</span>
</pre></div>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">num_epochs</span> <span class="o">=</span> <span class="mi">10</span>
<span class="c1"># epoch and batch counts starting at 1</span>
<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">num_epochs</span><span class="o">+</span><span class="mi">1</span><span class="p">):</span>
<span class="c1"># Iterate through the images and labels in the training data</span>
<span class="k">for</span> <span class="n">batch_num</span><span class="p">,</span> <span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">train_dataloader</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="c1"># get the images and labels</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">as_in_context</span><span class="p">(</span><span class="n">ctx</span><span class="p">)</span>
<span class="n">label</span> <span class="o">=</span> <span class="n">label</span><span class="o">.</span><span class="n">as_in_context</span><span class="p">(</span><span class="n">ctx</span><span class="p">)</span>
<span class="c1"># Ask autograd to record the forward pass</span>
<span class="k">with</span> <span class="n">mx</span><span class="o">.</span><span class="n">autograd</span><span class="o">.</span><span class="n">record</span><span class="p">():</span>
<span class="c1"># Run the forward pass</span>
<span class="n">output</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="c1"># Compute the loss</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">softmax_cross_entropy</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span>
<span class="c1"># Compute gradients</span>
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="c1"># Update parameters</span>
<span class="n">trainer</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="c1"># Show loss and learning rate after first iteration of epoch</span>
<span class="k">if</span> <span class="n">batch_num</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">curr_loss</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">loss</span><span class="p">)</span><span class="o">.</span><span class="n">asscalar</span><span class="p">()</span>
<span class="n">curr_lr</span> <span class="o">=</span> <span class="n">trainer</span><span class="o">.</span><span class="n">learning_rate</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Epoch: </span><span class="si">%d</span><span class="s2">; Batch </span><span class="si">%d</span><span class="s2">; Loss </span><span class="si">%f</span><span class="s2">; LR </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">epoch</span><span class="p">,</span> <span class="n">batch_num</span><span class="p">,</span> <span class="n">curr_loss</span><span class="p">,</span> <span class="n">curr_lr</span><span class="p">))</span>
</pre></div>
</div>
<p>Epoch: 1; Batch 1; Loss 2.304071; LR 0.030000</p>
<p>Epoch: 2; Batch 1; Loss 0.059640; LR 0.030000</p>
<p>Epoch: 3; Batch 1; Loss 0.072601; LR 0.030000</p>
<p>Epoch: 4; Batch 1; Loss 0.042228; LR 0.030000</p>
<p>Epoch: 5; Batch 1; Loss 0.025745; LR 0.003000</p>
<p>Epoch: 6; Batch 1; Loss 0.027391; LR 0.003000</p>
<p>Epoch: 7; Batch 1; Loss 0.048237; LR 0.003000</p>
<p>Epoch: 8; Batch 1; Loss 0.024213; LR 0.000300</p>
<p>Epoch: 9; Batch 1; Loss 0.008892; LR 0.000300</p>
<p>Epoch: 10; Batch 1; Loss 0.006875; LR 0.000030</p>
<p>We see that the learning rate starts at 0.03, and falls to 0.00003 by the end of training as per the schedule we defined.</p>
<div class="section" id="Manually-setting-the-learning-rate:-Gluon-API-only">
<h3>Manually setting the learning rate: Gluon API only<a class="headerlink" href="#Manually-setting-the-learning-rate:-Gluon-API-only" title="Permalink to this headline"></a></h3>
<p>When using the method above you don’t need to manually keep track of iteration count and set the learning rate, so this is the recommended approach for most cases. Sometimes you might want more fine-grained control over setting the learning rate though, so Gluon’s <code class="docutils literal notranslate"><span class="pre">Trainer</span></code> provides the <code class="docutils literal notranslate"><span class="pre">set_learning_rate</span></code> method for this.</p>
<p>We replicate the example above, but now keep track of the <code class="docutils literal notranslate"><span class="pre">iteration_idx</span></code>, call the schedule and set the learning rate appropriately using <code class="docutils literal notranslate"><span class="pre">set_learning_rate</span></code>. We also use <code class="docutils literal notranslate"><span class="pre">schedule.base_lr</span></code> to set the initial learning rate for the schedule since we are calling the schedule directly and not using it as part of the <code class="docutils literal notranslate"><span class="pre">Optimizer</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">net</span> <span class="o">=</span> <span class="n">build_cnn</span><span class="p">()</span>
<span class="n">net</span><span class="o">.</span><span class="n">collect_params</span><span class="p">()</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Xavier</span><span class="p">(),</span> <span class="n">ctx</span><span class="o">=</span><span class="n">ctx</span><span class="p">)</span>
<span class="n">schedule</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">lr_scheduler</span><span class="o">.</span><span class="n">MultiFactorScheduler</span><span class="p">(</span><span class="n">step</span><span class="o">=</span><span class="n">steps_iterations</span><span class="p">,</span> <span class="n">factor</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span>
<span class="n">schedule</span><span class="o">.</span><span class="n">base_lr</span> <span class="o">=</span> <span class="mf">0.03</span>
<span class="n">sgd_optimizer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">SGD</span><span class="p">()</span>
<span class="n">trainer</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">Trainer</span><span class="p">(</span><span class="n">params</span><span class="o">=</span><span class="n">net</span><span class="o">.</span><span class="n">collect_params</span><span class="p">(),</span> <span class="n">optimizer</span><span class="o">=</span><span class="n">sgd_optimizer</span><span class="p">)</span>
<span class="n">iteration_idx</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">num_epochs</span> <span class="o">=</span> <span class="mi">10</span>
<span class="c1"># epoch and batch counts starting at 1</span>
<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">num_epochs</span> <span class="o">+</span> <span class="mi">1</span><span class="p">):</span>
<span class="c1"># Iterate through the images and labels in the training data</span>
<span class="k">for</span> <span class="n">batch_num</span><span class="p">,</span> <span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">train_dataloader</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="c1"># get the images and labels</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">as_in_context</span><span class="p">(</span><span class="n">ctx</span><span class="p">)</span>
<span class="n">label</span> <span class="o">=</span> <span class="n">label</span><span class="o">.</span><span class="n">as_in_context</span><span class="p">(</span><span class="n">ctx</span><span class="p">)</span>
<span class="c1"># Ask autograd to record the forward pass</span>
<span class="k">with</span> <span class="n">mx</span><span class="o">.</span><span class="n">autograd</span><span class="o">.</span><span class="n">record</span><span class="p">():</span>
<span class="c1"># Run the forward pass</span>
<span class="n">output</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="c1"># Compute the loss</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">softmax_cross_entropy</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span>
<span class="c1"># Compute gradients</span>
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="c1"># Update the learning rate</span>
<span class="n">lr</span> <span class="o">=</span> <span class="n">schedule</span><span class="p">(</span><span class="n">iteration_idx</span><span class="p">)</span>
<span class="n">trainer</span><span class="o">.</span><span class="n">set_learning_rate</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span>
<span class="c1"># Update parameters</span>
<span class="n">trainer</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="c1"># Show loss and learning rate after first iteration of epoch</span>
<span class="k">if</span> <span class="n">batch_num</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">curr_loss</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">loss</span><span class="p">)</span><span class="o">.</span><span class="n">asscalar</span><span class="p">()</span>
<span class="n">curr_lr</span> <span class="o">=</span> <span class="n">trainer</span><span class="o">.</span><span class="n">learning_rate</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Epoch: </span><span class="si">%d</span><span class="s2">; Batch </span><span class="si">%d</span><span class="s2">; Loss </span><span class="si">%f</span><span class="s2">; LR </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">epoch</span><span class="p">,</span> <span class="n">batch_num</span><span class="p">,</span> <span class="n">curr_loss</span><span class="p">,</span> <span class="n">curr_lr</span><span class="p">))</span>
<span class="n">iteration_idx</span> <span class="o">+=</span> <span class="mi">1</span>
</pre></div>
</div>
<p>Epoch: 1; Batch 1; Loss 2.334119; LR 0.030000</p>
<p>Epoch: 2; Batch 1; Loss 0.178930; LR 0.030000</p>
<p>Epoch: 3; Batch 1; Loss 0.142640; LR 0.030000</p>
<p>Epoch: 4; Batch 1; Loss 0.041116; LR 0.030000</p>
<p>Epoch: 5; Batch 1; Loss 0.051049; LR 0.003000</p>
<p>Epoch: 6; Batch 1; Loss 0.027170; LR 0.003000</p>
<p>Epoch: 7; Batch 1; Loss 0.083776; LR 0.003000</p>
<p>Epoch: 8; Batch 1; Loss 0.082553; LR 0.000300</p>
<p>Epoch: 9; Batch 1; Loss 0.027984; LR 0.000300</p>
<p>Epoch: 10; Batch 1; Loss 0.030896; LR 0.000030</p>
<p>Once again, we see the learning rate start at 0.03, and fall to 0.00003 by the end of training as per the schedule we defined.</p>
</div>
</div>
<div class="section" id="Advanced-Schedules">
<h2>Advanced Schedules<a class="headerlink" href="#Advanced-Schedules" title="Permalink to this headline"></a></h2>
<p>We have a related tutorial on Advanced Learning Rate Schedules that shows reference implementations of schedules that give state-of-the-art results. We look at cyclical schedules applied to a variety of cycle shapes, and many other techniques such as warm-up and cool-down.</p>
<!-- INSERT SOURCE DOWNLOAD BUTTONS --></div>
</div>
<hr class="feedback-hr-top" />
<div class="feedback-container">
<div class="feedback-question">Did this page help you?</div>
<div class="feedback-answer-container">
<div class="feedback-answer yes-link" data-response="yes">Yes</div>
<div class="feedback-answer no-link" data-response="no">No</div>
</div>
<div class="feedback-thank-you">Thanks for your feedback!</div>
</div>
<hr class="feedback-hr-bottom" />
</div>
<div class="side-doc-outline">
<div class="side-doc-outline--content">
<div class="localtoc">
<p class="caption">
<span class="caption-text">Table Of Contents</span>
</p>
<ul>
<li><a class="reference internal" href="#">Learning Rate Schedules</a><ul>
<li><a class="reference internal" href="#Schedules">Schedules</a><ul>
<li><a class="reference internal" href="#Stepwise-Decay-Schedule">Stepwise Decay Schedule</a></li>
<li><a class="reference internal" href="#Polynomial-Schedule">Polynomial Schedule</a></li>
<li><a class="reference internal" href="#Custom-Schedules">Custom Schedules</a></li>
</ul>
</li>
<li><a class="reference internal" href="#Using-Schedules">Using Schedules</a><ul>
<li><a class="reference internal" href="#Manually-setting-the-learning-rate:-Gluon-API-only">Manually setting the learning rate: Gluon API only</a></li>
</ul>
</li>
<li><a class="reference internal" href="#Advanced-Schedules">Advanced Schedules</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<div class="clearer"></div>
</div><div class="pagenation">
<a id="button-prev" href="learning_rate_finder.html" class="mdl-button mdl-js-button mdl-js-ripple-effect mdl-button--colored" role="botton" accesskey="P">
<i class="pagenation-arrow-L fas fa-arrow-left fa-lg"></i>
<div class="pagenation-text">
<span class="pagenation-direction">Previous</span>
<div>Learning Rate Finder</div>
</div>
</a>
<a id="button-next" href="learning_rate_schedules_advanced.html" class="mdl-button mdl-js-button mdl-js-ripple-effect mdl-button--colored" role="botton" accesskey="N">
<i class="pagenation-arrow-R fas fa-arrow-right fa-lg"></i>
<div class="pagenation-text">
<span class="pagenation-direction">Next</span>
<div>Advanced Learning Rate Schedules</div>
</div>
</a>
</div>
<footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a class="u-email" href="mailto:dev@mxnet.apache.org">Dev list</a></li>
<li><a class="u-email" href="mailto:user@mxnet.apache.org">User mailing list</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
<li><a href="https://issues.apache.org/jira/projects/MXNET/issues">Jira Tracker</a></li>
<li><a href="https://github.com/apache/mxnet/labels/Roadmap">Github Roadmap</a></li>
<li><a href="https://medium.com/apache-mxnet">Blog</a></li>
<li><a href="https://discuss.mxnet.io">Forum</a></li>
<li><a href="/community/contribute">Contribute</a></li>
</ul>
</div>
<div class="col-4"><ul class="social-media-list"><li><a href="https://github.com/apache/mxnet"><svg class="svg-icon"><use xlink:href="../../../../../_static/minima-social-icons.svg#github"></use></svg> <span class="username">apache/mxnet</span></a></li><li><a href="https://www.twitter.com/apachemxnet"><svg class="svg-icon"><use xlink:href="../../../../../_static/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href="https://youtube.com/apachemxnet"><svg class="svg-icon"><use xlink:href="../../../../../_static/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
</div>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
</div>
</div>
</div>
</footer>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="../../../../../_static/apache_incubator_logo.png" class="footer-logo col-2">
</div>
<div class="footer-bottom-warning col-9">
<p>Apache MXNet is an effort undergoing incubation at <a href="http://www.apache.org/">The Apache Software Foundation</a> (ASF), <span style="font-weight:bold">sponsored by the <i>Apache Incubator</i></span>. Incubation is required
of all newly accepted projects until a further review indicates that the infrastructure,
communications, and decision making process have stabilized in a manner consistent with other
successful ASF projects. While incubation status is not necessarily a reflection of the completeness
or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
</p><p>"Copyright © 2017-2018, The Apache Software Foundation Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>
</div>
</div>
</div>
</footer>
</body>
</html>