versions/1.9.1/api/julia/docs/api/api/optimizer/index.html - mxnet-site - Git at Google



 <!doctype html>
 <html lang="en" class="no-js">
   <head>

       <meta charset="utf-8">
       <meta name="viewport" content="width=device-width,initial-scale=1">
       <meta http-equiv="x-ua-compatible" content="ie=edge">


         <meta name="lang:clipboard.copy" content="Copy to clipboard">

         <meta name="lang:clipboard.copied" content="Copied to clipboard">

         <meta name="lang:search.language" content="en">

         <meta name="lang:search.pipeline.stopwords" content="True">

         <meta name="lang:search.pipeline.trimmer" content="True">

         <meta name="lang:search.result.none" content="No matching documents">

         <meta name="lang:search.result.one" content="1 matching document">

         <meta name="lang:search.result.other" content="# matching documents">

         <meta name="lang:search.tokenizer" content="[\s\-]+">

       <link rel="shortcut icon" href="../../assets/images/favicon.png">
       <meta name="generator" content="mkdocs-1.0.4, mkdocs-material-4.6.0">


         <title>Optimizers - MXNet.jl</title>


       <link rel="stylesheet" href="../../assets/stylesheets/application.1b62728e.css">


       <script src="../../assets/javascripts/modernizr.268332fc.js"></script>


         <link href="https://fonts.gstatic.com" rel="preconnect" crossorigin>
         <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,400i,700%7CRoboto+Mono&display=fallback">
         <style>body,input{font-family:"Roboto","Helvetica Neue",Helvetica,Arial,sans-serif}code,kbd,pre{font-family:"Roboto Mono","Courier New",Courier,monospace}</style>


     <link rel="stylesheet" href="../../assets/fonts/material-icons.css">


       <link rel="stylesheet" href="../../assets/Documenter.css">


   </head>

     <body dir="ltr">

     <svg class="md-svg">
       <defs>


           <svg xmlns="http://www.w3.org/2000/svg" width="416" height="448" viewBox="0 0 416 448" id="__github"><path fill="currentColor" d="M160 304q0 10-3.125 20.5t-10.75 19T128 352t-18.125-8.5-10.75-19T96 304t3.125-20.5 10.75-19T128 256t18.125 8.5 10.75 19T160 304zm160 0q0 10-3.125 20.5t-10.75 19T288 352t-18.125-8.5-10.75-19T256 304t3.125-20.5 10.75-19T288 256t18.125 8.5 10.75 19T320 304zm40 0q0-30-17.25-51T296 232q-10.25 0-48.75 5.25Q229.5 240 208 240t-39.25-2.75Q130.75 232 120 232q-29.5 0-46.75 21T56 304q0 22 8 38.375t20.25 25.75 30.5 15 35 7.375 37.25 1.75h42q20.5 0 37.25-1.75t35-7.375 30.5-15 20.25-25.75T360 304zm56-44q0 51.75-15.25 82.75-9.5 19.25-26.375 33.25t-35.25 21.5-42.5 11.875-42.875 5.5T212 416q-19.5 0-35.5-.75t-36.875-3.125-38.125-7.5-34.25-12.875T37 371.5t-21.5-28.75Q0 312 0 260q0-59.25 34-99-6.75-20.5-6.75-42.5 0-29 12.75-54.5 27 0 47.5 9.875t47.25 30.875Q171.5 96 212 96q37 0 70 8 26.25-20.5 46.75-30.25T376 64q12.75 25.5 12.75 54.5 0 21.75-6.75 42 34 40 34 99.5z"/></svg>

       </defs>
     </svg>
     <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
     <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
     <label class="md-overlay" data-md-component="overlay" for="__drawer"></label>

       <a href="#optimizers" tabindex="1" class="md-skip">
         Skip to content
       </a>


       <header class="md-header" data-md-component="header">
   <nav class="md-header-nav md-grid">
     <div class="md-flex">
       <div class="md-flex__cell md-flex__cell--shrink">
         <a href="../.." title="MXNet.jl" class="md-header-nav__button md-logo">

             <i class="md-icon"></i>

         </a>
       </div>
       <div class="md-flex__cell md-flex__cell--shrink">
         <label class="md-icon md-icon--menu md-header-nav__button" for="__drawer"></label>
       </div>
       <div class="md-flex__cell md-flex__cell--stretch">
         <div class="md-flex__ellipsis md-header-nav__title" data-md-component="title">

             <span class="md-header-nav__topic">
               MXNet.jl
             </span>
             <span class="md-header-nav__topic">

                 Optimizers

             </span>

         </div>
       </div>
       <div class="md-flex__cell md-flex__cell--shrink">

           <label class="md-icon md-icon--search md-header-nav__button" for="__search"></label>

 <div class="md-search" data-md-component="search" role="dialog">
   <label class="md-search__overlay" for="__search"></label>
   <div class="md-search__inner" role="search">
     <form class="md-search__form" name="search">
       <input type="text" class="md-search__input" name="query" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="query" data-md-state="active">
       <label class="md-icon md-search__icon" for="__search"></label>
       <button type="reset" class="md-icon md-search__icon" data-md-component="reset" tabindex="-1">
         &#xE5CD;
       </button>
     </form>
     <div class="md-search__output">
       <div class="md-search__scrollwrap" data-md-scrollfix>
         <div class="md-search-result" data-md-component="result">
           <div class="md-search-result__meta">
             Type to start searching
           </div>
           <ol class="md-search-result__list"></ol>
         </div>
       </div>
     </div>
   </div>
 </div>

       </div>

         <div class="md-flex__cell md-flex__cell--shrink">
           <div class="md-header-nav__source">


 <a href="https://github.com/apache/mxnet/tree/master/julia#mxnet/" title="Go to repository" class="md-source" data-md-source="github">

     <div class="md-source__icon">
       <svg viewBox="0 0 24 24" width="24" height="24">
         <use xlink:href="#__github" width="24" height="24"></use>
       </svg>
     </div>

   <div class="md-source__repository">
     GitHub
   </div>
 </a>
           </div>
         </div>

     </div>
   </nav>
 </header>

     <div class="md-container">


       <main class="md-main" role="main">
         <div class="md-main__inner md-grid" data-md-component="container">


               <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
                 <div class="md-sidebar__scrollwrap">
                   <div class="md-sidebar__inner">
                     <nav class="md-nav md-nav--primary" data-md-level="0">
   <label class="md-nav__title md-nav__title--site" for="__drawer">
     <a href="../.." title="MXNet.jl" class="md-nav__button md-logo">

         <i class="md-icon"></i>

     </a>
     MXNet.jl
   </label>

     <div class="md-nav__source">


 <a href="https://github.com/apache/mxnet/tree/master/julia#mxnet/" title="Go to repository" class="md-source" data-md-source="github">

     <div class="md-source__icon">
       <svg viewBox="0 0 24 24" width="24" height="24">
         <use xlink:href="#__github" width="24" height="24"></use>
       </svg>
     </div>

   <div class="md-source__repository">
     GitHub
   </div>
 </a>
     </div>

   <ul class="md-nav__list" data-md-scrollfix>


   <li class="md-nav__item">
     <a href="../.." title="Home" class="md-nav__link">
       Home
     </a>
   </li>


   <li class="md-nav__item md-nav__item--nested">

       <input class="md-toggle md-nav__toggle" data-md-toggle="nav-2" type="checkbox" id="nav-2">

     <label class="md-nav__link" for="nav-2">
       Tutorial
     </label>
     <nav class="md-nav" data-md-component="collapsible" data-md-level="1">
       <label class="md-nav__title" for="nav-2">
         Tutorial
       </label>
       <ul class="md-nav__list" data-md-scrollfix>


   <li class="md-nav__item">
     <a href="../../tutorial/mnist/" title="Digit Recognition on MNIST" class="md-nav__link">
       Digit Recognition on MNIST
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../../tutorial/char-lstm/" title="Generating Random Sentence with LSTM RNN" class="md-nav__link">
       Generating Random Sentence with LSTM RNN
     </a>
   </li>


       </ul>
     </nav>
   </li>


   <li class="md-nav__item md-nav__item--nested">

       <input class="md-toggle md-nav__toggle" data-md-toggle="nav-3" type="checkbox" id="nav-3">

     <label class="md-nav__link" for="nav-3">
       User Guide
     </label>
     <nav class="md-nav" data-md-component="collapsible" data-md-level="1">
       <label class="md-nav__title" for="nav-3">
         User Guide
       </label>
       <ul class="md-nav__list" data-md-scrollfix>


   <li class="md-nav__item">
     <a href="../../user-guide/install/" title="Installation Guide" class="md-nav__link">
       Installation Guide
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../../user-guide/overview/" title="Overview" class="md-nav__link">
       Overview
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../../user-guide/faq/" title="FAQ" class="md-nav__link">
       FAQ
     </a>
   </li>


       </ul>
     </nav>
   </li>


   <li class="md-nav__item md-nav__item--active md-nav__item--nested">

       <input class="md-toggle md-nav__toggle" data-md-toggle="nav-4" type="checkbox" id="nav-4" checked>

     <label class="md-nav__link" for="nav-4">
       API Documentation
     </label>
     <nav class="md-nav" data-md-component="collapsible" data-md-level="1">
       <label class="md-nav__title" for="nav-4">
         API Documentation
       </label>
       <ul class="md-nav__list" data-md-scrollfix>


   <li class="md-nav__item">
     <a href="../context/" title="Context" class="md-nav__link">
       Context
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../model/" title="Models" class="md-nav__link">
       Models
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../initializer/" title="Initializers" class="md-nav__link">
       Initializers
     </a>
   </li>


   <li class="md-nav__item md-nav__item--active">

     <input class="md-toggle md-nav__toggle" data-md-toggle="toc" type="checkbox" id="__toc">


       <label class="md-nav__link md-nav__link--active" for="__toc">
         Optimizers
       </label>

     <a href="./" title="Optimizers" class="md-nav__link md-nav__link--active">
       Optimizers
     </a>


 <nav class="md-nav md-nav--secondary">


     <label class="md-nav__title" for="__toc">Table of contents</label>
     <ul class="md-nav__list" data-md-scrollfix>

         <li class="md-nav__item">
   <a href="#built-in-optimizers" class="md-nav__link">
     Built-in optimizers
   </a>

     <nav class="md-nav">
       <ul class="md-nav__list">

           <li class="md-nav__item">
   <a href="#stochastic-gradient-descent" class="md-nav__link">
     Stochastic Gradient Descent
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adam" class="md-nav__link">
     ADAM
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adagrad" class="md-nav__link">
     AdaGrad
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adadelta" class="md-nav__link">
     AdaDelta
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adamax" class="md-nav__link">
     AdaMax
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#rmsprop" class="md-nav__link">
     RMSProp
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#nadam" class="md-nav__link">
     Nadam
   </a>

 </li>

       </ul>
     </nav>

 </li>


     </ul>

 </nav>

   </li>


   <li class="md-nav__item">
     <a href="../callback/" title="Callbacks in training" class="md-nav__link">
       Callbacks in training
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../metric/" title="Evaluation Metrics" class="md-nav__link">
       Evaluation Metrics
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../io/" title="Data Providers" class="md-nav__link">
       Data Providers
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../ndarray/" title="NDArray API" class="md-nav__link">
       NDArray API
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../symbolic-node/" title="Symbolic API" class="md-nav__link">
       Symbolic API
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../nn-factory/" title="Neural Networks Factory" class="md-nav__link">
       Neural Networks Factory
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../executor/" title="Executor" class="md-nav__link">
       Executor
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../kvstore/" title="Key-Value Store" class="md-nav__link">
       Key-Value Store
     </a>
   </li>


   <li class="md-nav__item">
     <a href="../visualize/" title="Network Visualization" class="md-nav__link">
       Network Visualization
     </a>
   </li>


       </ul>
     </nav>
   </li>


   </ul>
 </nav>
                   </div>
                 </div>
               </div>


               <div class="md-sidebar md-sidebar--secondary" data-md-component="toc">
                 <div class="md-sidebar__scrollwrap">
                   <div class="md-sidebar__inner">

 <nav class="md-nav md-nav--secondary">


     <label class="md-nav__title" for="__toc">Table of contents</label>
     <ul class="md-nav__list" data-md-scrollfix>

         <li class="md-nav__item">
   <a href="#built-in-optimizers" class="md-nav__link">
     Built-in optimizers
   </a>

     <nav class="md-nav">
       <ul class="md-nav__list">

           <li class="md-nav__item">
   <a href="#stochastic-gradient-descent" class="md-nav__link">
     Stochastic Gradient Descent
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adam" class="md-nav__link">
     ADAM
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adagrad" class="md-nav__link">
     AdaGrad
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adadelta" class="md-nav__link">
     AdaDelta
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#adamax" class="md-nav__link">
     AdaMax
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#rmsprop" class="md-nav__link">
     RMSProp
   </a>

 </li>

           <li class="md-nav__item">
   <a href="#nadam" class="md-nav__link">
     Nadam
   </a>

 </li>

       </ul>
     </nav>

 </li>


     </ul>

 </nav>
                   </div>
                 </div>
               </div>


           <div class="md-content">
             <article class="md-content__inner md-typeset">


                   <a href="https://github.com/apache/mxnet/tree/master/edit/master/docs/api/optimizer.md" title="Edit this page" class="md-icon md-content__icon">&#xE3C9;</a>


                 <!–- Licensed to the Apache Software Foundation (ASF) under one –> <!–- or more contributor license agreements.  See the NOTICE file –> <!–- distributed with this work for additional information –> <!–- regarding copyright ownership.  The ASF licenses this file –> <!–- to you under the Apache License, Version 2.0 (the –> <!–- "License"); you may not use this file except in compliance –> <!–- with the License.  You may obtain a copy of the License at –> <!–- –> <!–-   http://www.apache.org/licenses/LICENSE-2.0 –> <!–- –> <!–- Unless required by applicable law or agreed to in writing, –> <!–- software distributed under the License is distributed on an –> <!–- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY –> <!–- KIND, either express or implied.  See the License for the –> <!–- specific language governing permissions and limitations –> <!–- under the License. –>

 <p><a id='Optimizers-1'></a></p>
 <h1 id="optimizers">Optimizers</h1>
 <p>Says, you have the parameter <code>W</code> inited for your model and got its gradient stored as <code>∇</code> (perhaps from AutoGrad APIs). Here is minimal snippet of getting your parameter <code>W</code> baked by <code>SGD</code>.</p>
 <pre><code class="julia-repl">julia&gt; using MXNet

 julia&gt; opt = SGD(η = 10)
 SGD(10, 0.0, 0, 0, 0.0001, MXNet.mx.LearningRate.Fixed(10.0), MXNet.mx.Momentum.Null())

 julia&gt; decend! = getupdater(opt)
 (::getfield(MXNet.mx, Symbol(&quot;#updater#9272&quot;)){SGD,Dict{Int64,Any}}) (generic function with 1 method)

 julia&gt; W = NDArray(Float32[1, 2, 3, 4]);

 julia&gt; ∇ = NDArray(Float32[.1, .2, .3, .4]);

 julia&gt; decend!(1, ∇, W)
 4-element NDArray{Float32,1} @ cpu0:
  -0.0010000467f0
  -0.0020000935f0
  -0.003000021f0
  -0.004000187f0
 </code></pre>

 <p><a id='MXNet.mx.AbstractOptimizer' href='#MXNet.mx.AbstractOptimizer'>#</a>
 <strong><code>MXNet.mx.AbstractOptimizer</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">AbstractOptimizer
 </code></pre>

 <p>Base type for all optimizers.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L22-L26' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.getupdater-Tuple{AbstractOptimizer}' href='#MXNet.mx.getupdater-Tuple{AbstractOptimizer}'>#</a>
 <strong><code>MXNet.mx.getupdater</code></strong> &mdash; <em>Method</em>.</p>
 <pre><code class="julia">getupdater(optimizer)
 </code></pre>

 <p>A utility function to create an updater function of <code>KVStore</code>, that uses its closure to store all the states needed for each weights.</p>
 <p>Ther returned function has following signature:</p>
 <pre><code class="julia">decend!(index::Int, ∇::NDArray, x::NDArray)
 </code></pre>

 <p>If the optimizer is stateful and need access/store states during updating, <code>index</code> will be the key to access/store states.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L252-L266' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.normgrad!-Tuple{AbstractOptimizer,NDArray,NDArray}' href='#MXNet.mx.normgrad!-Tuple{AbstractOptimizer,NDArray,NDArray}'>#</a>
 <strong><code>MXNet.mx.normgrad!</code></strong> &mdash; <em>Method</em>.</p>
 <pre><code class="julia">normgrad(optimizer, W, ∇)
 </code></pre>

 <p>Get the properly normalized gradient (re-scaled and clipped if necessary).</p>
 <ul>
 <li><code>optimizer</code>: the optimizer, should contain the field <code>scale</code>, <code>clip</code> and <code>λ</code>.</li>
 <li><code>W::NDArray</code>: the trainable weights.</li>
 <li><code>∇::NDArray</code>: the original gradient of the weights.</li>
 </ul>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L278-L287' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.AbstractLearningRateScheduler' href='#MXNet.mx.AbstractLearningRateScheduler'>#</a>
 <strong><code>MXNet.mx.AbstractLearningRateScheduler</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">AbstractLearningRateScheduler
 </code></pre>

 <p>Base type for all learning rate scheduler.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L29-L33' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.AbstractMomentumScheduler' href='#MXNet.mx.AbstractMomentumScheduler'>#</a>
 <strong><code>MXNet.mx.AbstractMomentumScheduler</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">AbstractMomentumScheduler
 </code></pre>

 <p>Base type for all momentum scheduler.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L36-L40' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.OptimizationState' href='#MXNet.mx.OptimizationState'>#</a>
 <strong><code>MXNet.mx.OptimizationState</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">OptimizationState
 </code></pre>

 <p><strong>Attributes</strong></p>
 <ul>
 <li><code>batch_size</code>: The size of the mini-batch used in stochastic training.</li>
 <li><code>curr_epoch</code>: The current epoch count. Epoch 0 means no training yet, during the first pass through the data, the epoch will be 1; during the second pass, the epoch count will be 1, and so on.</li>
 <li><code>curr_batch</code>: The current mini-batch count. The batch count is reset during every epoch. The batch count 0 means the beginning of each epoch, with no mini-batch seen yet. During the first mini-batch, the mini-batch count will be 1.</li>
 <li><code>curr_iter</code>: The current iteration count. One iteration corresponds to one mini-batch, but unlike the mini-batch count, the iteration count does <strong>not</strong> reset in each epoch. So it track the <em>total</em> number of mini-batches seen so far.</li>
 </ul>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L43-L60' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.LearningRate.Exp' href='#MXNet.mx.LearningRate.Exp'>#</a>
 <strong><code>MXNet.mx.LearningRate.Exp</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">LearningRate.Exp(η₀; γ = 0.9)
 </code></pre>

 <p>
 <script type="math/tex; mode=display">
 \eta_t = \eta_0\gamma^t
 </script>
 </p>
 <p>Where <code>t</code> is the epoch count, or the iteration count.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L105' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.LearningRate.Fixed' href='#MXNet.mx.LearningRate.Fixed'>#</a>
 <strong><code>MXNet.mx.LearningRate.Fixed</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">LearningRate.Fixed(η)
 </code></pre>

 <p>Fixed learning rate scheduler always return the same learning rate.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L94-L98' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.LearningRate.Inv' href='#MXNet.mx.LearningRate.Inv'>#</a>
 <strong><code>MXNet.mx.LearningRate.Inv</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">LearningRate.Inv(η₀; γ = 0.9, p = 0.5)
 </code></pre>

 <p>
 <script type="math/tex; mode=display">
 \eta_t = \eta_0 (1 + \gamma t)^{-p}
 </script>
 </p>
 <p>Where <code>t</code> is the epoch count, or the iteration count.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L127' class='documenter-source'>source</a><br></p>
 <p><a id='Base.get-Tuple{MXNet.mx.AbstractLearningRateScheduler}' href='#Base.get-Tuple{MXNet.mx.AbstractLearningRateScheduler}'>#</a>
 <strong><code>Base.get</code></strong> &mdash; <em>Method</em>.</p>
 <pre><code class="julia">get(sched::AbstractLearningRateScheduler)
 </code></pre>

 <p>Returns the current learning rate.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L87-L91' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.Momentum.Fixed' href='#MXNet.mx.Momentum.Fixed'>#</a>
 <strong><code>MXNet.mx.Momentum.Fixed</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">Momentum.Fixed
 </code></pre>

 <p>Fixed momentum scheduler always returns the same value.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L190-L194' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.Momentum.NadamScheduler' href='#MXNet.mx.Momentum.NadamScheduler'>#</a>
 <strong><code>MXNet.mx.Momentum.NadamScheduler</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">NadamScheduler(; μ = 0.99, δ = 0.004, γ = 0.5, α = 0.96)
 </code></pre>

 <p>Nesterov-accelerated adaptive momentum scheduler.</p>
 <p>Description in <a href="http://cs229.stanford.edu/proj2015/054_report.pdf">Incorporating Nesterov Momentum into Adam</a>.</p>
 <p>
 <script type="math/tex; mode=display">
 \mu_t = \mu_0 * (1 - \gamma * \alpha^{t * \delta})
 </script>
 </p>
 <p>Where</p>
 <ul>
 <li><code>t</code>: iteration count</li>
 <li><code>μ</code>: default <code>0.99</code>, μ₀</li>
 <li><code>δ</code>: default <code>0.004</code> is scheduler decay.</li>
 <li><code>γ</code>: default <code>0.5</code></li>
 <li><code>α</code>: default <code>0.96</code></li>
 </ul>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L201' class='documenter-source'>source</a><br></p>
 <p><a id='MXNet.mx.Momentum.Null' href='#MXNet.mx.Momentum.Null'>#</a>
 <strong><code>MXNet.mx.Momentum.Null</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">Momentum.Null
 </code></pre>

 <p>The null momentum scheduler always returns 0 for momentum. It is also used to explicitly indicate momentum should not be used.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L179-L184' class='documenter-source'>source</a><br></p>
 <p><a id='Base.get-Tuple{MXNet.mx.Momentum.NadamScheduler,Any}' href='#Base.get-Tuple{MXNet.mx.Momentum.NadamScheduler,Any}'>#</a>
 <strong><code>Base.get</code></strong> &mdash; <em>Method</em>.</p>
 <pre><code class="julia">get(n::NadamScheduler, t)
 </code></pre>

 <p>Where <code>t</code> is the iteration count.</p>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizer.jl#L235-L239' class='documenter-source'>source</a><br></p>
 <p><a id='Built-in-optimizers-1'></a></p>
 <h2 id="built-in-optimizers">Built-in optimizers</h2>
 <p><a id='Stochastic-Gradient-Descent-1'></a></p>
 <h3 id="stochastic-gradient-descent">Stochastic Gradient Descent</h3>
 <p><a id='MXNet.mx.SGD' href='#MXNet.mx.SGD'>#</a>
 <strong><code>MXNet.mx.SGD</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">SGD(; kwargs...)
 </code></pre>

 <p>Stochastic gradient descent optimizer.</p>
 <p>Vanilla SGD:</p>
 <p>
 <script type="math/tex; mode=display">
 \theta \leftarrow \theta - \eta \nabla
 </script>
 </p>
 <p>SGD with momentum::</p>
 <p>
 <script type="math/tex; mode=display">
 \begin{align*}
   \nu    & \leftarrow \mu \nu_{t-1} - \eta \nabla \\
   \theta & \leftarrow \theta + \nu_t
 \end{align*}
 </script>
 </p>
 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>η</code>: default <code>0.01</code>, learning rate.</li>
 <li><code>μ</code>: default <code>0</code>, the momentum, usually set to <code>0.9</code> in this implementation.</li>
 <li><code>λ</code>: default <code>0.0001</code>, weight decay is equivalent to adding a global l2 regularizer to the parameters.</li>
 <li><code>clip</code>: default <code>0</code>, gradient clipping. If positive, will clip the gradient into the bounded range <code>[-clip, clip]</code>.</li>
 <li><code>scale</code>: default <code>0</code>, gradient rescaling. If != 0, multiply the gradient with <code>scale</code> before updating. Often choose to be <code>1.0 / batch_size</code>. If leave it default, high-level API like <code>fit!</code> will set it to <code>1.0 / batch_size</code>, since <code>fit!</code> knows the <code>batch_size</code>.</li>
 <li><code>μ_sched::AbstractMomentumScheduler</code>: default <code>Momentum.Null()</code>, a dynamic momentum scheduler. If set, will overwrite the <code>momentum</code> parameter.</li>
 <li><code>η_sched::AbstractLearningRateScheduler</code>: default <code>LearningRate.Fixed(η)</code>, a dynamic learning rate scheduler. If set, will overwrite the <code>η</code> parameter.</li>
 </ul>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizers/sgd.jl#L18' class='documenter-source'>source</a><br></p>
 <p><a id='ADAM-1'></a></p>
 <h3 id="adam">ADAM</h3>
 <p><a id='MXNet.mx.ADAM' href='#MXNet.mx.ADAM'>#</a>
 <strong><code>MXNet.mx.ADAM</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia"> ADAM
 </code></pre>

 <p>The solver described in Diederik Kingma, Jimmy Ba: <em>Adam: A Method for Stochastic Optimization</em>. arXiv:1412.6980 [cs.LG].</p>
 <pre><code>ADAM(; kwargs...)
 </code></pre>

 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>η</code>: default <code>0.001</code>, learning rate.</li>
 <li><code>β1</code>: default <code>0.9</code>.</li>
 <li><code>β2</code>: default <code>0.999</code>.</li>
 <li><code>ϵ</code>: default <code>1e-8</code>.</li>
 <li><code>clip</code>: default <code>0</code>, gradient clipping. If positive, will clip the gradient into the range <code>[-clip, clip]</code>.</li>
 <li><code>scale</code>: default <code>0</code>, gradient rescaling. If != 0, multiply the gradient with <code>scale</code> before updating. Often choose to be <code>1.0 / batch_size</code>. If leave it default, high-level API like <code>fit!</code> will set it to <code>1.0 / batch_size</code>, since <code>fit!</code> knows the <code>batch_size</code>.</li>
 <li><code>λ</code>: default <code>0.00001</code>, weight decay is equivalent to adding a global l2 regularizer for all the parameters.</li>
 <li><code>η_sched::AbstractLearningRateScheduler</code>: default <code>LearningRate.Fixed(η)</code>, a dynamic learning rate scheduler. If set, will overwrite the <code>η</code> parameter.</li>
 </ul>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizers/adam.jl#L18-L42' class='documenter-source'>source</a><br></p>
 <p><a id='AdaGrad-1'></a></p>
 <h3 id="adagrad">AdaGrad</h3>
 <p><a id='MXNet.mx.AdaGrad' href='#MXNet.mx.AdaGrad'>#</a>
 <strong><code>MXNet.mx.AdaGrad</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">AdaGrad(; kwargs...)
 </code></pre>

 <p>Scale learning rates by dividing with the square root of accumulated squared gradients. See [1] for further description.</p>
 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>η</code>: default <code>0.1</code>, learning rate.</li>
 <li><code>ϵ</code>: default <code>1e-6</code>, small value added for numerical stability.</li>
 <li><code>clip</code>: default <code>0</code>, gradient clipping. If positive, will clip the gradient into the range <code>[-clip, clip]</code>.</li>
 <li><code>scale</code>: default <code>0</code>, gradient rescaling. If != 0, multiply the gradient with <code>scale</code> before updating. Often choose to be <code>1.0 / batch_size</code>. If leave it default, high-level API like <code>fit!</code> will set it to <code>1.0 / batch_size</code>, since <code>fit!</code> knows the <code>batch_size</code>.</li>
 <li><code>λ</code>: default <code>0.00001</code>, weight decay is equivalent to adding a global l2 regularizer for all the parameters.</li>
 </ul>
 <p><strong>Notes</strong></p>
 <p>Using step size <code>η</code> AdaGrad calculates the learning rate for feature <code>i</code> at time step t as:</p>
 <p>
 <script type="math/tex; mode=display">
 η_{t,i} = \frac{lr}{\sqrt{\sum^t_{t^\prime} g^2_{t^\prime,i} + ϵ}} g_{t,i}
 </script>
 </p>
 <p>as such the learning rate is monotonically decreasing. Epsilon is not included in the typical formula, see [2].</p>
 <p><strong>References</strong></p>
 <ol>
 <li>Duchi, J., Hazan, E., &amp; Singer, Y. (2011): Adaptive subgradient methods for online learning and stochastic optimization. JMLR, 12:2121-2159.</li>
 <li>Chris Dyer: Notes on AdaGrad. <a href="http://www.ark.cs.cmu.edu/cdyer/adagrad.pdf">http://www.ark.cs.cmu.edu/cdyer/adagrad.pdf</a></li>
 </ol>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizers/adagrad.jl#L18' class='documenter-source'>source</a><br></p>
 <p><a id='AdaDelta-1'></a></p>
 <h3 id="adadelta">AdaDelta</h3>
 <p><a id='MXNet.mx.AdaDelta' href='#MXNet.mx.AdaDelta'>#</a>
 <strong><code>MXNet.mx.AdaDelta</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">AdaDelta(; kwargs...)
 </code></pre>

 <p>Scale learning rates by the ratio of accumulated gradients to accumulated updates, see [1] and notes for further description.</p>
 <p><strong>Attributes</strong></p>
 <ul>
 <li><code>η</code>: default <code>1.0</code>, learning rate.</li>
 <li><code>ρ</code>: default <code>0.95</code>, squared gradient moving average decay factor.</li>
 <li><code>ϵ</code>: default <code>1e-6</code>, small value added for numerical stability.</li>
 <li><code>clip</code>: default <code>0</code>, gradient clipping. If positive, will clip the gradient into the range <code>[-clip, clip]</code>.</li>
 <li><code>scale</code>: default <code>0</code>, gradient rescaling. If != 0, multiply the gradient with <code>scale</code> before updating. Often choose to be <code>1.0 / batch_size</code>. If leave it default, high-level API like <code>fit!</code> will set it to <code>1.0 / batch_size</code>, since <code>fit!</code> knows the <code>batch_size</code>.</li>
 <li><code>λ</code>: default <code>0.00001</code>, weight decay is equivalent to adding a global l2 regularizer for all the parameters.</li>
 </ul>
 <p><strong>Notes</strong></p>
 <p><code>ρ</code> should be between 0 and 1. A value of <code>ρ</code> close to 1 will decay the moving average slowly and a value close to 0 will decay the moving average fast.</p>
 <p><code>ρ = 0.95</code> and <code>ϵ = 1e-6</code> are suggested in the paper and reported to work for multiple datasets (MNIST, speech). In the paper, no learning rate is considered (so <code>η = 1.0</code>). Probably best to keep it at this value.</p>
 <p><code>ϵ</code> is important for the very first update (so the numerator does not become 0).</p>
 <p>Using the step size <code>η</code> and a decay factor <code>ρ</code> the learning rate is calculated as:</p>
 <p>
 <script type="math/tex; mode=display">
 \begin{align*}
   r_t &= ρ r_{t-1} + (1 - ρ) g^2 \\
   η_t &= η \frac{\sqrt{s_{t-1} + ϵ}} {\sqrt{r_t + ϵ}} \\
   s_t &= ρ s_{t-1} + (1 - ρ) (η_t \times g)^2
 \end{align*}
 </script>
 </p>
 <p><strong>References</strong></p>
 <ol>
 <li>Zeiler, M. D. (2012): ADADELTA: An Adaptive Learning Rate Method. arXiv Preprint arXiv:1212.5701.</li>
 </ol>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizers/adadelta.jl#L18' class='documenter-source'>source</a><br></p>
 <p><a id='AdaMax-1'></a></p>
 <h3 id="adamax">AdaMax</h3>
 <p><a id='MXNet.mx.AdaMax' href='#MXNet.mx.AdaMax'>#</a>
 <strong><code>MXNet.mx.AdaMax</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">AdaMax(; kwargs...)
 </code></pre>

 <p>This is a variant of of the Adam algorithm based on the infinity norm. See [1] for further description.</p>
 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>η</code>: default <code>0.002</code>, learning rate.</li>
 <li><code>β1</code>: default <code>0.9</code>, exponential decay rate for the first moment estimates.</li>
 <li><code>β2</code>: default <code>0.999</code>, exponential decay rate for the weighted infinity norm estimates.</li>
 <li><code>ϵ</code>: default <code>1e-8</code>, small value added for numerical stability.</li>
 <li><code>clip</code>: default <code>0</code>, gradient clipping. If positive, will clip the gradient into the range <code>[-clip, clip]</code>.</li>
 <li><code>scale</code>: default <code>0</code>, gradient rescaling. If != 0, multiply the gradient with <code>scale</code> before updating. Often choose to be <code>1.0 / batch_size</code>. If leave it default, high-level API like <code>fit!</code> will set it to <code>1.0 / batch_size</code>, since <code>fit!</code> knows the <code>batch_size</code>.</li>
 <li><code>λ</code>: default <code>0.00001</code>, weight decay is equivalent to adding a global l2 regularizer for all the parameters.</li>
 </ul>
 <p><strong>References</strong></p>
 <ol>
 <li>Kingma, Diederik, and Jimmy Ba (2014): Adam: A Method for Stochastic Optimization. Section 7. <a href="http://arxiv.org/abs/1412.6980">http://arxiv.org/abs/1412.6980</a>.</li>
 </ol>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizers/adamax.jl#L18-L45' class='documenter-source'>source</a><br></p>
 <p><a id='RMSProp-1'></a></p>
 <h3 id="rmsprop">RMSProp</h3>
 <p><a id='MXNet.mx.RMSProp' href='#MXNet.mx.RMSProp'>#</a>
 <strong><code>MXNet.mx.RMSProp</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">RMSProp(; kwargs...)
 </code></pre>

 <p>Scale learning rates by dividing with the moving average of the root mean squared (RMS) gradients. See [1] for further description.</p>
 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>η</code>: default <code>0.1</code>, learning rate.</li>
 <li><code>ρ</code>: default <code>0.9</code>, gradient moving average decay factor.</li>
 <li><code>ϵ</code>: default <code>1e-8</code>, small value added for numerical stability.</li>
 <li><code>clip</code>: default <code>0</code>, gradient clipping. If positive, will clip the gradient into the range <code>[-clip, clip]</code>.</li>
 <li><code>scale</code>: default <code>0</code>, gradient rescaling. If != 0, multiply the gradient with <code>scale</code> before updating. Often choose to be <code>1.0 / batch_size</code>. If leave it default, high-level API like <code>fit!</code> will set it to <code>1.0 / batch_size</code>, since <code>fit!</code> knows the <code>batch_size</code>.</li>
 <li><code>λ</code>: default <code>0.00001</code>, weight decay is equivalent to adding a global l2 regularizer for all the parameters.</li>
 </ul>
 <p><strong>Notes</strong></p>
 <p><code>ρ</code> should be between 0 and 1. A value of <code>ρ</code> close to 1 will decay the moving average slowly and a value close to 0 will decay the moving average fast.</p>
 <p>Using the step size <code>η</code> and a decay factor <code>ρ the learning rate</code>ηₜ` is calculated as:</p>
 <p>
 <script type="math/tex; mode=display">
 \begin{align*}
   r_t &= ρ r_{t-1} + (1 - ρ)g^2 \\
   η_t &= \frac{η}{\sqrt{r_t + ϵ}}
 \end{align*}
 </script>
 </p>
 <p><strong>References</strong></p>
 <ol>
 <li>Tieleman, T. and Hinton, G. (2012): Neural Networks for Machine Learning, Lecture 6.5 - rmsprop. Coursera. <a href="http://www.youtube.com/watch?v=O3sxAc4hxZU">http://www.youtube.com/watch?v=O3sxAc4hxZU</a> (formula @5:20)</li>
 </ol>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizers/rmsprop.jl#L18' class='documenter-source'>source</a><br></p>
 <p><a id='Nadam-1'></a></p>
 <h3 id="nadam">Nadam</h3>
 <p><a id='MXNet.mx.Nadam' href='#MXNet.mx.Nadam'>#</a>
 <strong><code>MXNet.mx.Nadam</code></strong> &mdash; <em>Type</em>.</p>
 <pre><code class="julia">Nadam(; kwargs...)
 </code></pre>

 <p>Nesterov Adam optimizer: Adam RMSprop with Nesterov momentum, see [1] and notes for further description.</p>
 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>η</code>: default <code>0.001</code>, learning rate.</li>
 <li><code>β1</code>: default <code>0.99</code>.</li>
 <li><code>β2</code>: default <code>0.999</code>.</li>
 <li><code>ϵ</code>: default <code>1e-8</code>, small value added for numerical stability.</li>
 <li><code>clip</code>: default <code>0</code>, gradient clipping. If positive, will clip the gradient into the range <code>[-clip, clip]</code>.</li>
 <li><code>scale</code>: default <code>0</code>, gradient rescaling. If != 0, multiply the gradient with <code>scale</code> before updating. Often choose to be <code>1.0 / batch_size</code>. If leave it default, high-level API like <code>fit!</code> will set it to <code>1.0 / batch_size</code>, since <code>fit!</code> knows the <code>batch_size</code>.</li>
 <li><code>λ</code>: default <code>0.00001</code>, weight decay is equivalent to adding a global l2 regularizer for all the parameters.</li>
 <li><code>η_sched::AbstractLearningRateScheduler</code>: default <code>nothing</code>, a dynamic learning rate scheduler. If set, will overwrite the <code>η</code> parameter.</li>
 <li>
 <p><code>μ_sched::NadamScheduler</code> default <code>NadamScheduler()</code> of the form.</p>
 <p>
 <script type="math/tex; mode=display">
 \mu_t = β_1 (1 - 0.5 \times 0.96^{t \times 0.004})
 </script>
 </p>
 </li>
 </ul>
 <p><strong>Notes</strong></p>
 <p>Default parameters follow those provided in the paper. It is recommended to leave the parameters of this optimizer at their default values.</p>
 <p><strong>References</strong></p>
 <ol>
 <li><a href="http://cs229.stanford.edu/proj2015/054_report.pdf">Incorporating Nesterov Momentum into Adam</a>.</li>
 <li><a href="http://www.cs.toronto.edu/~fritz/absps/momentum.pdf">On the importance of initialization and momentum in deep learning</a>.</li>
 </ol>
 <p><a target='_blank' href='https://github.com/apache/mxnet/blob/26a5ad1f39784a60d1564f6f740e5c7bd971cd65/julia/src/optimizers/nadam.jl#L18' class='documenter-source'>source</a><br></p>


             </article>
           </div>
         </div>
       </main>


 <footer class="md-footer">

     <div class="md-footer-nav">
       <nav class="md-footer-nav__inner md-grid">

           <a href="../initializer/" title="Initializers" class="md-flex md-footer-nav__link md-footer-nav__link--prev" rel="prev">
             <div class="md-flex__cell md-flex__cell--shrink">
               <i class="md-icon md-icon--arrow-back md-footer-nav__button"></i>
             </div>
             <div class="md-flex__cell md-flex__cell--stretch md-footer-nav__title">
               <span class="md-flex__ellipsis">
                 <span class="md-footer-nav__direction">
                   Previous
                 </span>
                 Initializers
               </span>
             </div>
           </a>


           <a href="../callback/" title="Callbacks in training" class="md-flex md-footer-nav__link md-footer-nav__link--next" rel="next">
             <div class="md-flex__cell md-flex__cell--stretch md-footer-nav__title">
               <span class="md-flex__ellipsis">
                 <span class="md-footer-nav__direction">
                   Next
                 </span>
                 Callbacks in training
               </span>
             </div>
             <div class="md-flex__cell md-flex__cell--shrink">
               <i class="md-icon md-icon--arrow-forward md-footer-nav__button"></i>
             </div>
           </a>

       </nav>
     </div>

   <div class="md-footer-meta md-typeset">
     <div class="md-footer-meta__inner md-grid">
       <div class="md-footer-copyright">

         powered by
         <a href="https://www.mkdocs.org">MkDocs</a>
         and
         <a href="https://squidfunk.github.io/mkdocs-material/">
           Material for MkDocs</a>
       </div>

     </div>
   </div>
 </footer>

     </div>

       <script src="../../assets/javascripts/application.808e90bb.js"></script>

       <script>app.initialize({version:"1.0.4",url:{base:"../.."}})</script>

         <script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"></script>

         <script src="../../assets/mathjaxhelper.js"></script>


   </body>
 </html>