blob: 72d9895fef961b6c270780a44bcfeb8d7acf57fa [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<meta content="mxnet.gluon.trainer" property="og:title">
<meta content="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/og-logo.png" property="og:image">
<meta content="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/og-logo.png" property="og:image:secure_url">
<meta content="mxnet.gluon.trainer" property="og:description"/>
<title>mxnet.gluon.trainer — mxnet documentation</title>
<link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
<link href="../../../_static/basic.css" rel="stylesheet" type="text/css">
<link href="../../../_static/pygments.css" rel="stylesheet" type="text/css">
<link href="../../../_static/mxnet.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../../',
VERSION: '',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: '.txt'
};
</script>
<script src="https://code.jquery.com/jquery-1.11.1.min.js" type="text/javascript"></script>
<script src="../../../_static/underscore.js" type="text/javascript"></script>
<script src="../../../_static/searchtools_custom.js" type="text/javascript"></script>
<script src="../../../_static/doctools.js" type="text/javascript"></script>
<script src="../../../_static/selectlang.js" type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
<script type="text/javascript"> jQuery(function() { Search.loadIndex("/searchindex.js"); Search.init();}); </script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-96378503-1', 'auto');
ga('send', 'pageview');
</script>
<!-- -->
<!-- <script type="text/javascript" src="../../../_static/jquery.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../../_static/underscore.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../../../_static/doctools.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
<!-- -->
<link href="../../../genindex.html" rel="index" title="Index">
<link href="../../../search.html" rel="search" title="Search"/>
<link href="../../index.html" rel="up" title="Module code"/>
<link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
</link></link></link></meta></meta></meta></head>
<body background="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-background-compressed.jpeg" role="document">
<div class="content-block"><div class="navbar navbar-fixed-top">
<div class="container" id="navContainer">
<div class="innder" id="header-inner">
<h1 id="logo-wrap">
<a href="../../../" id="logo"><img src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet_logo.png"/></a>
</h1>
<nav class="nav-bar" id="main-nav">
<a class="main-nav-link" href="../../../install/index.html">Install</a>
<span id="dropdown-menu-position-anchor">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Gluon <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="../../../gluon/index.html">About</a></li>
<li><a class="main-nav-link" href="http://gluon.mxnet.io">Tutorials</a></li>
</ul>
</span>
<span id="dropdown-menu-position-anchor">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="../../../api/python/index.html">Python</a></li>
<li><a class="main-nav-link" href="../../../api/c++/index.html">C++</a></li>
<li><a class="main-nav-link" href="../../../api/clojure/index.html">Clojure</a></li>
<li><a class="main-nav-link" href="../../../api/julia/index.html">Julia</a></li>
<li><a class="main-nav-link" href="../../../api/perl/index.html">Perl</a></li>
<li><a class="main-nav-link" href="../../../api/r/index.html">R</a></li>
<li><a class="main-nav-link" href="../../../api/scala/index.html">Scala</a></li>
</ul>
</span>
<span id="dropdown-menu-position-anchor-docs">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Docs <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu-docs">
<li><a class="main-nav-link" href="../../../faq/index.html">FAQ</a></li>
<li><a class="main-nav-link" href="../../../tutorials/index.html">Tutorials</a>
<li><a class="main-nav-link" href="https://github.com/apache/incubator-mxnet/tree/1.2.1/example">Examples</a></li>
<li><a class="main-nav-link" href="../../../architecture/index.html">Architecture</a></li>
<li><a class="main-nav-link" href="../../../api/python/gluon/model_zoo.html">Model Zoo</a></li>
<li><a class="main-nav-link" href="../../../api/python/contrib/onnx.html">ONNX</a></li>
</li></ul>
</span>
<span id="dropdown-menu-position-anchor-community">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Community <span class="caret"></span></a>
<ul class="dropdown-menu navbar-menu" id="package-dropdown-menu-community">
<li><a class="main-nav-link" href="http://discuss.mxnet.io">Forum</a></li>
<li><a class="main-nav-link" href="https://github.com/apache/incubator-mxnet">Github</a></li>
<li><a class="main-nav-link" href="../../../community/contribute.html">Contribute</a></li>
<li><a class="main-nav-link" href="../../../community/ecosystem.html">Ecosystem</a></li>
<li><a class="main-nav-link" href="../../../community/powered_by.html">Powered By</a></li>
</ul>
</span>
<span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Versions(1.2.1)<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/>master</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/versions/1.2.1/index.html>1.2.1</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/versions/1.1.0/index.html>1.1.0</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/versions/1.0.0/index.html>1.0.0</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/versions/0.12.1/index.html>0.12.1</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/versions/0.11.0/index.html>0.11.0</a></li></ul></span></nav>
<script> function getRootPath(){ return "../../../" } </script>
<div class="burgerIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"></a>
<ul class="dropdown-menu" id="burgerMenu">
<li><a href="../../../install/index.html">Install</a></li>
<li><a class="main-nav-link" href="../../../tutorials/index.html">Tutorials</a></li>
<li class="dropdown-submenu dropdown">
<a aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" role="button" tabindex="-1">Community</a>
<ul class="dropdown-menu">
<li><a href="http://discuss.mxnet.io" tabindex="-1">Forum</a></li>
<li><a href="https://github.com/apache/incubator-mxnet" tabindex="-1">Github</a></li>
<li><a href="../../../community/contribute.html" tabindex="-1">Contribute</a></li>
<li><a href="../../../community/ecosystem.html" tabindex="-1">Ecosystem</a></li>
<li><a href="../../../community/powered_by.html" tabindex="-1">Powered By</a></li>
</ul>
</li>
<li class="dropdown-submenu">
<a aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" role="button" tabindex="-1">API</a>
<ul class="dropdown-menu">
<li><a href="../../../api/python/index.html" tabindex="-1">Python</a>
</li>
<li><a href="../../../api/c++/index.html" tabindex="-1">C++</a>
</li>
<li><a href="../../../api/clojure/index.html" tabindex="-1">Clojure</a>
</li>
<li><a href="../../../api/julia/index.html" tabindex="-1">Julia</a>
</li>
<li><a href="../../../api/perl/index.html" tabindex="-1">Perl</a>
</li>
<li><a href="../../../api/r/index.html" tabindex="-1">R</a>
</li>
<li><a href="../../../api/scala/index.html" tabindex="-1">Scala</a>
</li>
</ul>
</li>
<li class="dropdown-submenu">
<a aria-expanded="true" aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" tabindex="-1">Docs</a>
<ul class="dropdown-menu">
<li><a href="../../../tutorials/index.html" tabindex="-1">Tutorials</a></li>
<li><a href="../../../faq/index.html" tabindex="-1">FAQ</a></li>
<li><a href="../../../architecture/index.html" tabindex="-1">Architecture</a></li>
<li><a href="https://github.com/apache/incubator-mxnet/tree/1.2.1/example" tabindex="-1">Examples</a></li>
<li><a href="../../../api/python/gluon/model_zoo.html" tabindex="-1">Gluon Model Zoo</a></li>
</ul>
</li>
<li><a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a></li>
<li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">Versions(1.2.1)</a><ul class="dropdown-menu"><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/>master</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/versions/1.2.1/index.html>1.2.1</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/versions/1.1.0/index.html>1.1.0</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/versions/1.0.0/index.html>1.0.0</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/versions/0.12.1/index.html>0.12.1</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/versions/0.11.0/index.html>0.11.0</a></li></ul></li></ul>
</div>
<div class="plusIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
<ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
</div>
<div id="search-input-wrap">
<form action="../../../search.html" autocomplete="off" class="" method="get" role="search">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input class="form-control" name="q" placeholder="Search" type="text"/>
</div>
<input name="check_keywords" type="hidden" value="yes">
<input name="area" type="hidden" value="default"/>
</input></form>
<div id="search-preview"></div>
</div>
<div id="searchIcon">
<span aria-hidden="true" class="glyphicon glyphicon-search"></span>
</div>
<!-- <div id="lang-select-wrap"> -->
<!-- <label id="lang-select-label"> -->
<!-- <\!-- <i class="fa fa-globe"></i> -\-> -->
<!-- <span></span> -->
<!-- </label> -->
<!-- <select id="lang-select"> -->
<!-- <option value="en">Eng</option> -->
<!-- <option value="zh">中文</option> -->
<!-- </select> -->
<!-- </div> -->
<!-- <a id="mobile-nav-toggle">
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
</a> -->
</div>
</div>
</div>
<script type="text/javascript">
$('body').css('background', 'white');
</script>
<div class="container">
<div class="row">
<div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../api/python/index.html">Python Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/r/index.html">R Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/julia/index.html">Julia Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/c++/index.html">C++ Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/scala/index.html">Scala Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/perl/index.html">Perl Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../faq/index.html">HowTo Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../architecture/index.html">System Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../tutorials/index.html">Tutorials</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../community/contribute.html">Community</a></li>
</ul>
</div>
</div>
<div class="content">
<div class="page-tracker"></div>
<h1>Source code for mxnet.gluon.trainer</h1><div class="highlight"><pre>
<span></span><span class="c1"># Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="c1"># or more contributor license agreements. See the NOTICE file</span>
<span class="c1"># distributed with this work for additional information</span>
<span class="c1"># regarding copyright ownership. The ASF licenses this file</span>
<span class="c1"># to you under the Apache License, Version 2.0 (the</span>
<span class="c1"># "License"); you may not use this file except in compliance</span>
<span class="c1"># with the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing,</span>
<span class="c1"># software distributed under the License is distributed on an</span>
<span class="c1"># "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span>
<span class="c1"># KIND, either express or implied. See the License for the</span>
<span class="c1"># specific language governing permissions and limitations</span>
<span class="c1"># under the License.</span>
<span class="c1"># coding: utf-8</span>
<span class="c1"># pylint: disable=line-too-long</span>
<span class="sd">"""Parameter optimizer."""</span>
<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'Trainer'</span><span class="p">]</span>
<span class="kn">from</span> <span class="nn">..</span> <span class="k">import</span> <span class="n">optimizer</span> <span class="k">as</span> <span class="n">opt</span>
<span class="kn">from</span> <span class="nn">..model</span> <span class="k">import</span> <span class="n">_create_kvstore</span><span class="p">,</span> <span class="n">_create_sparse_kvstore</span>
<span class="kn">from</span> <span class="nn">.parameter</span> <span class="k">import</span> <span class="n">ParameterDict</span><span class="p">,</span> <span class="n">Parameter</span>
<div class="viewcode-block" id="Trainer"><a class="viewcode-back" href="../../../api/python/gluon/gluon.html#mxnet.gluon.Trainer">[docs]</a><span class="k">class</span> <span class="nc">Trainer</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">"""Applies an `Optimizer` on a set of Parameters. Trainer should</span>
<span class="sd"> be used together with `autograd`.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> params : ParameterDict</span>
<span class="sd"> The set of parameters to optimize.</span>
<span class="sd"> optimizer : str or Optimizer</span>
<span class="sd"> The optimizer to use. See</span>
<span class="sd"> `help <https://mxnet.incubator.apache.org/versions/1.2.1/api/python/optimization/optimization.html#the-mxnet-optimizer-package>`_</span>
<span class="sd"> on Optimizer for a list of available optimizers.</span>
<span class="sd"> optimizer_params : dict</span>
<span class="sd"> Key-word arguments to be passed to optimizer constructor. For example,</span>
<span class="sd"> `{'learning_rate': 0.1}`. All optimizers accept learning_rate, wd (weight decay),</span>
<span class="sd"> clip_gradient, and lr_scheduler. See each optimizer's</span>
<span class="sd"> constructor for a list of additional supported arguments.</span>
<span class="sd"> kvstore : str or KVStore</span>
<span class="sd"> kvstore type for multi-gpu and distributed training. See help on</span>
<span class="sd"> :any:`mxnet.kvstore.create` for more information.</span>
<span class="sd"> compression_params : dict</span>
<span class="sd"> Specifies type of gradient compression and additional arguments depending</span>
<span class="sd"> on the type of compression being used. For example, 2bit compression requires a threshold.</span>
<span class="sd"> Arguments would then be {'type':'2bit', 'threshold':0.5}</span>
<span class="sd"> See mxnet.KVStore.set_gradient_compression method for more details on gradient compression.</span>
<span class="sd"> update_on_kvstore : bool, default None</span>
<span class="sd"> Whether to perform parameter updates on kvstore. If None, then trainer will choose the more</span>
<span class="sd"> suitable option depending on the type of kvstore.</span>
<span class="sd"> Properties</span>
<span class="sd"> ----------</span>
<span class="sd"> learning_rate : float</span>
<span class="sd"> The current learning rate of the optimizer. Given an Optimizer object</span>
<span class="sd"> optimizer, its learning rate can be accessed as optimizer.learning_rate.</span>
<span class="sd"> """</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">optimizer</span><span class="p">,</span> <span class="n">optimizer_params</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">kvstore</span><span class="o">=</span><span class="s1">'device'</span><span class="p">,</span>
<span class="n">compression_params</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">update_on_kvstore</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="p">(</span><span class="nb">dict</span><span class="p">,</span> <span class="n">ParameterDict</span><span class="p">)):</span>
<span class="n">params</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">"First argument must be a list or dict of Parameters, "</span> \
<span class="s2">"got </span><span class="si">%s</span><span class="s2">."</span><span class="o">%</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">params</span><span class="p">)))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_params</span> <span class="o">=</span> <span class="p">[]</span>
<span class="c1"># parameters to initialize on the kvstore</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_weight</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_grad</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_param2idx</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">param</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">params</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">Parameter</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">"First argument must be a list or dict of Parameters, "</span> \
<span class="s2">"got list of </span><span class="si">%s</span><span class="s2">."</span><span class="o">%</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">param</span><span class="p">)))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_param2idx</span><span class="p">[</span><span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">i</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">param</span><span class="p">)</span>
<span class="n">param</span><span class="o">.</span><span class="n">_set_trainer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">_stype</span> <span class="o">!=</span> <span class="s1">'default'</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_weight</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">_grad_stype</span> <span class="o">!=</span> <span class="s1">'default'</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_grad</span> <span class="o">=</span> <span class="kc">True</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_compression_params</span> <span class="o">=</span> <span class="n">compression_params</span>
<span class="n">optimizer_params</span> <span class="o">=</span> <span class="n">optimizer_params</span> <span class="k">if</span> <span class="n">optimizer_params</span> <span class="k">else</span> <span class="p">{}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_scale</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">optimizer_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'rescale_grad'</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_contexts</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_check_contexts</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_optimizer</span><span class="p">(</span><span class="n">optimizer</span><span class="p">,</span> <span class="n">optimizer_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'kvstore'</span><span class="p">:</span> <span class="n">kvstore</span><span class="p">,</span> <span class="s1">'update_on_kvstore'</span><span class="p">:</span> <span class="n">update_on_kvstore</span><span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_distributed</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span> <span class="o">=</span> <span class="p">[]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_reset_kvstore</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_check_contexts</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">contexts</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">:</span>
<span class="n">ctx</span> <span class="o">=</span> <span class="n">param</span><span class="o">.</span><span class="n">list_ctx</span><span class="p">()</span>
<span class="k">assert</span> <span class="n">contexts</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">contexts</span> <span class="o">==</span> <span class="n">ctx</span><span class="p">,</span> \
<span class="s2">"All Parameters must be initialized on the same set of contexts, "</span> \
<span class="s2">"but Parameter </span><span class="si">%s</span><span class="s2"> is initialized on </span><span class="si">%s</span><span class="s2"> while previous Parameters "</span> \
<span class="s2">"are initialized on </span><span class="si">%s</span><span class="s2">."</span><span class="o">%</span><span class="p">(</span><span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">ctx</span><span class="p">),</span> <span class="nb">str</span><span class="p">(</span><span class="n">contexts</span><span class="p">))</span>
<span class="n">contexts</span> <span class="o">=</span> <span class="n">ctx</span>
<span class="k">return</span> <span class="n">contexts</span>
<span class="k">def</span> <span class="nf">_init_optimizer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">optimizer</span><span class="p">,</span> <span class="n">optimizer_params</span><span class="p">):</span>
<span class="n">param_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">i</span><span class="p">:</span> <span class="n">param</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">param</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">)}</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">optimizer</span><span class="p">,</span> <span class="n">opt</span><span class="o">.</span><span class="n">Optimizer</span><span class="p">):</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">optimizer_params</span><span class="p">,</span> \
<span class="s2">"optimizer_params must be None if optimizer is an instance of "</span> \
<span class="s2">"Optimizer instead of str"</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="o">.</span><span class="n">param_dict</span> <span class="o">=</span> <span class="n">param_dict</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span> <span class="o">=</span> <span class="n">opt</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">optimizer</span><span class="p">,</span> <span class="n">param_dict</span><span class="o">=</span><span class="n">param_dict</span><span class="p">,</span>
<span class="o">**</span><span class="n">optimizer_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_updaters</span> <span class="o">=</span> <span class="p">[</span><span class="n">opt</span><span class="o">.</span><span class="n">get_updater</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="p">)</span> \
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_contexts</span><span class="p">]</span>
<span class="k">def</span> <span class="nf">_init_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">"""Initialize parameters in the KVStore.</span>
<span class="sd"> Parameters with incomplete initialization are ignored.</span>
<span class="sd"> """</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span><span class="p">,</span> <span class="s2">"Cannot initialize parameters in KVStore "</span> \
<span class="s2">"when KVStore is not initialized."</span>
<span class="n">params_to_init</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="p">:</span>
<span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">:</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">_deferred_init</span><span class="p">:</span>
<span class="n">params_to_init</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">param</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">param_arrays</span> <span class="o">=</span> <span class="n">param</span><span class="o">.</span><span class="n">_check_and_get</span><span class="p">(</span><span class="n">param</span><span class="o">.</span><span class="n">_data</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span>
<span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_param2idx</span><span class="p">[</span><span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">init</span><span class="p">(</span><span class="n">idx</span><span class="p">,</span> <span class="n">param_arrays</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">_stype</span> <span class="o">==</span> <span class="s1">'default'</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">pull</span><span class="p">(</span><span class="n">idx</span><span class="p">,</span> <span class="n">param_arrays</span><span class="p">,</span> <span class="n">priority</span><span class="o">=-</span><span class="n">idx</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span> <span class="o">=</span> <span class="n">params_to_init</span>
<span class="k">def</span> <span class="nf">_reset_kvstore</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">"""Reset kvstore."""</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="ow">and</span> <span class="s1">'dist'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">type</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Cannot reset distributed KVStore."</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_distributed</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span> <span class="o">=</span> <span class="p">[</span><span class="n">param</span> <span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">]</span>
<span class="k">def</span> <span class="nf">_init_kvstore</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">"""Create kvstore."""</span>
<span class="n">config</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore_params</span>
<span class="c1"># if weight is sparse, the weight must be updated on KVStore.</span>
<span class="c1"># training loop contains:</span>
<span class="c1"># - row_sparse_pull(sparse_weight)</span>
<span class="c1"># - forward()</span>
<span class="c1"># - backward()</span>
<span class="c1"># - push(sparse_grad), push(dense_grad)</span>
<span class="c1"># - pull(dense_weight)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_weight</span><span class="p">:</span>
<span class="n">kvstore</span><span class="p">,</span> <span class="n">update_on_kvstore</span> <span class="o">=</span> <span class="n">_create_sparse_kvstore</span><span class="p">(</span><span class="n">config</span><span class="p">[</span><span class="s1">'kvstore'</span><span class="p">])</span>
<span class="c1"># raise Error if update_on_kvstore is set to False by the user</span>
<span class="k">if</span> <span class="n">config</span><span class="p">[</span><span class="s1">'update_on_kvstore'</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Cannot set update_on_kvstore to False when sparse weights "</span>
<span class="s2">"are present."</span><span class="p">)</span>
<span class="c1"># if weight is dense and grad is sparse, the weight better not be updated on KVStore.</span>
<span class="c1"># training loop contains:</span>
<span class="c1"># - forward()</span>
<span class="c1"># - backward()</span>
<span class="c1"># - push(grad)</span>
<span class="c1"># - pull(grad)</span>
<span class="c1"># - update(grad, weight)</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_grad</span><span class="p">:</span>
<span class="n">arg_arrays</span> <span class="o">=</span> <span class="p">{</span><span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">:</span> <span class="n">param</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_contexts</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">}</span>
<span class="n">kvstore</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">_create_kvstore</span><span class="p">(</span><span class="n">config</span><span class="p">[</span><span class="s1">'kvstore'</span><span class="p">],</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_contexts</span><span class="p">),</span> <span class="n">arg_arrays</span><span class="p">)</span>
<span class="n">update_on_kvstore</span> <span class="o">=</span> <span class="kc">False</span>
<span class="c1"># normal case</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">arg_arrays</span> <span class="o">=</span> <span class="p">{</span><span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">:</span> <span class="n">param</span><span class="o">.</span><span class="n">data</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_contexts</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">}</span>
<span class="n">kvstore</span><span class="p">,</span> <span class="n">update_on_kvstore</span> <span class="o">=</span> <span class="n">_create_kvstore</span><span class="p">(</span><span class="n">config</span><span class="p">[</span><span class="s1">'kvstore'</span><span class="p">],</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_contexts</span><span class="p">),</span>
<span class="n">arg_arrays</span><span class="p">)</span>
<span class="k">if</span> <span class="n">kvstore</span> <span class="ow">and</span> <span class="s1">'async'</span> <span class="ow">in</span> <span class="n">kvstore</span><span class="o">.</span><span class="n">type</span> <span class="ow">and</span> <span class="n">config</span><span class="p">[</span><span class="s1">'update_on_kvstore'</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>\
<span class="ow">and</span> <span class="ow">not</span> <span class="n">config</span><span class="p">[</span><span class="s1">'update_on_kvstore'</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Please set update_on_kvstore to true "</span>
<span class="s2">"when training in async mode."</span><span class="p">)</span>
<span class="k">if</span> <span class="n">config</span><span class="p">[</span><span class="s1">'update_on_kvstore'</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">update_on_kvstore</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">'update_on_kvstore'</span><span class="p">]</span>
<span class="k">if</span> <span class="n">kvstore</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compression_params</span><span class="p">:</span>
<span class="n">kvstore</span><span class="o">.</span><span class="n">set_gradient_compression</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_compression_params</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_distributed</span> <span class="o">=</span> <span class="s1">'dist'</span> <span class="ow">in</span> <span class="n">kvstore</span><span class="o">.</span><span class="n">type</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_distributed</span><span class="p">:</span>
<span class="c1"># kv.pull(row_sparse_grad) is not supported for dist kvstore</span>
<span class="n">update_on_kvstore</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_weight</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">_contains_sparse_grad</span> \
<span class="ow">or</span> <span class="s1">'async'</span> <span class="ow">in</span> <span class="n">kvstore</span><span class="o">.</span><span class="n">type</span>
<span class="k">if</span> <span class="n">update_on_kvstore</span><span class="p">:</span>
<span class="c1"># optimizer preferably needs to be set before init for multiprecision</span>
<span class="n">kvstore</span><span class="o">.</span><span class="n">set_optimizer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="o">=</span> <span class="n">kvstore</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span> <span class="o">=</span> <span class="n">update_on_kvstore</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span> <span class="o">=</span> <span class="kc">True</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">learning_rate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="p">,</span> <span class="n">opt</span><span class="o">.</span><span class="n">Optimizer</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">UserWarning</span><span class="p">(</span><span class="s2">"Optimizer has to be defined before its learning "</span>
<span class="s2">"rate can be accessed."</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="o">.</span><span class="n">learning_rate</span>
<div class="viewcode-block" id="Trainer.set_learning_rate"><a class="viewcode-back" href="../../../api/python/gluon/gluon.html#mxnet.gluon.Trainer.set_learning_rate">[docs]</a> <span class="k">def</span> <span class="nf">set_learning_rate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lr</span><span class="p">):</span>
<span class="sd">"""Sets a new learning rate of the optimizer.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> lr : float</span>
<span class="sd"> The new learning rate of the optimizer.</span>
<span class="sd"> """</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="p">,</span> <span class="n">opt</span><span class="o">.</span><span class="n">Optimizer</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">UserWarning</span><span class="p">(</span><span class="s2">"Optimizer has to be defined before its learning "</span>
<span class="s2">"rate is mutated."</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="o">.</span><span class="n">set_learning_rate</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_row_sparse_pull</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parameter</span><span class="p">,</span> <span class="n">out</span><span class="p">,</span> <span class="n">row_id</span><span class="p">,</span> <span class="n">full_idx</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">"""Internal method to invoke pull operations on KVStore. If `full_idx` is set to True,</span>
<span class="sd"> `kv.pull` is preferred instead of `kv.row_sparse_pull`.</span>
<span class="sd"> """</span>
<span class="c1"># initialize kv and params if not already</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_kvstore</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_params</span><span class="p">()</span>
<span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_param2idx</span><span class="p">[</span><span class="n">parameter</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="k">if</span> <span class="n">full_idx</span> <span class="ow">and</span> <span class="s1">'dist'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">type</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">row_id</span><span class="o">.</span><span class="n">size</span> <span class="o">==</span> <span class="n">out</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">pull</span><span class="p">(</span><span class="n">idx</span><span class="p">,</span> <span class="n">out</span><span class="o">=</span><span class="n">out</span><span class="p">,</span> <span class="n">priority</span><span class="o">=-</span><span class="n">idx</span><span class="p">,</span> <span class="n">ignore_sparse</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">row_sparse_pull</span><span class="p">(</span><span class="n">idx</span><span class="p">,</span> <span class="n">out</span><span class="o">=</span><span class="n">out</span><span class="p">,</span> <span class="n">row_ids</span><span class="o">=</span><span class="n">row_id</span><span class="p">,</span> <span class="n">priority</span><span class="o">=-</span><span class="n">idx</span><span class="p">)</span>
<div class="viewcode-block" id="Trainer.step"><a class="viewcode-back" href="../../../api/python/gluon/gluon.html#mxnet.gluon.Trainer.step">[docs]</a> <span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">ignore_stale_grad</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">"""Makes one step of parameter update. Should be called after</span>
<span class="sd"> `autograd.backward()` and outside of `record()` scope.</span>
<span class="sd"> For normal parameter updates, `step()` should be used, which internally calls</span>
<span class="sd"> `allreduce_grads()` and then `update()`. However, if you need to get the reduced</span>
<span class="sd"> gradients to perform certain transformation, such as in gradient clipping, then</span>
<span class="sd"> you may want to manually call `allreduce_grads()` and `update()` separately.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> batch_size : int</span>
<span class="sd"> Batch size of data processed. Gradient will be normalized by `1/batch_size`.</span>
<span class="sd"> Set this to 1 if you normalized loss manually with `loss = mean(loss)`.</span>
<span class="sd"> ignore_stale_grad : bool, optional, default=False</span>
<span class="sd"> If true, ignores Parameters with stale gradient (gradient that has not</span>
<span class="sd"> been updated by `backward` after last step) and skip update.</span>
<span class="sd"> """</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_kvstore</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_params</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="o">.</span><span class="n">rescale_grad</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_scale</span> <span class="o">/</span> <span class="n">batch_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_allreduce_grads</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update</span><span class="p">(</span><span class="n">ignore_stale_grad</span><span class="p">)</span></div>
<div class="viewcode-block" id="Trainer.allreduce_grads"><a class="viewcode-back" href="../../../api/python/gluon/gluon.html#mxnet.gluon.Trainer.allreduce_grads">[docs]</a> <span class="k">def</span> <span class="nf">allreduce_grads</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">"""For each parameter, reduce the gradients from different contexts.</span>
<span class="sd"> Should be called after `autograd.backward()`, outside of `record()` scope,</span>
<span class="sd"> and before `trainer.update()`.</span>
<span class="sd"> For normal parameter updates, `step()` should be used, which internally calls</span>
<span class="sd"> `allreduce_grads()` and then `update()`. However, if you need to get the reduced</span>
<span class="sd"> gradients to perform certain transformation, such as in gradient clipping, then</span>
<span class="sd"> you may want to manually call `allreduce_grads()` and `update()` separately.</span>
<span class="sd"> """</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_kvstore</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_params</span><span class="p">()</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span><span class="p">),</span> \
<span class="s1">'allreduce_grads() when parameters are updated on kvstore '</span> \
<span class="s1">'is not supported. Try setting `update_on_kvstore` '</span> \
<span class="s1">'to False when creating trainer.'</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_allreduce_grads</span><span class="p">()</span></div>
<span class="k">def</span> <span class="nf">_allreduce_grads</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="p">:</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">param</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">):</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">grad_req</span> <span class="o">!=</span> <span class="s1">'null'</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">push</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">param</span><span class="o">.</span><span class="n">list_grad</span><span class="p">(),</span> <span class="n">priority</span><span class="o">=-</span><span class="n">i</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">pull</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">param</span><span class="o">.</span><span class="n">list_grad</span><span class="p">(),</span> <span class="n">priority</span><span class="o">=-</span><span class="n">i</span><span class="p">,</span>
<span class="n">ignore_sparse</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_distributed</span><span class="p">)</span>
<div class="viewcode-block" id="Trainer.update"><a class="viewcode-back" href="../../../api/python/gluon/gluon.html#mxnet.gluon.Trainer.update">[docs]</a> <span class="k">def</span> <span class="nf">update</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">ignore_stale_grad</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">"""Makes one step of parameter update.</span>
<span class="sd"> Should be called after `autograd.backward()` and outside of `record()` scope,</span>
<span class="sd"> and after `trainer.update()`.</span>
<span class="sd"> For normal parameter updates, `step()` should be used, which internally calls</span>
<span class="sd"> `allreduce_grads()` and then `update()`. However, if you need to get the reduced</span>
<span class="sd"> gradients to perform certain transformation, such as in gradient clipping, then</span>
<span class="sd"> you may want to manually call `allreduce_grads()` and `update()` separately.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> batch_size : int</span>
<span class="sd"> Batch size of data processed. Gradient will be normalized by `1/batch_size`.</span>
<span class="sd"> Set this to 1 if you normalized loss manually with `loss = mean(loss)`.</span>
<span class="sd"> ignore_stale_grad : bool, optional, default=False</span>
<span class="sd"> If true, ignores Parameters with stale gradient (gradient that has not</span>
<span class="sd"> been updated by `backward` after last step) and skip update.</span>
<span class="sd"> """</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_kvstore</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_params</span><span class="p">()</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span><span class="p">),</span> \
<span class="s1">'update() when parameters are updated on kvstore '</span> \
<span class="s1">'is not supported. Try setting `update_on_kvstore` '</span> \
<span class="s1">'to False when creating trainer.'</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="o">.</span><span class="n">rescale_grad</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_scale</span> <span class="o">/</span> <span class="n">batch_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update</span><span class="p">(</span><span class="n">ignore_stale_grad</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_update</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ignore_stale_grad</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">param</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">):</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">grad_req</span> <span class="o">==</span> <span class="s1">'null'</span><span class="p">:</span>
<span class="k">continue</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">ignore_stale_grad</span><span class="p">:</span>
<span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">param</span><span class="o">.</span><span class="n">_check_and_get</span><span class="p">(</span><span class="n">param</span><span class="o">.</span><span class="n">_data</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">data</span><span class="o">.</span><span class="n">_fresh_grad</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">UserWarning</span><span class="p">(</span>
<span class="s2">"Gradient of Parameter `</span><span class="si">%s</span><span class="s2">` on context </span><span class="si">%s</span><span class="s2"> has not been updated "</span>
<span class="s2">"by backward since last `step`. This could mean a bug in your "</span>
<span class="s2">"model that made it only use a subset of the Parameters (Blocks) "</span>
<span class="s2">"for this iteration. If you are intentionally only using a subset, "</span>
<span class="s2">"call step with ignore_stale_grad=True to suppress this "</span>
<span class="s2">"warning and skip updating of Parameters with stale gradient"</span> \
<span class="o">%</span><span class="p">(</span><span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">context</span><span class="p">)))</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span><span class="p">:</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">_stype</span> <span class="o">==</span> <span class="s1">'default'</span><span class="p">:</span>
<span class="c1"># 'row_sparse' parameters are not pulled immediately - they're pulled</span>
<span class="c1"># in `Block.forward`</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">pull</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">param</span><span class="o">.</span><span class="n">list_data</span><span class="p">(),</span> <span class="n">priority</span><span class="o">=-</span><span class="n">i</span><span class="p">)</span>
<span class="k">continue</span>
<span class="k">for</span> <span class="n">upd</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">grad</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_updaters</span><span class="p">,</span> <span class="n">param</span><span class="o">.</span><span class="n">list_data</span><span class="p">(),</span> <span class="n">param</span><span class="o">.</span><span class="n">list_grad</span><span class="p">()):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">ignore_stale_grad</span> <span class="ow">or</span> <span class="n">arr</span><span class="o">.</span><span class="n">_fresh_grad</span><span class="p">:</span>
<span class="n">upd</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">grad</span><span class="p">,</span> <span class="n">arr</span><span class="p">)</span>
<span class="n">arr</span><span class="o">.</span><span class="n">_fresh_grad</span> <span class="o">=</span> <span class="kc">False</span>
<div class="viewcode-block" id="Trainer.save_states"><a class="viewcode-back" href="../../../api/python/gluon/gluon.html#mxnet.gluon.Trainer.save_states">[docs]</a> <span class="k">def</span> <span class="nf">save_states</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fname</span><span class="p">):</span>
<span class="sd">"""Saves trainer states (e.g. optimizer, momentum) to a file.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> fname : str</span>
<span class="sd"> Path to output states file.</span>
<span class="sd"> """</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_kvstore</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_params</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span><span class="p">:</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">,</span> <span class="s2">"Cannot save trainer states when some "</span> \
<span class="s2">"parameters are not yet initialized in kvstore."</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">save_optimizer_states</span><span class="p">(</span><span class="n">fname</span><span class="p">,</span> <span class="n">dump_optimizer</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">fname</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">fout</span><span class="p">:</span>
<span class="n">fout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_updaters</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get_states</span><span class="p">(</span><span class="n">dump_optimizer</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span></div>
<div class="viewcode-block" id="Trainer.load_states"><a class="viewcode-back" href="../../../api/python/gluon/gluon.html#mxnet.gluon.Trainer.load_states">[docs]</a> <span class="k">def</span> <span class="nf">load_states</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fname</span><span class="p">):</span>
<span class="sd">"""Loads trainer states (e.g. optimizer, momentum) from a file.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> fname : str</span>
<span class="sd"> Path to input states file.</span>
<span class="sd"> """</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kv_initialized</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_kvstore</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_params_to_init</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_init_params</span><span class="p">()</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_update_on_kvstore</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">load_optimizer_states</span><span class="p">(</span><span class="n">fname</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_kvstore</span><span class="o">.</span><span class="n">_updater</span><span class="o">.</span><span class="n">optimizer</span>
<span class="n">param_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">i</span><span class="p">:</span> <span class="n">param</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">param</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_params</span><span class="p">)}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="o">.</span><span class="n">param_dict</span> <span class="o">=</span> <span class="n">param_dict</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">fname</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">states</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="k">for</span> <span class="n">updater</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_updaters</span><span class="p">:</span>
<span class="n">updater</span><span class="o">.</span><span class="n">set_states</span><span class="p">(</span><span class="n">states</span><span class="p">)</span>
<span class="n">updater</span><span class="o">.</span><span class="n">optimizer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_updaters</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">optimizer</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_updaters</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">optimizer</span></div></div>
</pre></div>
</div>
</div>
<div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
</div>
</div>
</div><div class="footer">
<div class="section-disclaimer">
<div class="container">
<div>
<img height="60" src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/apache_incubator_logo.png"/>
<p>
Apache MXNet is an effort undergoing incubation at The Apache Software Foundation (ASF), <strong>sponsored by the <i>Apache Incubator</i></strong>. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
</p>
<p>
"Copyright © 2017-2018, The Apache Software Foundation
Apache MXNet, MXNet, Apache, the Apache feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the Apache Software Foundation."
</p>
</div>
</div>
</div>
</div> <!-- pagename != index -->
</div>
<script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
<script src="../../../_static/js/sidebar.js" type="text/javascript"></script>
<script src="../../../_static/js/search.js" type="text/javascript"></script>
<script src="../../../_static/js/navbar.js" type="text/javascript"></script>
<script src="../../../_static/js/clipboard.min.js" type="text/javascript"></script>
<script src="../../../_static/js/copycode.js" type="text/javascript"></script>
<script src="../../../_static/js/page.js" type="text/javascript"></script>
<script type="text/javascript">
$('body').ready(function () {
$('body').css('visibility', 'visible');
});
</script>
</body>
</html>