versions/1.4.1/faq/gradient_compression.html - mxnet-site - Git at Google

 <!DOCTYPE html>

 <html lang="en">
 <head>
 <meta charset="utf-8"/>
 <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
 <meta content="width=device-width, initial-scale=1" name="viewport"/>
 <meta content="Gradient Compression" property="og:title">
 <meta content="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/og-logo.png" property="og:image">
 <meta content="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/og-logo.png" property="og:image:secure_url">
 <meta content="Gradient Compression" property="og:description"/>
 <title>Gradient Compression — mxnet  documentation</title>
 <link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
 <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
 <link href="../_static/basic.css" rel="stylesheet" type="text/css">
 <link href="../_static/pygments.css" rel="stylesheet" type="text/css">
 <link href="../_static/mxnet.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript">
       var DOCUMENTATION_OPTIONS = {
         URL_ROOT:    '../',
         VERSION:     '',
         COLLAPSE_INDEX: false,
         FILE_SUFFIX: '.html',
         HAS_SOURCE:  true,
         SOURCELINK_SUFFIX: '.txt'
       };
     </script>
 <script src="https://code.jquery.com/jquery-1.11.1.min.js" type="text/javascript"></script>
 <script src="../_static/underscore.js" type="text/javascript"></script>
 <script src="../_static/searchtools_custom.js" type="text/javascript"></script>
 <script src="../_static/doctools.js" type="text/javascript"></script>
 <script src="../_static/selectlang.js" type="text/javascript"></script>
 <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
 <script type="text/javascript"> jQuery(function() { Search.loadIndex("/versions/1.4.1/searchindex.js"); Search.init();}); </script>
 <script>
       (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
       (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
       Date();a=s.createElement(o),
       m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
       })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

       ga('create', 'UA-96378503-1', 'auto');
       ga('send', 'pageview');

     </script>
 <!-- -->
 <!-- <script type="text/javascript" src="../_static/jquery.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="../_static/underscore.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="../_static/doctools.js"></script> -->
 <!-- -->
 <!-- <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
 <!-- -->
 <link href="../genindex.html" rel="index" title="Index">
 <link href="../search.html" rel="search" title="Search"/>
 <link href="index.html" rel="up" title="MXNet FAQ"/>
 <link href="model_parallel_lstm.html" rel="next" title="Training with Multiple GPUs Using Model Parallelism"/>
 <link href="float16.html" rel="prev" title="Mixed precision training using float16"/>
 <link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
 </link></link></link></meta></meta></meta></head>
 <body background="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-background-compressed.jpeg" role="document">
 <div class="content-block"><div class="navbar navbar-fixed-top">
 <div class="container" id="navContainer">
 <div class="innder" id="header-inner">
 <h1 id="logo-wrap">
 <a href="../" id="logo"><img src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet_logo.png"/></a>
 </h1>
 <nav class="nav-bar" id="main-nav">
 <a class="main-nav-link" href="/versions/1.4.1/install/index.html">Install</a>
 <span id="dropdown-menu-position-anchor">
 <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Gluon <span class="caret"></span></a>
 <ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
 <li><a class="main-nav-link" href="/versions/1.4.1/tutorials/gluon/gluon.html">About</a></li>
 <li><a class="main-nav-link" href="https://www.d2l.ai/">Dive into Deep Learning</a></li>
 <li><a class="main-nav-link" href="https://gluon-cv.mxnet.io">GluonCV Toolkit</a></li>
 <li><a class="main-nav-link" href="https://gluon-nlp.mxnet.io/">GluonNLP Toolkit</a></li>
 </ul>
 </span>
 <span id="dropdown-menu-position-anchor">
 <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
 <ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
 <li><a class="main-nav-link" href="/versions/1.4.1/api/python/index.html">Python</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/c++/index.html">C++</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/clojure/index.html">Clojure</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/java/index.html">Java</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/julia/index.html">Julia</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/perl/index.html">Perl</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/r/index.html">R</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/scala/index.html">Scala</a></li>
 </ul>
 </span>
 <span id="dropdown-menu-position-anchor-docs">
 <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Docs <span class="caret"></span></a>
 <ul class="dropdown-menu navbar-menu" id="package-dropdown-menu-docs">
 <li><a class="main-nav-link" href="/versions/1.4.1/faq/index.html">FAQ</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/tutorials/index.html">Tutorials</a>
 <li><a class="main-nav-link" href="https://github.com/apache/incubator-mxnet/tree/1.4.1/example">Examples</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/architecture/index.html">Architecture</a></li>
 <li><a class="main-nav-link" href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/model_zoo/index.html">Model Zoo</a></li>
 <li><a class="main-nav-link" href="https://github.com/onnx/onnx-mxnet">ONNX</a></li>
 </li></ul>
 </span>
 <span id="dropdown-menu-position-anchor-community">
 <a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">Community <span class="caret"></span></a>
 <ul class="dropdown-menu navbar-menu" id="package-dropdown-menu-community">
 <li><a class="main-nav-link" href="http://discuss.mxnet.io">Forum</a></li>
 <li><a class="main-nav-link" href="https://github.com/apache/incubator-mxnet/tree/1.4.1">Github</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/community/contribute.html">Contribute</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/community/ecosystem.html">Ecosystem</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/community/powered_by.html">Powered By</a></li>
 </ul>
 </span>
 <span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">1.4.1<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a href="/">master</a></li><li><a href="/versions/1.7.0/">1.7.0</a></li><li><a href=/versions/1.6.0/>1.6.0</a></li><li><a href=/versions/1.5.0/>1.5.0</a></li><li><a href=/versions/1.4.1/>1.4.1</a></li><li><a href=/versions/1.3.1/>1.3.1</a></li><li><a href=/versions/1.2.1/>1.2.1</a></li><li><a href=/versions/1.1.0/>1.1.0</a></li><li><a href=/versions/1.0.0/>1.0.0</a></li><li><a href=/versions/0.12.1/>0.12.1</a></li><li><a href=/versions/0.11.0/>0.11.0</a></li></ul></span></nav>
 <script> function getRootPath(){ return "../" } </script>
 <div class="burgerIcon dropdown">
 <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button">☰</a>
 <ul class="dropdown-menu" id="burgerMenu">
 <li><a href="/versions/1.4.1/install/index.html">Install</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/tutorials/index.html">Tutorials</a></li>
 <li class="dropdown-submenu dropdown">
 <a aria-expanded="true" aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" tabindex="-1">Gluon</a>
 <ul class="dropdown-menu navbar-menu" id="package-dropdown-menu">
 <li><a class="main-nav-link" href="/versions/1.4.1/tutorials/gluon/gluon.html">About</a></li>
 <li><a class="main-nav-link" href="http://gluon.mxnet.io">The Straight Dope (Tutorials)</a></li>
 <li><a class="main-nav-link" href="https://gluon-cv.mxnet.io">GluonCV Toolkit</a></li>
 <li><a class="main-nav-link" href="https://gluon-nlp.mxnet.io/">GluonNLP Toolkit</a></li>
 </ul>
 </li>
 <li class="dropdown-submenu">
 <a aria-expanded="true" aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" tabindex="-1">API</a>
 <ul class="dropdown-menu">
 <li><a class="main-nav-link" href="/versions/1.4.1/api/python/index.html">Python</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/c++/index.html">C++</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/clojure/index.html">Clojure</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/java/index.html">Java</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/julia/index.html">Julia</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/perl/index.html">Perl</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/r/index.html">R</a></li>
 <li><a class="main-nav-link" href="/versions/1.4.1/api/scala/index.html">Scala</a></li>
 </ul>
 </li>
 <li class="dropdown-submenu">
 <a aria-expanded="true" aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" tabindex="-1">Docs</a>
 <ul class="dropdown-menu">
 <li><a href="/versions/1.4.1/faq/index.html" tabindex="-1">FAQ</a></li>
 <li><a href="/versions/1.4.1/tutorials/index.html" tabindex="-1">Tutorials</a></li>
 <li><a href="https://github.com/apache/incubator-mxnet/tree/1.4.1/example" tabindex="-1">Examples</a></li>
 <li><a href="/versions/1.4.1/architecture/index.html" tabindex="-1">Architecture</a></li>
 <li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home" tabindex="-1">Developer Wiki</a></li>
 <li><a href="/versions/1.4.1/model_zoo/index.html" tabindex="-1">Gluon Model Zoo</a></li>
 <li><a href="https://github.com/onnx/onnx-mxnet" tabindex="-1">ONNX</a></li>
 </ul>
 </li>
 <li class="dropdown-submenu dropdown">
 <a aria-haspopup="true" class="dropdown-toggle burger-link" data-toggle="dropdown" href="#" role="button" tabindex="-1">Community</a>
 <ul class="dropdown-menu">
 <li><a href="http://discuss.mxnet.io" tabindex="-1">Forum</a></li>
 <li><a href="https://github.com/apache/incubator-mxnet/tree/1.4.1" tabindex="-1">Github</a></li>
 <li><a href="/versions/1.4.1/community/contribute.html" tabindex="-1">Contribute</a></li>
 <li><a href="/versions/1.4.1/community/ecosystem.html" tabindex="-1">Ecosystem</a></li>
 <li><a href="/versions/1.4.1/community/powered_by.html" tabindex="-1">Powered By</a></li>
 </ul>
 </li>
 <li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">1.4.1</a><ul class="dropdown-menu"><li><a tabindex="-1" href=/>master</a></li><li><a tabindex="-1" href=/versions/1.6.0/>1.6.0</a></li><li><a tabindex="-1" href=/versions/1.5.0/>1.5.0</a></li><li><a tabindex="-1" href=/versions/1.4.1/>1.4.1</a></li><li><a tabindex="-1" href=/versions/1.3.1/>1.3.1</a></li><li><a tabindex="-1" href=/versions/1.2.1/>1.2.1</a></li><li><a tabindex="-1" href=/versions/1.1.0/>1.1.0</a></li><li><a tabindex="-1" href=/versions/1.0.0/>1.0.0</a></li><li><a tabindex="-1" href=/versions/0.12.1/>0.12.1</a></li><li><a tabindex="-1" href=/versions/0.11.0/>0.11.0</a></li></ul></li></ul>
 </div>
 <div class="plusIcon dropdown">
 <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
 <ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
 </div>
 <div id="search-input-wrap">
 <form action="../search.html" autocomplete="off" class="" method="get" role="search">
 <div class="form-group inner-addon left-addon">
 <i class="glyphicon glyphicon-search"></i>
 <input class="form-control" name="q" placeholder="Search" type="text"/>
 </div>
 <input name="check_keywords" type="hidden" value="yes">
 <input name="area" type="hidden" value="default"/>
 </input></form>
 <div id="search-preview"></div>
 </div>
 <div id="searchIcon">
 <span aria-hidden="true" class="glyphicon glyphicon-search"></span>
 </div>
 <!-- <div id="lang-select-wrap"> -->
 <!--   <label id="lang-select-label"> -->
 <!--     <\!-- <i class="fa fa-globe"></i> -\-> -->
 <!--     <span></span> -->
 <!--   </label> -->
 <!--   <select id="lang-select"> -->
 <!--     <option value="en">Eng</option> -->
 <!--     <option value="zh">中文</option> -->
 <!--   </select> -->
 <!-- </div> -->
 <!--     <a id="mobile-nav-toggle">
         <span class="mobile-nav-toggle-bar"></span>
         <span class="mobile-nav-toggle-bar"></span>
         <span class="mobile-nav-toggle-bar"></span>
       </a> -->
 </div>
 </div>
 </div>
 <script type="text/javascript">
         $('body').css('background', 'white');
     </script>
 <div class="container">
 <div class="row">
 <div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
 <div class="sphinxsidebarwrapper">
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../api/index.html">MXNet APIs</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../architecture/index.html">MXNet Architecture</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../community/index.html">MXNet Community</a></li>
 <li class="toctree-l1"><a class="reference internal" href="index.html">MXNet FAQ</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../gluon/index.html">About Gluon</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../install/index.html">Installing MXNet</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../install/index.html#nvidia-jetson-tx-family">Nvidia Jetson TX family</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../install/index.html#source-download">Source Download</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../model_zoo/index.html">MXNet Model Zoo</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/index.html">Tutorials</a></li>
 </ul>
 </div>
 </div>
 <div class="content">
 <div class="page-tracker"></div>
 <!--- Licensed to the Apache Software Foundation (ASF) under one -->
 <!--- or more contributor license agreements.  See the NOTICE file -->
 <!--- distributed with this work for additional information -->
 <!--- regarding copyright ownership.  The ASF licenses this file -->
 <!--- to you under the Apache License, Version 2.0 (the -->
 <!--- "License"); you may not use this file except in compliance -->
 <!--- with the License.  You may obtain a copy of the License at --><!---   http://www.apache.org/licenses/LICENSE-2.0 --><!--- Unless required by applicable law or agreed to in writing, -->
 <!--- software distributed under the License is distributed on an -->
 <!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
 <!--- KIND, either express or implied.  See the License for the -->
 <!--- specific language governing permissions and limitations -->
 <!--- under the License. --><div class="section" id="gradient-compression">
 <span id="gradient-compression"></span><h1>Gradient Compression<a class="headerlink" href="#gradient-compression" title="Permalink to this headline">¶</a></h1>
 <p>Gradient Compression reduces communication bandwidth, and in some scenarios, it can make training more scalable and efficient without significant loss in convergence rate or accuracy. Example implementations with GPUs, CPUs, and distributed training are provided in this document.</p>
 <div class="section" id="benefits">
 <span id="benefits"></span><h2>Benefits<a class="headerlink" href="#benefits" title="Permalink to this headline">¶</a></h2>
 <p><strong>Increased Speed</strong></p>
 <p>For architectures with fully connected layers, the gradient compression capability is observed to speedup training by about 2x, depending on the size of the model and the network bandwidth of the instance. Bigger models see larger speedup with gradient compression.</p>
 <p><strong>Minimal Accuracy Loss</strong></p>
 <p>Gradient compression uses the approach of delaying the synchronization of weight updates which are small. Although small weight updates might not be sent for that batch, this information is not discarded. Once the weight updates for this location accumulate to become a larger value, they will be propagated. Since there is no information loss, but only delayed updates, it does not lead to a significant loss in accuracy or convergence rate. In distributed training experiments[1], the accuracy loss observed due to gradient compression was as low as 1%</p>
 </div>
 <div class="section" id="when-to-use-gradient-compression">
 <span id="when-to-use-gradient-compression"></span><h2>When to Use Gradient Compression<a class="headerlink" href="#when-to-use-gradient-compression" title="Permalink to this headline">¶</a></h2>
 <p>When training models whose architectures include large fully connected components, it can be helpful to use gradient compression. For larger models, as well as recurrent neural networks, the communication cost becomes a major factor. Such models stand to benefit greatly with gradient compression.</p>
 <div class="section" id="gpu-versus-cpu">
 <span id="gpu-versus-cpu"></span><h3>GPU versus CPU<a class="headerlink" href="#gpu-versus-cpu" title="Permalink to this headline">¶</a></h3>
 <p>The greatest benefits from gradient compression are realized when using multi-node (single or multi-GPU) distributed training. Training on CPU would provide a lower compute density per compute node as compared to the massive compute density per compute node on a GPU. Due to this, the required communication bandwidth for CPU-based nodes during training is not as high as for GPU-based nodes. Hence, the benefits of gradient compression are lower for CPU-based nodes as compared to GPU-based nodes.</p>
 </div>
 <div class="section" id="network-latency">
 <span id="network-latency"></span><h3>Network Latency<a class="headerlink" href="#network-latency" title="Permalink to this headline">¶</a></h3>
 <p>Benefits of gradient compression can be found when using distributed training with network connected nodes. Depending on the network latency between nodes and the model’s size, these can contribute to slow performance such that gradient compression may provide speed improvements.</p>
 <p>You may not want to use gradient compression if you have low latency network communication.</p>
 </div>
 <div class="section" id="model-size">
 <span id="model-size"></span><h3>Model Size<a class="headerlink" href="#model-size" title="Permalink to this headline">¶</a></h3>
 <p>Distributed training involves synchronization of weights after each batch. Larger models have much higher communication costs during training, hence such models stand to benefit much more from gradient compression.
 When running distributed training with gradient compression, the quantize and dequantize operations happen on CPU parallelized with OpenMP. For smaller models, when training on GPUs, it helps to set <code class="docutils literal"><span class="pre">OMP_NUM_THREADS=1</span></code> on each node, so that the overhead of launching OMP threads doesn’t cause the compression and decompression to be slow.</p>
 </div>
 <div class="section" id="model-architecture">
 <span id="model-architecture"></span><h3>Model Architecture<a class="headerlink" href="#model-architecture" title="Permalink to this headline">¶</a></h3>
 <p>The communication bandwidth requirements during training vary across various neural network architectures and hence the benefits of gradient compression vary accordingly.</p>
 <p>In networks which have significant fully connected components, since such layers have low compute cost on GPUs, communication becomes a bottleneck limiting the speed of distributed training. Gradient compression can help reduce the communication cost, and thus speed up training in such cases. We have observed speedup of about 2x on large fully connected neural networks. Models like AlexNet and VGG have large fully connected components as part of the network, hence stand to benefit from gradient compression. As with these models, Long Short-Term Memory architectures require more communication bandwidth, so they also exhibit speed improvements with gradient compression.</p>
 <p>Architectures like Convolutional Neural Networks on the other hand have a higher compute cost, in which case some communication can be parallelized with computation. Since communication is not the bottleneck in such networks, gradient compression doesn’t help much.</p>
 </div>
 <div class="section" id="single-node-gradient-compression">
 <span id="single-node-gradient-compression"></span><h3>Single Node Gradient Compression<a class="headerlink" href="#single-node-gradient-compression" title="Permalink to this headline">¶</a></h3>
 <p>When the training is configured to use device to device communication on a single node with multiple GPUs, gradient compression can be used to reduce the cost of communication. This can provide about 20% speedup for large models using older generation architectures. However, speed benefits may be negligible on a machine with a newer generation architecture where GPUs can communicate at low latency.</p>
 </div>
 </div>
 <div class="section" id="approach">
 <span id="approach"></span><h2>Approach<a class="headerlink" href="#approach" title="Permalink to this headline">¶</a></h2>
 <p>The idea behind gradient compression comes from two observations:</p>
 <p>First, when training large neural networks, the gradients of weights computed for a small mini-batch of training data are typically sparse. Only a small fraction of the weights have significant updates after each mini-batch. The synchronization of updates that are near zero can be safely delayed longer than the typical mini-batch size. This essentially means that the rate of weight-update can vary depending on the value of an individual weight.</p>
 <p>Secondly, gradients can be compressed significantly by considering only those gradient elements whose absolute values exceed a threshold, and then quantizing them to use lower bits per gradient value. By compressing the gradients, we can reduce communication bandwidth. The delayed gradient values, in the form of quantization error and values that don’t meet the threshold, are aggregated into a gradient residual which is communicated when it reaches the threshold.</p>
 </div>
 <div class="section" id="technical-implementation">
 <span id="technical-implementation"></span><h2>Technical Implementation<a class="headerlink" href="#technical-implementation" title="Permalink to this headline">¶</a></h2>
 <div class="section" id="two-bit-quantization">
 <span id="two-bit-quantization"></span><h3>Two Bit Quantization<a class="headerlink" href="#two-bit-quantization" title="Permalink to this headline">¶</a></h3>
 <p>Currently the supported type of quantization uses two bits for each gradient value. Any positive value greater than or equal to the threshold sets two bits as <code class="docutils literal"><span class="pre">11</span></code>, any negative value whose absolute value is greater or equal to the threshold sets two bits as <code class="docutils literal"><span class="pre">10</span></code>, and others are set to <code class="docutils literal"><span class="pre">00</span></code>. This enables us to store 16 quantized gradients as one float. The error in quantization, which is <code class="docutils literal"><span class="pre">original_value</span> <span class="pre">-</span> <span class="pre">quantized_value</span></code> is stored in the form of a gradient residual.</p>
 </div>
 <div class="section" id="types-of-kvstore">
 <span id="types-of-kvstore"></span><h3>Types of Kvstore<a class="headerlink" href="#types-of-kvstore" title="Permalink to this headline">¶</a></h3>
 <p>Supported types of <code class="docutils literal"><span class="pre">kvstore</span></code> are <code class="docutils literal"><span class="pre">device</span></code> and all distributed kvstores such as <code class="docutils literal"><span class="pre">dist_sync</span></code>, <code class="docutils literal"><span class="pre">dist_async</span></code>, and <code class="docutils literal"><span class="pre">dist_sync_device</span></code>. When <code class="docutils literal"><span class="pre">kvstore</span></code> is <code class="docutils literal"><span class="pre">device</span></code>, the communication between GPUs is compressed. Please note that this increases the memory usage of GPUs because of the additional residual stored. When using a distributed kvstore, worker-to-server communication is compressed. In this case, compression and decompression happen on the CPU, and gradient residuals will be stored on the CPU. Server-to-worker communication and device-to-device communication are not compressed to avoid multiple levels of compression.</p>
 </div>
 </div>
 <div class="section" id="enabling-the-gradient-compression-in-mxnet">
 <span id="enabling-the-gradient-compression-in-mxnet"></span><h2>Enabling the Gradient Compression in MXNet<a class="headerlink" href="#enabling-the-gradient-compression-in-mxnet" title="Permalink to this headline">¶</a></h2>
 <p>Gradient compression is a run-time configuration parameter to be enabled during training. Here are the MXNet APIs to enable gradient compression:</p>
 <p><strong>Gluon API</strong>:</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">trainer</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">Trainer</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">compression_params</span><span class="o">=</span><span class="p">{</span><span class="s1">'type’:'</span><span class="mi">2</span><span class="n">bit</span><span class="s1">', '</span><span class="n">threshold</span><span class="s1">':0.5})</span>
 </pre></div>
 </div>
 <p>A reference <code class="docutils literal"><span class="pre">gluon</span></code> implementation with a gradient compression option can be found in the <a class="reference external" href="https://github.com/apache/incubator-mxnet/blob/master/example/gluon/word_language_model/train.py">train.py script from a word-level language modeling RNN example</a>.</p>
 <p><strong>Module API</strong>:</p>
 <div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">mod</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">mod</span><span class="o">.</span><span class="n">Module</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">compression_params</span><span class="o">=</span><span class="p">{</span><span class="s1">'type’:'</span><span class="mi">2</span><span class="n">bit</span><span class="s1">', '</span><span class="n">threshold</span><span class="s1">':0.5})</span>
 </pre></div>
 </div>
 <p>A <code class="docutils literal"><span class="pre">module</span></code> example is provided with <a class="reference external" href="/versions/master/faq/multi_devices.html#distributed-training-with-multiple-machines">this guide for setting up MXNet with distributed training</a>. It comes with the option of turning on gradient compression as an argument to the <a class="reference external" href="https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/train_mnist.py">train_mnist.py script</a>.</p>
 <div class="section" id="configuration-details">
 <span id="configuration-details"></span><h3>Configuration Details<a class="headerlink" href="#configuration-details" title="Permalink to this headline">¶</a></h3>
 <p><strong>Threshold</strong></p>
 <p>A default <code class="docutils literal"><span class="pre">threshold</span></code> value of <code class="docutils literal"><span class="pre">0.5</span></code> is good for most use cases, but to get the most benefit from gradient compression for a particular scenario, it can be beneficial to experiment. If the threshold is set to a very large value, say <code class="docutils literal"><span class="pre">10.0</span></code>, then the updates become too infrequent and the training will converge slower. Setting the threshold automatically is expected in a future release.</p>
 <p><strong>Quantization</strong></p>
 <p>This release supports 2-bit quantization for encoding of gradients to reduce the communication bandwidth during training. Future releases will support 1-bit quantization and other approaches for encoding of gradients based on experimental evidence of benefits and user demand.</p>
 <p><strong>Sparse Format</strong></p>
 <p>We believe that the density of data will need to be really low (i.e. around > 90% zeros) to reap benefits of the sparse format. However, this is an area of experimentation that will be explored in a future release.</p>
 </div>
 </div>
 <div class="section" id="references">
 <span id="references"></span><h2>References<a class="headerlink" href="#references" title="Permalink to this headline">¶</a></h2>
 <div class="toctree-wrapper compound">
 <ul>
 <li class="toctree-l1"><a class="reference external" href="https://s3-us-west-2.amazonaws.com/amazon.jobs-public-documents/strom_interspeech2015.pdf">Nikko Storm, Amazon.com, Scalable Distributed Training using commodity GPU cloud computing.</a></li>
 </ul>
 </div>
 </div>
 </div>
 </div>
 </div>
 <div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
 <div class="sphinxsidebarwrapper">
 <h3><a href="../index.html">Table Of Contents</a></h3>
 <ul>
 <li><a class="reference internal" href="#">Gradient Compression</a><ul>
 <li><a class="reference internal" href="#benefits">Benefits</a></li>
 <li><a class="reference internal" href="#when-to-use-gradient-compression">When to Use Gradient Compression</a><ul>
 <li><a class="reference internal" href="#gpu-versus-cpu">GPU versus CPU</a></li>
 <li><a class="reference internal" href="#network-latency">Network Latency</a></li>
 <li><a class="reference internal" href="#model-size">Model Size</a></li>
 <li><a class="reference internal" href="#model-architecture">Model Architecture</a></li>
 <li><a class="reference internal" href="#single-node-gradient-compression">Single Node Gradient Compression</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#approach">Approach</a></li>
 <li><a class="reference internal" href="#technical-implementation">Technical Implementation</a><ul>
 <li><a class="reference internal" href="#two-bit-quantization">Two Bit Quantization</a></li>
 <li><a class="reference internal" href="#types-of-kvstore">Types of Kvstore</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#enabling-the-gradient-compression-in-mxnet">Enabling the Gradient Compression in MXNet</a><ul>
 <li><a class="reference internal" href="#configuration-details">Configuration Details</a></li>
 </ul>
 </li>
 <li><a class="reference internal" href="#references">References</a></li>
 </ul>
 </li>
 </ul>
 </div>
 </div>
 </div><div class="footer">
 <div class="section-disclaimer">
 <div class="container">
 <div>
 <img height="60" src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/apache_incubator_logo.png"/>
 <p>
             Apache MXNet is an effort undergoing incubation at The Apache Software Foundation (ASF), <strong>sponsored by the <i>Apache Incubator</i></strong>. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
         </p>
 <p>
             "Copyright © 2017-2018, The Apache Software Foundation
             Apache MXNet, MXNet, Apache, the Apache feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the Apache Software Foundation."
         </p>
 </div>
 </div>
 </div>
 </div> <!-- pagename != index -->
 </div>
 <script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
 <script src="../_static/js/sidebar.js" type="text/javascript"></script>
 <script src="../_static/js/search.js" type="text/javascript"></script>
 <script src="../_static/js/navbar.js" type="text/javascript"></script>
 <script src="../_static/js/clipboard.min.js" type="text/javascript"></script>
 <script src="../_static/js/copycode.js" type="text/javascript"></script>
 <script src="../_static/js/page.js" type="text/javascript"></script>
 <script src="../_static/js/docversion.js" type="text/javascript"></script>
 <script type="text/javascript">
         $('body').ready(function () {
             $('body').css('visibility', 'visible');
         });
     </script>
 </body>
 </html>