| <!DOCTYPE html> |
| |
| <!--- |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| http://www.apache.org/licenses/LICENSE-2.0 |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| --> |
| |
| <html lang=" en"><head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <link href="/versions/master/assets/img/mxnet-icon.png" rel="icon" type="image/png"><!-- Begin Jekyll SEO tag v2.6.1 --> |
| <title>Some Tips for Improving MXNet Performance | Apache MXNet</title> |
| <meta name="generator" content="Jekyll v4.0.0" /> |
| <meta property="og:title" content="Some Tips for Improving MXNet Performance" /> |
| <meta property="og:locale" content="en_US" /> |
| <meta name="description" content="A flexible and efficient library for deep learning." /> |
| <meta property="og:description" content="A flexible and efficient library for deep learning." /> |
| <link rel="canonical" href="https://mxnet.apache.org/versions/master/api/faq/perf" /> |
| <meta property="og:url" content="https://mxnet.apache.org/versions/master/api/faq/perf" /> |
| <meta property="og:site_name" content="Apache MXNet" /> |
| <script type="application/ld+json"> |
| {"url":"https://mxnet.apache.org/versions/master/api/faq/perf","headline":"Some Tips for Improving MXNet Performance","description":"A flexible and efficient library for deep learning.","@type":"WebPage","@context":"https://schema.org"}</script> |
| <!-- End Jekyll SEO tag --> |
| <link rel="stylesheet" href="/versions/master/assets/docsearch.min.css" /><link rel="stylesheet" href="/versions/master/assets/main.css"><link type="application/atom+xml" rel="alternate" href="https://mxnet.apache.org/versions/master/feed.xml" title="Apache MXNet" /><!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '23']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| <script src="/versions/master/assets/js/jquery-3.3.1.min.js"></script> |
| <script src="/versions/master/assets/js/docsearch.min.js"></script><script src="/versions/master/assets/js/globalSearch.js" defer></script> |
| <script src="/versions/master/assets/js/clipboard.js" defer></script> |
| <script src="/versions/master/assets/js/copycode.js" defer></script></head> |
| <body><header class="site-header" role="banner"> |
| |
| <script> |
| $(document).ready(function () { |
| |
| // HEADER OPACITY LOGIC |
| |
| function opacity_header() { |
| var value = "rgba(4,140,204," + ($(window).scrollTop() / 300 + 0.4) + ")" |
| $('.site-header').css("background-color", value) |
| } |
| |
| $(window).scroll(function () { |
| opacity_header() |
| }) |
| opacity_header(); |
| |
| // MENU SELECTOR LOGIC |
| $('.page-link').each( function () { |
| if (window.location.href.includes(this.href)) { |
| $(this).addClass("page-current"); |
| } |
| }); |
| }) |
| </script> |
| <div class="wrapper"> |
| <a class="site-title" rel="author" href="/versions/master/"><img |
| src="/versions/master/assets/img/mxnet_logo.png" class="site-header-logo"></a> |
| <nav class="site-nav"> |
| <input type="checkbox" id="nav-trigger" class="nav-trigger"/> |
| <label for="nav-trigger"> |
| <span class="menu-icon"> |
| <svg viewBox="0 0 18 15" width="18px" height="15px"> |
| <path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/> |
| </svg> |
| </span> |
| </label> |
| <div class="gs-search-border"> |
| <div id="gs-search-icon"></div> |
| <form id="global-search-form"> |
| <input id="global-search" type="text" title="Search" placeholder="Search" /> |
| <div id="global-search-dropdown-container"> |
| <button class="gs-current-version btn" type="button" data-toggle="dropdown"> |
| <span id="gs-current-version-label">master</span> |
| <svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"> |
| <path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path> |
| </svg> |
| </button> |
| <ul class="gs-opt-group gs-version-dropdown"> |
| |
| |
| <li class="gs-opt gs-versions active">master</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.9.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.8.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.7.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.6.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.5.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.4.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.3.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.2.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.1.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.0.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">0.12.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">0.11.0</li> |
| |
| |
| </ul> |
| </div> |
| <span id="global-search-close">x</span> |
| </form> |
| </div> |
| <div class="trigger"> |
| <div id="global-search-mobile-border"> |
| <div id="gs-search-icon-mobile"></div> |
| <input id="global-search-mobile" placeholder="Search..." type="text"/> |
| <div id="global-search-dropdown-container-mobile"> |
| <button class="gs-current-version-mobile btn" type="button" data-toggle="dropdown"> |
| <svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"> |
| <path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path> |
| </svg> |
| </button> |
| <ul class="gs-opt-group gs-version-dropdown-mobile"> |
| |
| |
| <li class="gs-opt gs-versions active">master</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.9.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.8.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.7.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.6.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.5.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.4.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.3.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.2.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.1.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">1.0.0</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">0.12.1</li> |
| |
| |
| |
| <li class="gs-opt gs-versions">0.11.0</li> |
| |
| |
| </ul> |
| </div> |
| </div> |
| <a class="page-link" href="/versions/master/get_started">Get Started</a> |
| <a class="page-link" href="/versions/master/features">Features</a> |
| <a class="page-link" href="/versions/master/ecosystem">Ecosystem</a> |
| <a class="page-link" href="/versions/master/api">Docs & Tutorials</a> |
| <a class="page-link" href="/versions/master/trusted_by">Trusted By</a> |
| <a class="page-link" href="https://github.com/apache/mxnet">GitHub</a> |
| <div class="dropdown" style="min-width:100px"> |
| <span class="dropdown-header">Apache |
| <svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg> |
| </span> |
| <div class="dropdown-content" style="min-width:250px"> |
| <a href="https://www.apache.org/foundation/">Apache Software Foundation</a> |
| <a href="https://www.apache.org/licenses/">License</a> |
| <a href="/versions/master/api/faq/security.html">Security</a> |
| <a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy</a> |
| <a href="https://www.apache.org/events/current-event">Events</a> |
| <a href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a> |
| <a href="https://www.apache.org/foundation/thanks.html">Thanks</a> |
| </div> |
| </div> |
| <div class="dropdown"> |
| <span class="dropdown-header">master |
| <svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg> |
| </span> |
| <div class="dropdown-content"> |
| |
| |
| <a class="dropdown-option-active" href="/">master</a> |
| |
| |
| |
| <a href="/versions/1.9.1/">1.9.1</a> |
| |
| |
| |
| <a href="/versions/1.8.0/">1.8.0</a> |
| |
| |
| |
| <a href="/versions/1.7.0/">1.7.0</a> |
| |
| |
| |
| <a href="/versions/1.6.0/">1.6.0</a> |
| |
| |
| |
| <a href="/versions/1.5.0/">1.5.0</a> |
| |
| |
| |
| <a href="/versions/1.4.1/">1.4.1</a> |
| |
| |
| |
| <a href="/versions/1.3.1/">1.3.1</a> |
| |
| |
| |
| <a href="/versions/1.2.1/">1.2.1</a> |
| |
| |
| |
| <a href="/versions/1.1.0/">1.1.0</a> |
| |
| |
| |
| <a href="/versions/1.0.0/">1.0.0</a> |
| |
| |
| |
| <a href="/versions/0.12.1/">0.12.1</a> |
| |
| |
| |
| <a href="/versions/0.11.0/">0.11.0</a> |
| |
| |
| </div> |
| </div> |
| </div> |
| </nav> |
| </div> |
| </header> |
| <main class="page-content" aria-label="Content"> |
| <script> |
| |
| </script> |
| <article class="post"> |
| |
| <header class="post-header wrapper"> |
| <h1 class="post-title">Some Tips for Improving MXNet Performance</h1> |
| <h3></h3></header> |
| |
| <div class="post-content"> |
| <div class="wrapper"> |
| <div class="row"> |
| <div class="col-3 docs-side-bar"> |
| <h3 style="text-transform: capitalize; padding-left:10px">faq</h3> |
| <ul> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/add_op_in_backend">A Beginner's Guide to Implementing Operators in MXNet Backend</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/cloud">MXNet on the Cloud</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/distributed_training">Distributed Training in MXNet</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/env_var">Environment Variables</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/float16">Float16</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/large_tensor_support">Using MXNet with Large Tensor Support</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/model_parallel_lstm">Model Parallel</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/new_op">Create New Operators</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/perf">Some Tips for Improving MXNet Performance</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/recordio">Create a Dataset Using RecordIO</a></li> |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/s3_integration">Use data from S3 for training</a></li> |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/security">MXNet Security Best Practices</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/tensor_inspector_tutorial">Use TensorInspector to Help Debug Operators</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/using_rtc">Using runtime compilation (RTC) to write CUDA kernels in MXNet</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| |
| <li><a href="/versions/master/api/faq/why_mxnet">Why MXNet came to be?</a></li> |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| |
| <!-- page-category --> |
| <!-- resource-p --> |
| </ul> |
| </div> |
| <div class="col-9"> |
| <!--- Licensed to the Apache Software Foundation (ASF) under one --> |
| <!--- or more contributor license agreements. See the NOTICE file --> |
| <!--- distributed with this work for additional information --> |
| <!--- regarding copyright ownership. The ASF licenses this file --> |
| <!--- to you under the Apache License, Version 2.0 (the --> |
| <!--- "License"); you may not use this file except in compliance --> |
| <!--- with the License. You may obtain a copy of the License at --> |
| |
| <!--- http://www.apache.org/licenses/LICENSE-2.0 --> |
| |
| <!--- Unless required by applicable law or agreed to in writing, --> |
| <!--- software distributed under the License is distributed on an --> |
| <!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --> |
| <!--- KIND, either express or implied. See the License for the --> |
| <!--- specific language governing permissions and limitations --> |
| <!--- under the License. --> |
| |
| <h1 id="some-tips-for-improving-mxnet-performance">Some Tips for Improving MXNet Performance</h1> |
| <p>Even after fixing the training or deployment environment and parallelization scheme, |
| a number of configuration settings and data-handling choices can impact the <em>MXNet</em> performance. |
| In this document, we address some tips for improving <em>MXNet</em> performance.</p> |
| |
| <p>Performance is mainly affected by the following 4 factors:</p> |
| |
| <ol> |
| <li>Implementation of operators (Convolution, Pooling, ..) |
| <ul> |
| <li><a href="#intel-cpu">Intel CPU</a></li> |
| <li><a href="#nvidia-gpu">Nvidia GPU</a></li> |
| </ul> |
| </li> |
| <li>Input data loading and augmentation |
| <ul> |
| <li><a href="#input-data">Input Data</a></li> |
| </ul> |
| </li> |
| <li>Workloads (computation graph) optimization and scheduling |
| <ul> |
| <li><a href="#profiler">Profiler</a></li> |
| </ul> |
| </li> |
| <li>Communication for multi-devices training |
| <ul> |
| <li><a href="#multiple-devices">Multiple Devices</a></li> |
| </ul> |
| </li> |
| </ol> |
| |
| <h2 id="intel-cpu">Intel CPU</h2> |
| |
| <p>When using Intel Xeon CPUs for training and inference, the <code class="highlighter-rouge">mxnet-mkl</code> package is recommended. Adding <code class="highlighter-rouge">--pre</code> installs a nightly build from master. Without it you will install the latest patched release of MXNet:</p> |
| |
| <div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>$ pip install mxnet-mkl [--pre] |
| </code></pre></div></div> |
| |
| <p>Or build MXNet from source code with <code class="highlighter-rouge">USE_ONEDNN=1</code>. For Linux users, <code class="highlighter-rouge">USE_ONEDNN=1</code> will be turned on by default.</p> |
| |
| <p>We also find that setting the following environment variables can help:</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th style="text-align: left">Variable</th> |
| <th style="text-align: left">Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td style="text-align: left"><code class="highlighter-rouge">OMP_NUM_THREADS</code></td> |
| <td style="text-align: left">Suggested value: <code class="highlighter-rouge">vCPUs / 2</code> in which <code class="highlighter-rouge">vCPUs</code> is the number of virtual CPUs. For more information, please see the guide for <a href="https://software.intel.com/en-us/mkl-windows-developer-guide-setting-the-number-of-threads-using-an-openmp-environment-variable">setting the number of threads using an OpenMP environment variable</a></td> |
| </tr> |
| <tr> |
| <td style="text-align: left"><code class="highlighter-rouge">KMP_AFFINITY</code></td> |
| <td style="text-align: left">Suggested value: <code class="highlighter-rouge">granularity=fine,compact,1,0</code>. For more information, please see the guide for <a href="https://software.intel.com/en-us/node/522691">Thread Affinity Interface (Linux* and Windows*)</a>.</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p>Note that <em>MXNet</em> treats all CPUs on a single machine as a single device. |
| So whether you specify <code class="highlighter-rouge">cpu(0)</code> or <code class="highlighter-rouge">cpu()</code>, <em>MXNet</em> will use all CPU cores on the machine.</p> |
| |
| <h3 id="scoring-results">Scoring results</h3> |
| <p>The following table shows performance of MXNet-1.2.0.rc1, |
| namely number of images that can be predicted per second. |
| We used <a href="https://github.com/apache/mxnet/blob/master/example/image-classification/benchmark_score.py">example/image-classification/benchmark_score.py</a> |
| to measure the performance on different AWS EC2 machines.</p> |
| |
| <p>AWS EC2 C5.18xlarge:</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>390.53</td> |
| <td>81.57</td> |
| <td>124.13</td> |
| <td>62.26</td> |
| <td>76.22</td> |
| <td>32.92</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>596.45</td> |
| <td>100.84</td> |
| <td>206.58</td> |
| <td>93.36</td> |
| <td>119.55</td> |
| <td>46.80</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>710.77</td> |
| <td>119.04</td> |
| <td>275.55</td> |
| <td>127.86</td> |
| <td>148.62</td> |
| <td>59.36</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>921.40</td> |
| <td>120.38</td> |
| <td>380.82</td> |
| <td>157.11</td> |
| <td>167.95</td> |
| <td>70.78</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>1018.43</td> |
| <td>115.30</td> |
| <td>411.67</td> |
| <td>168.71</td> |
| <td>178.54</td> |
| <td>75.13</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>1290.31</td> |
| <td>107.19</td> |
| <td>483.34</td> |
| <td>179.38</td> |
| <td>193.47</td> |
| <td>85.86</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p>AWS EC2 C5.9xlarge:</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>257.77</td> |
| <td>50.61</td> |
| <td>130.99</td> |
| <td>66.95</td> |
| <td>75.38</td> |
| <td>32.33</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>410.60</td> |
| <td>63.02</td> |
| <td>195.14</td> |
| <td>87.84</td> |
| <td>102.67</td> |
| <td>41.57</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>462.59</td> |
| <td>62.64</td> |
| <td>263.15</td> |
| <td>109.87</td> |
| <td>127.15</td> |
| <td>50.69</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>573.79</td> |
| <td>63.95</td> |
| <td>309.99</td> |
| <td>121.36</td> |
| <td>140.84</td> |
| <td>59.01</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>709.47</td> |
| <td>67.79</td> |
| <td>350.19</td> |
| <td>128.26</td> |
| <td>147.41</td> |
| <td>64.15</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>831.46</td> |
| <td>69.58</td> |
| <td>354.91</td> |
| <td>129.92</td> |
| <td>149.18</td> |
| <td>64.25</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p>AWS EC2 C5.4xlarge:</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>214.15</td> |
| <td>29.32</td> |
| <td>114.97</td> |
| <td>47.96</td> |
| <td>61.01</td> |
| <td>23.92</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>310.04</td> |
| <td>34.81</td> |
| <td>150.09</td> |
| <td>60.89</td> |
| <td>71.16</td> |
| <td>27.92</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>330.69</td> |
| <td>34.56</td> |
| <td>186.63</td> |
| <td>74.15</td> |
| <td>86.86</td> |
| <td>34.37</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>378.88</td> |
| <td>35.46</td> |
| <td>204.89</td> |
| <td>77.05</td> |
| <td>91.10</td> |
| <td>36.93</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>424.00</td> |
| <td>36.49</td> |
| <td>211.55</td> |
| <td>78.39</td> |
| <td>91.23</td> |
| <td>37.34</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>481.95</td> |
| <td>37.23</td> |
| <td>213.71</td> |
| <td>78.23</td> |
| <td>91.68</td> |
| <td>37.26</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p>AWS EC2 C5.2xlarge:</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>131.01</td> |
| <td>15.67</td> |
| <td>78.75</td> |
| <td>31.12</td> |
| <td>37.30</td> |
| <td>14.75</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>182.29</td> |
| <td>18.01</td> |
| <td>98.59</td> |
| <td>39.13</td> |
| <td>45.98</td> |
| <td>17.84</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>189.31</td> |
| <td>18.25</td> |
| <td>110.26</td> |
| <td>41.35</td> |
| <td>49.21</td> |
| <td>19.32</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>211.75</td> |
| <td>18.57</td> |
| <td>115.46</td> |
| <td>42.53</td> |
| <td>49.98</td> |
| <td>19.81</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>236.06</td> |
| <td>19.11</td> |
| <td>117.18</td> |
| <td>42.59</td> |
| <td>50.20</td> |
| <td>19.92</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>261.13</td> |
| <td>19.46</td> |
| <td>116.20</td> |
| <td>42.72</td> |
| <td>49.95</td> |
| <td>19.80</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p>AWS EC2 C5.xlarge:</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>36.64</td> |
| <td>3.93</td> |
| <td>27.06</td> |
| <td>10.09</td> |
| <td>12.98</td> |
| <td>5.06</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>49.21</td> |
| <td>4.49</td> |
| <td>29.67</td> |
| <td>10.80</td> |
| <td>12.94</td> |
| <td>5.14</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>50.12</td> |
| <td>4.50</td> |
| <td>30.31</td> |
| <td>10.83</td> |
| <td>13.17</td> |
| <td>5.19</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>54.71</td> |
| <td>4.58</td> |
| <td>30.22</td> |
| <td>10.89</td> |
| <td>13.19</td> |
| <td>5.20</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>60.23</td> |
| <td>4.70</td> |
| <td>30.20</td> |
| <td>10.91</td> |
| <td>13.23</td> |
| <td>5.19</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>66.37</td> |
| <td>4.76</td> |
| <td>30.10</td> |
| <td>10.90</td> |
| <td>13.22</td> |
| <td>5.15</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <h2 id="other-cpu">Other CPU</h2> |
| |
| <p>If using CPUs (not just Intel CPUs – ARMs also), NNPACK can improve the running performance with 2x~7x, please check <a href="nnpack">nnpack.md</a> for details.</p> |
| |
| <h2 id="nvidia-gpu">Nvidia GPU</h2> |
| |
| <p><code class="highlighter-rouge">cuDNN</code> typically accelerates <em>MXNet</em> performance on NVIDIA GPUs significantly, |
| especially for convolution layers. |
| We suggest always checking to make sure that a recent cuDNN version is used.</p> |
| |
| <p>Setting the environment <code class="highlighter-rouge">export MXNET_CUDNN_AUTOTUNE_DEFAULT=1</code> sometimes also helps.</p> |
| |
| <p>We show results when using various GPUs including K80 (EC2 p2.2xlarge), M60 (EC2 g3.4xlarge), |
| and V100 (EC2 p3.2xlarge).</p> |
| |
| <h3 id="scoring-results-1">Scoring results</h3> |
| |
| <p>Based on |
| <a href="https://github.com/apache/mxnet/blob/master/example/image-classification/benchmark_score.py">example/image-classification/benchmark_score.py</a> |
| and MXNet-1.2.0.rc1, with cuDNN 7.0.5</p> |
| |
| <ul> |
| <li>K80 (single GPU)</li> |
| </ul> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>243.93</td> |
| <td>43.59</td> |
| <td>68.62</td> |
| <td>35.52</td> |
| <td>67.41</td> |
| <td>23.65</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>338.16</td> |
| <td>49.14</td> |
| <td>113.41</td> |
| <td>56.29</td> |
| <td>93.35</td> |
| <td>33.88</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>478.92</td> |
| <td>53.44</td> |
| <td>159.61</td> |
| <td>74.43</td> |
| <td>119.18</td> |
| <td>45.23</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>683.52</td> |
| <td>70.50</td> |
| <td>190.49</td> |
| <td>86.23</td> |
| <td>131.32</td> |
| <td>50.54</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>1004.66</td> |
| <td>109.01</td> |
| <td>254.20</td> |
| <td>105.70</td> |
| <td>155.40</td> |
| <td>62.55</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>1238.55</td> |
| <td>114.98</td> |
| <td>285.49</td> |
| <td>116.79</td> |
| <td>159.42</td> |
| <td>64.99</td> |
| </tr> |
| <tr> |
| <td>64</td> |
| <td>1346.72</td> |
| <td>123.56</td> |
| <td>308.73</td> |
| <td>122.21</td> |
| <td>167.58</td> |
| <td>70.21</td> |
| </tr> |
| <tr> |
| <td>128</td> |
| <td>1416.91</td> |
| <td>OOM</td> |
| <td>320.98</td> |
| <td>123.11</td> |
| <td>171.55</td> |
| <td>71.85</td> |
| </tr> |
| <tr> |
| <td>256</td> |
| <td>1462.97</td> |
| <td>OOM</td> |
| <td>329.16</td> |
| <td>127.53</td> |
| <td>153.01</td> |
| <td>57.23</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <ul> |
| <li>M60</li> |
| </ul> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>243.49</td> |
| <td>59.95</td> |
| <td>101.97</td> |
| <td>48.30</td> |
| <td>95.46</td> |
| <td>39.29</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>491.04</td> |
| <td>69.14</td> |
| <td>170.35</td> |
| <td>80.27</td> |
| <td>142.61</td> |
| <td>60.17</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>711.54</td> |
| <td>78.94</td> |
| <td>257.89</td> |
| <td>123.09</td> |
| <td>182.36</td> |
| <td>76.51</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>1077.73</td> |
| <td>109.34</td> |
| <td>343.42</td> |
| <td>152.82</td> |
| <td>208.74</td> |
| <td>87.27</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>1447.21</td> |
| <td>144.93</td> |
| <td>390.25</td> |
| <td>166.32</td> |
| <td>220.73</td> |
| <td>92.41</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>1797.66</td> |
| <td>151.86</td> |
| <td>416.69</td> |
| <td>176.56</td> |
| <td>230.19</td> |
| <td>97.03</td> |
| </tr> |
| <tr> |
| <td>64</td> |
| <td>1779.38</td> |
| <td>150.18</td> |
| <td>427.51</td> |
| <td>183.47</td> |
| <td>239.12</td> |
| <td>101.59</td> |
| </tr> |
| <tr> |
| <td>128</td> |
| <td>1787.36</td> |
| <td>OOM</td> |
| <td>439.04</td> |
| <td>185.29</td> |
| <td>243.31</td> |
| <td>103.39</td> |
| </tr> |
| <tr> |
| <td>256</td> |
| <td>1899.10</td> |
| <td>OOM</td> |
| <td>450.22</td> |
| <td>183.42</td> |
| <td>242.36</td> |
| <td>100.98</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <ul> |
| <li>V100</li> |
| </ul> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>659.51</td> |
| <td>205.16</td> |
| <td>157.37</td> |
| <td>87.71</td> |
| <td>162.15</td> |
| <td>61.38</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>1248.21</td> |
| <td>265.40</td> |
| <td>297.34</td> |
| <td>159.24</td> |
| <td>293.74</td> |
| <td>116.30</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>2122.41</td> |
| <td>333.97</td> |
| <td>520.91</td> |
| <td>279.84</td> |
| <td>479.14</td> |
| <td>195.17</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>3894.30</td> |
| <td>420.26</td> |
| <td>898.09</td> |
| <td>455.03</td> |
| <td>699.39</td> |
| <td>294.19</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>5815.58</td> |
| <td>654.16</td> |
| <td>1430.97</td> |
| <td>672.54</td> |
| <td>947.45</td> |
| <td>398.79</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>7906.09</td> |
| <td>708.43</td> |
| <td>1847.26</td> |
| <td>814.59</td> |
| <td>1076.81</td> |
| <td>451.82</td> |
| </tr> |
| <tr> |
| <td>64</td> |
| <td>9486.26</td> |
| <td>701.59</td> |
| <td>2134.89</td> |
| <td>899.01</td> |
| <td>1168.37</td> |
| <td>480.44</td> |
| </tr> |
| <tr> |
| <td>128</td> |
| <td>10177.84</td> |
| <td>703.30</td> |
| <td>2318.32</td> |
| <td>904.33</td> |
| <td>1233.15</td> |
| <td>511.79</td> |
| </tr> |
| <tr> |
| <td>256</td> |
| <td>10990.46</td> |
| <td>473.62</td> |
| <td>2425.28</td> |
| <td>960.20</td> |
| <td>1155.07</td> |
| <td>449.35</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p>Below is the performance result on V100 using float 16.</p> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>VGG 16</th> |
| <th>Inception-BN</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| <th>Resnet 152</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>276.29</td> |
| <td>155.53</td> |
| <td>150.99</td> |
| <td>270.89</td> |
| <td>96.79</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>476.91</td> |
| <td>296.45</td> |
| <td>282.02</td> |
| <td>493.99</td> |
| <td>176.88</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>711.92</td> |
| <td>525.05</td> |
| <td>492.45</td> |
| <td>851.15</td> |
| <td>321.52</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>1047.11</td> |
| <td>900.26</td> |
| <td>807.94</td> |
| <td>1282.36</td> |
| <td>517.66</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>1299.88</td> |
| <td>1441.41</td> |
| <td>1192.21</td> |
| <td>1722.97</td> |
| <td>724.57</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>1486.63</td> |
| <td>1854.30</td> |
| <td>1512.08</td> |
| <td>2085.51</td> |
| <td>887.34</td> |
| </tr> |
| <tr> |
| <td>64</td> |
| <td>1219.65</td> |
| <td>2138.61</td> |
| <td>1687.35</td> |
| <td>2341.67</td> |
| <td>1002.90</td> |
| </tr> |
| <tr> |
| <td>128</td> |
| <td>1169.81</td> |
| <td>2317.39</td> |
| <td>1818.26</td> |
| <td>2355.04</td> |
| <td>1046.98</td> |
| </tr> |
| <tr> |
| <td>256</td> |
| <td>764.16</td> |
| <td>2425.16</td> |
| <td>1653.74</td> |
| <td>1991.88</td> |
| <td>976.73</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <h3 id="training-results">Training results</h3> |
| |
| <p>Based on |
| <a href="https://github.com/apache/mxnet/blob/master/example/image-classification/train_imagenet.py">example/image-classification/train_imagenet.py</a> |
| and MXNet-1.2.0.rc1, with CUDNN 7.0.5. The benchmark script is available at |
| <a href="https://github.com/mli/mxnet-benchmark/blob/master/run_vary_batch.sh">here</a>, |
| where the batch size for Alexnet is increased by 16x.</p> |
| |
| <ul> |
| <li>K80 (single GPU)</li> |
| </ul> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet(*16)</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>300.30</td> |
| <td>10.48</td> |
| <td>15.61</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>406.08</td> |
| <td>16.00</td> |
| <td>23.88</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>461.01</td> |
| <td>22.10</td> |
| <td>32.26</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>484.00</td> |
| <td>26.80</td> |
| <td>39.42</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>490.45</td> |
| <td>31.62</td> |
| <td>46.69</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>414.72</td> |
| <td>33.78</td> |
| <td>49.48</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <ul> |
| <li>M60</li> |
| </ul> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet(*16)</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>380.96</td> |
| <td>14.06</td> |
| <td>20.55</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>530.53</td> |
| <td>21.90</td> |
| <td>32.65</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>600.17</td> |
| <td>31.96</td> |
| <td>45.57</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>633.60</td> |
| <td>40.58</td> |
| <td>54.92</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>639.37</td> |
| <td>46.88</td> |
| <td>64.44</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>576.54</td> |
| <td>50.05</td> |
| <td>68.34</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <ul> |
| <li>V100</li> |
| </ul> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Batch</th> |
| <th>Alexnet(*16)</th> |
| <th>Inception-v3</th> |
| <th>Resnet 50</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td>1</td> |
| <td>1629.52</td> |
| <td>21.83</td> |
| <td>34.54</td> |
| </tr> |
| <tr> |
| <td>2</td> |
| <td>2359.73</td> |
| <td>40.11</td> |
| <td>65.01</td> |
| </tr> |
| <tr> |
| <td>4</td> |
| <td>2687.89</td> |
| <td>72.79</td> |
| <td>113.49</td> |
| </tr> |
| <tr> |
| <td>8</td> |
| <td>2919.02</td> |
| <td>118.43</td> |
| <td>174.81</td> |
| </tr> |
| <tr> |
| <td>16</td> |
| <td>2994.32</td> |
| <td>173.15</td> |
| <td>251.22</td> |
| </tr> |
| <tr> |
| <td>32</td> |
| <td>2585.61</td> |
| <td>214.48</td> |
| <td>298.51</td> |
| </tr> |
| <tr> |
| <td>64</td> |
| <td>1984.21</td> |
| <td>247.43</td> |
| <td>343.19</td> |
| </tr> |
| <tr> |
| <td>128</td> |
| <td>OOM</td> |
| <td>253.68</td> |
| <td>363.69</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <h2 id="multiple-devices">Multiple Devices</h2> |
| |
| <p>If more than one GPU or machine are used, MXNet uses <code class="highlighter-rouge">kvstore</code> to communicate data. |
| It’s critical to use the proper type of <code class="highlighter-rouge">kvstore</code> to get the best performance. |
| Refer to <a href="https://mxnet.apache.org/api/faq/distributed_training.html">Distributed Training</a> for more |
| details.</p> |
| |
| <p>Besides, we can use <a href="https://github.com/apache/mxnet/tree/master/tools/bandwidth">tools/bandwidth</a> |
| to find the communication cost per batch. |
| Ideally, the communication cost should be less than the time to compute a batch. |
| To reduce the communication cost, we can consider:</p> |
| |
| <ul> |
| <li>Exploring different <code class="highlighter-rouge">--kv-store</code> options.</li> |
| <li>Increasing the batch size to improve the computation to communication ratio.</li> |
| </ul> |
| |
| <p>Finally, MXNet is integrated with other distributed training frameworks, including <a href="https://github.com/apache/mxnet/tree/master/example/distributed_training-horovod">horovod</a> and <a href="https://github.com/bytedance/byteps#use-byteps-in-your-code">BytePS</a>.</p> |
| |
| <h2 id="input-data">Input Data</h2> |
| |
| <p>To make sure you’re handling input data in a reasonable way consider the following:</p> |
| |
| <ul> |
| <li>Data format: If you are using the <code class="highlighter-rouge">rec</code> format, then everything should be fine.</li> |
| <li>Decoding: By default, <em>MXNet</em> uses 4 CPU threads for decoding images. |
| This is often sufficient to decode more than 1K images per second. |
| If you are using a low-end CPU or your GPUs are very powerful, you can increase the number of threads.</li> |
| <li>Storage location. Any local or distributed file system (HDFS, Amazon S3) should be fine. |
| If multiple devices read the data from the shared network file system (NFS) at the same time, problems might occur.</li> |
| <li>Use a large batch size. We often choose the largest one that fits into GPU memory. |
| A value that’s too large can slow down convergence. |
| For example, the safe batch size for CIFAR 10 is approximately 200, while for ImageNet 1K, the batch size can exceed 1K.</li> |
| </ul> |
| |
| <h2 id="profiler">Profiler</h2> |
| |
| <p><em>MXNet</em> has a built-in profiler |
| that gives detailed information about execution time at the operator level. |
| This feature complements general profiling tools like <em>nvprof</em> and <em>gprof</em> |
| by summarizing at the operator level, instead of a function, kernel, or instruction level.</p> |
| |
| <p>The profiler can be turned on with an <a href="/versions/master/api/faq/env_var#control-the-profiler">environment variable</a> |
| for an entire program run, or programmatically for just part of a run. Note that by default the profiler hides the details of each individual operator, and you can reveal the details by setting environment variables <code class="highlighter-rouge">MXNET_EXEC_BULK_EXEC_INFERENCE</code>, <code class="highlighter-rouge">MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN</code> and <code class="highlighter-rouge">MXNET_EXEC_BULK_EXEC_TRAIN</code> to 0. |
| See <a href="https://github.com/apache/mxnet/tree/master/example/profiler">example/profiler</a> |
| for complete examples of how to use the profiler in code, or <a href="https://mxnet.apache.org/api/python/docs/tutorials/performance/backend/profiler.html">this tutorial</a> on how to profile MXNet performance.</p> |
| |
| <p>Briefly, the Python code looks like:</p> |
| |
| <div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code> <span class="c1"># wait for previous operations to complete |
| </span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">waitall</span><span class="p">()</span> |
| <span class="n">mx</span><span class="o">.</span><span class="n">profiler</span><span class="o">.</span><span class="n">set_config</span><span class="p">(</span><span class="n">profile_all</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">aggregate_stats</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="s">'profile_output.json'</span><span class="p">)</span> |
| <span class="n">mx</span><span class="o">.</span><span class="n">profiler</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="s">'run'</span><span class="p">)</span> |
| |
| <span class="c1"># Code to be profiled goes here... |
| </span> |
| <span class="c1"># wait for previous operations to complete |
| </span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">waitall</span><span class="p">()</span> |
| <span class="n">mx</span><span class="o">.</span><span class="n">profiler</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="s">'stop'</span><span class="p">)</span> |
| </code></pre></div></div> |
| |
| <p>After the program finishes, navigate to your browser’s tracing (Example - chrome://tracing in a Chrome browser) and load the <code class="highlighter-rouge">profile_output.json</code> file output by the profiler to inspect the results.</p> |
| |
| <p><img src="https://cloud.githubusercontent.com/assets/17693755/18035938/0a43484a-6d93-11e6-80d4-241c6ca552ea.png" alt="MLP Profile" /></p> |
| |
| <p>Note that the output file can grow extremely large, so this approach is not recommended for general use.</p> |
| |
| </div> |
| </div> |
| |
| </div> |
| </div> |
| |
| </article> |
| |
| </main><footer class="site-footer h-card"> |
| <div class="wrapper"> |
| <div class="row"> |
| <div class="col-4"> |
| <h4 class="footer-category-title">Resources</h4> |
| <ul class="contact-list"> |
| <li><a href="/versions/master/community#stay-connected">Mailing lists</a></li> |
| <li><a href="/versions/master/community#github-issues">Github Issues</a></li> |
| <li><a href="https://github.com/apache/mxnet/projects">Projects</a></li> |
| <li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li> |
| <li><a href="https://discuss.mxnet.io">Forum</a></li> |
| <li><a href="/versions/master/community">Contribute To MXNet</a></li> |
| </ul> |
| </div> |
| |
| <div class="col-4"><ul class="social-media-list"><li><a href="https://github.com/apache/mxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#github"></use></svg> <span class="username">apache/mxnet</span></a></li><li><a href="https://www.twitter.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href="https://youtube.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul> |
| </div> |
| |
| <div class="col-4 footer-text"> |
| <p>A flexible and efficient library for deep learning.</p> |
| </div> |
| </div> |
| </div> |
| </footer> |
| <footer class="site-footer2"> |
| <div class="wrapper"> |
| <div class="row"> |
| <div class="col-3"> |
| <img src="/versions/master/assets/img/asf_logo.svg" class="footer-logo col-2"> |
| </div> |
| <div class="footer-bottom-warning col-9"> |
| </p><p>"Copyright © 2017-2022, The Apache Software Foundation. Licensed under the Apache License, Version 2.0. Apache MXNet, MXNet, Apache, the Apache |
| feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the |
| Apache Software Foundation."</p> |
| </div> |
| </div> |
| </div> |
| </footer> |
| |
| |
| |
| |
| </body> |
| |
| </html> |