blob: d21085c7a57de4b4b7ef4887788ef74d916185ab [file] [log] [blame]
<!DOCTYPE html>
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<html lang=" en"><head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="/versions/master/assets/img/mxnet-icon.png" rel="icon" type="image/png"><!-- Begin Jekyll SEO tag v2.6.1 -->
<title>Profiling | Apache MXNet</title>
<meta name="generator" content="Jekyll v4.0.0" />
<meta property="og:title" content="Profiling" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="A flexible and efficient library for deep learning." />
<meta property="og:description" content="A flexible and efficient library for deep learning." />
<link rel="canonical" href="https://mxnet.apache.org/versions/master/api/dev-guide/profiling" />
<meta property="og:url" content="https://mxnet.apache.org/versions/master/api/dev-guide/profiling" />
<meta property="og:site_name" content="Apache MXNet" />
<script type="application/ld+json">
{"url":"https://mxnet.apache.org/versions/master/api/dev-guide/profiling","headline":"Profiling","description":"A flexible and efficient library for deep learning.","@type":"WebPage","@context":"https://schema.org"}</script>
<!-- End Jekyll SEO tag -->
<link rel="stylesheet" href="/versions/master/assets/docsearch.min.css" /><link rel="stylesheet" href="/versions/master/assets/main.css"><link type="application/atom+xml" rel="alternate" href="https://mxnet.apache.org/versions/master/feed.xml" title="Apache MXNet" /><!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '23']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
<script src="/versions/master/assets/js/jquery-3.3.1.min.js"></script>
<script src="/versions/master/assets/js/docsearch.min.js"></script><script src="/versions/master/assets/js/globalSearch.js" defer></script>
<script src="/versions/master/assets/js/clipboard.js" defer></script>
<script src="/versions/master/assets/js/copycode.js" defer></script></head>
<body><header class="site-header" role="banner">
<script>
$(document).ready(function () {
// HEADER OPACITY LOGIC
function opacity_header() {
var value = "rgba(4,140,204," + ($(window).scrollTop() / 300 + 0.4) + ")"
$('.site-header').css("background-color", value)
}
$(window).scroll(function () {
opacity_header()
})
opacity_header();
// MENU SELECTOR LOGIC
$('.page-link').each( function () {
if (window.location.href.includes(this.href)) {
$(this).addClass("page-current");
}
});
})
</script>
<div class="wrapper">
<a class="site-title" rel="author" href="/versions/master/"><img
src="/versions/master/assets/img/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
</svg>
</span>
</label>
<div class="gs-search-border">
<div id="gs-search-icon"></div>
<form id="global-search-form">
<input id="global-search" type="text" title="Search" placeholder="Search" />
<div id="global-search-dropdown-container">
<button class="gs-current-version btn" type="button" data-toggle="dropdown">
<span id="gs-current-version-label">master</span>
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
</svg>
</button>
<ul class="gs-opt-group gs-version-dropdown">
<li class="gs-opt gs-versions active">master</li>
<li class="gs-opt gs-versions">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
</ul>
</div>
<span id="global-search-close">x</span>
</form>
</div>
<div class="trigger">
<div id="global-search-mobile-border">
<div id="gs-search-icon-mobile"></div>
<input id="global-search-mobile" placeholder="Search..." type="text"/>
<div id="global-search-dropdown-container-mobile">
<button class="gs-current-version-mobile btn" type="button" data-toggle="dropdown">
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
</svg>
</button>
<ul class="gs-opt-group gs-version-dropdown-mobile">
<li class="gs-opt gs-versions active">master</li>
<li class="gs-opt gs-versions">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
</ul>
</div>
</div>
<a class="page-link" href="/versions/master/get_started">Get Started</a>
<a class="page-link" href="/versions/master/features">Features</a>
<a class="page-link" href="/versions/master/ecosystem">Ecosystem</a>
<a class="page-link" href="/versions/master/api">Docs & Tutorials</a>
<a class="page-link" href="/versions/master/trusted_by">Trusted By</a>
<a class="page-link" href="https://github.com/apache/mxnet">GitHub</a>
<div class="dropdown" style="min-width:100px">
<span class="dropdown-header">Apache
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content" style="min-width:250px">
<a href="https://www.apache.org/foundation/">Apache Software Foundation</a>
<a href="https://www.apache.org/licenses/">License</a>
<a href="/versions/master/api/faq/security.html">Security</a>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy</a>
<a href="https://www.apache.org/events/current-event">Events</a>
<a href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
<div class="dropdown">
<span class="dropdown-header">master
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content">
<a class="dropdown-option-active" href="/">master</a>
<a href="/versions/1.9.1/">1.9.1</a>
<a href="/versions/1.8.0/">1.8.0</a>
<a href="/versions/1.7.0/">1.7.0</a>
<a href="/versions/1.6.0/">1.6.0</a>
<a href="/versions/1.5.0/">1.5.0</a>
<a href="/versions/1.4.1/">1.4.1</a>
<a href="/versions/1.3.1/">1.3.1</a>
<a href="/versions/1.2.1/">1.2.1</a>
<a href="/versions/1.1.0/">1.1.0</a>
<a href="/versions/1.0.0/">1.0.0</a>
<a href="/versions/0.12.1/">0.12.1</a>
<a href="/versions/0.11.0/">0.11.0</a>
</div>
</div>
</div>
</nav>
</div>
</header>
<main class="page-content" aria-label="Content">
<script>
</script>
<article class="post">
<header class="post-header wrapper">
<h1 class="post-title">Profiling</h1>
<h3></h3></header>
<div class="post-content">
<div class="wrapper">
<div class="row">
<div class="col-3 docs-side-bar">
<h3 style="text-transform: capitalize; padding-left:10px">Developer Guide</h3>
<ul>
<li><a href="/versions/master/api/dev-guide/github_contribution_and_PR_verification_tips">GitHub contribution and PR verification tips</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/dev-guide/debugging_and_performance_optimization_tips">Debugging and performance optimization tips</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/dev-guide/examine_forward_results_with_hooks">Examine forward results with hooks</a></li>
<!-- page-category -->
<li><a href="/versions/master/api/dev-guide/exception_handing_and_custom_error_types">Exception handing and custom error types</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/dev-guide/profiling">Profiling</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- resource-p -->
</ul>
</div>
<div class="col-9">
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->
<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->
<h1 id="profiling">Profiling</h1>
<p>Apache MXNet provides memory <a href="/versions/master/api/python/docs/api/mxnet/profiler/index.html">profiler</a> which is a way to access what is happening under the hood during runtime. The common scenario is you want to use the profiler for your hybridized model and visualize the outputs via <code class="highlighter-rouge">chrome://tracing</code>. Here are the steps you need to do:</p>
<ol>
<li>Configure the profiler</li>
<li><code class="highlighter-rouge">set_state('run')</code> before the model is defined</li>
<li>Add <code class="highlighter-rouge">mx.nd.waitall()</code> to enforce synchronization after you have done with some computation (maybe as part of training)</li>
<li>Then add <code class="highlighter-rouge">set_state('stop')</code></li>
<li>Finally <code class="highlighter-rouge">dump</code> the profiling results</li>
</ol>
<p>Here is a simple example</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">import</span> <span class="n">mxnet</span> <span class="k">as</span> <span class="n">mx</span>
<span class="k">from</span> <span class="n">mxnet</span><span class="p">.</span><span class="n">gluon</span> <span class="n">import</span> <span class="n">nn</span>
<span class="k">from</span> <span class="n">mxnet</span> <span class="n">import</span> <span class="n">profiler</span>
<span class="n">def</span> <span class="n">enable_profiler</span><span class="p">(</span><span class="n">profile_filename</span><span class="p">,</span> <span class="nf">run</span><span class="p">=</span><span class="nb">True</span><span class="p">,</span> <span class="n">continuous_dump</span><span class="p">=</span><span class="nb">False</span><span class="p">,</span> <span class="n">aggregate_stats</span><span class="p">=</span><span class="nb">False</span><span class="p">):</span>
<span class="n">profiler</span><span class="p">.</span><span class="n">set_config</span><span class="p">(</span><span class="n">profile_symbolic</span><span class="p">=</span><span class="nb">True</span><span class="p">,</span>
<span class="n">profile_imperative</span><span class="p">=</span><span class="nb">True</span><span class="p">,</span>
<span class="n">profile_memory</span><span class="p">=</span><span class="nb">True</span><span class="p">,</span>
<span class="n">profile_api</span><span class="p">=</span><span class="nb">True</span><span class="p">,</span>
<span class="n">filename</span><span class="p">=</span><span class="n">profile_filename</span><span class="p">,</span>
<span class="n">continuous_dump</span><span class="p">=</span><span class="n">continuous_dump</span><span class="p">,</span>
<span class="n">aggregate_stats</span><span class="p">=</span><span class="n">aggregate_stats</span><span class="p">)</span>
<span class="k">if</span> <span class="nf">run</span><span class="p">:</span>
<span class="n">profiler</span><span class="p">.</span><span class="n">set_state</span><span class="p">(</span><span class="s1">'run'</span><span class="p">)</span>
<span class="n">enable_profiler</span><span class="p">(</span><span class="n">profile_filename</span><span class="p">=</span><span class="s1">'test_profiler.json'</span><span class="p">,</span> <span class="nf">run</span><span class="p">=</span><span class="nb">True</span><span class="p">,</span> <span class="n">continuous_dump</span><span class="p">=</span><span class="nb">True</span><span class="p">)</span>
<span class="n">profiler</span><span class="p">.</span><span class="n">set_state</span><span class="p">(</span><span class="s1">'run'</span><span class="p">)</span>
<span class="k">model</span> <span class="p">=</span> <span class="n">nn</span><span class="p">.</span><span class="n">HybridSequential</span><span class="p">(</span><span class="n">prefix</span><span class="p">=</span><span class="s1">'net_'</span><span class="p">)</span>
<span class="k">with</span> <span class="k">model</span><span class="p">.</span><span class="n">name_scope</span><span class="p">():</span>
<span class="k">model</span><span class="p">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Dense</span><span class="p">(</span><span class="m">128</span><span class="p">,</span> <span class="n">activation</span><span class="p">=</span><span class="s1">'tanh'</span><span class="p">))</span>
<span class="k">model</span><span class="p">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Dropout</span><span class="p">(</span><span class="m">0.5</span><span class="p">))</span>
<span class="k">model</span><span class="p">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Dense</span><span class="p">(</span><span class="m">64</span><span class="p">,</span> <span class="n">activation</span><span class="p">=</span><span class="s1">'tanh'</span><span class="p">),</span>
<span class="n">nn</span><span class="p">.</span><span class="n">Dense</span><span class="p">(</span><span class="m">32</span><span class="p">,</span> <span class="n">in_units</span><span class="p">=</span><span class="m">64</span><span class="p">))</span>
<span class="k">model</span><span class="p">.</span><span class="n">add</span><span class="p">(</span><span class="n">nn</span><span class="p">.</span><span class="n">Activation</span><span class="p">(</span><span class="s1">'relu'</span><span class="p">))</span>
<span class="k">model</span><span class="p">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">device</span><span class="p">=</span><span class="n">mx</span><span class="p">.</span><span class="n">cpu</span><span class="p">())</span>
<span class="k">model</span><span class="p">.</span><span class="n">hybridize</span><span class="p">()</span>
<span class="n">inputs</span> <span class="p">=</span> <span class="n">mx</span><span class="p">.</span><span class="n">sym</span><span class="p">.</span><span class="n">var</span><span class="p">(</span><span class="s1">'data'</span><span class="p">)</span>
<span class="k">with</span> <span class="n">mx</span><span class="p">.</span><span class="n">autograd</span><span class="p">.</span><span class="k">record</span><span class="p">():</span>
<span class="n">out</span> <span class="p">=</span> <span class="k">model</span><span class="p">(</span><span class="n">mx</span><span class="p">.</span><span class="n">nd</span><span class="p">.</span><span class="n">zeros</span><span class="p">((</span><span class="m">16</span><span class="p">,</span> <span class="m">10</span><span class="p">),</span> <span class="n">device</span><span class="p">=</span><span class="n">mx</span><span class="p">.</span><span class="n">cpu</span><span class="p">()))</span>
<span class="n">out</span><span class="p">.</span><span class="n">backward</span><span class="p">()</span>
<span class="n">mx</span><span class="p">.</span><span class="n">nd</span><span class="p">.</span><span class="n">waitall</span><span class="p">()</span>
<span class="n">profiler</span><span class="p">.</span><span class="n">set_state</span><span class="p">(</span><span class="s1">'stop'</span><span class="p">)</span>
<span class="n">profiler</span><span class="p">.</span><span class="n">dump</span><span class="p">(</span><span class="nb">True</span><span class="p">)</span>
</code></pre></div></div>
<p>And in <code class="highlighter-rouge">chrome://tracing</code> use the <code class="highlighter-rouge">load</code> and select <code class="highlighter-rouge">test_profiler.json</code>, then you will see something like this
<img src="/assets/img/dev_guide_profilling_1.png" alt="dev_guide_profilling_1" /> To understand what is going on, we need to dive deep into the MXNet runtime.</p>
<h2 id="dive-deep-into-mxnet-runtime-with-the-profiler">Dive deep into MXNet runtime with the profiler</h2>
<p>Let’s start with a simple example and explain as we go on. The following code creates a 3x3 tensor, computes the diagonal and then sum’s along the diagonal (to compute the “trace”). Using the MXNet profiler, we capture internal MXNet behavior and dump it to a string and print it (<code class="highlighter-rouge">dumps()</code>) and also dump it to a file (<code class="highlighter-rouge">dump()</code>). Then we can import that file in <code class="highlighter-rouge">chrome://tracing</code> and view it graphically.</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>import mxnet as mx
import numpy as np
from mxnet import profiler
#configure the profiler
profiler.set_config(profile_all=True, aggregate_stats=True, filename='trace_profile.json')
#start the profiler collecting data
profiler.set_state('run')
###########################################################
#1. create our data
data = np.linspace(1,9,9).reshape((3,3))
#2. create an MXNet ndarray
a = mx.nd.array(data)
#3. compute on our data and produce results
b = mx.nd.diag(a)
c = mx.nd.sum(b,-1)
#4. wait for computation to finish
mx.nd.waitall()
###########################################################
#stop the profiler
profiler.set_state('stop')
#dump the profiling data as a string
print(profiler.dumps())
#dump the profiling data as a json file that can be viewed graphically
profiler.dump()
</code></pre></div></div>
<p>When running this code, the dumps function dumps the profiling data to a string and returns it (which we promptly print). This statistical info is shown below.</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>Profile Statistics:
Note the difference in units for different entries.
Device Storage
=================
Name Total Count Min Use (kB) Max Use (kB) Avg Use (kB)
---- ----------- ------------- ------------- -------------
Memory: cpu/0 3 96.0600 96.0760 0.0080
MXNET_C_API
=================
Name Total Count Time (ms) Min Time (ms) Max Time (ms) Avg Time (ms)
---- ----------- --------- ------------- ------------- -------------
MXImperativeInvoke 2 0.3360 0.0990 0.2370 0.1680
MXNet C API Calls 17 0.2320 0.2160 0.2320 0.0080
MXNDArraySyncCopyFromCPU 1 0.1750 0.1750 0.1750 0.1750
MXNDArrayCreate 1 0.1050 0.1050 0.1050 0.1050
MXNDArrayGetShape 11 0.0210 0.0000 0.0160 0.0019
MXNDArrayWaitAll 1 0.0200 0.0200 0.0200 0.0200
MXNDArrayGetDType 1 0.0010 0.0010 0.0010 0.0010
MXNet C API Concurrency 34 0.0000 0.0000 0.0010 0.0000
operator
=================
Name Total Count Time (ms) Min Time (ms) Max Time (ms) Avg Time (ms)
---- ----------- --------- ------------- ------------- -------------
sum 1 0.0520 0.0520 0.0520 0.0520
diag 1 0.0410 0.0410 0.0410 0.0410
WaitForVar 1 0.0220 0.0220 0.0220 0.0220
</code></pre></div></div>
<p>The dump function writes out the same data in a format that can be opened in <code class="highlighter-rouge">chrome://tracing</code> and displayed visually. This can be seen in the diagram below.</p>
<p><img src="/assets/img/dev_guide_profilling_2.png" alt="dev_guide_profilling_2.png" />
The profiling data has captured info about interesting functions that have executed while your program was running. Here are some explanations about what each one does.</p>
<h3 id="the-functions-in-the-c_api-are"><strong>The functions in the C_API are:</strong></h3>
<table>
<thead>
<tr>
<th><strong>Function Name</strong></th>
<th><strong>Description</strong></th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>MXImperativeInvoke</strong></td>
<td>invokes an operator to perform the computation</td>
</tr>
<tr>
<td><strong>MXNDArrayCreate</strong></td>
<td>creates an ndarray</td>
</tr>
<tr>
<td><strong>MXNDArrayGetDType</strong></td>
<td>returns the data type of the ndarray</td>
</tr>
<tr>
<td><strong>MXNDArrayGetShape</strong></td>
<td>returns the shape of the ndarray (as a tuple where each element is the size of a dimension)</td>
</tr>
<tr>
<td><strong>MXNDArraySyncCopyFromCPU</strong></td>
<td>called when data is initially residing outside of an MXNet data structure (ie. numpy.ndarry rather than mxnet.numpy.ndarray). Data is copied into the MXNet data structure</td>
</tr>
<tr>
<td><strong>MXNDArrayWaitAll</strong></td>
<td>wait for all asynchronous operations to finish in MXNet. This function is only used in benchmarking to wait for work to happen. In a real program, there is no waiting and data dependencies are evaluated and computation executed as needed in a As Late As Possible (ALAP) way</td>
</tr>
</tbody>
</table>
<h3 id="the-function-in-the-engine-api-are"><strong>The function in the Engine API are:</strong></h3>
<table>
<thead>
<tr>
<th><strong>Function Name</strong></th>
<th><strong>Description</strong></th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>WaitForVar</strong></td>
<td>Takes a variable reference as input and waits until that variable has been computed before returning</td>
</tr>
</tbody>
</table>
<h3 id="other-api-functions"><strong>Other API functions:</strong></h3>
<table>
<thead>
<tr>
<th><strong>Function Name</strong></th>
<th><strong>Description</strong></th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>ResourceParallelRandomSetSeed</strong></td>
<td>sets the random number generator seed</td>
</tr>
</tbody>
</table>
<h3 id="operators-we-intended-to-call-in-the-code"><strong>Operators we intended to call in the code:</strong></h3>
<table>
<thead>
<tr>
<th><strong>Operator Name</strong></th>
<th><strong>Description</strong></th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>sum</strong></td>
<td>sum a tensor along a particular axis</td>
</tr>
<tr>
<td><strong>diag</strong></td>
<td>compute the diagonal of the tensor</td>
</tr>
</tbody>
</table>
<h2 id="closer-look">Closer look</h2>
<p>From the code, we can identify the major events in our test application</p>
<ol>
<li>Initialize our input data</li>
<li>Creating a new MXNet ndarray using our existing data values</li>
<li>Compute on our data
<ol>
<li>produce the diagonal of the input data</li>
<li>sum along the diagonal to compute the “trace” of the matrix</li>
</ol>
</li>
<li>Wait for computation to finish (only needed when profiling)</li>
</ol>
<p>In the following list, #1 uses regular numpy functions to initialize data. MXNet is not involved in this process. In #2, we create an MXNet ndarray and quite a few things happen under the hood. The screenshot below shows a zoomed in portion of the timeline.</p>
<p><img src="/assets/img/dev_guide_profilling_3.png" alt="dev_guide_profilling_3.png" />
Here, the four red arrows show the important events in this sequence.</p>
<ol>
<li>First, the <code class="highlighter-rouge">MXNDArrayCreate</code> is called to physically allocate space to store the data and other necessary attributes in the <code class="highlighter-rouge">ndarray</code> class.</li>
<li>Then some support functions are called (<code class="highlighter-rouge">MXNDArrayGetShape,</code> <code class="highlighter-rouge">MXNDArrayGetDType</code>) while initialing the data structure.</li>
<li>Finally the data is copied from the non-MXNet ndarray into the newly prepared MXNet ndarray by the <code class="highlighter-rouge">MXNDArraySyncCopyFromCPU</code> function.</li>
</ol>
<p>Next, #3 (in our code example) begins the computing process to produce our output data. The screenshot below shows this behavior.</p>
<p><img src="/assets/img/dev_guide_profilling_4.png" alt="dev_guide_profilling_4.png" />
Here you can see that the following sequence of events happen:</p>
<ol>
<li><code class="highlighter-rouge">MXImperativeInvoke</code> is called the first time to launch the diagonal operator from #3 (in our code example).</li>
<li>Soon after that the actual <strong><code class="highlighter-rouge">diag</code></strong> operator begins executing in another thread.</li>
<li>While that is happening, our main thread moves on and calls <code class="highlighter-rouge">MXImperativeInvoke</code> again to launch the <strong><code class="highlighter-rouge">sum</code></strong> operator. Just like before, this returns without actually executing the operator and continues.</li>
<li>Lastly, the <code class="highlighter-rouge">MXNDArrayWaitAll</code> is called as the main thread has progressed to #4 in our app. It will wait here while all the computation finishes.</li>
</ol>
<p>Next lets look at a view of the part of the timeline zoomed to the actual operator execution.</p>
<p><img src="/assets/img/dev_guide_profilling_5.png" alt="dev_guide_profilling_5.png" />
Here there are 3 main events happening:</p>
<ol>
<li>The <strong><code class="highlighter-rouge">diag</code></strong> operator is executing first.</li>
<li>Then the <code class="highlighter-rouge">ResourceParallelRandomSetSeed</code> runs.</li>
<li>And finally the <code class="highlighter-rouge">sum</code> operator executes (for a very short time as shown by the big red arrow).</li>
</ol>
<p>The <code class="highlighter-rouge">diag</code> operator running makes sense (although seems to take a little longer than we’d like). At the end, the sum operator runs (very quickly!). But the weird part in the middle is <strong><code class="highlighter-rouge">ResourceParallelRandomSetSeed</code></strong> running. This is part of the MXNet resource manager. The resource manager handles temporary space and random number generators needed by the operators. The <strong><code class="highlighter-rouge">sum</code></strong> operator requests temporary space in order to compute the sum, and therefore launches the resource manager (for the first time) here. As part of its startup sequence, the random number generator is initialized by setting the seed. So this is some initialization overhead. But let’s try and run the app again, running the compute twice, and look at the 2nd run to try and remove this initialization from our profiling.</p>
<p>Here is the modified code:</p>
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>import mxnet as mx
import numpy as np
from mxnet import profiler
profiler.set_config(profile_all=True, aggregate_stats=True, filename='trace_profile.json')
profiler.set_state('run')
################
# first run
sdata = np.linspace(1,9,9).reshape((3,3))
sa = mx.nd.array(sdata)
sb = mx.nd.diag(sa)
sc = mx.nd.sum(sb,-1)
mx.nd.waitall()
################
################
# second run
data = np.linspace(1,9,9).reshape((3,3))
a = mx.nd.array(data)
b = mx.nd.diag(a)
c = mx.nd.sum(b,-1)
mx.nd.waitall()
################
profiler.set_state('stop')
print(profiler.dumps())
profiler.dump()
</code></pre></div></div>
<p>Notice that we renamed the variables and made another copy after the <code class="highlighter-rouge">waital</code> call. This is so that MXNet doesn’t have to worry about re-using variables, and to segment the 2nd half after the first time initialization.</p>
<p>Here is an overview of the <em>new</em> timeline:</p>
<p><img src="/assets/img/dev_guide_profilling_6.png" alt="dev_guide_profilling_6.png" />
The first red box is the first run, and the 2nd smaller one is the 2nd run. First off, we can see how much smaller the 2nd one is now without any of the initialization routines. Here is a zoomed in view of just the 2nd run.</p>
<p><img src="/assets/img/dev_guide_profilling_7.png" alt="dev_guide_profilling_7.png" />
We still have the same sequence of events at the beginning to initialize the MXNet ndarray (<code class="highlighter-rouge">MXNDArrayCreate</code>, <code class="highlighter-rouge">MXNDArrayGetShape</code>, <code class="highlighter-rouge">MXNDArrayGetDType</code>, <code class="highlighter-rouge">MXNDArraySyncCopyFromCPU</code>). Then the <strong><code class="highlighter-rouge">diag</code></strong> operator runs, followed by the <strong><code class="highlighter-rouge">sum</code></strong> operator, and finally the <code class="highlighter-rouge">waitall</code>. When you look at this, be careful about the assumptions that you make. In this version of the timeline, it appears that the operator executes after the <code class="highlighter-rouge">MXImperativeInvoke</code> runs, and seems to imply an inherent ordering. But realize that there is no dependency between the <strong><code class="highlighter-rouge">diag</code></strong> operator finishing and the next <strong><code class="highlighter-rouge">MXImperativeInvoke</code></strong> launching the <strong><code class="highlighter-rouge">sum</code></strong> operator. In this case, it just-so-happens that the <strong><code class="highlighter-rouge">diag</code></strong> operator finishes so quickly that it appears that way. But in reality the main thread is launching the operators and not waiting for them to finish. Lastly, keep in mind that in this case by the time we hit the <strong><code class="highlighter-rouge">MXNDArrayWaitAll</code></strong> everything is already done and we return immediately, but in other circumstances it may sit here waiting for everything to finish (like we saw earlier in the first run).</p>
</div>
</div>
</div>
</div>
</article>
</main><footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a href="/versions/master/community#stay-connected">Mailing lists</a></li>
<li><a href="/versions/master/community#github-issues">Github Issues</a></li>
<li><a href="https://github.com/apache/mxnet/projects">Projects</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
<li><a href="https://discuss.mxnet.io">Forum</a></li>
<li><a href="/versions/master/community">Contribute To MXNet</a></li>
</ul>
</div>
<div class="col-4"><ul class="social-media-list"><li><a href="https://github.com/apache/mxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#github"></use></svg> <span class="username">apache/mxnet</span></a></li><li><a href="https://www.twitter.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href="https://youtube.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
</div>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
</div>
</div>
</div>
</footer>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="/versions/master/assets/img/asf_logo.svg" class="footer-logo col-2">
</div>
<div class="footer-bottom-warning col-9">
</p><p>"Copyright © 2017-2022, The Apache Software Foundation. Licensed under the Apache License, Version 2.0. Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>
</div>
</div>
</div>
</footer>
</body>
</html>