blob: fb7d78cd547ddd710d6ed5859cc0af0267243c9b [file] [log] [blame]
<!DOCTYPE html>
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<html lang=" en"><head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="/versions/master/assets/img/mxnet-icon.png" rel="icon" type="image/png"><!-- Begin Jekyll SEO tag v2.6.1 -->
<title>Why MXNet came to be? | Apache MXNet</title>
<meta name="generator" content="Jekyll v4.0.0" />
<meta property="og:title" content="Why MXNet came to be?" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="A flexible and efficient library for deep learning." />
<meta property="og:description" content="A flexible and efficient library for deep learning." />
<link rel="canonical" href="https://mxnet.apache.org/versions/master/api/faq/why_mxnet" />
<meta property="og:url" content="https://mxnet.apache.org/versions/master/api/faq/why_mxnet" />
<meta property="og:site_name" content="Apache MXNet" />
<script type="application/ld+json">
{"url":"https://mxnet.apache.org/versions/master/api/faq/why_mxnet","headline":"Why MXNet came to be?","description":"A flexible and efficient library for deep learning.","@type":"WebPage","@context":"https://schema.org"}</script>
<!-- End Jekyll SEO tag -->
<link rel="stylesheet" href="/versions/master/assets/docsearch.min.css" /><link rel="stylesheet" href="/versions/master/assets/main.css"><link type="application/atom+xml" rel="alternate" href="https://mxnet.apache.org/versions/master/feed.xml" title="Apache MXNet" /><!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '23']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
<script src="/versions/master/assets/js/jquery-3.3.1.min.js"></script>
<script src="/versions/master/assets/js/docsearch.min.js"></script><script src="/versions/master/assets/js/globalSearch.js" defer></script>
<script src="/versions/master/assets/js/clipboard.js" defer></script>
<script src="/versions/master/assets/js/copycode.js" defer></script></head>
<body><header class="site-header" role="banner">
<script>
$(document).ready(function () {
// HEADER OPACITY LOGIC
function opacity_header() {
var value = "rgba(4,140,204," + ($(window).scrollTop() / 300 + 0.4) + ")"
$('.site-header').css("background-color", value)
}
$(window).scroll(function () {
opacity_header()
})
opacity_header();
// MENU SELECTOR LOGIC
$('.page-link').each( function () {
if (window.location.href.includes(this.href)) {
$(this).addClass("page-current");
}
});
})
</script>
<div class="wrapper">
<a class="site-title" rel="author" href="/versions/master/"><img
src="/versions/master/assets/img/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
</svg>
</span>
</label>
<div class="gs-search-border">
<div id="gs-search-icon"></div>
<form id="global-search-form">
<input id="global-search" type="text" title="Search" placeholder="Search" />
<div id="global-search-dropdown-container">
<button class="gs-current-version btn" type="button" data-toggle="dropdown">
<span id="gs-current-version-label">master</span>
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
</svg>
</button>
<ul class="gs-opt-group gs-version-dropdown">
<li class="gs-opt gs-versions active">master</li>
<li class="gs-opt gs-versions">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
</ul>
</div>
<span id="global-search-close">x</span>
</form>
</div>
<div class="trigger">
<div id="global-search-mobile-border">
<div id="gs-search-icon-mobile"></div>
<input id="global-search-mobile" placeholder="Search..." type="text"/>
<div id="global-search-dropdown-container-mobile">
<button class="gs-current-version-mobile btn" type="button" data-toggle="dropdown">
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
</svg>
</button>
<ul class="gs-opt-group gs-version-dropdown-mobile">
<li class="gs-opt gs-versions active">master</li>
<li class="gs-opt gs-versions">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
</ul>
</div>
</div>
<a class="page-link" href="/versions/master/get_started">Get Started</a>
<a class="page-link" href="/versions/master/features">Features</a>
<a class="page-link" href="/versions/master/ecosystem">Ecosystem</a>
<a class="page-link" href="/versions/master/api">Docs & Tutorials</a>
<a class="page-link" href="/versions/master/trusted_by">Trusted By</a>
<a class="page-link" href="https://github.com/apache/mxnet">GitHub</a>
<div class="dropdown" style="min-width:100px">
<span class="dropdown-header">Apache
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content" style="min-width:250px">
<a href="https://www.apache.org/foundation/">Apache Software Foundation</a>
<a href="https://www.apache.org/licenses/">License</a>
<a href="/versions/master/api/faq/security.html">Security</a>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy</a>
<a href="https://www.apache.org/events/current-event">Events</a>
<a href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
<div class="dropdown">
<span class="dropdown-header">master
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content">
<a class="dropdown-option-active" href="/">master</a>
<a href="/versions/1.9.1/">1.9.1</a>
<a href="/versions/1.8.0/">1.8.0</a>
<a href="/versions/1.7.0/">1.7.0</a>
<a href="/versions/1.6.0/">1.6.0</a>
<a href="/versions/1.5.0/">1.5.0</a>
<a href="/versions/1.4.1/">1.4.1</a>
<a href="/versions/1.3.1/">1.3.1</a>
<a href="/versions/1.2.1/">1.2.1</a>
<a href="/versions/1.1.0/">1.1.0</a>
<a href="/versions/1.0.0/">1.0.0</a>
<a href="/versions/0.12.1/">0.12.1</a>
<a href="/versions/0.11.0/">0.11.0</a>
</div>
</div>
</div>
</nav>
</div>
</header>
<main class="page-content" aria-label="Content">
<script>
</script>
<article class="post">
<header class="post-header wrapper">
<h1 class="post-title">Why MXNet came to be?</h1>
<h3></h3></header>
<div class="post-content">
<div class="wrapper">
<div class="row">
<div class="col-3 docs-side-bar">
<h3 style="text-transform: capitalize; padding-left:10px">faq</h3>
<ul>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/add_op_in_backend">A Beginner's Guide to Implementing Operators in MXNet Backend</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/cloud">MXNet on the Cloud</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/distributed_training">Distributed Training in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/env_var">Environment Variables</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/float16">Float16</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/large_tensor_support">Using MXNet with Large Tensor Support</a></li>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/model_parallel_lstm">Model Parallel</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/new_op">Create New Operators</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/perf">Some Tips for Improving MXNet Performance</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/recordio">Create a Dataset Using RecordIO</a></li>
<!-- page-category -->
<li><a href="/versions/master/api/faq/s3_integration">Use data from S3 for training</a></li>
<!-- page-category -->
<li><a href="/versions/master/api/faq/security">MXNet Security Best Practices</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/tensor_inspector_tutorial">Use TensorInspector to Help Debug Operators</a></li>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/using_rtc">Using runtime compilation (RTC) to write CUDA kernels in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/master/api/faq/why_mxnet">Why MXNet came to be?</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- resource-p -->
</ul>
</div>
<div class="col-9">
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->
<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->
<h1 id="why-was-mxnet-developed-in-the-first-place-">Why was MXNet developed in the first place ?</h1>
<p>Probably, if you’ve stumbled upon this page, you’ve heard of <em>deep learning</em>.
Deep learning denotes the modern incarnation of neural networks,
and it’s the technology behind recent breakthroughs
in self-driving cars, machine translation, speech recognition and more.
While widespread interest in deep learning took off in 2012,
deep learning has become an indispensable tool for countless industries.</p>
<p><img src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/get-started/image-classification.png" alt="alt text" /></p>
<p>It might not come as a surprise that researchers
have investigated neural networks for decades.
Warren McCulloch and Walter Pitts
suggested the forerunner of today’s artificial neurons back in 1943.
Each neuron is connected to other neurons along <em>edges</em>, analogous to the synapses that connect real neurons.
And associated with each edge is a <em>weight</em> that indicates whether the connection is excitatory or inhibitatory and the strength of the connection.</p>
<p><img src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/get-started/artificial-neuron-2.png" alt="alt_text" /></p>
<p>In the 1980s, the modern version of neural networks took shape.
Researchers arranged artificial neurons into <em>layers</em>.
Neurons in any layer get input from the neurons in the layers below them.
And, in turn, their output feeds into the neurons in the layer above.
Typically, the lowest layer represents the <em>input</em> to a neural network.
After computing the values of each layer, the <em>output</em> values are read out from the topmost layer.
The behavior of the network is determined by the setting of the weights.
And the process of <em>learning</em> in neural networks
is precisely the process of searching for good settings of these <em>weights</em>.</p>
<p>All that we need is an algorithm that tells us how to perform this search.
And since David Rumelhart and colleagues
introduced the <em>backpropagation</em> learning algorithm to train neural networks,
nearly all the major ideas have been in place.
Still, for many years neural networks took a backseat
to classical statistical methods like logistic regression and support vector machines (SVMs).
So you might reasonably ask, what’s changed to garner such interest?</p>
<h2 id="scale-and-computation">Scale and Computation</h2>
<p>The two biggest factors driving innovation in deep learning now are data and computation.
With distributed cloud computing and parallelism across GPU cores,
we can train models millions of times faster than researchers could in the 1980s.
The availability of large, high-quality datasets is another factor driving the field forward.
In the 1990s, the best datasets in computer vision had thousands of low-resolution images and ground truth assignments to a small number of classes.
Today, researchers cut their teeth on ImageNet, a massive dataset containing millions of high-resolution images from a thousand distinct classes.
The falling price of storage and high network bandwidth
make it affordable to work with big data at will.</p>
<p>In this new world, with bigger datasets and abundant computation,
neural networks dominate on most pattern recognition problems.
Over the last five years, neural networks have come to dominate on nearly every problem in computer vision,
replacing classical models and hand-engineered features.
Similarly, nearly every production speech recognition system now relies on neural networks,
where replacing the hidden Markov models that previously held sway.</p>
<p><img src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/get-started/nvidia-gpus.jpg" alt="alt text" /></p>
<p>While GPUs and clusters present a huge opportunity for accelerating neural network training,
adapting traditional machine learning code
to take advantage of these resources can be challenging.
The familiar scientific computing stacks (Matlab, R, or NumPy &amp; SciPy)
give no straight-forward way to exploit these distributed resources.</p>
<p>Acceleration libraries like <em>MXNet</em> offer powerful tools
to help developers exploit the full capabilities of GPUs and cloud computing.
While these tools are generally useful and applicable to any mathematical computation, <em>MXNet</em> places a special emphasis on speeding up the development and deployment of large-scale deep neural networks. In particular, we offer the following capabilities:</p>
<ul>
<li><strong>Device Placement:</strong> With <em>MXNet</em>, it’s easy to specify where each data structures should live.</li>
<li><strong>Multi-GPU training</strong>: <em>MXNet</em> makes it easy to scale computation with number of available GPUs.</li>
<li><strong>Automatic differentiation</strong>: <em>MXNet</em> automates the derivative calculations that once bogged down neural network research.</li>
<li><strong>Optimized Predefined Layers</strong>: While you can code up your own layers in <em>MXNet</em>, the predefined layers are optimized for speed, outperforming competing libraries.</li>
</ul>
<h2 id="deep-nets-on-fast-computers">Deep Nets on Fast Computers</h2>
<p>While MXNet can accelerate any numerical computation,
we developed the library with neural networks in mind.
However you plan to use MXNet, neural networks make for a powerful motivating example to display MXNet’s capabilities.</p>
<p>Neural networks are just functions for transforming input arrays <code class="highlighter-rouge">X</code> into output arrays <code class="highlighter-rouge">Y</code>.
In the case of image classification, <code class="highlighter-rouge">X</code> might represent the pixel values of an image, and <code class="highlighter-rouge">Y</code> might represent the corresponding probabilities that the image belongs to each of <code class="highlighter-rouge">10</code> classes.
For language translation, <code class="highlighter-rouge">X</code> and <code class="highlighter-rouge">Y</code> both might denote sequences of words. We’ll revisit the way you might represent sequences in subsequent tutorials - so for now it’s safe to think of <code class="highlighter-rouge">X</code> and <code class="highlighter-rouge">Y</code> as fixed length vectors.</p>
<p>To perform this mapping, neural networks stack <em>layers</em> of computation. Each layer consists of a linear function followed by a nonlinear transformation. In <em>MXNet</em> we might express this as:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">hidden_linear</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">W</span><span class="p">)</span>
<span class="n">hidden_activation</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">tanh</span><span class="p">(</span><span class="n">hidden_linear</span><span class="p">)</span>
</code></pre></div></div>
<p>The linear transformations consist of multiplication by parameter arrays (<code class="highlighter-rouge">W</code> above).
When we talk about learning we mean finding the right set of values for <code class="highlighter-rouge">W</code>.
With just one layer, we can implement the familiar family of linear models,
including linear and logistic regression, linear support vector machines (SVMs), and the perceptron algorithm.
With more layers and a few clever constraints, we can implement all of today’s state-of-the-art deep learning techniques.</p>
<p>Of course, tens or hundreds of matrix multiplications can be computationally taxing.
Generally, these linear operations are the computational bottleneck.
Fortunately, linear operators can be parallelized trivially across the thousands of cores on a GPU.
But low-level GPU programming requires specialized skills that are not common even among leading researchers in the ML community. Moreover, even for CUDA experts, implementing a new neural network architecture shouldn’t require weeks of programming to implement low-level linear algebra operations. That’s where <em>MXNet</em> comes in.</p>
<ul>
<li><em>MXNet</em> provides optimized numerical computation for GPUs and distributed ecosystems, from the comfort of high-level environments like Python and R</li>
<li><em>MXNet</em> automates common workflows, so standard neural networks can be expressed concisely in just a few lines of code</li>
</ul>
<p>Now let’s take a closer look at the computational demands of neural networks
and give a sense of how <em>MXNet</em> helps us to write better, faster, code.
Say we have a neural network trained to recognize spam from the content of emails.
The emails may be streaming from an online service (at inference time),
or from a large offline dataset <strong>D</strong> (at training time).
In either case, the dataset typically must be managed by the CPU.</p>
<p><img src="https://raw.githubusercontent.com/kevinthesun/web-data/master/mxnet/get-started/architecture.png" alt="alt text" /></p>
<p>To compute the transformation of a neural network quickly, we need both the parameters and data points to make it into GPU memory. For any example <em>X</em>, the parameters <em>W</em> are the same. Moreover the size of the model tends to dwarf the size of an individual example. So we might arrive at the natural insight that parameters should always live on the GPU, even if the dataset itself must live on the CPU or stream in. This prevents IO from becoming the bottleneck during training or inference.</p>
<p>Fortunately, <em>MXNet</em> makes this kind of assignment easy.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">import</span> <span class="nn">mxnet.ndarray</span> <span class="k">as</span> <span class="n">nd</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">10000</span><span class="p">,</span> <span class="mi">40000</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="c1">#Allocate an array to store 1000 datapoints (of 40k dimensions) that lives on the CPU
</span><span class="n">W1</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">40000</span><span class="p">,</span> <span class="mi">1024</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="c1">#Allocate a 40k x 1024 weight matrix on GPU for the 1st layer of the net
</span><span class="n">W2</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="c1">#Allocate a 1024 x 1024 weight matrix on GPU for the 2nd layer of the net
</span></code></pre></div></div>
<!-- * __Talk about how mxnet also makes it easy to assign a context (on which device the computation happens__ -->
<p>Similarly, <em>MXNet</em> makes it easy to specify the computing device</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">with</span> <span class="n">mx</span><span class="o">.</span><span class="n">Context</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">()):</span> <span class="c1"># Absent this statement, by default, MXNet will execute on CPU
</span> <span class="n">h</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">tanh</span><span class="p">(</span><span class="n">nd</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">W1</span><span class="p">))</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">sigmoid</span><span class="p">(</span><span class="n">nd</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">h1</span><span class="p">,</span> <span class="n">W2</span><span class="p">))</span>
</code></pre></div></div>
<p>Thus, with only a high-level understanding of how our numerical computation maps onto an execution environment, <em>MXNet</em> allows us to exert fine-grained control when needed.</p>
<h2 id="nuts-and-bolts">Nuts and Bolts</h2>
<p>MXNet supports two styles of programming: <em>imperative programming</em> (supported by the <em>NDArray</em> API) and <em>symbolic programming</em> (supported by the <em>Symbol</em> API). In short, imperative programming is the style that you’re likely to be most familiar with. Here if A and B are variables denoting matrices, then <code class="highlighter-rouge">C = A + B</code> is a piece of code that <em>when executed</em> sums the values referenced by <code class="highlighter-rouge">A</code> and <code class="highlighter-rouge">B</code> and stores their sum <code class="highlighter-rouge">C</code> in a new variable. Symbolic programming, on the other hand, allows functions to be defined abstractly through computation graphs. In the symbolic style, we first express complex functions in terms of placeholder values. Then, we can execute these functions by <em>binding them</em> to real values.</p>
<h3 id="imperative-programming-with-ndarray">Imperative Programming with <em>NDArray</em></h3>
<p>If you’re familiar with NumPy, then the mechanics of <em>NDArray</em> should be old hat. Like the corresponding <code class="highlighter-rouge">numpy.ndarray</code>, <code class="highlighter-rouge">mxnet.ndarray</code> (<code class="highlighter-rouge">mxnet.nd</code> for short) allows us to represent and manipulate multi-dimensional, homogenous arrays of fixed-size components. Converting between the two is effortless:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Create a numpy array from an mxnet NDArray
</span><span class="n">A_np</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">],[</span><span class="mi">5</span><span class="p">,</span><span class="mi">6</span><span class="p">,</span><span class="mi">7</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">]])</span>
<span class="n">A_nd</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">A</span><span class="p">)</span>
<span class="c1"># Convert back to a numpy array
</span><span class="n">A2_np</span> <span class="o">=</span> <span class="n">A_nd</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">()</span>
</code></pre></div></div>
<p>Other deep learning libraries tend to rely on NumPy exclusively for imperative programming and the syntax.
So you might reasonably wonder, why do we need to bother with <em>NDArray</em>?
Put simply, other libraries only reap the advantages of GPU computing when executing symbolic functions. By using <em>NDArray</em>, <em>MXNet</em> users can specify device context and run on GPUs. In other words, <em>MXNet</em> gives you access to the high-speed computation for imperative operations that Tensorflow and Theano only give for symbolic operations.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">X</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">],[</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">]])</span>
<span class="n">Y</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">5</span><span class="p">,</span><span class="mi">6</span><span class="p">],[</span><span class="mi">7</span><span class="p">,</span><span class="mi">8</span><span class="p">]])</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">X</span> <span class="o">+</span> <span class="n">Y</span>
</code></pre></div></div>
<h3 id="symbolic-programming-in-mxnet">Symbolic Programming in <em>MXNet</em></h3>
<p>In addition to providing fast math operations through NDArray, <em>MXNet</em> provides an interface for defining operations abstractly via a computation graph.
With <code class="highlighter-rouge">mxnet.symbol</code>, we define operations abstractly in terms of place holders. For example, in the following code <code class="highlighter-rouge">a</code> and <code class="highlighter-rouge">b</code> stand in for real values that will be supplied at run time.
When we call <code class="highlighter-rouge">c = a+b</code>, no numerical computation is performed. This operation simply builds a graph that defines the relationship between <code class="highlighter-rouge">a</code>, <code class="highlighter-rouge">b</code> and <code class="highlighter-rouge">c</code>. In order to perform a real calculation, we need to bind <code class="highlighter-rouge">c</code> to real values.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">a</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">Variable</span><span class="p">(</span><span class="s">'a'</span><span class="p">)</span>
<span class="n">b</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">Variable</span><span class="p">(</span><span class="s">'b'</span><span class="p">)</span>
<span class="n">c</span> <span class="o">=</span> <span class="n">a</span> <span class="o">+</span> <span class="n">b</span>
<span class="n">executor</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(),</span> <span class="p">{</span><span class="s">'a'</span><span class="p">:</span> <span class="n">X</span><span class="p">,</span> <span class="s">'b'</span><span class="p">:</span> <span class="n">Y</span><span class="p">})</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">executor</span><span class="o">.</span><span class="n">forward</span><span class="p">()</span>
</code></pre></div></div>
<p>Symbolic computation is useful for several reasons. First, because we define a full computation graph before executing it, <em>MXNet</em> can perform sophisticated optimizations to eliminate unnecessary or repeated work. This tends to give better performance than imperative programming. Second, because we store the relationships between different variables in the computation graph, <em>MXNet</em> can then perform efficient auto-differentiation.</p>
<p><strong>However</strong> Symbolic programming is error-prone and very slow to iterate with, as the graph needs to be computed before it is processed.</p>
<h3 id="gluon-for-briding-the-gap-between-the-two">Gluon for briding the gap between the two</h3>
<p><a href="/versions/master/api/python">MXNet Gluon</a> aims to bridge the gap between the imperative nature of MXNet and its symbolic capabilities and keep the advantages of both through <a href="https://d2l.ai/chapter_computational-performance/hybridize.html">hybridization</a>.</p>
<h2 id="conclusions">Conclusions</h2>
<p>Given its combination of high performance, clean code, access to a high-level API, and low-level control, <em>MXNet</em> stands out as a unique choice among deep learning frameworks.</p>
</div>
</div>
</div>
</div>
</article>
</main><footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a href="/versions/master/community#stay-connected">Mailing lists</a></li>
<li><a href="/versions/master/community#github-issues">Github Issues</a></li>
<li><a href="https://github.com/apache/mxnet/projects">Projects</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
<li><a href="https://discuss.mxnet.io">Forum</a></li>
<li><a href="/versions/master/community">Contribute To MXNet</a></li>
</ul>
</div>
<div class="col-4"><ul class="social-media-list"><li><a href="https://github.com/apache/mxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#github"></use></svg> <span class="username">apache/mxnet</span></a></li><li><a href="https://www.twitter.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href="https://youtube.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/master/assets/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
</div>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
</div>
</div>
</div>
</footer>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="/versions/master/assets/img/asf_logo.svg" class="footer-logo col-2">
</div>
<div class="footer-bottom-warning col-9">
</p><p>"Copyright © 2017-2022, The Apache Software Foundation. Licensed under the Apache License, Version 2.0. Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>
</div>
</div>
</div>
</footer>
</body>
</html>