blob: 3451471a45ef74106d4e5e4629d4d0b9569223de [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>Why MXNet? — mxnet documentation</title>
<link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" rel="stylesheet"/>
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet"/>
<link href="../_static/basic.css" rel="stylesheet" type="text/css">
<link href="../_static/pygments.css" rel="stylesheet" type="text/css">
<link href="../_static/mxnet.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../',
VERSION: '',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: ''
};
</script>
<script src="../_static/jquery-1.11.1.js" type="text/javascript"></script>
<script src="../_static/underscore.js" type="text/javascript"></script>
<script src="../_static/searchtools_custom.js" type="text/javascript"></script>
<script src="../_static/doctools.js" type="text/javascript"></script>
<script src="../_static/selectlang.js" type="text/javascript"></script>
<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>
<script type="text/javascript"> jQuery(function() { Search.loadIndex("/searchindex.js"); Search.init();}); </script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-96378503-1', 'auto');
ga('send', 'pageview');
</script>
<!-- -->
<!-- <script type="text/javascript" src="../_static/jquery.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../_static/underscore.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="../_static/doctools.js"></script> -->
<!-- -->
<!-- <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> -->
<!-- -->
<link href="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet-icon.png" rel="icon" type="image/png"/>
</link></link></head>
<body role="document"><!-- Previous Navbar Layout
<div class="navbar navbar-default navbar-fixed-top">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="../" class="navbar-brand">
<img src="http://data.mxnet.io/theme/mxnet.png">
</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul id="navbar" class="navbar navbar-left">
<li> <a href="../get_started/index.html">Get Started</a> </li>
<li> <a href="../tutorials/index.html">Tutorials</a> </li>
<li> <a href="../how_to/index.html">How To</a> </li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Packages <span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="../packages/python/index.html">
Python
</a></li>
<li><a href="../packages/r/index.html">
R
</a></li>
<li><a href="../packages/julia/index.html">
Julia
</a></li>
<li><a href="../packages/c++/index.html">
C++
</a></li>
<li><a href="../packages/scala/index.html">
Scala
</a></li>
<li><a href="../packages/perl/index.html">
Perl
</a></li>
</ul>
</li>
<li> <a href="../system/index.html">System</a> </li>
<li>
<form class="" role="search" action="../search.html" method="get" autocomplete="off">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input type="text" name="q" class="form-control" placeholder="Search">
</div>
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form> </li>
</ul>
<ul id="navbar" class="navbar navbar-right">
<li> <a href="../index.html"><span class="flag-icon flag-icon-us"></span></a> </li>
<li> <a href="..//zh/index.html"><span class="flag-icon flag-icon-cn"></span></a> </li>
</ul>
</div>
</div>
</div>
Previous Navbar Layout End -->
<div class="navbar navbar-fixed-top">
<div class="container" id="navContainer">
<div class="innder" id="header-inner">
<h1 id="logo-wrap">
<a href="../" id="logo"><img src="http://data.mxnet.io/theme/mxnet.png"/></a>
</h1>
<nav class="nav-bar" id="main-nav">
<a class="main-nav-link" href="../get_started/install.html">Install</a>
<a class="main-nav-link" href="../tutorials/index.html">Tutorials</a>
<a class="main-nav-link" href="../how_to/index.html">How To</a>
<span id="dropdown-menu-position-anchor">
<a aria-expanded="true" aria-haspopup="true" class="main-nav-link dropdown-toggle" data-toggle="dropdown" href="#" role="button">API <span class="caret"></span></a>
<ul class="dropdown-menu" id="package-dropdown-menu">
<li><a class="main-nav-link" href="../api/python/index.html">Python</a></li>
<li><a class="main-nav-link" href="../api/scala/index.html">Scala</a></li>
<li><a class="main-nav-link" href="../api/r/index.html">R</a></li>
<li><a class="main-nav-link" href="../api/julia/index.html">Julia</a></li>
<li><a class="main-nav-link" href="../api/c++/index.html">C++</a></li>
<li><a class="main-nav-link" href="../api/perl/index.html">Perl</a></li>
</ul>
</span>
<a class="main-nav-link" href="../architecture/index.html">Architecture</a>
<!-- <a class="main-nav-link" href="../community/index.html">Community</a> -->
<a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a>
<span id="dropdown-menu-position-anchor-version" style="position: relative"><a href="#" class="main-nav-link dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="true">Versions(master)<span class="caret"></span></a><ul id="package-dropdown-menu" class="dropdown-menu"><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a class="main-nav-link" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></span></nav>
<script> function getRootPath(){ return "../" } </script>
<div class="burgerIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"></a>
<ul class="dropdown-menu dropdown-menu-right" id="burgerMenu">
<li><a href="../get_started/install.html">Install</a></li>
<li><a href="../tutorials/index.html">Tutorials</a></li>
<li><a href="../how_to/index.html">How To</a></li>
<li class="dropdown-submenu">
<a href="#" tabindex="-1">API</a>
<ul class="dropdown-menu">
<li><a href="../api/python/index.html" tabindex="-1">Python</a>
</li>
<li><a href="../api/scala/index.html" tabindex="-1">Scala</a>
</li>
<li><a href="../api/r/index.html" tabindex="-1">R</a>
</li>
<li><a href="../api/julia/index.html" tabindex="-1">Julia</a>
</li>
<li><a href="../api/c++/index.html" tabindex="-1">C++</a>
</li>
<li><a href="../api/perl/index.html" tabindex="-1">Perl</a>
</li>
</ul>
</li>
<li><a href="../architecture/index.html">Architecture</a></li>
<li><a class="main-nav-link" href="https://github.com/dmlc/mxnet">Github</a></li>
<li id="dropdown-menu-position-anchor-version-mobile" class="dropdown-submenu" style="position: relative"><a href="#" tabindex="-1">Versions(master)</a><ul class="dropdown-menu"><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/>v0.10.14</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/0.10/index.html>0.10</a></li><li><a tabindex="-1" href=http://mxnet.incubator.apache.org/test/versions/master/index.html>master</a></li></ul></li></ul>
</div>
<div class="plusIcon dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button"><span aria-hidden="true" class="glyphicon glyphicon-plus"></span></a>
<ul class="dropdown-menu dropdown-menu-right" id="plusMenu"></ul>
</div>
<div id="search-input-wrap">
<form action="../search.html" autocomplete="off" class="" method="get" role="search">
<div class="form-group inner-addon left-addon">
<i class="glyphicon glyphicon-search"></i>
<input class="form-control" name="q" placeholder="Search" type="text"/>
</div>
<input name="check_keywords" type="hidden" value="yes">
<input name="area" type="hidden" value="default"/>
</input></form>
<div id="search-preview"></div>
</div>
<div id="searchIcon">
<span aria-hidden="true" class="glyphicon glyphicon-search"></span>
</div>
<!-- <div id="lang-select-wrap"> -->
<!-- <label id="lang-select-label"> -->
<!-- <\!-- <i class="fa fa-globe"></i> -\-> -->
<!-- <span></span> -->
<!-- </label> -->
<!-- <select id="lang-select"> -->
<!-- <option value="en">Eng</option> -->
<!-- <option value="zh">中文</option> -->
<!-- </select> -->
<!-- </div> -->
<!-- <a id="mobile-nav-toggle">
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
<span class="mobile-nav-toggle-bar"></span>
</a> -->
</div>
</div>
</div>
<div class="container">
<div class="row">
<div aria-label="main navigation" class="sphinxsidebar leftsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/python/index.html">Python Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/r/index.html">R Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/julia/index.html">Julia Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/c++/index.html">C++ Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/scala/index.html">Scala Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/perl/index.html">Perl Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../how_to/index.html">HowTo Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architecture/index.html">System Documents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/index.html">Tutorials</a></li>
</ul>
</div>
</div>
<div class="content">
<div class="section" id="why-mxnet">
<span id="why-mxnet"></span><h1>Why MXNet?<a class="headerlink" href="#why-mxnet" title="Permalink to this headline"></a></h1>
<p>Probably, if you’ve stumbled upon this page, you’ve heard of <em>deep learning</em>.
Deep learning denotes the modern incarnation of neural networks,
and it’s the technology behind recent breakthroughs
in self-driving cars, machine translation, speech recognition and more.
While widespread interest in deep learning took off in 2012,
deep learning has become an indispensable tool for countless industries.</p>
<p><img alt="alt text" src="https://raw.githubusercontent.com/kevinthesun/web-data/master/mxnet/get-started/image-classification.png"/></p>
<p>It might not come as a surprise that researchers
have investigated neural networks for decades.
Warren McCulloch and Walter Pitts
suggested the forerunner of today’s artificial neurons back in 1943.
Each neuron is connected to other neurons along <em>edges</em>, analogous to the synapses that connect real neurons.
And associated with each edge is a <em>weight</em> that indicates whether the connection is excitatory or inhibitatory and the strength of the connection.</p>
<p><img alt="alt_text" src="https://raw.githubusercontent.com/kevinthesun/web-data/master/mxnet/get-started/artificial-neuron-2.png"/></p>
<p>In the 1980s, the modern version of neural networks took shape.
Researchers arranged artificial neurons into <em>layers</em>.
Neurons in any layer get input from the neurons in the layers below them.
And, in turn, their output feeds into the neurons in the layer above.
Typically, the lowest layer represents the <em>input</em> to a neural network.
After computing the values of each layer, the <em>output</em> values are read out from the topmost layer.
The behavior of the network is determined by the setting of the weights.
And the process of <em>learning</em> in neural networks
is precisely the process of searching for good settings of these <em>weights</em>.</p>
<p>All that we need is an algorithm that tells us how to perform this search.
And since David Rumelhart and colleagues
introduced the <em>backpropagation</em> learning algorithm to train neural networks,
nearly all the major ideas have been in place.
Still, for many years neural networks took a backseat
to classical statistical methods like logistic regression and support vector machines (SVMs).
So you might reasonably ask, what’s changed to garner such interest?</p>
<div class="section" id="scale-and-computation">
<span id="scale-and-computation"></span><h2>Scale and Computation<a class="headerlink" href="#scale-and-computation" title="Permalink to this headline"></a></h2>
<p>The two biggest factors driving innovation in deep learning now are data and computation.
With distributed cloud computing and parallelism across GPU cores,
we can train models millions of times faster than researchers could in the 1980s.
The availability of large, high-quality datasets is another factor driving the field forward.
In the 1990s, the best datasets in computer vision had thousands of low-resolution images and ground truth assignments to a small number of classes.
Today, researchers cut their teeth on ImageNet, a massive dataset containing millions of high-resolution images from a thousand distinct classes.
The falling price of storage and high network bandwidth
make it affordable to work with big data at will.</p>
<p>In this new world, with bigger datasets and abundant computation,
neural networks dominate on most pattern recognition problems.
Over the last five years, neural networks have come to dominate on nearly every problem in computer vision,
replacing classical models and hand-engineered features.
Similarly, nearly every production speech recognition system now relies on neural networks,
where replacing the hidden Markov models that previously held sway.</p>
<p><img alt="alt text" src="https://raw.githubusercontent.com/kevinthesun/web-data/master/mxnet/get-started/nvidia-gpus.jpg"/></p>
<p>While GPUs and clusters present a huge opportunity for accelerating neural network training,
adapting traditional machine learning code
to take advantage of these resources can be challenging.
The familiar scientific computing stacks (Matlab, R, or NumPy &amp; SciPy)
give no straight-forward way to exploit these distributed resources.</p>
<p>Acceleration libraries like <em>MXNet</em> offer powerful tools
to help developers exploit the full capabilities of GPUs and cloud computing.
While these tools are generally useful and applicable to any mathematical computation, <em>MXNet</em> places a special emphasis on speeding up the development and deployment of large-scale deep neural networks. In particular, we offer the following capabilities:</p>
<ul class="simple">
<li><strong>Device Placement:</strong> With <em>MXNet</em>, it’s easy to specify where each data structures should live.</li>
<li><strong>Multi-GPU training</strong>: <em>MXNet</em> makes it easy to scale computation with number of available GPUs.</li>
<li><strong>Automatic differentiation</strong>: <em>MXNet</em> automates the derivative calculations that once bogged down neural network research.</li>
<li><strong>Optimized Predefined Layers</strong>: While you can code up your own layers in <em>MXNet</em>, the predefined layers are optimized for speed, outperforming competing libraries.</li>
</ul>
</div>
<div class="section" id="deep-nets-on-fast-computers">
<span id="deep-nets-on-fast-computers"></span><h2>Deep Nets on Fast Computers<a class="headerlink" href="#deep-nets-on-fast-computers" title="Permalink to this headline"></a></h2>
<p>While MXNet can accelerate any numerical computation,
we developed the library with neural networks in mind.
However you plan to use MXNet, neural networks make for a powerful motivating example to display MXNet’s capabilities.</p>
<p>Neural networks are just functions for transforming input arrays <code class="docutils literal"><span class="pre">X</span></code> into output arrays <code class="docutils literal"><span class="pre">Y</span></code>.
In the case of image classification, <code class="docutils literal"><span class="pre">X</span></code> might represent the pixel values of an image, and <code class="docutils literal"><span class="pre">Y</span></code> might represent the corresponding probabilities that the image belongs to each of <code class="docutils literal"><span class="pre">10</span></code> classes.
For language translation, <code class="docutils literal"><span class="pre">X</span></code> and <code class="docutils literal"><span class="pre">Y</span></code> both might denote sequences of words. We’ll revisit the way you might represent sequences in subsequent tutorials - so for now it’s safe to think of <code class="docutils literal"><span class="pre">X</span></code> and <code class="docutils literal"><span class="pre">Y</span></code> as fixed length vectors.</p>
<p>To perform this mapping, neural networks stack <em>layers</em> of computation. Each layer consists of a linear function followed by a nonlinear transformation. In <em>MXNet</em> we might express this as:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">hidden_linear</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">W</span><span class="p">)</span>
<span class="n">hidden_activation</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">tanh</span><span class="p">(</span><span class="n">hidden_linear</span><span class="p">)</span>
</pre></div>
</div>
<p>The linear transformations consist of multiplication by parameter arrays (<code class="docutils literal"><span class="pre">W</span></code> above).
When we talk about learning we mean finding the right set of values for <code class="docutils literal"><span class="pre">W</span></code>.
With just one layer, we can implement the familiar family of linear models,
including linear and logistic regression, linear support vector machines (SVMs), and the perceptron algorithm.
With more layers and a few clever constraints, we can implement all of today’s state-of-the-art deep learning techniques.</p>
<p>Of course, tens or hundreds of matrix multiplications can be computationally taxing.
Generally, these linear operations are the computational bottleneck.
Fortunately, linear operators can be parallelized trivially across the thousands of cores on a GPU.
But low-level GPU programming requires specialized skills that are not common even among leading researchers in the ML community. Moreover, even for CUDA experts, implementing a new neural network architecture shouldn’t require weeks of programming to implement low-level linear algebra operations. That’s where <em>MXNet</em> comes in.</p>
<ul class="simple">
<li><em>MXNet</em> provides optimized numerical computation for GPUs and distributed ecosystems, from the comfort of high-level environments like Python and R</li>
<li><em>MXNet</em> automates common workflows, so standard neural networks can be expressed concisely in just a few lines of code</li>
</ul>
<p>Now let’s take a closer look at the computational demands of neural networks
and give a sense of how <em>MXNet</em> helps us to write better, faster, code.
Say we have a neural network trained to recognize spam from the content of emails.
The emails may be streaming from an online service (at inference time),
or from a large offline dataset <strong>D</strong> (at training time).
In either case, the dataset typically must be managed by the CPU.</p>
<p><img alt="alt text" src="https://raw.githubusercontent.com/kevinthesun/web-data/master/mxnet/get-started/architecture.png"/></p>
<p>To compute the transformation of a neural network quickly, we need both the parameters and data points to make it into GPU memory. For any example <em>X</em>, the parameters <em>W</em> are the same. Moreover the size of the model tends to dwarf the size of an individual example. So we might arrive at the natural insight that parameters should always live on the GPU, even if the dataset itself must live on the CPU or stream in. This prevents IO from becoming the bottleneck during training or inference.</p>
<p>Fortunately, <em>MXNet</em> makes this kind of assignment easy.</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">mxnet.ndarray</span> <span class="kn">as</span> <span class="nn">nd</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">10000</span><span class="p">,</span> <span class="mi">40000</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="c1">#Allocate an array to store 1000 datapoints (of 40k dimensions) that lives on the CPU</span>
<span class="n">W1</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">40000</span><span class="p">,</span> <span class="mi">1024</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="c1">#Allocate a 40k x 1024 weight matrix on GPU for the 1st layer of the net</span>
<span class="n">W2</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span> <span class="c1">#Allocate a 1024 x 1024 weight matrix on GPU for the 2nd layer of the net</span>
</pre></div>
</div>
<!-- * __Talk about how mxnet also makes it easy to assign a context (on which device the computation happens__ -->
Similarly, _MXNet_ makes it easy to specify the computing device<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">mx</span><span class="o">.</span><span class="n">Context</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">gpu</span><span class="p">()):</span> <span class="c1"># Absent this statement, by default, MXNet will execute on CPU</span>
<span class="n">h</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">tanh</span><span class="p">(</span><span class="n">nd</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">W1</span><span class="p">))</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">sigmoid</span><span class="p">(</span><span class="n">nd</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">h1</span><span class="p">,</span> <span class="n">W2</span><span class="p">))</span>
</pre></div>
</div>
<p>Thus, with only a high-level understanding of how our numerical computation maps onto an execution environment, <em>MXNet</em> allows us to exert fine-grained control when needed.</p>
</div>
<div class="section" id="nuts-and-bolts">
<span id="nuts-and-bolts"></span><h2>Nuts and Bolts<a class="headerlink" href="#nuts-and-bolts" title="Permalink to this headline"></a></h2>
<p>MXNet supports two styles of programming: <em>imperative programming</em> (supported by the <em>NDArray</em> API) and <em>symbolic programming</em> (supported by the <em>Symbol</em> API). In short, imperative programming is the style that you’re likely to be most familiar with. Here if A and B are variables denoting matrices, then <code class="docutils literal"><span class="pre">C</span> <span class="pre">=</span> <span class="pre">A</span> <span class="pre">+</span> <span class="pre">B</span></code> is a piece of code that <em>when executed</em> sums the values referenced by <code class="docutils literal"><span class="pre">A</span></code> and <code class="docutils literal"><span class="pre">B</span></code> and stores their sum <code class="docutils literal"><span class="pre">C</span></code> in a new variable. Symbolic programming, on the other hand, allows functions to be defined abstractly through computation graphs. In the symbolic style, we first express complex functions in terms of placeholder values. Then, we can execute these functions by <em>binding them</em> to real values.</p>
<div class="section" id="imperative-programming-with-ndarray">
<span id="imperative-programming-with-ndarray"></span><h3>Imperative Programming with <em>NDArray</em><a class="headerlink" href="#imperative-programming-with-ndarray" title="Permalink to this headline"></a></h3>
<p>If you’re familiar with NumPy, then the mechanics of <em>NDArray</em> should be old hat. Like the corresponding <code class="docutils literal"><span class="pre">numpy.ndarray</span></code>, <code class="docutils literal"><span class="pre">mxnet.ndarray</span></code> (<code class="docutils literal"><span class="pre">mxnet.nd</span></code> for short) allows us to represent and manipulate multi-dimensional, homogenous arrays of fixed-size components. Converting between the two is effortless:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># Create a numpy array from an mxnet NDArray</span>
<span class="n">A_np</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">],[</span><span class="mi">5</span><span class="p">,</span><span class="mi">6</span><span class="p">,</span><span class="mi">7</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">]])</span>
<span class="n">A_nd</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">A</span><span class="p">)</span>
<span class="c1"># Convert back to a numpy array</span>
<span class="n">A2_np</span> <span class="o">=</span> <span class="n">A_nd</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">()</span>
</pre></div>
</div>
<p>Other deep learning libraries tend to rely on NumPy exclusively for imperative programming and the syntax.
So you might reasonably wonder, why do we need to bother with <em>NDArray</em>?
Put simply, other libraries only reap the advantages of GPU computing when executing symbolic functions. By using <em>NDArray</em>, <em>MXNet</em> users can specify device context and run on GPUs. In other words, <em>MXNet</em> gives you access to the high-speed computation for imperative operations that Tensorflow and Theano only give for symbolic operations.</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">X</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">],[</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">]])</span>
<span class="n">Y</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">5</span><span class="p">,</span><span class="mi">6</span><span class="p">],[</span><span class="mi">7</span><span class="p">,</span><span class="mi">8</span><span class="p">]])</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">X</span> <span class="o">+</span> <span class="n">Y</span>
</pre></div>
</div>
</div>
<div class="section" id="symbolic-programming-in-mxnet">
<span id="symbolic-programming-in-mxnet"></span><h3>Symbolic Programming in <em>MXNet</em><a class="headerlink" href="#symbolic-programming-in-mxnet" title="Permalink to this headline"></a></h3>
<p>In addition to providing fast math operations through NDArray, <em>MXNet</em> provides an interface for defining operations abstractly via a computation graph.
With <code class="docutils literal"><span class="pre">mxnet.symbol</span></code>, we define operations abstractly in terms of place holders. For example, in the following code <code class="docutils literal"><span class="pre">a</span></code> and <code class="docutils literal"><span class="pre">b</span></code> stand in for real values that will be supplied at run time.
When we call <code class="docutils literal"><span class="pre">c</span> <span class="pre">=</span> <span class="pre">a+b</span></code>, no numerical computation is performed. This operation simply builds a graph that defines the relationship between <code class="docutils literal"><span class="pre">a</span></code>, <code class="docutils literal"><span class="pre">b</span></code> and <code class="docutils literal"><span class="pre">c</span></code>. In order to perform a real calculation, we need to bind <code class="docutils literal"><span class="pre">c</span></code> to real values.</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">a</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">Variable</span><span class="p">(</span><span class="s1">'a'</span><span class="p">)</span>
<span class="n">b</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">sym</span><span class="o">.</span><span class="n">Variable</span><span class="p">(</span><span class="s1">'b'</span><span class="p">)</span>
<span class="n">c</span> <span class="o">=</span> <span class="n">a</span> <span class="o">+</span> <span class="n">b</span>
<span class="n">executor</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">cpu</span><span class="p">(),</span> <span class="p">{</span><span class="s1">'a'</span><span class="p">:</span> <span class="n">X</span><span class="p">,</span> <span class="s1">'b'</span><span class="p">:</span> <span class="n">Y</span><span class="p">})</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">executor</span><span class="o">.</span><span class="n">forward</span><span class="p">()</span>
</pre></div>
</div>
<p>Symbolic computation is useful for several reasons. First, because we define a full computation graph before executing it, <em>MXNet</em> can perform sophisticated optimizations to eliminate unnecessary or repeated work. This tends to give better performance than imperative programming. Second, because we store the relationships between different variables in the computation graph, <em>MXNet</em> can then perform efficient auto-differentiation.</p>
</div>
</div>
<div class="section" id="building-models-with-mxnet-layers">
<span id="building-models-with-mxnet-layers"></span><h2>Building Models with <em>MXNet</em> Layers<a class="headerlink" href="#building-models-with-mxnet-layers" title="Permalink to this headline"></a></h2>
<p>In addition to providing a general-purpose toolkit for optimizing mathematical operations,
<em>MXNet</em> provides predefined neural network layers.
This higher-level interface has several benefits.</p>
<ul class="simple">
<li>Predefined layers allow you to express large models concisely. Repetitive work, like allocating parameters and inferring their dimensions are eliminated.</li>
<li>Layers are written directly in C++, giving even faster performance than equivalent layers implemented manually in Python.</li>
</ul>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">data</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">Variable</span><span class="p">(</span><span class="s1">'data'</span><span class="p">)</span>
<span class="n">fc1</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">FullyConnected</span><span class="p">(</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'fc1'</span><span class="p">,</span> <span class="n">num_hidden</span><span class="o">=</span><span class="mi">128</span><span class="p">)</span>
<span class="n">act1</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">Activation</span><span class="p">(</span><span class="n">data</span> <span class="o">=</span> <span class="n">fc1</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'relu1'</span><span class="p">,</span> <span class="n">act_type</span><span class="o">=</span><span class="s2">"relu"</span><span class="p">)</span>
<span class="n">fc2</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">FullyConnected</span><span class="p">(</span><span class="n">data</span> <span class="o">=</span> <span class="n">act1</span><span class="p">,</span> <span class="n">name</span> <span class="o">=</span> <span class="s1">'fc2'</span><span class="p">,</span> <span class="n">num_hidden</span> <span class="o">=</span> <span class="mi">64</span><span class="p">)</span>
<span class="n">act2</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">Activation</span><span class="p">(</span><span class="n">data</span> <span class="o">=</span> <span class="n">fc2</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'relu2'</span><span class="p">,</span> <span class="n">act_type</span><span class="o">=</span><span class="s2">"relu"</span><span class="p">)</span>
<span class="n">fc3</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">FullyConnected</span><span class="p">(</span><span class="n">data</span> <span class="o">=</span> <span class="n">act2</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'fc3'</span><span class="p">,</span> <span class="n">num_hidden</span><span class="o">=</span><span class="n">num_classes</span><span class="p">)</span>
<span class="n">mlp</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">symbol</span><span class="o">.</span><span class="n">SoftmaxOutput</span><span class="p">(</span><span class="n">data</span> <span class="o">=</span> <span class="n">fc3</span><span class="p">,</span> <span class="n">name</span> <span class="o">=</span> <span class="s1">'softmax'</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="conclusions">
<span id="conclusions"></span><h2>Conclusions<a class="headerlink" href="#conclusions" title="Permalink to this headline"></a></h2>
<p>Given its combination of high performance, clean code, access to a high-level API, and low-level control, <em>MXNet</em> stands out as a unique choice among deep learning frameworks.</p>
</div>
</div>
<div class="container">
<div class="footer">
<p> © 2015-2017 DMLC. All rights reserved. </p>
</div>
</div>
</div>
<div aria-label="main navigation" class="sphinxsidebar rightsidebar" role="navigation">
<div class="sphinxsidebarwrapper">
<h3><a href="../index.html">Table Of Contents</a></h3>
<ul>
<li><a class="reference internal" href="#">Why MXNet?</a><ul>
<li><a class="reference internal" href="#scale-and-computation">Scale and Computation</a></li>
<li><a class="reference internal" href="#deep-nets-on-fast-computers">Deep Nets on Fast Computers</a></li>
<li><a class="reference internal" href="#nuts-and-bolts">Nuts and Bolts</a><ul>
<li><a class="reference internal" href="#imperative-programming-with-ndarray">Imperative Programming with <em>NDArray</em></a></li>
<li><a class="reference internal" href="#symbolic-programming-in-mxnet">Symbolic Programming in <em>MXNet</em></a></li>
</ul>
</li>
<li><a class="reference internal" href="#building-models-with-mxnet-layers">Building Models with <em>MXNet</em> Layers</a></li>
<li><a class="reference internal" href="#conclusions">Conclusions</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div> <!-- pagename != index -->
<script crossorigin="anonymous" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"></script>
<script src="../_static/js/sidebar.js" type="text/javascript"></script>
<script src="../_static/js/search.js" type="text/javascript"></script>
<script src="../_static/js/navbar.js" type="text/javascript"></script>
<script src="../_static/js/clipboard.min.js" type="text/javascript"></script>
<script src="../_static/js/copycode.js" type="text/javascript"></script>
<script type="text/javascript">
$('body').ready(function () {
$('body').css('visibility', 'visible');
});
</script>
</div></body>
</html>