blob: 6020eb9d13a0f8e576248088b8b419e19cb4dc52 [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Autograd in SINGA · Apache SINGA</title><meta name="viewport" content="width=device-width"/><meta name="generator" content="Docusaurus"/><meta name="description" content="&lt;!--- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the &quot;License&quot;); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --&gt;"/><meta name="docsearch:version" content="2.0.0"/><meta name="docsearch:language" content="en"/><meta property="og:title" content="Autograd in SINGA · Apache SINGA"/><meta property="og:type" content="website"/><meta property="og:url" content="https://feynmandna.github.io/"/><meta property="og:description" content="&lt;!--- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the &quot;License&quot;); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --&gt;"/><meta property="og:image" content="https://feynmandna.github.io/img/singa_twitter_banner.jpeg"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://feynmandna.github.io/img/singa_twitter_banner.jpeg"/><link rel="shortcut icon" href="/img/favicon.ico"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/atom-one-dark.min.css"/><link rel="alternate" type="application/atom+xml" href="https://feynmandna.github.io/blog/atom.xml" title="Apache SINGA Blog ATOM Feed"/><link rel="alternate" type="application/rss+xml" href="https://feynmandna.github.io/blog/feed.xml" title="Apache SINGA Blog RSS Feed"/><script type="text/javascript" src="https://buttons.github.io/buttons.js"></script><script src="https://unpkg.com/vanilla-back-to-top@7.1.14/dist/vanilla-back-to-top.min.js"></script><script>
document.addEventListener('DOMContentLoaded', function() {
addBackToTop(
{"zIndex":100}
)
});
</script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/singa.png" alt="Apache SINGA"/></a><a href="/versions"><h3>2.0.0</h3></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class="siteNavGroupActive"><a href="/docs/2.0.0/installation" target="_self">Docs</a></li><li class=""><a href="/docs/2.0.0/source-repository" target="_self">Community</a></li><li class=""><a href="/blog/" target="_self">News</a></li><li class=""><a href="https://apache-singa.readthedocs.io/en/latest/" target="_self">API</a></li><li class=""><a target="_self"></a></li><li class=""><a href="https://github.com/apache/singa-doc" target="_self">GitHub</a></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i></i><span>Guides</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle">Getting Started</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/2.0.0/installation">Installation</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/software-stack">Software Stack</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/benchmark-train">Benchmark for Distributed Training</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Guides</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/2.0.0/device">Device</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/tensor">Tensor</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/2.0.0/autograd">Autograd in SINGA</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Model Zoo</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-cnn-cifar10">Train CNN over Cifar-10</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-char-rnn">Train Char-RNN over plain text</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-rbm-mnist">Train a RBM model against MNIST dataset</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-imagenet-alexnet">Train AlexNet over ImageNet</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-imagenet-densenet">Image Classification using DenseNet</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-imagenet-googlenet">Image Classification using GoogleNet</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-imagenet-inception">Image Classification using Inception V4</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-imagenet-resnet">Image Classification using Residual Networks</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/model-zoo-imagenet-vgg">Image Classification using VGG</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Development</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/2.0.0/download-singa">Download SINGA</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/build">Build SINGA from Source</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/contribute-code">How to Contribute Code</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/contribute-docs">How to Contribute to Documentation</a></li><li class="navListItem"><a class="navItem" href="/docs/2.0.0/how-to-release">How to Prepare a Release</a></li></ul></div></div></section></div><script>
var coll = document.getElementsByClassName('collapsible');
var checkActiveCategory = true;
for (var i = 0; i < coll.length; i++) {
var links = coll[i].nextElementSibling.getElementsByTagName('*');
if (checkActiveCategory){
for (var j = 0; j < links.length; j++) {
if (links[j].classList.contains('navListItemActive')){
coll[i].nextElementSibling.classList.toggle('hide');
coll[i].childNodes[1].classList.toggle('rotate');
checkActiveCategory = false;
break;
}
}
}
coll[i].addEventListener('click', function() {
var arrow = this.childNodes[1];
arrow.classList.toggle('rotate');
var content = this.nextElementSibling;
content.classList.toggle('hide');
});
}
document.addEventListener('DOMContentLoaded', function() {
createToggler('#navToggler', '#docsNav', 'docsSliderActive');
createToggler('#tocToggler', 'body', 'tocActive');
var headings = document.querySelector('.toc-headings');
headings && headings.addEventListener('click', function(event) {
var el = event.target;
while(el !== headings){
if (el.tagName === 'A') {
document.body.classList.remove('tocActive');
break;
} else{
el = el.parentNode;
}
}
}, false);
function createToggler(togglerSelector, targetSelector, className) {
var toggler = document.querySelector(togglerSelector);
var target = document.querySelector(targetSelector);
if (!toggler) {
return;
}
toggler.onclick = function(event) {
event.preventDefault();
target.classList.toggle(className);
};
}
});
</script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/singa-doc/blob/master/docs/autograd.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">Autograd in SINGA</h1></header><article><div><span><!--- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -->
<p>There are two typical ways to implement autograd, via symbolic differentiation like <a href="http://deeplearning.net/software/theano/index.html">Theano</a> or reverse differentiation like <a href="https://pytorch.org/docs/stable/notes/autograd.html">Pytorch</a>. Singa follows Pytorch way, which records the computation graph and apply the backward propagation automatically after forward propagation. The autograd algorithm is explained in details <a href="https://pytorch.org/docs/stable/notes/autograd.html">here</a>. We explain the relevant modules in Singa and give an example to illustrate the usage.</p>
<h2><a class="anchor" aria-hidden="true" id="relevant-modules"></a><a href="#relevant-modules" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Relevant Modules</h2>
<p>There are three classes involved in autograd, namely <code>singa.tensor.Tensor</code>, <code>singa.autograd.Operation</code>, and <code>singa.autograd.Layer</code>. In the rest of this article, we use tensor, operation and layer to refer to an instance of the respective class.</p>
<h3><a class="anchor" aria-hidden="true" id="tensor"></a><a href="#tensor" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Tensor</h3>
<p>Three attributes of Tensor are used by autograd,</p>
<ul>
<li><code>.creator</code> is an <code>Operation</code> instance. It records the operation that generates the Tensor instance.</li>
<li><code>.requires_grad</code> is a boolean variable. It is used to indicate that the autograd algorithm needs to compute the gradient of the tensor (i.e., the owner). For example, during backpropagation, the gradients of the tensors for the weight matrix of a linear layer and the feature maps of a convolution layer (not the bottom layer) should be computed.</li>
<li><code>.stores_grad</code> is a boolean variable. It is used to indicate that the gradient of the owner tensor should be stored and output by the backward function. For example, the gradient of the feature maps is computed during backpropagation, but is not included in the output of the backward function.</li>
</ul>
<p>Programmers can change <code>requires_grad</code> and <code>stores_grad</code> of a Tensor instance. For example, if later is set to True, the corresponding gradient is included in the output of the backward function. It should be noted that if <code>stores_grad</code> is True, then <code>requires_grad</code> must be true, not vice versa.</p>
<h3><a class="anchor" aria-hidden="true" id="operation"></a><a href="#operation" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Operation</h3>
<p>It takes one or more <code>Tensor</code> instances as input, and then outputs one or more <code>Tensor</code> instances. For example, ReLU can be implemented as a specific Operation subclass. When an <code>Operation</code> instance is called (after instantiation), the following two steps are executed:</p>
<ol>
<li>record the source operations, i.e., the <code>creator</code>s of the input tensors.</li>
<li>do calculation by calling member function <code>.forward()</code></li>
</ol>
<p>There are two member functions for forwarding and backwarding, i.e., <code>.forward()</code> and <code>.backward()</code>. They take <code>Tensor.data</code> as inputs (the type is <code>CTensor</code>), and output <code>Ctensor</code>s. To add a specific operation, subclass <code>operation</code> should implement their own <code>.forward()</code> and <code>.backward()</code>. The <code>backward()</code> function is called by the <code>backward()</code> function of autograd automatically during backward propogation to compute the gradients of inputs (according to the <code>require_grad</code> field).</p>
<h3><a class="anchor" aria-hidden="true" id="layer"></a><a href="#layer" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Layer</h3>
<p>For those operations that require parameters, we package them into a new class, <code>Layer</code>. For example, convolution operation is wrapped into a convolution layer. <code>Layer</code> manages (stores) the parameters and calls the corresponding <code>Operation</code>s to implement the transformation.</p>
<h2><a class="anchor" aria-hidden="true" id="examples"></a><a href="#examples" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Examples</h2>
<p>Multiple examples are provided in the <a href="https://github.com/apache/singa/tree/master/examples/autograd">example folder</a>. We explain two representative examples here.</p>
<h3><a class="anchor" aria-hidden="true" id="operation-only"></a><a href="#operation-only" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Operation only</h3>
<p>The following codes implement a MLP model using only Operation instances (no Layer instances).</p>
<h4><a class="anchor" aria-hidden="true" id="import-packages"></a><a href="#import-packages" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Import packages</h4>
<pre><code class="hljs css language-python"><span class="hljs-keyword">from</span> singa.tensor <span class="hljs-keyword">import</span> Tensor
<span class="hljs-keyword">from</span> singa <span class="hljs-keyword">import</span> autograd
<span class="hljs-keyword">from</span> singa <span class="hljs-keyword">import</span> opt
</code></pre>
<h4><a class="anchor" aria-hidden="true" id="create-weight-matrix-and-bias-vector"></a><a href="#create-weight-matrix-and-bias-vector" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Create weight matrix and bias vector</h4>
<p>The parameter tensors are created with both <code>requires_grad</code> and <code>stores_grad</code> set to <code>True</code>.</p>
<pre><code class="hljs css language-python">w0 = Tensor(shape=(<span class="hljs-number">2</span>, <span class="hljs-number">3</span>), requires_grad=<span class="hljs-literal">True</span>, stores_grad=<span class="hljs-literal">True</span>)
w0.gaussian(<span class="hljs-number">0.0</span>, <span class="hljs-number">0.1</span>)
b0 = Tensor(shape=(<span class="hljs-number">1</span>, <span class="hljs-number">3</span>), requires_grad=<span class="hljs-literal">True</span>, stores_grad=<span class="hljs-literal">True</span>)
b0.set_value(<span class="hljs-number">0.0</span>)
w1 = Tensor(shape=(<span class="hljs-number">3</span>, <span class="hljs-number">2</span>), requires_grad=<span class="hljs-literal">True</span>, stores_grad=<span class="hljs-literal">True</span>)
w1.gaussian(<span class="hljs-number">0.0</span>, <span class="hljs-number">0.1</span>)
b1 = Tensor(shape=(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>), requires_grad=<span class="hljs-literal">True</span>, stores_grad=<span class="hljs-literal">True</span>)
b1.set_value(<span class="hljs-number">0.0</span>)
</code></pre>
<h4><a class="anchor" aria-hidden="true" id="training"></a><a href="#training" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Training</h4>
<pre><code class="hljs css language-python">inputs = Tensor(data=data) <span class="hljs-comment"># data matrix</span>
target = Tensor(data=label) <span class="hljs-comment"># label vector</span>
autograd.training = <span class="hljs-literal">True</span> <span class="hljs-comment"># for training</span>
sgd = opt.SGD(<span class="hljs-number">0.05</span>) <span class="hljs-comment"># optimizer</span>
<span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> range(<span class="hljs-number">10</span>):
x = autograd.matmul(inputs, w0) <span class="hljs-comment"># matrix multiplication</span>
x = autograd.add_bias(x, b0) <span class="hljs-comment"># add the bias vector</span>
x = autograd.relu(x) <span class="hljs-comment"># ReLU activation operation</span>
x = autograd.matmul(x, w1)
x = autograd.add_bias(x, b1)
loss = autograd.softmax_cross_entropy(x, target)
<span class="hljs-keyword">for</span> p, g <span class="hljs-keyword">in</span> autograd.backward(loss):
sgd.update(p, g)
</code></pre>
<h3><a class="anchor" aria-hidden="true" id="operation--layer"></a><a href="#operation--layer" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Operation + Layer</h3>
<p>The following <a href="https://github.com/apache/singa/blob/master/examples/autograd/mnist_cnn.py">example</a> implements a CNN model using layers provided by the autograd module.</p>
<h4><a class="anchor" aria-hidden="true" id="create-the-layers"></a><a href="#create-the-layers" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Create the layers</h4>
<pre><code class="hljs css language-python">conv1 = autograd.Conv2d(<span class="hljs-number">1</span>, <span class="hljs-number">32</span>, <span class="hljs-number">3</span>, padding=<span class="hljs-number">1</span>, bias=<span class="hljs-literal">False</span>)
bn1 = autograd.BatchNorm2d(<span class="hljs-number">32</span>)
pooling1 = autograd.MaxPool2d(<span class="hljs-number">3</span>, <span class="hljs-number">1</span>, padding=<span class="hljs-number">1</span>)
conv21 = autograd.Conv2d(<span class="hljs-number">32</span>, <span class="hljs-number">16</span>, <span class="hljs-number">3</span>, padding=<span class="hljs-number">1</span>)
conv22 = autograd.Conv2d(<span class="hljs-number">32</span>, <span class="hljs-number">16</span>, <span class="hljs-number">3</span>, padding=<span class="hljs-number">1</span>)
bn2 = autograd.BatchNorm2d(<span class="hljs-number">32</span>)
linear = autograd.Linear(<span class="hljs-number">32</span> * <span class="hljs-number">28</span> * <span class="hljs-number">28</span>, <span class="hljs-number">10</span>)
pooling2 = autograd.AvgPool2d(<span class="hljs-number">3</span>, <span class="hljs-number">1</span>, padding=<span class="hljs-number">1</span>)
</code></pre>
<h4><a class="anchor" aria-hidden="true" id="define-the-forward-function"></a><a href="#define-the-forward-function" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Define the forward function</h4>
<p>The operations in the forward pass will be recorded automatically for backward propagation.</p>
<pre><code class="hljs css language-python"><span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">forward</span><span class="hljs-params">(x, t)</span>:</span>
<span class="hljs-comment"># x is the input data (a batch of images)</span>
<span class="hljs-comment"># t the the label vector (a batch of integers)</span>
y = conv1(x) <span class="hljs-comment"># Conv layer</span>
y = autograd.relu(y) <span class="hljs-comment"># ReLU operation</span>
y = bn1(y) <span class="hljs-comment"># BN layer</span>
y = pooling1(y) <span class="hljs-comment"># Pooling Layer</span>
<span class="hljs-comment"># two parallel convolution layers</span>
y1 = conv21(y)
y2 = conv22(y)
y = autograd.cat((y1, y2), <span class="hljs-number">1</span>) <span class="hljs-comment"># cat operation</span>
y = autograd.relu(y) <span class="hljs-comment"># ReLU operation</span>
y = bn2(y)
y = pooling2(y)
y = autograd.flatten(y) <span class="hljs-comment"># flatten operation</span>
y = linear(y) <span class="hljs-comment"># Linear layer</span>
loss = autograd.softmax_cross_entropy(y, t) <span class="hljs-comment"># operation</span>
<span class="hljs-keyword">return</span> loss, y
</code></pre>
<h4><a class="anchor" aria-hidden="true" id="training-1"></a><a href="#training-1" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Training</h4>
<pre><code class="hljs css language-python">autograd.training = <span class="hljs-literal">True</span>
<span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> range(epochs):
<span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> range(batch_number):
inputs = tensor.Tensor(device=dev, data=x_train[
i * batch_sz:(<span class="hljs-number">1</span> + i) * batch_sz], stores_grad=<span class="hljs-literal">False</span>)
targets = tensor.Tensor(device=dev, data=y_train[
i * batch_sz:(<span class="hljs-number">1</span> + i) * batch_sz], requires_grad=<span class="hljs-literal">False</span>, stores_grad=<span class="hljs-literal">False</span>)
loss, y = forward(inputs, targets) <span class="hljs-comment"># forward the net</span>
<span class="hljs-keyword">for</span> p, gp <span class="hljs-keyword">in</span> autograd.backward(loss): <span class="hljs-comment"># auto backward</span>
sgd.update(p, gp)
</code></pre>
</span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/2.0.0/tensor"><span class="arrow-prev"></span><span>Tensor</span></a><a class="docs-next button" href="/docs/2.0.0/model-zoo-cnn-cifar10"><span>Next</span><span class="arrow-next"></span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#relevant-modules">Relevant Modules</a><ul class="toc-headings"><li><a href="#tensor">Tensor</a></li><li><a href="#operation">Operation</a></li><li><a href="#layer">Layer</a></li></ul></li><li><a href="#examples">Examples</a><ul class="toc-headings"><li><a href="#operation-only">Operation only</a></li><li><a href="#operation--layer">Operation + Layer</a></li></ul></li></ul></nav></div><footer class="nav-footer" id="footer"><section class="sitemap"><a href="/" class="nav-home"><img src="/img/singa-logo-square.png" alt="Apache SINGA" width="66" height="58"/></a><div><h5>Docs</h5><a href="/docs/installation">Getting Started</a><a href="/docs/device">Guides</a><a href="/en/#">API Reference (coming soon)</a><a href="/docs/model-zoo-cnn-cifar10">Model Zoo</a><a href="/docs/download-singa">Development</a></div><div><h5>Community</h5><a href="/en/users.html">User Showcase</a><a href="/docs/history-singa">SINGA History</a><a href="/docs/team-list">SINGA Team</a><a href="/news">SINGA News</a><a href="https://github.com/apache/singa-doc">GitHub</a><div class="social"><a class="github-button" href="https://github.com/apache/singa-doc" data-count-href="/apache/singa/stargazers" data-show-count="true" data-count-aria-label="# stargazers on GitHub" aria-label="Star this project on GitHub">apache/singa-doc</a></div><div class="social"><a href="https://twitter.com/ApacheSINGA" class="twitter-follow-button">Follow @ApacheSINGA</a></div></div><div><h5>Apache Software Foundation</h5><a href="https://apache.org/" target="_blank" rel="noreferrer noopener">Foundation</a><a href="http://www.apache.org/licenses/" target="_blank" rel="noreferrer noopener">License</a><a href="http://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noreferrer noopener">Sponsorship</a><a href="http://www.apache.org/foundation/thanks.html" target="_blank" rel="noreferrer noopener">Thanks</a><a href="http://www.apache.org/events/current-event" target="_blank" rel="noreferrer noopener">Events</a><a href="http://www.apache.org/security/" target="_blank" rel="noreferrer noopener">Security</a></div></section><div style="width:100%;text-align:center"><a href="https://apache.org/" target="_blank" rel="noreferrer noopener" class="ApacheOpenSource"><img src="/img/asf_logo_wide.svg" alt="Apache Open Source"/></a><section class="copyright" style="max-width:60%;margin:0 auto">Copyright © 2020
The Apache Software Foundation. All rights reserved.
Apache SINGA, Apache, the Apache feather logo, and
the Apache SINGA project logos are trademarks of The
Apache Software Foundation. All other marks mentioned
may be trademarks or registered trademarks of their
respective owners.</section></div></footer></div><script>window.twttr=(function(d,s, id){var js,fjs=d.getElementsByTagName(s)[0],t=window.twttr||{};if(d.getElementById(id))return t;js=d.createElement(s);js.id=id;js.src='https://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js, fjs);t._e = [];t.ready = function(f) {t._e.push(f);};return t;}(document, 'script', 'twitter-wjs'));</script></body></html>