blob: e17db4bc2fe390ff142c6488b000b33eb56bddfe [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.13"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>mxnet: /work/mxnet/3rdparty/mshadow/mshadow/tensor_cpu-inl.h Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">mxnet
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.13 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
$(function() {
initMenu('',true,false,'search.php','Search');
$(document).ready(function() { init_search(); });
});
</script>
<div id="main-nav"></div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="dir_8cab8f464681f7cc51cee77e79a434cd.html">3rdparty</a></li><li class="navelem"><a class="el" href="dir_3e48ced36faa4eaa1b41f6d960bf0edb.html">mshadow</a></li><li class="navelem"><a class="el" href="dir_00b035bb2ad81894e6ad291054ea5f82.html">mshadow</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="headertitle">
<div class="title">tensor_cpu-inl.h</div> </div>
</div><!--header-->
<div class="contents">
<a href="tensor__cpu-inl_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno"> 1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno"> 2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno"> 3</span>&#160;<span class="comment"> * or more contributor license agreements. See the NOTICE file</span></div><div class="line"><a name="l00004"></a><span class="lineno"> 4</span>&#160;<span class="comment"> * distributed with this work for additional information</span></div><div class="line"><a name="l00005"></a><span class="lineno"> 5</span>&#160;<span class="comment"> * regarding copyright ownership. The ASF licenses this file</span></div><div class="line"><a name="l00006"></a><span class="lineno"> 6</span>&#160;<span class="comment"> * to you under the Apache License, Version 2.0 (the</span></div><div class="line"><a name="l00007"></a><span class="lineno"> 7</span>&#160;<span class="comment"> * &quot;License&quot;); you may not use this file except in compliance</span></div><div class="line"><a name="l00008"></a><span class="lineno"> 8</span>&#160;<span class="comment"> * with the License. You may obtain a copy of the License at</span></div><div class="line"><a name="l00009"></a><span class="lineno"> 9</span>&#160;<span class="comment"> *</span></div><div class="line"><a name="l00010"></a><span class="lineno"> 10</span>&#160;<span class="comment"> * http://www.apache.org/licenses/LICENSE-2.0</span></div><div class="line"><a name="l00011"></a><span class="lineno"> 11</span>&#160;<span class="comment"> *</span></div><div class="line"><a name="l00012"></a><span class="lineno"> 12</span>&#160;<span class="comment"> * Unless required by applicable law or agreed to in writing,</span></div><div class="line"><a name="l00013"></a><span class="lineno"> 13</span>&#160;<span class="comment"> * software distributed under the License is distributed on an</span></div><div class="line"><a name="l00014"></a><span class="lineno"> 14</span>&#160;<span class="comment"> * &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span></div><div class="line"><a name="l00015"></a><span class="lineno"> 15</span>&#160;<span class="comment"> * KIND, either express or implied. See the License for the</span></div><div class="line"><a name="l00016"></a><span class="lineno"> 16</span>&#160;<span class="comment"> * specific language governing permissions and limitations</span></div><div class="line"><a name="l00017"></a><span class="lineno"> 17</span>&#160;<span class="comment"> * under the License.</span></div><div class="line"><a name="l00018"></a><span class="lineno"> 18</span>&#160;<span class="comment"> */</span></div><div class="line"><a name="l00019"></a><span class="lineno"> 19</span>&#160;</div><div class="line"><a name="l00025"></a><span class="lineno"> 25</span>&#160;<span class="preprocessor">#ifndef MSHADOW_TENSOR_CPU_INL_H_</span></div><div class="line"><a name="l00026"></a><span class="lineno"> 26</span>&#160;<span class="preprocessor">#define MSHADOW_TENSOR_CPU_INL_H_</span></div><div class="line"><a name="l00027"></a><span class="lineno"> 27</span>&#160;<span class="preprocessor">#include &lt;cstring&gt;</span></div><div class="line"><a name="l00028"></a><span class="lineno"> 28</span>&#160;<span class="preprocessor">#include &lt;functional&gt;</span></div><div class="line"><a name="l00029"></a><span class="lineno"> 29</span>&#160;<span class="preprocessor">#include &lt;utility&gt;</span></div><div class="line"><a name="l00030"></a><span class="lineno"> 30</span>&#160;<span class="preprocessor">#include &lt;vector&gt;</span></div><div class="line"><a name="l00031"></a><span class="lineno"> 31</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="3rdparty_2mshadow_2mshadow_2base_8h.html">./base.h</a>&quot;</span></div><div class="line"><a name="l00032"></a><span class="lineno"> 32</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="tensor_8h.html">./tensor.h</a>&quot;</span></div><div class="line"><a name="l00033"></a><span class="lineno"> 33</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="packet-inl_8h.html">./packet-inl.h</a>&quot;</span></div><div class="line"><a name="l00034"></a><span class="lineno"> 34</span>&#160;<span class="preprocessor">#include &quot;<a class="code" href="dot__engine-inl_8h.html">./dot_engine-inl.h</a>&quot;</span></div><div class="line"><a name="l00035"></a><span class="lineno"> 35</span>&#160;</div><div class="line"><a name="l00036"></a><span class="lineno"> 36</span>&#160;<span class="keyword">namespace </span><a class="code" href="namespacemshadow.html">mshadow</a> {</div><div class="line"><a name="l00037"></a><span class="lineno"> 37</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00038"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a8c62e9096998333b078c8b8f744061ad"> 38</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a8c62e9096998333b078c8b8f744061ad">InitTensorEngine&lt;cpu&gt;</a>(<span class="keywordtype">int</span> dev_id) {</div><div class="line"><a name="l00039"></a><span class="lineno"> 39</span>&#160;}</div><div class="line"><a name="l00040"></a><span class="lineno"> 40</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00041"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a0884cf8a1d05a9d631cdf82ef3ebc567"> 41</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a0884cf8a1d05a9d631cdf82ef3ebc567">ShutdownTensorEngine&lt;cpu&gt;</a>(void) {</div><div class="line"><a name="l00042"></a><span class="lineno"> 42</span>&#160;}</div><div class="line"><a name="l00043"></a><span class="lineno"> 43</span>&#160;</div><div class="line"><a name="l00044"></a><span class="lineno"> 44</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00045"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a2f9d916c81866abceed494cd622cd3e2"> 45</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a2f9d916c81866abceed494cd622cd3e2">SetDevice&lt;cpu&gt;</a>(<span class="keywordtype">int</span> devid) {</div><div class="line"><a name="l00046"></a><span class="lineno"> 46</span>&#160;}</div><div class="line"><a name="l00047"></a><span class="lineno"> 47</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00048"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ad46c65f82d888517bc8e372fd49d8011"> 48</a></span>&#160;<span class="keyword">inline</span> <a class="code" href="structmshadow_1_1Stream.html">Stream&lt;cpu&gt;</a> *<a class="code" href="namespacemshadow.html#ad46c65f82d888517bc8e372fd49d8011">NewStream&lt;cpu&gt;</a>(<span class="keywordtype">bool</span> create_blas_handle,</div><div class="line"><a name="l00049"></a><span class="lineno"> 49</span>&#160; <span class="keywordtype">bool</span> create_dnn_handle,</div><div class="line"><a name="l00050"></a><span class="lineno"> 50</span>&#160; <span class="keywordtype">int</span> dev_id) {</div><div class="line"><a name="l00051"></a><span class="lineno"> 51</span>&#160; <span class="keywordflow">return</span> <span class="keyword">new</span> <a class="code" href="structmshadow_1_1Stream.html">Stream&lt;cpu&gt;</a>();</div><div class="line"><a name="l00052"></a><span class="lineno"> 52</span>&#160;}</div><div class="line"><a name="l00053"></a><span class="lineno"> 53</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00054"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a9573b40e55fb2e885db8b594f4168efc"> 54</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a9573b40e55fb2e885db8b594f4168efc">DeleteStream&lt;cpu&gt;</a>(<a class="code" href="structmshadow_1_1Stream.html">Stream&lt;cpu&gt;</a> *stream) {</div><div class="line"><a name="l00055"></a><span class="lineno"> 55</span>&#160; <span class="keyword">delete</span> stream;</div><div class="line"><a name="l00056"></a><span class="lineno"> 56</span>&#160;}</div><div class="line"><a name="l00057"></a><span class="lineno"> 57</span>&#160;</div><div class="line"><a name="l00058"></a><span class="lineno"> 58</span>&#160;<span class="keyword">template</span>&lt;<span class="keywordtype">int</span> ndim&gt;</div><div class="line"><a name="l00059"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a10717da9e107e05150e6fb6d79103b34"> 59</a></span>&#160;<span class="keyword">inline</span> std::ostream &amp;operator&lt;&lt;(std::ostream &amp;os, const Shape&lt;ndim&gt; &amp;shape) { <span class="comment">// NOLINT(*)</span></div><div class="line"><a name="l00060"></a><span class="lineno"> 60</span>&#160; os &lt;&lt; <span class="charliteral">&#39;(&#39;</span>;</div><div class="line"><a name="l00061"></a><span class="lineno"> 61</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; ndim; ++i) {</div><div class="line"><a name="l00062"></a><span class="lineno"> 62</span>&#160; <span class="keywordflow">if</span> (i != 0) os &lt;&lt; <span class="charliteral">&#39;,&#39;</span>;</div><div class="line"><a name="l00063"></a><span class="lineno"> 63</span>&#160; os &lt;&lt; shape[i];</div><div class="line"><a name="l00064"></a><span class="lineno"> 64</span>&#160; }</div><div class="line"><a name="l00065"></a><span class="lineno"> 65</span>&#160; <span class="comment">// python style tuple</span></div><div class="line"><a name="l00066"></a><span class="lineno"> 66</span>&#160; <span class="keywordflow">if</span> (ndim == 1) os &lt;&lt; <span class="charliteral">&#39;,&#39;</span>;</div><div class="line"><a name="l00067"></a><span class="lineno"> 67</span>&#160; os &lt;&lt; <span class="charliteral">&#39;)&#39;</span>;</div><div class="line"><a name="l00068"></a><span class="lineno"> 68</span>&#160; <span class="keywordflow">return</span> os;</div><div class="line"><a name="l00069"></a><span class="lineno"> 69</span>&#160;}</div><div class="line"><a name="l00070"></a><span class="lineno"> 70</span>&#160;</div><div class="line"><a name="l00071"></a><span class="lineno"> 71</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> xpu&gt;</div><div class="line"><a name="l00072"></a><span class="lineno"> 72</span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> *<a class="code" href="namespacemshadow.html#a1d4e86f18ce66c1b7073537647bd4374">AllocHost_</a>(<span class="keywordtype">size_t</span> size);</div><div class="line"><a name="l00073"></a><span class="lineno"> 73</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> xpu&gt;</div><div class="line"><a name="l00074"></a><span class="lineno"> 74</span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a4c3517f91f3bb35d9867fa7a42665ec3">FreeHost_</a>(<span class="keywordtype">void</span> * dptr);</div><div class="line"><a name="l00075"></a><span class="lineno"> 75</span>&#160;</div><div class="line"><a name="l00076"></a><span class="lineno"> 76</span>&#160;<span class="preprocessor">#ifdef __CUDACC__</span></div><div class="line"><a name="l00077"></a><span class="lineno"> 77</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00078"></a><span class="lineno"> 78</span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> *AllocHost_&lt;gpu&gt;(<span class="keywordtype">size_t</span> size) {</div><div class="line"><a name="l00079"></a><span class="lineno"> 79</span>&#160; <span class="keywordtype">void</span> *dptr;</div><div class="line"><a name="l00080"></a><span class="lineno"> 80</span>&#160; <a class="code" href="3rdparty_2mshadow_2mshadow_2base_8h.html#a8f433b4dd005a854eec58178ffd3d4bd">MSHADOW_CUDA_CALL</a>(cudaMallocHost(&amp;dptr, size, cudaHostAllocPortable));</div><div class="line"><a name="l00081"></a><span class="lineno"> 81</span>&#160; <span class="keywordflow">return</span> dptr;</div><div class="line"><a name="l00082"></a><span class="lineno"> 82</span>&#160;}</div><div class="line"><a name="l00083"></a><span class="lineno"> 83</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00084"></a><span class="lineno"> 84</span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> FreeHost_&lt;gpu&gt;(<span class="keywordtype">void</span> *dptr) {</div><div class="line"><a name="l00085"></a><span class="lineno"> 85</span>&#160; <a class="code" href="3rdparty_2mshadow_2mshadow_2base_8h.html#a8f433b4dd005a854eec58178ffd3d4bd">MSHADOW_CUDA_CALL</a>(cudaFreeHost(dptr));</div><div class="line"><a name="l00086"></a><span class="lineno"> 86</span>&#160;}</div><div class="line"><a name="l00087"></a><span class="lineno"> 87</span>&#160;<span class="preprocessor">#endif</span></div><div class="line"><a name="l00088"></a><span class="lineno"> 88</span>&#160;</div><div class="line"><a name="l00089"></a><span class="lineno"> 89</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00090"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a750a8961eda58a98116726e462545e6c"> 90</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> *<a class="code" href="namespacemshadow.html#a750a8961eda58a98116726e462545e6c">AllocHost_&lt;cpu&gt;</a>(<span class="keywordtype">size_t</span> size) {</div><div class="line"><a name="l00091"></a><span class="lineno"> 91</span>&#160; <span class="keywordtype">size_t</span> pitch;</div><div class="line"><a name="l00092"></a><span class="lineno"> 92</span>&#160; <span class="keywordflow">return</span> <a class="code" href="namespacemshadow_1_1packet.html#ae87c3ed51219b7ae4a295884bffa5d8b">packet::AlignedMallocPitch</a>(&amp;pitch, size, 1);</div><div class="line"><a name="l00093"></a><span class="lineno"> 93</span>&#160;}</div><div class="line"><a name="l00094"></a><span class="lineno"> 94</span>&#160;<span class="keyword">template</span>&lt;&gt;</div><div class="line"><a name="l00095"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a508edd8f1985757d0562b1e951666340"> 95</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a508edd8f1985757d0562b1e951666340">FreeHost_&lt;cpu&gt;</a>(<span class="keywordtype">void</span> *dptr) {</div><div class="line"><a name="l00096"></a><span class="lineno"> 96</span>&#160; <a class="code" href="namespacemshadow_1_1packet.html#ac5a6a9a133e5c212531dbae80484c1cc">packet::AlignedFree</a>(dptr);</div><div class="line"><a name="l00097"></a><span class="lineno"> 97</span>&#160;}</div><div class="line"><a name="l00098"></a><span class="lineno"> 98</span>&#160;</div><div class="line"><a name="l00099"></a><span class="lineno"> 99</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> xpu, <span class="keywordtype">int</span> dim, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00100"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ad0eacae3492e97f53fcd2f210bf9c645"> 100</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#ad0eacae3492e97f53fcd2f210bf9c645">AllocHost</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a> *obj) {</div><div class="line"><a name="l00101"></a><span class="lineno"> 101</span>&#160; obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#afee556f188e29bbd0ecc45fe98d3c1c3">stride_</a> = obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(dim - 1);</div><div class="line"><a name="l00102"></a><span class="lineno"> 102</span>&#160; CHECK_EQ(obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>(), <span class="keyword">true</span>) &lt;&lt; <span class="stringliteral">&quot;AllocHost&quot;</span>;</div><div class="line"><a name="l00103"></a><span class="lineno"> 103</span>&#160; <span class="keywordtype">void</span> *dptr = AllocHost_&lt;xpu&gt;(obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#a01a586757f1d2abba12d0f4fdf2fdeb1">MSize</a>() * <span class="keyword">sizeof</span>(DType));</div><div class="line"><a name="l00104"></a><span class="lineno"> 104</span>&#160; obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a> = <span class="keyword">reinterpret_cast&lt;</span>DType*<span class="keyword">&gt;</span>(dptr);</div><div class="line"><a name="l00105"></a><span class="lineno"> 105</span>&#160;}</div><div class="line"><a name="l00106"></a><span class="lineno"> 106</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> xpu, <span class="keywordtype">int</span> dim, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00107"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a6dc5b2a49c44364a6f3c8cca386aff12"> 107</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a6dc5b2a49c44364a6f3c8cca386aff12">FreeHost</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a> *obj) {</div><div class="line"><a name="l00108"></a><span class="lineno"> 108</span>&#160; <span class="keywordflow">if</span> (obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a> == NULL) {</div><div class="line"><a name="l00109"></a><span class="lineno"> 109</span>&#160; LOG(FATAL) &lt;&lt; <span class="stringliteral">&quot;FreeHost:: double free&quot;</span>;</div><div class="line"><a name="l00110"></a><span class="lineno"> 110</span>&#160; }</div><div class="line"><a name="l00111"></a><span class="lineno"> 111</span>&#160; FreeHost_&lt;xpu&gt;(obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>);</div><div class="line"><a name="l00112"></a><span class="lineno"> 112</span>&#160; obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a> = NULL;</div><div class="line"><a name="l00113"></a><span class="lineno"> 113</span>&#160;}</div><div class="line"><a name="l00114"></a><span class="lineno"> 114</span>&#160;</div><div class="line"><a name="l00115"></a><span class="lineno"> 115</span>&#160;<span class="keyword">template</span>&lt;<span class="keywordtype">int</span> dim, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00116"></a><span class="lineno"><a class="line" href="namespacemshadow.html#aef49c3cef522198322017315341ac689"> 116</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#aef49c3cef522198322017315341ac689">AllocSpace</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a> *obj, <span class="keywordtype">bool</span> <a class="code" href="namespacemshadow_1_1expr.html#a97c45c821ac8275dd75fa585f47b9e00">pad</a>) {</div><div class="line"><a name="l00117"></a><span class="lineno"> 117</span>&#160; <span class="keywordtype">size_t</span> pitch;</div><div class="line"><a name="l00118"></a><span class="lineno"> 118</span>&#160; <span class="keywordtype">void</span> *dptr;</div><div class="line"><a name="l00119"></a><span class="lineno"> 119</span>&#160; <span class="keywordflow">if</span> (pad) {</div><div class="line"><a name="l00120"></a><span class="lineno"> 120</span>&#160; dptr = <a class="code" href="namespacemshadow_1_1packet.html#ae87c3ed51219b7ae4a295884bffa5d8b">packet::AlignedMallocPitch</a></div><div class="line"><a name="l00121"></a><span class="lineno"> 121</span>&#160; (&amp;pitch, obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(dim - 1) * <span class="keyword">sizeof</span>(DType), obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>.FlatTo2D()[0]);</div><div class="line"><a name="l00122"></a><span class="lineno"> 122</span>&#160; obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#afee556f188e29bbd0ecc45fe98d3c1c3">stride_</a> = <span class="keyword">static_cast&lt;</span><a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a><span class="keyword">&gt;</span>(pitch / <span class="keyword">sizeof</span>(DType));</div><div class="line"><a name="l00123"></a><span class="lineno"> 123</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00124"></a><span class="lineno"> 124</span>&#160; obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#afee556f188e29bbd0ecc45fe98d3c1c3">stride_</a> = obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(dim - 1);</div><div class="line"><a name="l00125"></a><span class="lineno"> 125</span>&#160; dptr = <a class="code" href="namespacemshadow_1_1packet.html#ae87c3ed51219b7ae4a295884bffa5d8b">packet::AlignedMallocPitch</a></div><div class="line"><a name="l00126"></a><span class="lineno"> 126</span>&#160; (&amp;pitch, obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>.Size() * <span class="keyword">sizeof</span>(DType), 1);</div><div class="line"><a name="l00127"></a><span class="lineno"> 127</span>&#160; }</div><div class="line"><a name="l00128"></a><span class="lineno"> 128</span>&#160; obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a> = <span class="keyword">reinterpret_cast&lt;</span>DType*<span class="keyword">&gt;</span>(dptr);</div><div class="line"><a name="l00129"></a><span class="lineno"> 129</span>&#160;}</div><div class="line"><a name="l00130"></a><span class="lineno"> 130</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> Device, <span class="keyword">typename</span> DType, <span class="keywordtype">int</span> dim&gt;</div><div class="line"><a name="l00131"></a><span class="lineno"> 131</span>&#160;<span class="keyword">inline</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, dim, DType&gt;</a></div><div class="line"><a name="l00132"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a3f19232d4c6ea393abccc5288675d345"> 132</a></span>&#160;<a class="code" href="namespacemshadow.html#a3f19232d4c6ea393abccc5288675d345">NewTensor</a>(<span class="keyword">const</span> <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;dim&gt;</a> &amp;shape, DType initv, <span class="keywordtype">bool</span> <a class="code" href="namespacemshadow_1_1expr.html#a97c45c821ac8275dd75fa585f47b9e00">pad</a>, <a class="code" href="structmshadow_1_1Stream.html">Stream&lt;Device&gt;</a> *stream_) {</div><div class="line"><a name="l00133"></a><span class="lineno"> 133</span>&#160; <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, dim, DType&gt;</a> obj(shape);</div><div class="line"><a name="l00134"></a><span class="lineno"> 134</span>&#160; obj.<a class="code" href="structmshadow_1_1Tensor.html#a485d1558eaa9eded0b8fd23ed82c6298">stream_</a> = stream_;</div><div class="line"><a name="l00135"></a><span class="lineno"> 135</span>&#160; <a class="code" href="namespacemshadow.html#aef49c3cef522198322017315341ac689">AllocSpace</a>(&amp;obj, pad);</div><div class="line"><a name="l00136"></a><span class="lineno"> 136</span>&#160; MapExp&lt;sv::saveto&gt;(&amp;obj, <a class="code" href="structmshadow_1_1expr_1_1ScalarExp.html">expr::ScalarExp&lt;DType&gt;</a>(initv));</div><div class="line"><a name="l00137"></a><span class="lineno"> 137</span>&#160; <span class="keywordflow">return</span> obj;</div><div class="line"><a name="l00138"></a><span class="lineno"> 138</span>&#160;}</div><div class="line"><a name="l00139"></a><span class="lineno"> 139</span>&#160;<span class="keyword">template</span>&lt;<span class="keywordtype">int</span> dim, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00140"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a3e8485c882dab873525b4b241e5db7ab"> 140</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a3e8485c882dab873525b4b241e5db7ab">FreeSpace</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a> *obj) {</div><div class="line"><a name="l00141"></a><span class="lineno"> 141</span>&#160; <a class="code" href="namespacemshadow_1_1packet.html#ac5a6a9a133e5c212531dbae80484c1cc">packet::AlignedFree</a>(obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>);</div><div class="line"><a name="l00142"></a><span class="lineno"> 142</span>&#160; obj-&gt;<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a> = NULL;</div><div class="line"><a name="l00143"></a><span class="lineno"> 143</span>&#160;}</div><div class="line"><a name="l00144"></a><span class="lineno"> 144</span>&#160;<span class="keyword">template</span>&lt;<span class="keywordtype">int</span> dim, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00145"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a0435f827863555e4ed19bbc0cc81fa39"> 145</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a0435f827863555e4ed19bbc0cc81fa39">Copy</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a> _dst,</div><div class="line"><a name="l00146"></a><span class="lineno"> 146</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a> &amp;_src,</div><div class="line"><a name="l00147"></a><span class="lineno"> 147</span>&#160; <a class="code" href="structmshadow_1_1Stream.html">Stream&lt;cpu&gt;</a> *stream) {</div><div class="line"><a name="l00148"></a><span class="lineno"> 148</span>&#160; CHECK_EQ(_dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>, _src.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>)</div><div class="line"><a name="l00149"></a><span class="lineno"> 149</span>&#160; &lt;&lt; <span class="stringliteral">&quot;Copy:shape mismatch:&quot;</span> &lt;&lt; _dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a> &lt;&lt; <span class="stringliteral">&quot; vs &quot;</span> &lt;&lt; _src.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>;</div><div class="line"><a name="l00150"></a><span class="lineno"> 150</span>&#160; <span class="keywordflow">if</span> (_dst.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>() &amp;&amp; _src.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>()) {</div><div class="line"><a name="l00151"></a><span class="lineno"> 151</span>&#160; memcpy(_dst.<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>, _src.<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>, <span class="keyword">sizeof</span>(DType) * _dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>.Size());</div><div class="line"><a name="l00152"></a><span class="lineno"> 152</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00153"></a><span class="lineno"> 153</span>&#160; <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst = _dst.<a class="code" href="structmshadow_1_1Tensor.html#a48a5927e810fbc45e43e92cfe397d9f2">FlatTo2D</a>();</div><div class="line"><a name="l00154"></a><span class="lineno"> 154</span>&#160; <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> src = _src.<a class="code" href="structmshadow_1_1Tensor.html#a48a5927e810fbc45e43e92cfe397d9f2">FlatTo2D</a>();</div><div class="line"><a name="l00155"></a><span class="lineno"> 155</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00156"></a><span class="lineno"> 156</span>&#160; memcpy(dst[y].dptr_, src[y].dptr_, <span class="keyword">sizeof</span>(DType) * dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1));</div><div class="line"><a name="l00157"></a><span class="lineno"> 157</span>&#160; }</div><div class="line"><a name="l00158"></a><span class="lineno"> 158</span>&#160; }</div><div class="line"><a name="l00159"></a><span class="lineno"> 159</span>&#160;}</div><div class="line"><a name="l00160"></a><span class="lineno"> 160</span>&#160;</div><div class="line"><a name="l00161"></a><span class="lineno"> 161</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> Saver, <span class="keyword">typename</span> R, <span class="keywordtype">int</span> dim,</div><div class="line"><a name="l00162"></a><span class="lineno"> 162</span>&#160; <span class="keyword">typename</span> DType, <span class="keyword">typename</span> E&gt;</div><div class="line"><a name="l00163"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ad4d36332af8c5202ecc95adf4b00cdf2"> 163</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#ad4d36332af8c5202ecc95adf4b00cdf2">MapPlan</a>(<a class="code" href="structmshadow_1_1TRValue.html">TRValue&lt;R, cpu, dim, DType&gt;</a> *dst,</div><div class="line"><a name="l00164"></a><span class="lineno"> 164</span>&#160; <span class="keyword">const</span> <a class="code" href="classmshadow_1_1expr_1_1Plan.html">expr::Plan&lt;E, DType&gt;</a> &amp;plan) {</div><div class="line"><a name="l00165"></a><span class="lineno"> 165</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;2&gt;</a> shape = <a class="code" href="structmshadow_1_1expr_1_1ShapeCheck.html#af44e6a3b97cf3b245bc1b7ad4cacb306">expr::ShapeCheck&lt;dim, R&gt;::Check</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>()).FlatTo2D();</div><div class="line"><a name="l00166"></a><span class="lineno"> 166</span>&#160; <a class="code" href="classmshadow_1_1expr_1_1Plan.html">expr::Plan&lt;R, DType&gt;</a> dplan = <a class="code" href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">expr::MakePlan</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00167"></a><span class="lineno"> 167</span>&#160;<span class="preprocessor">#ifndef __CUDACC__</span></div><div class="line"><a name="l00168"></a><span class="lineno"> 168</span>&#160;<span class="preprocessor"> #pragma omp parallel for</span></div><div class="line"><a name="l00169"></a><span class="lineno"> 169</span>&#160;<span class="preprocessor">#endif</span></div><div class="line"><a name="l00170"></a><span class="lineno"> 170</span>&#160; <span class="comment">// temp remove openmp, as default setting throttles CPU</span></div><div class="line"><a name="l00171"></a><span class="lineno"> 171</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> y = 0; y &lt; shape[0]; ++y) {</div><div class="line"><a name="l00172"></a><span class="lineno"> 172</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; shape[1]; ++x) {</div><div class="line"><a name="l00173"></a><span class="lineno"> 173</span>&#160; <span class="comment">// trust your compiler! -_- they will optimize it</span></div><div class="line"><a name="l00174"></a><span class="lineno"> 174</span>&#160; Saver::template Save&lt;DType&gt;(dplan.REval(y, x), plan.<a class="code" href="classmshadow_1_1expr_1_1Plan.html#ac3328472fd038748346a97cf8e58f3c6">Eval</a>(y, x));</div><div class="line"><a name="l00175"></a><span class="lineno"> 175</span>&#160; }</div><div class="line"><a name="l00176"></a><span class="lineno"> 176</span>&#160; }</div><div class="line"><a name="l00177"></a><span class="lineno"> 177</span>&#160;}</div><div class="line"><a name="l00178"></a><span class="lineno"> 178</span>&#160;<span class="comment">// code to handle SSE optimization</span></div><div class="line"><a name="l00179"></a><span class="lineno"> 179</span>&#160;<span class="keyword">template</span>&lt;<span class="keywordtype">bool</span> pass_check, <span class="keyword">typename</span> Saver,</div><div class="line"><a name="l00180"></a><span class="lineno"> 180</span>&#160; <span class="keyword">typename</span> R, <span class="keywordtype">int</span> dim,</div><div class="line"><a name="l00181"></a><span class="lineno"> 181</span>&#160; <span class="keyword">typename</span> DType, <span class="keyword">typename</span> E, <span class="keywordtype">int</span> etype&gt;</div><div class="line"><a name="l00182"></a><span class="lineno"><a class="line" href="structmshadow_1_1MapExpCPUEngine.html"> 182</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structmshadow_1_1MapExpCPUEngine.html">MapExpCPUEngine</a> {</div><div class="line"><a name="l00183"></a><span class="lineno"><a class="line" href="structmshadow_1_1MapExpCPUEngine.html#a2815df2f68121b3093965d9bf6a7c75f"> 183</a></span>&#160; <span class="keyword">inline</span> <span class="keyword">static</span> <span class="keywordtype">void</span> <a class="code" href="structmshadow_1_1MapExpCPUEngine.html#a2815df2f68121b3093965d9bf6a7c75f">Map</a>(<a class="code" href="structmshadow_1_1TRValue.html">TRValue&lt;R, cpu, dim, DType&gt;</a> *dst,</div><div class="line"><a name="l00184"></a><span class="lineno"> 184</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1expr_1_1Exp.html">expr::Exp&lt;E, DType, etype&gt;</a> &amp;exp) {</div><div class="line"><a name="l00185"></a><span class="lineno"> 185</span>&#160; MapPlan&lt;Saver&gt;(dst, <a class="code" href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">MakePlan</a>(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>()));</div><div class="line"><a name="l00186"></a><span class="lineno"> 186</span>&#160; }</div><div class="line"><a name="l00187"></a><span class="lineno"> 187</span>&#160;};</div><div class="line"><a name="l00188"></a><span class="lineno"> 188</span>&#160;</div><div class="line"><a name="l00189"></a><span class="lineno"> 189</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> SV, <span class="keywordtype">int</span> dim, <span class="keyword">typename</span> DType, <span class="keyword">typename</span> E, <span class="keywordtype">int</span> etype&gt;</div><div class="line"><a name="l00190"></a><span class="lineno"><a class="line" href="structmshadow_1_1MapExpCPUEngine_3_01true_00_01SV_00_01Tensor_3_01cpu_00_01dim_00_01DType_01_4_01d6b33847e15ac0b561110aa3bff3f62.html"> 190</a></span>&#160;<span class="keyword">struct </span><a class="code" href="structmshadow_1_1MapExpCPUEngine.html">MapExpCPUEngine</a>&lt;true, SV, <a class="code" href="structmshadow_1_1Tensor.html">Tensor</a>&lt;<a class="code" href="structmshadow_1_1cpu.html">cpu</a>, dim, DType&gt;,</div><div class="line"><a name="l00191"></a><span class="lineno"> 191</span>&#160; dim, DType, E, etype&gt; {</div><div class="line"><a name="l00192"></a><span class="lineno"><a class="line" href="structmshadow_1_1MapExpCPUEngine_3_01true_00_01SV_00_01Tensor_3_01cpu_00_01dim_00_01DType_01_4_01d6b33847e15ac0b561110aa3bff3f62.html#aa7c638bcd548963fc82f8752317f9d3f"> 192</a></span>&#160; <span class="keyword">inline</span> <span class="keyword">static</span> <span class="keywordtype">void</span> <a class="code" href="structmshadow_1_1MapExpCPUEngine_3_01true_00_01SV_00_01Tensor_3_01cpu_00_01dim_00_01DType_01_4_01d6b33847e15ac0b561110aa3bff3f62.html#aa7c638bcd548963fc82f8752317f9d3f">Map</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a> *dst,</div><div class="line"><a name="l00193"></a><span class="lineno"> 193</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1expr_1_1Exp.html">expr::Exp&lt;E, DType, etype&gt;</a> &amp;exp) {</div><div class="line"><a name="l00194"></a><span class="lineno"> 194</span>&#160; <span class="keywordflow">if</span> (<a class="code" href="structmshadow_1_1expr_1_1PacketAlignCheck.html">expr::PacketAlignCheck&lt;dim, E, MSHADOW_DEFAULT_PACKET&gt;::Check</a>(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>()) &amp;&amp;</div><div class="line"><a name="l00195"></a><span class="lineno"> 195</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1PacketAlignCheck.html">expr::PacketAlignCheck</a>&lt;dim, <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, dim, DType&gt;</a>, <a class="code" href="packet-inl_8h.html#a958737730f9e52e5d5b9cbab79ad357e">MSHADOW_DEFAULT_PACKET</a>&gt;::Check(*dst)) {</div><div class="line"><a name="l00196"></a><span class="lineno"> 196</span>&#160; expr::MapPacketPlan&lt;SV&gt;(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>(),</div><div class="line"><a name="l00197"></a><span class="lineno"> 197</span>&#160; expr::MakePacketPlan&lt;MSHADOW_DEFAULT_PACKET&gt;(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>()));</div><div class="line"><a name="l00198"></a><span class="lineno"> 198</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00199"></a><span class="lineno"> 199</span>&#160; MapPlan&lt;SV&gt;(dst, <a class="code" href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">MakePlan</a>(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>()));</div><div class="line"><a name="l00200"></a><span class="lineno"> 200</span>&#160; }</div><div class="line"><a name="l00201"></a><span class="lineno"> 201</span>&#160; }</div><div class="line"><a name="l00202"></a><span class="lineno"> 202</span>&#160;};</div><div class="line"><a name="l00203"></a><span class="lineno"> 203</span>&#160;</div><div class="line"><a name="l00204"></a><span class="lineno"> 204</span>&#160;</div><div class="line"><a name="l00205"></a><span class="lineno"> 205</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> Saver, <span class="keyword">typename</span> R, <span class="keywordtype">int</span> dim,</div><div class="line"><a name="l00206"></a><span class="lineno"> 206</span>&#160; <span class="keyword">typename</span> DType, <span class="keyword">typename</span> E, <span class="keywordtype">int</span> etype&gt;</div><div class="line"><a name="l00207"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a4bdc2c62fd5dcee696cadd2351bf85e2"> 207</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a4bdc2c62fd5dcee696cadd2351bf85e2">MapExp</a>(<a class="code" href="structmshadow_1_1TRValue.html">TRValue&lt;R, cpu, dim, DType&gt;</a> *dst,</div><div class="line"><a name="l00208"></a><span class="lineno"> 208</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1expr_1_1Exp.html">expr::Exp&lt;E, DType, etype&gt;</a> &amp;exp) {</div><div class="line"><a name="l00209"></a><span class="lineno"> 209</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1TypeCheckPass.html">expr::TypeCheckPass&lt;expr::TypeCheck&lt;cpu, dim, DType, E&gt;::kMapPass</a>&gt;</div><div class="line"><a name="l00210"></a><span class="lineno"> 210</span>&#160; ::Error_All_Tensor_in_Exp_Must_Have_Same_Type();</div><div class="line"><a name="l00211"></a><span class="lineno"> 211</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;dim&gt;</a> eshape = <a class="code" href="structmshadow_1_1expr_1_1ShapeCheck.html#af44e6a3b97cf3b245bc1b7ad4cacb306">expr::ShapeCheck&lt;dim, E&gt;::Check</a>(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00212"></a><span class="lineno"> 212</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;dim&gt;</a> dshape = <a class="code" href="structmshadow_1_1expr_1_1ShapeCheck.html#af44e6a3b97cf3b245bc1b7ad4cacb306">expr::ShapeCheck&lt;dim, R&gt;::Check</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00213"></a><span class="lineno"> 213</span>&#160; CHECK(eshape[0] == 0 || eshape == dshape)</div><div class="line"><a name="l00214"></a><span class="lineno"> 214</span>&#160; &lt;&lt; <span class="stringliteral">&quot;Assignment: Shape of Tensors are not consistent with target, &quot;</span></div><div class="line"><a name="l00215"></a><span class="lineno"> 215</span>&#160; &lt;&lt; <span class="stringliteral">&quot;eshape: &quot;</span> &lt;&lt; eshape &lt;&lt; <span class="stringliteral">&quot; dshape:&quot;</span> &lt;&lt; dshape;</div><div class="line"><a name="l00216"></a><span class="lineno"> 216</span>&#160; <a class="code" href="structmshadow_1_1MapExpCPUEngine.html">MapExpCPUEngine&lt;expr::PacketCheck&lt;E, MSHADOW_DEFAULT_PACKET&gt;::kPass</a>,</div><div class="line"><a name="l00217"></a><span class="lineno"> 217</span>&#160; Saver, R, dim, DType, E, etype&gt;</div><div class="line"><a name="l00218"></a><span class="lineno"> 218</span>&#160; <a class="code" href="structmshadow_1_1MapExpCPUEngine.html#a2815df2f68121b3093965d9bf6a7c75f">::Map</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#ad6f23b53ba2fc3c2b594cf2ddfba5b22">ptrself</a>(), exp);</div><div class="line"><a name="l00219"></a><span class="lineno"> 219</span>&#160;}</div><div class="line"><a name="l00220"></a><span class="lineno"> 220</span>&#160;</div><div class="line"><a name="l00221"></a><span class="lineno"> 221</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> Saver, <span class="keyword">typename</span> Reducer,</div><div class="line"><a name="l00222"></a><span class="lineno"> 222</span>&#160; <span class="keyword">typename</span> R, <span class="keyword">typename</span> DType, <span class="keyword">typename</span> E, <span class="keywordtype">int</span> etype&gt;</div><div class="line"><a name="l00223"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ae1734eb7939fe9627de46d62494fe9dc"> 223</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#ae1734eb7939fe9627de46d62494fe9dc">MapReduceKeepLowest</a>(<a class="code" href="structmshadow_1_1TRValue.html">TRValue&lt;R, cpu, 1, DType&gt;</a> *dst,</div><div class="line"><a name="l00224"></a><span class="lineno"> 224</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1expr_1_1Exp.html">expr::Exp&lt;E, DType, etype&gt;</a> &amp;exp,</div><div class="line"><a name="l00225"></a><span class="lineno"> 225</span>&#160; DType scale) {</div><div class="line"><a name="l00226"></a><span class="lineno"> 226</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1TypeCheckPass.html">expr::TypeCheckPass&lt;expr::TypeCheck&lt;cpu, 1, DType, E&gt;::kRedPass</a>&gt;</div><div class="line"><a name="l00227"></a><span class="lineno"> 227</span>&#160; ::Error_TypeCheck_Not_Pass_For_Reduce_Exp();</div><div class="line"><a name="l00228"></a><span class="lineno"> 228</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;2&gt;</a> eshape = <a class="code" href="structmshadow_1_1expr_1_1ShapeCheck.html">expr::ShapeCheck&lt;expr::ExpInfo&lt;E&gt;::kDim</a>, E&gt;</div><div class="line"><a name="l00229"></a><span class="lineno"> 229</span>&#160; ::Check(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>()).FlatTo2D();</div><div class="line"><a name="l00230"></a><span class="lineno"> 230</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;1&gt;</a> dshape = <a class="code" href="structmshadow_1_1expr_1_1ShapeCheck.html#af44e6a3b97cf3b245bc1b7ad4cacb306">expr::ShapeCheck&lt;1, R&gt;::Check</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00231"></a><span class="lineno"> 231</span>&#160; CHECK_EQ(eshape[1], dshape[0]) &lt;&lt; <span class="stringliteral">&quot;MapReduceKeepLowest::reduction dimension do not match&quot;</span>;</div><div class="line"><a name="l00232"></a><span class="lineno"> 232</span>&#160; CHECK_NE(eshape[0], 0U) &lt;&lt; <span class="stringliteral">&quot;can not reduce over empty tensor&quot;</span>;</div><div class="line"><a name="l00233"></a><span class="lineno"> 233</span>&#160; <span class="comment">// execution</span></div><div class="line"><a name="l00234"></a><span class="lineno"> 234</span>&#160; <a class="code" href="classmshadow_1_1expr_1_1Plan.html">expr::Plan&lt;R, DType&gt;</a> dplan = <a class="code" href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">MakePlan</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00235"></a><span class="lineno"> 235</span>&#160; <a class="code" href="classmshadow_1_1expr_1_1Plan.html">expr::Plan&lt;E, DType&gt;</a> splan = <a class="code" href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">MakePlan</a>(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00236"></a><span class="lineno"> 236</span>&#160;<span class="preprocessor">#ifndef __CUDACC__</span></div><div class="line"><a name="l00237"></a><span class="lineno"> 237</span>&#160;<span class="preprocessor"> #pragma omp parallel for</span></div><div class="line"><a name="l00238"></a><span class="lineno"> 238</span>&#160;<span class="preprocessor">#endif</span></div><div class="line"><a name="l00239"></a><span class="lineno"> 239</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> x = 0; x &lt; eshape[1]; ++x) {</div><div class="line"><a name="l00240"></a><span class="lineno"> 240</span>&#160; DType res = splan.Eval(0, x);</div><div class="line"><a name="l00241"></a><span class="lineno"> 241</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 1; y &lt; eshape[0]; ++y) {</div><div class="line"><a name="l00242"></a><span class="lineno"> 242</span>&#160; Reducer::Reduce(res, splan.Eval(y, x));</div><div class="line"><a name="l00243"></a><span class="lineno"> 243</span>&#160; }</div><div class="line"><a name="l00244"></a><span class="lineno"> 244</span>&#160; Saver::template Save&lt;DType&gt;(dplan.REval(0, x), res * scale);</div><div class="line"><a name="l00245"></a><span class="lineno"> 245</span>&#160; }</div><div class="line"><a name="l00246"></a><span class="lineno"> 246</span>&#160;}</div><div class="line"><a name="l00247"></a><span class="lineno"> 247</span>&#160;</div><div class="line"><a name="l00248"></a><span class="lineno"> 248</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> Saver, <span class="keyword">typename</span> Reducer, <span class="keywordtype">int</span> dimkeep,</div><div class="line"><a name="l00249"></a><span class="lineno"> 249</span>&#160; <span class="keyword">typename</span> R, <span class="keyword">typename</span> DType, <span class="keyword">typename</span> E, <span class="keywordtype">int</span> etype&gt;</div><div class="line"><a name="l00250"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a5fce5c2df842439cc7d2d7a90e2cf7d4"> 250</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a5fce5c2df842439cc7d2d7a90e2cf7d4">MapReduceKeepHighDim</a>(<a class="code" href="structmshadow_1_1TRValue.html">TRValue&lt;R, cpu, 1, DType&gt;</a> *dst,</div><div class="line"><a name="l00251"></a><span class="lineno"> 251</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1expr_1_1Exp.html">expr::Exp&lt;E, DType, etype&gt;</a> &amp;exp,</div><div class="line"><a name="l00252"></a><span class="lineno"> 252</span>&#160; DType scale) {</div><div class="line"><a name="l00253"></a><span class="lineno"> 253</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1TypeCheckPass.html">expr::TypeCheckPass&lt;expr::TypeCheck&lt;cpu, dimkeep, DType, E&gt;::kRedPass</a>&gt;</div><div class="line"><a name="l00254"></a><span class="lineno"> 254</span>&#160; ::Error_TypeCheck_Not_Pass_For_Reduce_Exp();</div><div class="line"><a name="l00255"></a><span class="lineno"> 255</span>&#160; <span class="keyword">typedef</span> <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;expr::ExpInfo&lt;E&gt;::kDim</a>&gt; EShape;</div><div class="line"><a name="l00256"></a><span class="lineno"> 256</span>&#160; EShape eshape = <a class="code" href="structmshadow_1_1expr_1_1ShapeCheck.html">expr::ShapeCheck&lt;expr::ExpInfo&lt;E&gt;::kDim</a>, E&gt;</div><div class="line"><a name="l00257"></a><span class="lineno"> 257</span>&#160; ::Check(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00258"></a><span class="lineno"> 258</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;1&gt;</a> dshape = <a class="code" href="structmshadow_1_1expr_1_1ShapeCheck.html#af44e6a3b97cf3b245bc1b7ad4cacb306">expr::ShapeCheck&lt;1, R&gt;::Check</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00259"></a><span class="lineno"> 259</span>&#160; CHECK_EQ(eshape[dimkeep], dshape[0])</div><div class="line"><a name="l00260"></a><span class="lineno"> 260</span>&#160; &lt;&lt; <span class="stringliteral">&quot;MapReduceKeepHighDim::reduction dimension do not match&quot;</span>;</div><div class="line"><a name="l00261"></a><span class="lineno"> 261</span>&#160; <span class="comment">// use equvalent form</span></div><div class="line"><a name="l00262"></a><span class="lineno"> 262</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;4&gt;</a> pshape = <a class="code" href="namespacemshadow.html#a8fc5237744c6eda97f3070ddcb0c715e">Shape4</a>(eshape.ProdShape(0, dimkeep),</div><div class="line"><a name="l00263"></a><span class="lineno"> 263</span>&#160; eshape[dimkeep],</div><div class="line"><a name="l00264"></a><span class="lineno"> 264</span>&#160; eshape.ProdShape(dimkeep + 1, EShape::kSubdim),</div><div class="line"><a name="l00265"></a><span class="lineno"> 265</span>&#160; eshape[EShape::kSubdim]);</div><div class="line"><a name="l00266"></a><span class="lineno"> 266</span>&#160; <span class="comment">// execution</span></div><div class="line"><a name="l00267"></a><span class="lineno"> 267</span>&#160; <a class="code" href="classmshadow_1_1expr_1_1Plan.html">expr::Plan&lt;R, DType&gt;</a> dplan = <a class="code" href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">MakePlan</a>(dst-&gt;<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00268"></a><span class="lineno"> 268</span>&#160; <a class="code" href="classmshadow_1_1expr_1_1Plan.html">expr::Plan&lt;E, DType&gt;</a> splan = <a class="code" href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">MakePlan</a>(exp.<a class="code" href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">self</a>());</div><div class="line"><a name="l00269"></a><span class="lineno"> 269</span>&#160;<span class="preprocessor">#ifndef __CUDACC__</span></div><div class="line"><a name="l00270"></a><span class="lineno"> 270</span>&#160;<span class="preprocessor"> #pragma omp parallel for</span></div><div class="line"><a name="l00271"></a><span class="lineno"> 271</span>&#160;<span class="preprocessor">#endif</span></div><div class="line"><a name="l00272"></a><span class="lineno"> 272</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> c = 0; c &lt; pshape[1]; ++c) {</div><div class="line"><a name="l00273"></a><span class="lineno"> 273</span>&#160; DType res; Reducer::SetInitValue(res);</div><div class="line"><a name="l00274"></a><span class="lineno"> 274</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> n = 0; n &lt; pshape[0]; ++n) {</div><div class="line"><a name="l00275"></a><span class="lineno"> 275</span>&#160; DType tres; Reducer::SetInitValue(tres);</div><div class="line"><a name="l00276"></a><span class="lineno"> 276</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; pshape[2]; ++y) {</div><div class="line"><a name="l00277"></a><span class="lineno"> 277</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; pshape[3]; ++x) {</div><div class="line"><a name="l00278"></a><span class="lineno"> 278</span>&#160; Reducer::Reduce(tres,</div><div class="line"><a name="l00279"></a><span class="lineno"> 279</span>&#160; splan.Eval((n * pshape[1] + c) * pshape[2] + y, x));</div><div class="line"><a name="l00280"></a><span class="lineno"> 280</span>&#160; }</div><div class="line"><a name="l00281"></a><span class="lineno"> 281</span>&#160; }</div><div class="line"><a name="l00282"></a><span class="lineno"> 282</span>&#160; Reducer::Reduce(res, tres);</div><div class="line"><a name="l00283"></a><span class="lineno"> 283</span>&#160; }</div><div class="line"><a name="l00284"></a><span class="lineno"> 284</span>&#160; Saver::template Save&lt;DType&gt;(dplan.REval(0, c), DType(res * scale));</div><div class="line"><a name="l00285"></a><span class="lineno"> 285</span>&#160; }</div><div class="line"><a name="l00286"></a><span class="lineno"> 286</span>&#160;}</div><div class="line"><a name="l00287"></a><span class="lineno"> 287</span>&#160;</div><div class="line"><a name="l00288"></a><span class="lineno"> 288</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00289"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a486ba25ad6f51106014450c1e745e767"> 289</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a75161ef5ed964dcf6518e3a7e59e6fb6">Softmax</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, DType&gt;</a> dst,</div><div class="line"><a name="l00290"></a><span class="lineno"> 290</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, DType&gt;</a> &amp;energy) {</div><div class="line"><a name="l00291"></a><span class="lineno"> 291</span>&#160; DType mmax = energy[0];</div><div class="line"><a name="l00292"></a><span class="lineno"> 292</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 1; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++x) {</div><div class="line"><a name="l00293"></a><span class="lineno"> 293</span>&#160; <span class="keywordflow">if</span> (mmax &lt; energy[x]) mmax = energy[x];</div><div class="line"><a name="l00294"></a><span class="lineno"> 294</span>&#160; }</div><div class="line"><a name="l00295"></a><span class="lineno"> 295</span>&#160; DType sum = DType(0.0f);</div><div class="line"><a name="l00296"></a><span class="lineno"> 296</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++x) {</div><div class="line"><a name="l00297"></a><span class="lineno"> 297</span>&#160; dst[x] = std::exp(energy[x] - mmax);</div><div class="line"><a name="l00298"></a><span class="lineno"> 298</span>&#160; sum += dst[x];</div><div class="line"><a name="l00299"></a><span class="lineno"> 299</span>&#160; }</div><div class="line"><a name="l00300"></a><span class="lineno"> 300</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++x) {</div><div class="line"><a name="l00301"></a><span class="lineno"> 301</span>&#160; dst[x] /= sum;</div><div class="line"><a name="l00302"></a><span class="lineno"> 302</span>&#160; }</div><div class="line"><a name="l00303"></a><span class="lineno"> 303</span>&#160;}</div><div class="line"><a name="l00304"></a><span class="lineno"> 304</span>&#160;</div><div class="line"><a name="l00305"></a><span class="lineno"> 305</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00306"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a3409c8a836e7dab83bec25556164261c"> 306</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a3409c8a836e7dab83bec25556164261c">SoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00307"></a><span class="lineno"> 307</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00308"></a><span class="lineno"> 308</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, DType&gt;</a> &amp;label) {</div><div class="line"><a name="l00309"></a><span class="lineno"> 309</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00310"></a><span class="lineno"> 310</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00311"></a><span class="lineno"> 311</span>&#160; <span class="keyword">const</span> <a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y]);</div><div class="line"><a name="l00312"></a><span class="lineno"> 312</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1); ++x) {</div><div class="line"><a name="l00313"></a><span class="lineno"> 313</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00314"></a><span class="lineno"> 314</span>&#160; dst[y][k] = src[y][k] - 1.0f;</div><div class="line"><a name="l00315"></a><span class="lineno"> 315</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00316"></a><span class="lineno"> 316</span>&#160; dst[y][x] = src[y][x];</div><div class="line"><a name="l00317"></a><span class="lineno"> 317</span>&#160; }</div><div class="line"><a name="l00318"></a><span class="lineno"> 318</span>&#160; }</div><div class="line"><a name="l00319"></a><span class="lineno"> 319</span>&#160; }</div><div class="line"><a name="l00320"></a><span class="lineno"> 320</span>&#160;}</div><div class="line"><a name="l00321"></a><span class="lineno"> 321</span>&#160;</div><div class="line"><a name="l00322"></a><span class="lineno"> 322</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00323"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a4fb3739d11e671f9809458cf6ada1e64"> 323</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a4fb3739d11e671f9809458cf6ada1e64">SmoothSoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00324"></a><span class="lineno"> 324</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00325"></a><span class="lineno"> 325</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, DType&gt;</a> &amp;label,</div><div class="line"><a name="l00326"></a><span class="lineno"> 326</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> alpha) {</div><div class="line"><a name="l00327"></a><span class="lineno"> 327</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> smooth_grad = (alpha / (dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1) - 1));</div><div class="line"><a name="l00328"></a><span class="lineno"> 328</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00329"></a><span class="lineno"> 329</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00330"></a><span class="lineno"> 330</span>&#160; <span class="keyword">const</span> <a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y]);</div><div class="line"><a name="l00331"></a><span class="lineno"> 331</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1); ++x) {</div><div class="line"><a name="l00332"></a><span class="lineno"> 332</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00333"></a><span class="lineno"> 333</span>&#160; dst[y][k] = src[y][k] - 1.0f + alpha;</div><div class="line"><a name="l00334"></a><span class="lineno"> 334</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00335"></a><span class="lineno"> 335</span>&#160; dst[y][x] = src[y][x] - smooth_grad;</div><div class="line"><a name="l00336"></a><span class="lineno"> 336</span>&#160; }</div><div class="line"><a name="l00337"></a><span class="lineno"> 337</span>&#160; }</div><div class="line"><a name="l00338"></a><span class="lineno"> 338</span>&#160; }</div><div class="line"><a name="l00339"></a><span class="lineno"> 339</span>&#160;}</div><div class="line"><a name="l00340"></a><span class="lineno"> 340</span>&#160;</div><div class="line"><a name="l00341"></a><span class="lineno"> 341</span>&#160;</div><div class="line"><a name="l00342"></a><span class="lineno"> 342</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00343"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a97d3ff90dd86faf3c79c382591af5705"> 343</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a3409c8a836e7dab83bec25556164261c">SoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00344"></a><span class="lineno"> 344</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00345"></a><span class="lineno"> 345</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, DType&gt;</a> &amp;label,</div><div class="line"><a name="l00346"></a><span class="lineno"> 346</span>&#160; <span class="keyword">const</span> DType &amp;ignore_label) {</div><div class="line"><a name="l00347"></a><span class="lineno"> 347</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00348"></a><span class="lineno"> 348</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00349"></a><span class="lineno"> 349</span>&#160; <span class="keyword">const</span> <span class="keywordtype">int</span> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y]);</div><div class="line"><a name="l00350"></a><span class="lineno"> 350</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00351"></a><span class="lineno"> 351</span>&#160; <span class="keywordflow">if</span> (static_cast&lt;int&gt;(ignore_label) == k) {</div><div class="line"><a name="l00352"></a><span class="lineno"> 352</span>&#160; dst[y][x] = 0.0f;</div><div class="line"><a name="l00353"></a><span class="lineno"> 353</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00354"></a><span class="lineno"> 354</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00355"></a><span class="lineno"> 355</span>&#160; dst[y][k] = src[y][k] - 1.0f;</div><div class="line"><a name="l00356"></a><span class="lineno"> 356</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00357"></a><span class="lineno"> 357</span>&#160; dst[y][x] = src[y][x];</div><div class="line"><a name="l00358"></a><span class="lineno"> 358</span>&#160; }</div><div class="line"><a name="l00359"></a><span class="lineno"> 359</span>&#160; }</div><div class="line"><a name="l00360"></a><span class="lineno"> 360</span>&#160; }</div><div class="line"><a name="l00361"></a><span class="lineno"> 361</span>&#160; }</div><div class="line"><a name="l00362"></a><span class="lineno"> 362</span>&#160;}</div><div class="line"><a name="l00363"></a><span class="lineno"> 363</span>&#160;</div><div class="line"><a name="l00364"></a><span class="lineno"> 364</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00365"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a8ef141e66ac481f9da39b79eb282c12b"> 365</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a4fb3739d11e671f9809458cf6ada1e64">SmoothSoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00366"></a><span class="lineno"> 366</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00367"></a><span class="lineno"> 367</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, DType&gt;</a> &amp;label,</div><div class="line"><a name="l00368"></a><span class="lineno"> 368</span>&#160; <span class="keyword">const</span> DType &amp;ignore_label,</div><div class="line"><a name="l00369"></a><span class="lineno"> 369</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> alpha) {</div><div class="line"><a name="l00370"></a><span class="lineno"> 370</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> smooth_grad = (alpha / (dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1) - 1));</div><div class="line"><a name="l00371"></a><span class="lineno"> 371</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00372"></a><span class="lineno"> 372</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00373"></a><span class="lineno"> 373</span>&#160; <span class="keyword">const</span> <span class="keywordtype">int</span> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y]);</div><div class="line"><a name="l00374"></a><span class="lineno"> 374</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00375"></a><span class="lineno"> 375</span>&#160; <span class="keywordflow">if</span> (static_cast&lt;int&gt;(ignore_label) == k) {</div><div class="line"><a name="l00376"></a><span class="lineno"> 376</span>&#160; dst[y][x] = 0.0f;</div><div class="line"><a name="l00377"></a><span class="lineno"> 377</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00378"></a><span class="lineno"> 378</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00379"></a><span class="lineno"> 379</span>&#160; dst[y][k] = src[y][k] - 1.0f + alpha;</div><div class="line"><a name="l00380"></a><span class="lineno"> 380</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00381"></a><span class="lineno"> 381</span>&#160; dst[y][x] = src[y][x] - smooth_grad;</div><div class="line"><a name="l00382"></a><span class="lineno"> 382</span>&#160; }</div><div class="line"><a name="l00383"></a><span class="lineno"> 383</span>&#160; }</div><div class="line"><a name="l00384"></a><span class="lineno"> 384</span>&#160; }</div><div class="line"><a name="l00385"></a><span class="lineno"> 385</span>&#160; }</div><div class="line"><a name="l00386"></a><span class="lineno"> 386</span>&#160;}</div><div class="line"><a name="l00387"></a><span class="lineno"> 387</span>&#160;</div><div class="line"><a name="l00388"></a><span class="lineno"> 388</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00389"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a4aac2d8b75f24039748627bf7bafb6f5"> 389</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a3409c8a836e7dab83bec25556164261c">SoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> dst,</div><div class="line"><a name="l00390"></a><span class="lineno"> 390</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00391"></a><span class="lineno"> 391</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;label) {</div><div class="line"><a name="l00392"></a><span class="lineno"> 392</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00393"></a><span class="lineno"> 393</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> n = 0; n &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2); ++n) {</div><div class="line"><a name="l00394"></a><span class="lineno"> 394</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00395"></a><span class="lineno"> 395</span>&#160; <span class="keyword">const</span> <span class="keywordtype">int</span> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y][n]);</div><div class="line"><a name="l00396"></a><span class="lineno"> 396</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00397"></a><span class="lineno"> 397</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00398"></a><span class="lineno"> 398</span>&#160; dst[y][k][n] = src[y][k][n] - 1.0f;</div><div class="line"><a name="l00399"></a><span class="lineno"> 399</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00400"></a><span class="lineno"> 400</span>&#160; dst[y][x][n] = src[y][x][n];</div><div class="line"><a name="l00401"></a><span class="lineno"> 401</span>&#160; }</div><div class="line"><a name="l00402"></a><span class="lineno"> 402</span>&#160; }</div><div class="line"><a name="l00403"></a><span class="lineno"> 403</span>&#160; }</div><div class="line"><a name="l00404"></a><span class="lineno"> 404</span>&#160; }</div><div class="line"><a name="l00405"></a><span class="lineno"> 405</span>&#160;}</div><div class="line"><a name="l00406"></a><span class="lineno"> 406</span>&#160;</div><div class="line"><a name="l00407"></a><span class="lineno"> 407</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00408"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a0166cbd7c6f70d858cfc1a030e8b5f69"> 408</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a4fb3739d11e671f9809458cf6ada1e64">SmoothSoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> dst,</div><div class="line"><a name="l00409"></a><span class="lineno"> 409</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00410"></a><span class="lineno"> 410</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;label,</div><div class="line"><a name="l00411"></a><span class="lineno"> 411</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> alpha) {</div><div class="line"><a name="l00412"></a><span class="lineno"> 412</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> smooth_grad = (alpha / (dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1) - 1));</div><div class="line"><a name="l00413"></a><span class="lineno"> 413</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00414"></a><span class="lineno"> 414</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> n = 0; n &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2); ++n) {</div><div class="line"><a name="l00415"></a><span class="lineno"> 415</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00416"></a><span class="lineno"> 416</span>&#160; <span class="keyword">const</span> <span class="keywordtype">int</span> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y][n]);</div><div class="line"><a name="l00417"></a><span class="lineno"> 417</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00418"></a><span class="lineno"> 418</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00419"></a><span class="lineno"> 419</span>&#160; dst[y][k][n] = src[y][k][n] - 1.0f + alpha;</div><div class="line"><a name="l00420"></a><span class="lineno"> 420</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00421"></a><span class="lineno"> 421</span>&#160; dst[y][x][n] = src[y][x][n] - smooth_grad;</div><div class="line"><a name="l00422"></a><span class="lineno"> 422</span>&#160; }</div><div class="line"><a name="l00423"></a><span class="lineno"> 423</span>&#160; }</div><div class="line"><a name="l00424"></a><span class="lineno"> 424</span>&#160; }</div><div class="line"><a name="l00425"></a><span class="lineno"> 425</span>&#160; }</div><div class="line"><a name="l00426"></a><span class="lineno"> 426</span>&#160;}</div><div class="line"><a name="l00427"></a><span class="lineno"> 427</span>&#160;</div><div class="line"><a name="l00428"></a><span class="lineno"> 428</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00429"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a8ed2e28641a53c3bb04442be23e52a6a"> 429</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a3409c8a836e7dab83bec25556164261c">SoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> dst,</div><div class="line"><a name="l00430"></a><span class="lineno"> 430</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00431"></a><span class="lineno"> 431</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;label,</div><div class="line"><a name="l00432"></a><span class="lineno"> 432</span>&#160; <span class="keyword">const</span> DType &amp;ignore_label) {</div><div class="line"><a name="l00433"></a><span class="lineno"> 433</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00434"></a><span class="lineno"> 434</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> n = 0; n &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2); ++n) {</div><div class="line"><a name="l00435"></a><span class="lineno"> 435</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00436"></a><span class="lineno"> 436</span>&#160; <span class="keyword">const</span> <span class="keywordtype">int</span> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y][n]);</div><div class="line"><a name="l00437"></a><span class="lineno"> 437</span>&#160; <span class="keywordflow">if</span> (k == static_cast&lt;int&gt;(ignore_label)) {</div><div class="line"><a name="l00438"></a><span class="lineno"> 438</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00439"></a><span class="lineno"> 439</span>&#160; dst[y][x][n] = DType(0.0f);</div><div class="line"><a name="l00440"></a><span class="lineno"> 440</span>&#160; }</div><div class="line"><a name="l00441"></a><span class="lineno"> 441</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00442"></a><span class="lineno"> 442</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00443"></a><span class="lineno"> 443</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00444"></a><span class="lineno"> 444</span>&#160; dst[y][k][n] = src[y][k][n] - 1.0f;</div><div class="line"><a name="l00445"></a><span class="lineno"> 445</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00446"></a><span class="lineno"> 446</span>&#160; dst[y][x][n] = src[y][x][n];</div><div class="line"><a name="l00447"></a><span class="lineno"> 447</span>&#160; }</div><div class="line"><a name="l00448"></a><span class="lineno"> 448</span>&#160; }</div><div class="line"><a name="l00449"></a><span class="lineno"> 449</span>&#160; }</div><div class="line"><a name="l00450"></a><span class="lineno"> 450</span>&#160; }</div><div class="line"><a name="l00451"></a><span class="lineno"> 451</span>&#160; }</div><div class="line"><a name="l00452"></a><span class="lineno"> 452</span>&#160;}</div><div class="line"><a name="l00453"></a><span class="lineno"> 453</span>&#160;</div><div class="line"><a name="l00454"></a><span class="lineno"> 454</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00455"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a2f0ff988f790390404f31fe2a68c8b1b"> 455</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a4fb3739d11e671f9809458cf6ada1e64">SmoothSoftmaxGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> dst,</div><div class="line"><a name="l00456"></a><span class="lineno"> 456</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> &amp;src,</div><div class="line"><a name="l00457"></a><span class="lineno"> 457</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;label,</div><div class="line"><a name="l00458"></a><span class="lineno"> 458</span>&#160; <span class="keyword">const</span> DType &amp;ignore_label,</div><div class="line"><a name="l00459"></a><span class="lineno"> 459</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> alpha) {</div><div class="line"><a name="l00460"></a><span class="lineno"> 460</span>&#160; <span class="keyword">const</span> <span class="keywordtype">float</span> smooth_grad = (alpha / (dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1) - 1));</div><div class="line"><a name="l00461"></a><span class="lineno"> 461</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00462"></a><span class="lineno"> 462</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> n = 0; n &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2); ++n) {</div><div class="line"><a name="l00463"></a><span class="lineno"> 463</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00464"></a><span class="lineno"> 464</span>&#160; <span class="keyword">const</span> <span class="keywordtype">int</span> k = <span class="keyword">static_cast&lt;</span><span class="keywordtype">int</span><span class="keyword">&gt;</span>(label[y][n]);</div><div class="line"><a name="l00465"></a><span class="lineno"> 465</span>&#160; <span class="keywordflow">if</span> (k == static_cast&lt;int&gt;(ignore_label)) {</div><div class="line"><a name="l00466"></a><span class="lineno"> 466</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00467"></a><span class="lineno"> 467</span>&#160; dst[y][x][n] = DType(0.0f);</div><div class="line"><a name="l00468"></a><span class="lineno"> 468</span>&#160; }</div><div class="line"><a name="l00469"></a><span class="lineno"> 469</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00470"></a><span class="lineno"> 470</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> x = 0; x &lt; static_cast&lt;int&gt;(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1)); ++x) {</div><div class="line"><a name="l00471"></a><span class="lineno"> 471</span>&#160; <span class="keywordflow">if</span> (x == k) {</div><div class="line"><a name="l00472"></a><span class="lineno"> 472</span>&#160; dst[y][k][n] = src[y][k][n] - 1.0f + alpha;</div><div class="line"><a name="l00473"></a><span class="lineno"> 473</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00474"></a><span class="lineno"> 474</span>&#160; dst[y][x][n] = src[y][x][n] - smooth_grad;</div><div class="line"><a name="l00475"></a><span class="lineno"> 475</span>&#160; }</div><div class="line"><a name="l00476"></a><span class="lineno"> 476</span>&#160; }</div><div class="line"><a name="l00477"></a><span class="lineno"> 477</span>&#160; }</div><div class="line"><a name="l00478"></a><span class="lineno"> 478</span>&#160; }</div><div class="line"><a name="l00479"></a><span class="lineno"> 479</span>&#160; }</div><div class="line"><a name="l00480"></a><span class="lineno"> 480</span>&#160;}</div><div class="line"><a name="l00481"></a><span class="lineno"> 481</span>&#160;</div><div class="line"><a name="l00482"></a><span class="lineno"> 482</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00483"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a75161ef5ed964dcf6518e3a7e59e6fb6"> 483</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a75161ef5ed964dcf6518e3a7e59e6fb6">Softmax</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00484"></a><span class="lineno"> 484</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;energy) {</div><div class="line"><a name="l00485"></a><span class="lineno"> 485</span>&#160; CHECK_EQ(dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>, energy.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>) &lt;&lt; <span class="stringliteral">&quot;Softmax: shape mismatch&quot;</span>;</div><div class="line"><a name="l00486"></a><span class="lineno"> 486</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00487"></a><span class="lineno"> 487</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00488"></a><span class="lineno"> 488</span>&#160; <a class="code" href="namespacemshadow.html#a75161ef5ed964dcf6518e3a7e59e6fb6">Softmax</a>(dst[y], energy[y]);</div><div class="line"><a name="l00489"></a><span class="lineno"> 489</span>&#160; }</div><div class="line"><a name="l00490"></a><span class="lineno"> 490</span>&#160;}</div><div class="line"><a name="l00491"></a><span class="lineno"> 491</span>&#160;</div><div class="line"><a name="l00492"></a><span class="lineno"> 492</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00493"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ae1994dc1386e8430061c65b15e574430"> 493</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a75161ef5ed964dcf6518e3a7e59e6fb6">Softmax</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> dst,</div><div class="line"><a name="l00494"></a><span class="lineno"> 494</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 3, DType&gt;</a> &amp;energy) {</div><div class="line"><a name="l00495"></a><span class="lineno"> 495</span>&#160; CHECK_EQ(dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>, energy.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>) &lt;&lt; <span class="stringliteral">&quot;Softmax: shape mismatch&quot;</span>;</div><div class="line"><a name="l00496"></a><span class="lineno"> 496</span>&#160;<span class="preprocessor">#pragma omp parallel for</span></div><div class="line"><a name="l00497"></a><span class="lineno"> 497</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">openmp_index_t</a> y = 0; y &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00498"></a><span class="lineno"> 498</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> n = 0; n &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2); ++n) {</div><div class="line"><a name="l00499"></a><span class="lineno"> 499</span>&#160; DType mmax = energy[y][0][n];</div><div class="line"><a name="l00500"></a><span class="lineno"> 500</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 1; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1); ++x) {</div><div class="line"><a name="l00501"></a><span class="lineno"> 501</span>&#160; <span class="keywordflow">if</span> (mmax &lt; energy[y][x][n]) mmax = energy[y][x][n];</div><div class="line"><a name="l00502"></a><span class="lineno"> 502</span>&#160; }</div><div class="line"><a name="l00503"></a><span class="lineno"> 503</span>&#160; DType sum = DType(0.0f);</div><div class="line"><a name="l00504"></a><span class="lineno"> 504</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1); ++x) {</div><div class="line"><a name="l00505"></a><span class="lineno"> 505</span>&#160; dst[y][x][n] = std::exp(energy[y][x][n] - mmax);</div><div class="line"><a name="l00506"></a><span class="lineno"> 506</span>&#160; sum += dst[y][x][n];</div><div class="line"><a name="l00507"></a><span class="lineno"> 507</span>&#160; }</div><div class="line"><a name="l00508"></a><span class="lineno"> 508</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> x = 0; x &lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1); ++x) {</div><div class="line"><a name="l00509"></a><span class="lineno"> 509</span>&#160; dst[y][x][n] /= sum;</div><div class="line"><a name="l00510"></a><span class="lineno"> 510</span>&#160; }</div><div class="line"><a name="l00511"></a><span class="lineno"> 511</span>&#160; }</div><div class="line"><a name="l00512"></a><span class="lineno"> 512</span>&#160; }</div><div class="line"><a name="l00513"></a><span class="lineno"> 513</span>&#160;}</div><div class="line"><a name="l00514"></a><span class="lineno"> 514</span>&#160;</div><div class="line"><a name="l00515"></a><span class="lineno"> 515</span>&#160;<span class="keyword">template</span>&lt;<span class="keywordtype">bool</span> clip, <span class="keyword">typename</span> IndexType, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00516"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a802d44c652195053e87b63881c7d4d1c"> 516</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a802d44c652195053e87b63881c7d4d1c">AddTakeGrad</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00517"></a><span class="lineno"> 517</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, IndexType&gt;</a>&amp; index,</div><div class="line"><a name="l00518"></a><span class="lineno"> 518</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;src) {</div><div class="line"><a name="l00519"></a><span class="lineno"> 519</span>&#160; <span class="keyword">const</span> <a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> K = dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[0];</div><div class="line"><a name="l00520"></a><span class="lineno"> 520</span>&#160; <span class="keyword">const</span> <a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> C = dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[1];</div><div class="line"><a name="l00521"></a><span class="lineno"> 521</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; index.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00522"></a><span class="lineno"> 522</span>&#160; <a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> j = index[y];</div><div class="line"><a name="l00523"></a><span class="lineno"> 523</span>&#160; <span class="keywordflow">if</span> (clip) {</div><div class="line"><a name="l00524"></a><span class="lineno"> 524</span>&#160; <span class="keywordflow">if</span> (j &lt;= 0) j = 0;</div><div class="line"><a name="l00525"></a><span class="lineno"> 525</span>&#160; <span class="keywordflow">else</span> <span class="keywordflow">if</span> (j &gt;= K) j = K - 1;</div><div class="line"><a name="l00526"></a><span class="lineno"> 526</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00527"></a><span class="lineno"> 527</span>&#160; j %= K;</div><div class="line"><a name="l00528"></a><span class="lineno"> 528</span>&#160; <span class="keywordflow">if</span> (j &lt; 0) j += K;</div><div class="line"><a name="l00529"></a><span class="lineno"> 529</span>&#160; }</div><div class="line"><a name="l00530"></a><span class="lineno"> 530</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> i = 0; i &lt; C; ++i) {</div><div class="line"><a name="l00531"></a><span class="lineno"> 531</span>&#160; dst[j][i] += src[y][i];</div><div class="line"><a name="l00532"></a><span class="lineno"> 532</span>&#160; }</div><div class="line"><a name="l00533"></a><span class="lineno"> 533</span>&#160; }</div><div class="line"><a name="l00534"></a><span class="lineno"> 534</span>&#160;}</div><div class="line"><a name="l00535"></a><span class="lineno"> 535</span>&#160;</div><div class="line"><a name="l00536"></a><span class="lineno"> 536</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> IndexType, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00537"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ad2320213151030353ae90f92a0da808d"> 537</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#ad2320213151030353ae90f92a0da808d">AddTakeGradLargeBatch</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00538"></a><span class="lineno"> 538</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, IndexType&gt;</a>&amp; sorted,</div><div class="line"><a name="l00539"></a><span class="lineno"> 539</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, IndexType&gt;</a>&amp; index,</div><div class="line"><a name="l00540"></a><span class="lineno"> 540</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;src) {</div><div class="line"><a name="l00541"></a><span class="lineno"> 541</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; sorted.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00542"></a><span class="lineno"> 542</span>&#160; dst[sorted[y]] += src[index[y]];</div><div class="line"><a name="l00543"></a><span class="lineno"> 543</span>&#160; }</div><div class="line"><a name="l00544"></a><span class="lineno"> 544</span>&#160;}</div><div class="line"><a name="l00545"></a><span class="lineno"> 545</span>&#160;</div><div class="line"><a name="l00546"></a><span class="lineno"> 546</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> IndexType, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00547"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a77348ebaaac4581773b4fe78448c8fa6"> 547</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a77348ebaaac4581773b4fe78448c8fa6">IndexFill</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> dst,</div><div class="line"><a name="l00548"></a><span class="lineno"> 548</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, IndexType&gt;</a>&amp; index,</div><div class="line"><a name="l00549"></a><span class="lineno"> 549</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 2, DType&gt;</a> &amp;src) {</div><div class="line"><a name="l00550"></a><span class="lineno"> 550</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> y = 0; y &lt; index.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); ++y) {</div><div class="line"><a name="l00551"></a><span class="lineno"> 551</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> j = 0; j &lt; src.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1); j++) {</div><div class="line"><a name="l00552"></a><span class="lineno"> 552</span>&#160; dst[index[y]][j] = src[y][j];</div><div class="line"><a name="l00553"></a><span class="lineno"> 553</span>&#160; }</div><div class="line"><a name="l00554"></a><span class="lineno"> 554</span>&#160; }</div><div class="line"><a name="l00555"></a><span class="lineno"> 555</span>&#160;}</div><div class="line"><a name="l00556"></a><span class="lineno"> 556</span>&#160;</div><div class="line"><a name="l00557"></a><span class="lineno"> 557</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> KDType, <span class="keyword">typename</span> VDType&gt;</div><div class="line"><a name="l00558"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a3476fdf7d39add6f57beddafbc1b9625"> 558</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a3476fdf7d39add6f57beddafbc1b9625">SortByKey</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, KDType&gt;</a> keys, <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;cpu, 1, VDType&gt;</a> values,</div><div class="line"><a name="l00559"></a><span class="lineno"> 559</span>&#160; <span class="keywordtype">bool</span> is_ascend) {</div><div class="line"><a name="l00560"></a><span class="lineno"> 560</span>&#160; CHECK_EQ(keys.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>(), <span class="keyword">true</span>);</div><div class="line"><a name="l00561"></a><span class="lineno"> 561</span>&#160; CHECK_EQ(values.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>(), <span class="keyword">true</span>);</div><div class="line"><a name="l00562"></a><span class="lineno"> 562</span>&#160; CHECK_EQ(keys.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0), values.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0))</div><div class="line"><a name="l00563"></a><span class="lineno"> 563</span>&#160; &lt;&lt; <span class="stringliteral">&quot;The sizes of key/value are not equal! keys_size: &quot;</span> &lt;&lt; keys.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0)</div><div class="line"><a name="l00564"></a><span class="lineno"> 564</span>&#160; &lt;&lt; <span class="stringliteral">&quot;values_size: &quot;</span> &lt;&lt; values.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0);</div><div class="line"><a name="l00565"></a><span class="lineno"> 565</span>&#160; std::vector&lt;size_t&gt; idx(keys.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0));</div><div class="line"><a name="l00566"></a><span class="lineno"> 566</span>&#160; std::vector&lt;KDType&gt; keys_vec(keys.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0));</div><div class="line"><a name="l00567"></a><span class="lineno"> 567</span>&#160; std::vector&lt;VDType&gt; values_vec(values.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0));</div><div class="line"><a name="l00568"></a><span class="lineno"> 568</span>&#160; <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i = 0; i &lt; keys.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); i++) {</div><div class="line"><a name="l00569"></a><span class="lineno"> 569</span>&#160; idx[i] = i;</div><div class="line"><a name="l00570"></a><span class="lineno"> 570</span>&#160; keys_vec[i] = keys[i];</div><div class="line"><a name="l00571"></a><span class="lineno"> 571</span>&#160; values_vec[i] = values[i];</div><div class="line"><a name="l00572"></a><span class="lineno"> 572</span>&#160; }</div><div class="line"><a name="l00573"></a><span class="lineno"> 573</span>&#160; <span class="keywordflow">if</span> (is_ascend) {</div><div class="line"><a name="l00574"></a><span class="lineno"> 574</span>&#160; std::stable_sort(idx.begin(), idx.end(),</div><div class="line"><a name="l00575"></a><span class="lineno"> 575</span>&#160; [&amp;keys_vec](<span class="keywordtype">size_t</span> i1, <span class="keywordtype">size_t</span> i2)</div><div class="line"><a name="l00576"></a><span class="lineno"> 576</span>&#160; {<span class="keywordflow">return</span> keys_vec[i1] &lt; keys_vec[i2]; });</div><div class="line"><a name="l00577"></a><span class="lineno"> 577</span>&#160; } <span class="keywordflow">else</span> {</div><div class="line"><a name="l00578"></a><span class="lineno"> 578</span>&#160; std::stable_sort(idx.begin(), idx.end(),</div><div class="line"><a name="l00579"></a><span class="lineno"> 579</span>&#160; [&amp;keys_vec](<span class="keywordtype">size_t</span> i1, <span class="keywordtype">size_t</span> i2)</div><div class="line"><a name="l00580"></a><span class="lineno"> 580</span>&#160; {<span class="keywordflow">return</span> keys_vec[i1] &gt; keys_vec[i2]; });</div><div class="line"><a name="l00581"></a><span class="lineno"> 581</span>&#160; }</div><div class="line"><a name="l00582"></a><span class="lineno"> 582</span>&#160; <span class="keywordflow">for</span> (<a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> i = 0; i &lt; values.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0); i++) {</div><div class="line"><a name="l00583"></a><span class="lineno"> 583</span>&#160; keys[i] = keys_vec[idx[i]];</div><div class="line"><a name="l00584"></a><span class="lineno"> 584</span>&#160; values[i] = values_vec[idx[i]];</div><div class="line"><a name="l00585"></a><span class="lineno"> 585</span>&#160; }</div><div class="line"><a name="l00586"></a><span class="lineno"> 586</span>&#160;}</div><div class="line"><a name="l00587"></a><span class="lineno"> 587</span>&#160;</div><div class="line"><a name="l00588"></a><span class="lineno"> 588</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> Device, <span class="keyword">typename</span> VDType, <span class="keyword">typename</span> SDType&gt;</div><div class="line"><a name="l00589"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ab3f080439f035e05501cc7f202e58c0e"> 589</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#ab3f080439f035e05501cc7f202e58c0e">VectorizedSort</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, 1, VDType&gt;</a> values, <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, 1, SDType&gt;</a> segments) {</div><div class="line"><a name="l00590"></a><span class="lineno"> 590</span>&#160; <span class="comment">// We can sort each segments using two stable sorts</span></div><div class="line"><a name="l00591"></a><span class="lineno"> 591</span>&#160; <a class="code" href="namespacemshadow.html#a3476fdf7d39add6f57beddafbc1b9625">SortByKey</a>(values, segments, <span class="keyword">true</span>);</div><div class="line"><a name="l00592"></a><span class="lineno"> 592</span>&#160; <a class="code" href="namespacemshadow.html#a3476fdf7d39add6f57beddafbc1b9625">SortByKey</a>(segments, values, <span class="keyword">true</span>);</div><div class="line"><a name="l00593"></a><span class="lineno"> 593</span>&#160;}</div><div class="line"><a name="l00594"></a><span class="lineno"> 594</span>&#160;</div><div class="line"><a name="l00595"></a><span class="lineno"> 595</span>&#160;<span class="comment">// blas related</span></div><div class="line"><a name="l00596"></a><span class="lineno"> 596</span>&#160;<span class="keyword">template</span>&lt;<span class="keyword">typename</span> Device, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00597"></a><span class="lineno"><a class="line" href="namespacemshadow.html#ad4c4674a33dfb486802427058375a055"> 597</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#ad4c4674a33dfb486802427058375a055">VectorDot</a>(<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html">Tensor&lt;Device, 1, DType&gt;</a> dst,</div><div class="line"><a name="l00598"></a><span class="lineno"> 598</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html">Tensor&lt;Device, 1, DType&gt;</a> &amp;lhs,</div><div class="line"><a name="l00599"></a><span class="lineno"> 599</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html">Tensor&lt;Device, 1, DType&gt;</a> &amp;rhs) {</div><div class="line"><a name="l00600"></a><span class="lineno"> 600</span>&#160; CHECK_EQ(lhs.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#ad58f6139a1d680c2dab2919cd448a9cf">size</a>(0), rhs.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#ad58f6139a1d680c2dab2919cd448a9cf">size</a>(0))</div><div class="line"><a name="l00601"></a><span class="lineno"> 601</span>&#160; &lt;&lt; <span class="stringliteral">&quot;VectorDot: Shape mismatch&quot;</span>;</div><div class="line"><a name="l00602"></a><span class="lineno"> 602</span>&#160; CHECK_EQ(dst.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#ad58f6139a1d680c2dab2919cd448a9cf">size</a>(0), 1U)</div><div class="line"><a name="l00603"></a><span class="lineno"> 603</span>&#160; &lt;&lt; <span class="stringliteral">&quot;VectorDot: expect dst to be scalar&quot;</span>;</div><div class="line"><a name="l00604"></a><span class="lineno"> 604</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1BLASEngine.html#ac0b67fda3ecdf2087b816602ad43eda3">expr::BLASEngine&lt;Device, DType&gt;::SetStream</a>(lhs.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#a9790363151a34829fcb18aa51b25a95f">stream_</a>);</div><div class="line"><a name="l00605"></a><span class="lineno"> 605</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1BLASEngine.html#af21b0de1c3d687671e36cffb464d5988">mshadow::expr::BLASEngine&lt;Device, DType&gt;::dot</a>(</div><div class="line"><a name="l00606"></a><span class="lineno"> 606</span>&#160; lhs.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#a9790363151a34829fcb18aa51b25a95f">stream_</a>, lhs.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#ad58f6139a1d680c2dab2919cd448a9cf">size</a>(0), lhs.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#a98c523db2418f25f868842fb32c16a35">dptr_</a>, 1, rhs.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#a98c523db2418f25f868842fb32c16a35">dptr_</a>, 1, dst.<a class="code" href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#a98c523db2418f25f868842fb32c16a35">dptr_</a>);</div><div class="line"><a name="l00607"></a><span class="lineno"> 607</span>&#160;}</div><div class="line"><a name="l00608"></a><span class="lineno"> 608</span>&#160;</div><div class="line"><a name="l00609"></a><span class="lineno"> 609</span>&#160;<span class="keyword">template</span>&lt;<span class="keywordtype">bool</span> transpose_left, <span class="keywordtype">bool</span> transpose_right, <span class="keyword">typename</span> Device, <span class="keyword">typename</span> DType&gt;</div><div class="line"><a name="l00610"></a><span class="lineno"><a class="line" href="namespacemshadow.html#a04a223d0f4e8af8c3f504bb5d324ad43"> 610</a></span>&#160;<span class="keyword">inline</span> <span class="keywordtype">void</span> <a class="code" href="namespacemshadow.html#a04a223d0f4e8af8c3f504bb5d324ad43">BatchGEMM</a>(<a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, 3, DType&gt;</a> dst,</div><div class="line"><a name="l00611"></a><span class="lineno"> 611</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, 3, DType&gt;</a> &amp;lhs,</div><div class="line"><a name="l00612"></a><span class="lineno"> 612</span>&#160; <span class="keyword">const</span> <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, 3, DType&gt;</a> &amp;rhs,</div><div class="line"><a name="l00613"></a><span class="lineno"> 613</span>&#160; DType alpha,</div><div class="line"><a name="l00614"></a><span class="lineno"> 614</span>&#160; DType beta,</div><div class="line"><a name="l00615"></a><span class="lineno"> 615</span>&#160; <a class="code" href="structmshadow_1_1Tensor.html">Tensor&lt;Device, 1, DType*&gt;</a> workspace) {</div><div class="line"><a name="l00616"></a><span class="lineno"> 616</span>&#160; <a class="code" href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">index_t</a> batch_size = dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[0];</div><div class="line"><a name="l00617"></a><span class="lineno"> 617</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1BLASEngine.html#ac0b67fda3ecdf2087b816602ad43eda3">expr::BLASEngine&lt;Device, DType&gt;::SetStream</a>(dst.<a class="code" href="structmshadow_1_1Tensor.html#a485d1558eaa9eded0b8fd23ed82c6298">stream_</a>);</div><div class="line"><a name="l00618"></a><span class="lineno"> 618</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;3&gt;</a> sleft = transpose_left ? <a class="code" href="namespacemshadow.html#a2be3eab8d3469aa48c40d38881558bb9">Shape3</a>(lhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[0], lhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[2], lhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[1])</div><div class="line"><a name="l00619"></a><span class="lineno"> 619</span>&#160; : lhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>;</div><div class="line"><a name="l00620"></a><span class="lineno"> 620</span>&#160; <a class="code" href="structmshadow_1_1Shape.html">Shape&lt;3&gt;</a> sright = transpose_right ? <a class="code" href="namespacemshadow.html#a2be3eab8d3469aa48c40d38881558bb9">Shape3</a>(rhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[0], rhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[2], rhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>[1])</div><div class="line"><a name="l00621"></a><span class="lineno"> 621</span>&#160; : rhs.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a>;</div><div class="line"><a name="l00622"></a><span class="lineno"> 622</span>&#160; CHECK_EQ(dst.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>(), <span class="keyword">true</span>);</div><div class="line"><a name="l00623"></a><span class="lineno"> 623</span>&#160; CHECK_EQ(lhs.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>(), <span class="keyword">true</span>);</div><div class="line"><a name="l00624"></a><span class="lineno"> 624</span>&#160; CHECK_EQ(rhs.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>(), <span class="keyword">true</span>);</div><div class="line"><a name="l00625"></a><span class="lineno"> 625</span>&#160; CHECK(sleft[0] == batch_size &amp;&amp; sright[0] == batch_size)</div><div class="line"><a name="l00626"></a><span class="lineno"> 626</span>&#160; &lt;&lt; <span class="stringliteral">&quot;BatchGEMM: batchsize must be equal.&quot;</span></div><div class="line"><a name="l00627"></a><span class="lineno"> 627</span>&#160; &lt;&lt; <span class="stringliteral">&quot;dst: &quot;</span> &lt;&lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a> &lt;&lt; <span class="stringliteral">&quot;\n&quot;</span></div><div class="line"><a name="l00628"></a><span class="lineno"> 628</span>&#160; &lt;&lt; <span class="stringliteral">&quot;lhs: &quot;</span> &lt;&lt; sleft &lt;&lt; <span class="stringliteral">&quot;\n&quot;</span></div><div class="line"><a name="l00629"></a><span class="lineno"> 629</span>&#160; &lt;&lt; <span class="stringliteral">&quot;rhs: &quot;</span> &lt;&lt; sright &lt;&lt; <span class="stringliteral">&quot;\n&quot;</span>;</div><div class="line"><a name="l00630"></a><span class="lineno"> 630</span>&#160; CHECK(dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1) == sleft[1] &amp;&amp; dst.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2) == sright[2] &amp;&amp; sleft[2] == sright[1])</div><div class="line"><a name="l00631"></a><span class="lineno"> 631</span>&#160; &lt;&lt; <span class="stringliteral">&quot;BatchGEMM: matrix shape mismatch&quot;</span></div><div class="line"><a name="l00632"></a><span class="lineno"> 632</span>&#160; &lt;&lt; <span class="stringliteral">&quot;dst: &quot;</span> &lt;&lt; dst.<a class="code" href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">shape_</a> &lt;&lt; <span class="stringliteral">&quot;\n&quot;</span></div><div class="line"><a name="l00633"></a><span class="lineno"> 633</span>&#160; &lt;&lt; <span class="stringliteral">&quot;lhs: &quot;</span> &lt;&lt; sleft &lt;&lt; <span class="stringliteral">&quot;\n&quot;</span></div><div class="line"><a name="l00634"></a><span class="lineno"> 634</span>&#160; &lt;&lt; <span class="stringliteral">&quot;rhs: &quot;</span> &lt;&lt; sright &lt;&lt; <span class="stringliteral">&quot;\n&quot;</span>;</div><div class="line"><a name="l00635"></a><span class="lineno"> 635</span>&#160; CHECK(workspace.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(0) &gt;= 3 * batch_size)</div><div class="line"><a name="l00636"></a><span class="lineno"> 636</span>&#160; &lt;&lt; <span class="stringliteral">&quot;Workspace Size must be bigger than &quot;</span> &lt;&lt; 3 * batch_size;</div><div class="line"><a name="l00637"></a><span class="lineno"> 637</span>&#160; CHECK_EQ(workspace.<a class="code" href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">CheckContiguous</a>(), <span class="keyword">true</span>);</div><div class="line"><a name="l00638"></a><span class="lineno"> 638</span>&#160; <span class="comment">// use column major argument to compatible with most BLAS</span></div><div class="line"><a name="l00639"></a><span class="lineno"> 639</span>&#160; <a class="code" href="structmshadow_1_1expr_1_1BLASEngine.html#a85dfe7b9fe210cb4dbe06a97fa81ecda">expr::BLASEngine&lt;Device, DType&gt;::batched_gemm</a></div><div class="line"><a name="l00640"></a><span class="lineno"> 640</span>&#160; (dst.<a class="code" href="structmshadow_1_1Tensor.html#a485d1558eaa9eded0b8fd23ed82c6298">stream_</a>,</div><div class="line"><a name="l00641"></a><span class="lineno"> 641</span>&#160; transpose_right, transpose_left,</div><div class="line"><a name="l00642"></a><span class="lineno"> 642</span>&#160; transpose_right ? rhs.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1) : rhs.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2),</div><div class="line"><a name="l00643"></a><span class="lineno"> 643</span>&#160; transpose_left ? lhs.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2) : lhs.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1),</div><div class="line"><a name="l00644"></a><span class="lineno"> 644</span>&#160; transpose_right ? rhs.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(2) : rhs.<a class="code" href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">size</a>(1),</div><div class="line"><a name="l00645"></a><span class="lineno"> 645</span>&#160; alpha,</div><div class="line"><a name="l00646"></a><span class="lineno"> 646</span>&#160; rhs.<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>, rhs.<a class="code" href="structmshadow_1_1Tensor.html#afee556f188e29bbd0ecc45fe98d3c1c3">stride_</a>,</div><div class="line"><a name="l00647"></a><span class="lineno"> 647</span>&#160; lhs.<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>, lhs.<a class="code" href="structmshadow_1_1Tensor.html#afee556f188e29bbd0ecc45fe98d3c1c3">stride_</a>,</div><div class="line"><a name="l00648"></a><span class="lineno"> 648</span>&#160; beta,</div><div class="line"><a name="l00649"></a><span class="lineno"> 649</span>&#160; dst.<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>, dst.<a class="code" href="structmshadow_1_1Tensor.html#afee556f188e29bbd0ecc45fe98d3c1c3">stride_</a>, batch_size,</div><div class="line"><a name="l00650"></a><span class="lineno"> 650</span>&#160; workspace.<a class="code" href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">dptr_</a>);</div><div class="line"><a name="l00651"></a><span class="lineno"> 651</span>&#160;}</div><div class="line"><a name="l00652"></a><span class="lineno"> 652</span>&#160;} <span class="comment">// namespace mshadow</span></div><div class="line"><a name="l00653"></a><span class="lineno"> 653</span>&#160;<span class="preprocessor">#endif // MSHADOW_TENSOR_CPU_INL_H_</span></div><div class="ttc" id="namespacemshadow_html_ad4c4674a33dfb486802427058375a055"><div class="ttname"><a href="namespacemshadow.html#ad4c4674a33dfb486802427058375a055">mshadow::VectorDot</a></div><div class="ttdeci">void VectorDot(Tensor&lt; Device, 1, DType &gt; dst, const Tensor&lt; Device, 1, DType &gt; &amp;lhs, const Tensor&lt; Device, 1, DType &gt; &amp;rhs)</div><div class="ttdoc">CPU/GPU: 1 dimension vector dot. </div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:597</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1BLASEngine_html_a85dfe7b9fe210cb4dbe06a97fa81ecda"><div class="ttname"><a href="structmshadow_1_1expr_1_1BLASEngine.html#a85dfe7b9fe210cb4dbe06a97fa81ecda">mshadow::expr::BLASEngine::batched_gemm</a></div><div class="ttdeci">static void batched_gemm(Stream&lt; Device &gt; *stream, bool transa, bool transb, int m, int n, int k, DType alpha, const DType *A, int lda, const DType *B, int ldb, DType beta, DType *C, int ldc, int batch_count, DType **workspace)</div><div class="ttdef"><b>Definition:</b> dot_engine-inl.h:91</div></div>
<div class="ttc" id="namespacemshadow_html_a3e8485c882dab873525b4b241e5db7ab"><div class="ttname"><a href="namespacemshadow.html#a3e8485c882dab873525b4b241e5db7ab">mshadow::FreeSpace</a></div><div class="ttdeci">void FreeSpace(Tensor&lt; cpu, dim, DType &gt; *obj)</div><div class="ttdoc">CPU/GPU: free the space of tensor, will set obj.dptr to NULL. </div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:140</div></div>
<div class="ttc" id="namespacemshadow_html_a0884cf8a1d05a9d631cdf82ef3ebc567"><div class="ttname"><a href="namespacemshadow.html#a0884cf8a1d05a9d631cdf82ef3ebc567">mshadow::ShutdownTensorEngine&lt; cpu &gt;</a></div><div class="ttdeci">void ShutdownTensorEngine&lt; cpu &gt;(void)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:41</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4_html_a9790363151a34829fcb18aa51b25a95f"><div class="ttname"><a href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#a9790363151a34829fcb18aa51b25a95f">mshadow::Tensor&lt; Device, 1, DType &gt;::stream_</a></div><div class="ttdeci">Stream&lt; Device &gt; * stream_</div><div class="ttdef"><b>Definition:</b> tensor.h:574</div></div>
<div class="ttc" id="namespacemshadow_html_a77348ebaaac4581773b4fe78448c8fa6"><div class="ttname"><a href="namespacemshadow.html#a77348ebaaac4581773b4fe78448c8fa6">mshadow::IndexFill</a></div><div class="ttdeci">void IndexFill(Tensor&lt; cpu, 2, DType &gt; dst, const Tensor&lt; cpu, 1, IndexType &gt; &amp;index, const Tensor&lt; cpu, 2, DType &gt; &amp;src)</div><div class="ttdoc">CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:547</div></div>
<div class="ttc" id="namespacemshadow_html_a3409c8a836e7dab83bec25556164261c"><div class="ttname"><a href="namespacemshadow.html#a3409c8a836e7dab83bec25556164261c">mshadow::SoftmaxGrad</a></div><div class="ttdeci">void SoftmaxGrad(Tensor&lt; cpu, 2, DType &gt; dst, const Tensor&lt; cpu, 2, DType &gt; &amp;src, const Tensor&lt; cpu, 1, DType &gt; &amp;label)</div><div class="ttdoc">CPU/GPU: softmax gradient. </div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:306</div></div>
<div class="ttc" id="namespacemshadow_html_a4fb3739d11e671f9809458cf6ada1e64"><div class="ttname"><a href="namespacemshadow.html#a4fb3739d11e671f9809458cf6ada1e64">mshadow::SmoothSoftmaxGrad</a></div><div class="ttdeci">void SmoothSoftmaxGrad(Tensor&lt; cpu, 2, DType &gt; dst, const Tensor&lt; cpu, 2, DType &gt; &amp;src, const Tensor&lt; cpu, 1, DType &gt; &amp;label, const float alpha)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:323</div></div>
<div class="ttc" id="namespacemshadow_1_1expr_html_a97c45c821ac8275dd75fa585f47b9e00"><div class="ttname"><a href="namespacemshadow_1_1expr.html#a97c45c821ac8275dd75fa585f47b9e00">mshadow::expr::pad</a></div><div class="ttdeci">PaddingExp&lt; SrcExp, DType, ExpInfo&lt; SrcExp &gt;::kDim &gt; pad(const Exp&lt; SrcExp, DType, etype &gt; &amp;src, index_t pad)</div><div class="ttdoc">padding expression, pad a image with zeros on boundaries, padding affects shape[0], and shape[1] </div><div class="ttdef"><b>Definition:</b> pad.h:71</div></div>
<div class="ttc" id="namespacemshadow_html_a04a223d0f4e8af8c3f504bb5d324ad43"><div class="ttname"><a href="namespacemshadow.html#a04a223d0f4e8af8c3f504bb5d324ad43">mshadow::BatchGEMM</a></div><div class="ttdeci">void BatchGEMM(Tensor&lt; Device, 3, DType &gt; dst, const Tensor&lt; Device, 3, DType &gt; &amp;lhs, const Tensor&lt; Device, 3, DType &gt; &amp;rhs, DType alpha, DType beta, Tensor&lt; Device, 1, DType *&gt; workspace)</div><div class="ttdoc">CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst. </div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:610</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_ad86d6759c585efb5229b3a0659973838"><div class="ttname"><a href="structmshadow_1_1Tensor.html#ad86d6759c585efb5229b3a0659973838">mshadow::Tensor::dptr_</a></div><div class="ttdeci">DType * dptr_</div><div class="ttdoc">pointer to the data </div><div class="ttdef"><b>Definition:</b> tensor.h:434</div></div>
<div class="ttc" id="namespacemshadow_html_a4c3517f91f3bb35d9867fa7a42665ec3"><div class="ttname"><a href="namespacemshadow.html#a4c3517f91f3bb35d9867fa7a42665ec3">mshadow::FreeHost_</a></div><div class="ttdeci">void FreeHost_(void *dptr)</div></div>
<div class="ttc" id="structmshadow_1_1TRValue_html"><div class="ttname"><a href="structmshadow_1_1TRValue.html">mshadow::TRValue</a></div><div class="ttdoc">Tensor RValue, this is the super type of all kinds of possible tensors. </div><div class="ttdef"><b>Definition:</b> tensor.h:409</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1Exp_html_adb7e4afd0baed78d66cff87de0a6621f"><div class="ttname"><a href="structmshadow_1_1expr_1_1Exp.html#adb7e4afd0baed78d66cff87de0a6621f">mshadow::expr::Exp&lt; Container, DType, type::kRValue &gt;::self</a></div><div class="ttdeci">const Container &amp; self(void) const</div><div class="ttdef"><b>Definition:</b> expression.h:82</div></div>
<div class="ttc" id="classmshadow_1_1expr_1_1Plan_html"><div class="ttname"><a href="classmshadow_1_1expr_1_1Plan.html">mshadow::expr::Plan</a></div><div class="ttdef"><b>Definition:</b> expr_engine-inl.h:58</div></div>
<div class="ttc" id="namespacemshadow_html_a2f9d916c81866abceed494cd622cd3e2"><div class="ttname"><a href="namespacemshadow.html#a2f9d916c81866abceed494cd622cd3e2">mshadow::SetDevice&lt; cpu &gt;</a></div><div class="ttdeci">void SetDevice&lt; cpu &gt;(int devid)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:45</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1TypeCheckPass_html"><div class="ttname"><a href="structmshadow_1_1expr_1_1TypeCheckPass.html">mshadow::expr::TypeCheckPass</a></div><div class="ttdoc">used to help static type check </div><div class="ttdef"><b>Definition:</b> expr_engine-inl.h:330</div></div>
<div class="ttc" id="namespacemshadow_1_1packet_html_ac5a6a9a133e5c212531dbae80484c1cc"><div class="ttname"><a href="namespacemshadow_1_1packet.html#ac5a6a9a133e5c212531dbae80484c1cc">mshadow::packet::AlignedFree</a></div><div class="ttdeci">void AlignedFree(void *ptr)</div><div class="ttdoc">free aligned space </div><div class="ttdef"><b>Definition:</b> packet-inl.h:106</div></div>
<div class="ttc" id="namespacemshadow_html_a0435f827863555e4ed19bbc0cc81fa39"><div class="ttname"><a href="namespacemshadow.html#a0435f827863555e4ed19bbc0cc81fa39">mshadow::Copy</a></div><div class="ttdeci">void Copy(Tensor&lt; cpu, dim, DType &gt; dst, const Tensor&lt; cpu, dim, DType &gt; &amp;src, Stream&lt; cpu &gt; *stream=NULL)</div><div class="ttdoc">copy data from one tensor to another, with same shape </div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:145</div></div>
<div class="ttc" id="structmshadow_1_1Shape_html"><div class="ttname"><a href="structmshadow_1_1Shape.html">mshadow::Shape&lt; dim &gt;</a></div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_a01a586757f1d2abba12d0f4fdf2fdeb1"><div class="ttname"><a href="structmshadow_1_1Tensor.html#a01a586757f1d2abba12d0f4fdf2fdeb1">mshadow::Tensor::MSize</a></div><div class="ttdeci">MSHADOW_XINLINE index_t MSize(void) const</div><div class="ttdef"><b>Definition:</b> tensor.h:497</div></div>
<div class="ttc" id="namespacemshadow_html_a4bdc2c62fd5dcee696cadd2351bf85e2"><div class="ttname"><a href="namespacemshadow.html#a4bdc2c62fd5dcee696cadd2351bf85e2">mshadow::MapExp</a></div><div class="ttdeci">void MapExp(TRValue&lt; R, cpu, dim, DType &gt; *dst, const expr::Exp&lt; E, DType, etype &gt; &amp;exp)</div><div class="ttdoc">CPU/GPU: map a expression to a tensor, this function calls MapPlan. </div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:207</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1Exp_html_ad6f23b53ba2fc3c2b594cf2ddfba5b22"><div class="ttname"><a href="structmshadow_1_1expr_1_1Exp.html#ad6f23b53ba2fc3c2b594cf2ddfba5b22">mshadow::expr::Exp&lt; Container, DType, type::kRValue &gt;::ptrself</a></div><div class="ttdeci">Container * ptrself(void)</div><div class="ttdef"><b>Definition:</b> expression.h:86</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_ad10c7414c5948e789e8761df2083c4e5"><div class="ttname"><a href="structmshadow_1_1Tensor.html#ad10c7414c5948e789e8761df2083c4e5">mshadow::Tensor::shape_</a></div><div class="ttdeci">Shape&lt; dimension &gt; shape_</div><div class="ttdoc">shape of the tensor </div><div class="ttdef"><b>Definition:</b> tensor.h:436</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1PacketAlignCheck_html"><div class="ttname"><a href="structmshadow_1_1expr_1_1PacketAlignCheck.html">mshadow::expr::PacketAlignCheck</a></div><div class="ttdef"><b>Definition:</b> packet-inl.h:379</div></div>
<div class="ttc" id="namespacemshadow_html_a8fc5237744c6eda97f3070ddcb0c715e"><div class="ttname"><a href="namespacemshadow.html#a8fc5237744c6eda97f3070ddcb0c715e">mshadow::Shape4</a></div><div class="ttdeci">MSHADOW_XINLINE Shape&lt; 4 &gt; Shape4(index_t s0, index_t s1, index_t s2, index_t s3)</div><div class="ttdoc">construct a four dimension shape, stride will equal s0 </div><div class="ttdef"><b>Definition:</b> tensor.h:240</div></div>
<div class="ttc" id="namespacemshadow_html_a3476fdf7d39add6f57beddafbc1b9625"><div class="ttname"><a href="namespacemshadow.html#a3476fdf7d39add6f57beddafbc1b9625">mshadow::SortByKey</a></div><div class="ttdeci">void SortByKey(Tensor&lt; cpu, 1, KDType &gt; keys, Tensor&lt; cpu, 1, VDType &gt; values, bool is_ascend=true)</div><div class="ttdoc">CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) ...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:558</div></div>
<div class="ttc" id="namespacemshadow_html_a75161ef5ed964dcf6518e3a7e59e6fb6"><div class="ttname"><a href="namespacemshadow.html#a75161ef5ed964dcf6518e3a7e59e6fb6">mshadow::Softmax</a></div><div class="ttdeci">void Softmax(Tensor&lt; cpu, 2, DType &gt; dst, const Tensor&lt; cpu, 2, DType &gt; &amp;energy)</div><div class="ttdoc">CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) ...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:483</div></div>
<div class="ttc" id="namespacemshadow_html_ab3f080439f035e05501cc7f202e58c0e"><div class="ttname"><a href="namespacemshadow.html#ab3f080439f035e05501cc7f202e58c0e">mshadow::VectorizedSort</a></div><div class="ttdeci">void VectorizedSort(Tensor&lt; Device, 1, VDType &gt; values, Tensor&lt; Device, 1, SDType &gt; segments)</div><div class="ttdoc">CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an asc...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:589</div></div>
<div class="ttc" id="namespacemshadow_1_1packet_html_ae87c3ed51219b7ae4a295884bffa5d8b"><div class="ttname"><a href="namespacemshadow_1_1packet.html#ae87c3ed51219b7ae4a295884bffa5d8b">mshadow::packet::AlignedMallocPitch</a></div><div class="ttdeci">void * AlignedMallocPitch(size_t *out_pitch, size_t lspace, size_t num_line)</div><div class="ttdoc">analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells </div><div class="ttdef"><b>Definition:</b> packet-inl.h:77</div></div>
<div class="ttc" id="3rdparty_2mshadow_2mshadow_2base_8h_html_a8f433b4dd005a854eec58178ffd3d4bd"><div class="ttname"><a href="3rdparty_2mshadow_2mshadow_2base_8h.html#a8f433b4dd005a854eec58178ffd3d4bd">MSHADOW_CUDA_CALL</a></div><div class="ttdeci">#define MSHADOW_CUDA_CALL(func)</div><div class="ttdoc">Protected cuda call in mshadow. </div><div class="ttdef"><b>Definition:</b> base.h:278</div></div>
<div class="ttc" id="namespacemshadow_html_ae1734eb7939fe9627de46d62494fe9dc"><div class="ttname"><a href="namespacemshadow.html#ae1734eb7939fe9627de46d62494fe9dc">mshadow::MapReduceKeepLowest</a></div><div class="ttdeci">void MapReduceKeepLowest(TRValue&lt; R, cpu, 1, DType &gt; *dst, const expr::Exp&lt; E, DType, etype &gt; &amp;exp, DType scale=1)</div><div class="ttdoc">CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) ...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:223</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1ShapeCheck_html_af44e6a3b97cf3b245bc1b7ad4cacb306"><div class="ttname"><a href="structmshadow_1_1expr_1_1ShapeCheck.html#af44e6a3b97cf3b245bc1b7ad4cacb306">mshadow::expr::ShapeCheck::Check</a></div><div class="ttdeci">static Shape&lt; dim &gt; Check(const E &amp;t)</div></div>
<div class="ttc" id="tensor_8h_html"><div class="ttname"><a href="tensor_8h.html">tensor.h</a></div><div class="ttdoc">header file of tensor data structure and functions This lib requires explicit memory allocation and d...</div></div>
<div class="ttc" id="structmshadow_1_1cpu_html"><div class="ttname"><a href="structmshadow_1_1cpu.html">mshadow::cpu</a></div><div class="ttdoc">device name CPU </div><div class="ttdef"><b>Definition:</b> tensor.h:39</div></div>
<div class="ttc" id="classmshadow_1_1expr_1_1Plan_html_ac3328472fd038748346a97cf8e58f3c6"><div class="ttname"><a href="classmshadow_1_1expr_1_1Plan.html#ac3328472fd038748346a97cf8e58f3c6">mshadow::expr::Plan::Eval</a></div><div class="ttdeci">MSHADOW_XINLINE DType Eval(index_t y, index_t x) const</div><div class="ttdoc">evaluate the expression at index [y][x] to be implemented by SubType, for RValue, the return type wil...</div></div>
<div class="ttc" id="namespacemshadow_html_a1d4e86f18ce66c1b7073537647bd4374"><div class="ttname"><a href="namespacemshadow.html#a1d4e86f18ce66c1b7073537647bd4374">mshadow::AllocHost_</a></div><div class="ttdeci">void * AllocHost_(size_t size)</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4_html_ad58f6139a1d680c2dab2919cd448a9cf"><div class="ttname"><a href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#ad58f6139a1d680c2dab2919cd448a9cf">mshadow::Tensor&lt; Device, 1, DType &gt;::size</a></div><div class="ttdeci">MSHADOW_XINLINE index_t size(index_t i) const</div><div class="ttdef"><b>Definition:</b> tensor.h:606</div></div>
<div class="ttc" id="namespacemshadow_html_a508edd8f1985757d0562b1e951666340"><div class="ttname"><a href="namespacemshadow.html#a508edd8f1985757d0562b1e951666340">mshadow::FreeHost_&lt; cpu &gt;</a></div><div class="ttdeci">void FreeHost_&lt; cpu &gt;(void *dptr)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:95</div></div>
<div class="ttc" id="namespacemshadow_html_adcbc2e1131386fccb1474b0bdf045926"><div class="ttname"><a href="namespacemshadow.html#adcbc2e1131386fccb1474b0bdf045926">mshadow::index_t</a></div><div class="ttdeci">int32_t index_t</div><div class="ttdoc">type that will be used for index </div><div class="ttdef"><b>Definition:</b> base.h:343</div></div>
<div class="ttc" id="namespacemshadow_html_aef49c3cef522198322017315341ac689"><div class="ttname"><a href="namespacemshadow.html#aef49c3cef522198322017315341ac689">mshadow::AllocSpace</a></div><div class="ttdeci">void AllocSpace(Tensor&lt; cpu, dim, DType &gt; *obj, bool pad=MSHADOW_ALLOC_PAD)</div><div class="ttdoc">CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible t...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:116</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4_html_a98c523db2418f25f868842fb32c16a35"><div class="ttname"><a href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html#a98c523db2418f25f868842fb32c16a35">mshadow::Tensor&lt; Device, 1, DType &gt;::dptr_</a></div><div class="ttdeci">DType * dptr_</div><div class="ttdef"><b>Definition:</b> tensor.h:571</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_a48a5927e810fbc45e43e92cfe397d9f2"><div class="ttname"><a href="structmshadow_1_1Tensor.html#a48a5927e810fbc45e43e92cfe397d9f2">mshadow::Tensor::FlatTo2D</a></div><div class="ttdeci">MSHADOW_XINLINE Tensor&lt; Device, 2, DType &gt; FlatTo2D(void) const</div><div class="ttdoc">flatten the tensor to 2 dimension, collapse the higher dimensions together </div><div class="ttdef"><b>Definition:</b> tensor.h:519</div></div>
<div class="ttc" id="packet-inl_8h_html"><div class="ttname"><a href="packet-inl_8h.html">packet-inl.h</a></div><div class="ttdoc">Generic packet vectorization code. </div></div>
<div class="ttc" id="namespacemshadow_html_a8c62e9096998333b078c8b8f744061ad"><div class="ttname"><a href="namespacemshadow.html#a8c62e9096998333b078c8b8f744061ad">mshadow::InitTensorEngine&lt; cpu &gt;</a></div><div class="ttdeci">void InitTensorEngine&lt; cpu &gt;(int dev_id)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:38</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_a88cbcae11653307bfa4c99804320b638"><div class="ttname"><a href="structmshadow_1_1Tensor.html#a88cbcae11653307bfa4c99804320b638">mshadow::Tensor::size</a></div><div class="ttdeci">MSHADOW_XINLINE index_t size(int idx) const</div><div class="ttdoc">return size of i-th dimension, start counting from highest dimension </div><div class="ttdef"><b>Definition:</b> tensor.h:505</div></div>
<div class="ttc" id="namespacemshadow_html_ad2320213151030353ae90f92a0da808d"><div class="ttname"><a href="namespacemshadow.html#ad2320213151030353ae90f92a0da808d">mshadow::AddTakeGradLargeBatch</a></div><div class="ttdeci">void AddTakeGradLargeBatch(Tensor&lt; cpu, 2, DType &gt; dst, const Tensor&lt; cpu, 1, IndexType &gt; &amp;sorted, const Tensor&lt; cpu, 1, IndexType &gt; &amp;index, const Tensor&lt; cpu, 2, DType &gt; &amp;src)</div><div class="ttdoc">CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the bat...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:537</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_a9cc7d01a1eb0825d7a3fcdabc8e58f07"><div class="ttname"><a href="structmshadow_1_1Tensor.html#a9cc7d01a1eb0825d7a3fcdabc8e58f07">mshadow::Tensor::CheckContiguous</a></div><div class="ttdeci">MSHADOW_XINLINE bool CheckContiguous(void) const</div><div class="ttdef"><b>Definition:</b> tensor.h:491</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1BLASEngine_html_af21b0de1c3d687671e36cffb464d5988"><div class="ttname"><a href="structmshadow_1_1expr_1_1BLASEngine.html#af21b0de1c3d687671e36cffb464d5988">mshadow::expr::BLASEngine::dot</a></div><div class="ttdeci">static void dot(Stream&lt; Device &gt; *stream, int n, const DType *X, int incX, const DType *Y, int incY, DType *ret)</div><div class="ttdef"><b>Definition:</b> dot_engine-inl.h:125</div></div>
<div class="ttc" id="namespacemshadow_html_ad0eacae3492e97f53fcd2f210bf9c645"><div class="ttname"><a href="namespacemshadow.html#ad0eacae3492e97f53fcd2f210bf9c645">mshadow::AllocHost</a></div><div class="ttdeci">void AllocHost(Tensor&lt; cpu, dim, DType &gt; *obj)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:100</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1ShapeCheck_html"><div class="ttname"><a href="structmshadow_1_1expr_1_1ShapeCheck.html">mshadow::expr::ShapeCheck</a></div><div class="ttdoc">runtime shape checking template get the shape of an expression, report error if shape mismatch ...</div><div class="ttdef"><b>Definition:</b> expr_engine-inl.h:364</div></div>
<div class="ttc" id="namespacemshadow_html_ad46c65f82d888517bc8e372fd49d8011"><div class="ttname"><a href="namespacemshadow.html#ad46c65f82d888517bc8e372fd49d8011">mshadow::NewStream&lt; cpu &gt;</a></div><div class="ttdeci">Stream&lt; cpu &gt; * NewStream&lt; cpu &gt;(bool create_blas_handle, bool create_dnn_handle, int dev_id)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:48</div></div>
<div class="ttc" id="namespacemshadow_html_ad4d36332af8c5202ecc95adf4b00cdf2"><div class="ttname"><a href="namespacemshadow.html#ad4d36332af8c5202ecc95adf4b00cdf2">mshadow::MapPlan</a></div><div class="ttdeci">void MapPlan(TRValue&lt; R, cpu, dim, DType &gt; *dst, const expr::Plan&lt; E, DType &gt; &amp;plan)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:163</div></div>
<div class="ttc" id="structmshadow_1_1MapExpCPUEngine_html"><div class="ttname"><a href="structmshadow_1_1MapExpCPUEngine.html">mshadow::MapExpCPUEngine</a></div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:182</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1ScalarExp_html"><div class="ttname"><a href="structmshadow_1_1expr_1_1ScalarExp.html">mshadow::expr::ScalarExp</a></div><div class="ttdoc">scalar expression </div><div class="ttdef"><b>Definition:</b> expression.h:95</div></div>
<div class="ttc" id="namespacemshadow_html_a5fce5c2df842439cc7d2d7a90e2cf7d4"><div class="ttname"><a href="namespacemshadow.html#a5fce5c2df842439cc7d2d7a90e2cf7d4">mshadow::MapReduceKeepHighDim</a></div><div class="ttdeci">void MapReduceKeepHighDim(TRValue&lt; R, cpu, 1, DType &gt; *dst, const expr::Exp&lt; E, DType, etype &gt; &amp;exp, DType scale=1)</div><div class="ttdoc">CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) ...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:250</div></div>
<div class="ttc" id="namespacemshadow_html_a750a8961eda58a98116726e462545e6c"><div class="ttname"><a href="namespacemshadow.html#a750a8961eda58a98116726e462545e6c">mshadow::AllocHost_&lt; cpu &gt;</a></div><div class="ttdeci">void * AllocHost_&lt; cpu &gt;(size_t size)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:90</div></div>
<div class="ttc" id="namespacemshadow_html_a3f19232d4c6ea393abccc5288675d345"><div class="ttname"><a href="namespacemshadow.html#a3f19232d4c6ea393abccc5288675d345">mshadow::NewTensor</a></div><div class="ttdeci">Tensor&lt; Device, dim, DType &gt; NewTensor(const Shape&lt; dim &gt; &amp;shape, DType initv, bool pad=MSHADOW_ALLOC_PAD, Stream&lt; Device &gt; *stream=NULL)</div><div class="ttdoc">CPU/GPU: short cut to allocate and initialize a Tensor. </div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:132</div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1Exp_html"><div class="ttname"><a href="structmshadow_1_1expr_1_1Exp.html">mshadow::expr::Exp</a></div><div class="ttdoc">defines how expression exp can be evaluated and stored into dst </div><div class="ttdef"><b>Definition:</b> expression.h:79</div></div>
<div class="ttc" id="namespacemshadow_1_1expr_html_aefd008a0bf012dc8fb99d9bbcca7a078"><div class="ttname"><a href="namespacemshadow_1_1expr.html#aefd008a0bf012dc8fb99d9bbcca7a078">mshadow::expr::MakePlan</a></div><div class="ttdeci">Plan&lt; BinaryMapExp&lt; OP, TA, TB, DType, etype &gt;, DType &gt; MakePlan(const BinaryMapExp&lt; OP, TA, TB, DType, etype &gt; &amp;e)</div><div class="ttdef"><b>Definition:</b> expr_engine-inl.h:239</div></div>
<div class="ttc" id="namespacemshadow_html_a802d44c652195053e87b63881c7d4d1c"><div class="ttname"><a href="namespacemshadow.html#a802d44c652195053e87b63881c7d4d1c">mshadow::AddTakeGrad</a></div><div class="ttdeci">void AddTakeGrad(Tensor&lt; cpu, 2, DType &gt; dst, const Tensor&lt; cpu, 1, IndexType &gt; &amp;index, const Tensor&lt; cpu, 2, DType &gt; &amp;src)</div><div class="ttdoc">CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim ...</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:516</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4_html"><div class="ttname"><a href="structmshadow_1_1Tensor_3_01Device_00_011_00_01DType_01_4.html">mshadow::Tensor&lt; Device, 1, DType &gt;</a></div><div class="ttdef"><b>Definition:</b> tensor.h:568</div></div>
<div class="ttc" id="namespacemshadow_html_a2be3eab8d3469aa48c40d38881558bb9"><div class="ttname"><a href="namespacemshadow.html#a2be3eab8d3469aa48c40d38881558bb9">mshadow::Shape3</a></div><div class="ttdeci">MSHADOW_XINLINE Shape&lt; 3 &gt; Shape3(index_t s0, index_t s1, index_t s2)</div><div class="ttdoc">construct a three dimension shape, stride will equal s0 </div><div class="ttdef"><b>Definition:</b> tensor.h:227</div></div>
<div class="ttc" id="namespacemshadow_html"><div class="ttname"><a href="namespacemshadow.html">mshadow</a></div><div class="ttdoc">overloaded + operator between half_t and bf16_t </div><div class="ttdef"><b>Definition:</b> base.h:334</div></div>
<div class="ttc" id="namespacemshadow_html_a6dc5b2a49c44364a6f3c8cca386aff12"><div class="ttname"><a href="namespacemshadow.html#a6dc5b2a49c44364a6f3c8cca386aff12">mshadow::FreeHost</a></div><div class="ttdeci">void FreeHost(Tensor&lt; cpu, dim, DType &gt; *obj)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:107</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_afee556f188e29bbd0ecc45fe98d3c1c3"><div class="ttname"><a href="structmshadow_1_1Tensor.html#afee556f188e29bbd0ecc45fe98d3c1c3">mshadow::Tensor::stride_</a></div><div class="ttdeci">index_t stride_</div><div class="ttdoc">storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...</div><div class="ttdef"><b>Definition:</b> tensor.h:441</div></div>
<div class="ttc" id="packet-inl_8h_html_a958737730f9e52e5d5b9cbab79ad357e"><div class="ttname"><a href="packet-inl_8h.html#a958737730f9e52e5d5b9cbab79ad357e">MSHADOW_DEFAULT_PACKET</a></div><div class="ttdeci">#define MSHADOW_DEFAULT_PACKET</div><div class="ttdef"><b>Definition:</b> packet-inl.h:47</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html"><div class="ttname"><a href="structmshadow_1_1Tensor.html">mshadow::Tensor</a></div><div class="ttdoc">general tensor </div><div class="ttdef"><b>Definition:</b> tensor.h:420</div></div>
<div class="ttc" id="3rdparty_2mshadow_2mshadow_2base_8h_html"><div class="ttname"><a href="3rdparty_2mshadow_2mshadow_2base_8h.html">base.h</a></div></div>
<div class="ttc" id="structmshadow_1_1expr_1_1BLASEngine_html_ac0b67fda3ecdf2087b816602ad43eda3"><div class="ttname"><a href="structmshadow_1_1expr_1_1BLASEngine.html#ac0b67fda3ecdf2087b816602ad43eda3">mshadow::expr::BLASEngine::SetStream</a></div><div class="ttdeci">static void SetStream(Stream&lt; Device &gt; *stream)</div><div class="ttdef"><b>Definition:</b> dot_engine-inl.h:82</div></div>
<div class="ttc" id="namespacemshadow_html_a9573b40e55fb2e885db8b594f4168efc"><div class="ttname"><a href="namespacemshadow.html#a9573b40e55fb2e885db8b594f4168efc">mshadow::DeleteStream&lt; cpu &gt;</a></div><div class="ttdeci">void DeleteStream&lt; cpu &gt;(Stream&lt; cpu &gt; *stream)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:54</div></div>
<div class="ttc" id="structmshadow_1_1MapExpCPUEngine_3_01true_00_01SV_00_01Tensor_3_01cpu_00_01dim_00_01DType_01_4_01d6b33847e15ac0b561110aa3bff3f62_html_aa7c638bcd548963fc82f8752317f9d3f"><div class="ttname"><a href="structmshadow_1_1MapExpCPUEngine_3_01true_00_01SV_00_01Tensor_3_01cpu_00_01dim_00_01DType_01_4_01d6b33847e15ac0b561110aa3bff3f62.html#aa7c638bcd548963fc82f8752317f9d3f">mshadow::MapExpCPUEngine&lt; true, SV, Tensor&lt; cpu, dim, DType &gt;, dim, DType, E, etype &gt;::Map</a></div><div class="ttdeci">static void Map(Tensor&lt; cpu, dim, DType &gt; *dst, const expr::Exp&lt; E, DType, etype &gt; &amp;exp)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:192</div></div>
<div class="ttc" id="namespacemshadow_html_aa94c153a2f8bcd10be82a4f5598ae5b9"><div class="ttname"><a href="namespacemshadow.html#aa94c153a2f8bcd10be82a4f5598ae5b9">mshadow::openmp_index_t</a></div><div class="ttdeci">index_t openmp_index_t</div><div class="ttdoc">openmp index for linux </div><div class="ttdef"><b>Definition:</b> base.h:351</div></div>
<div class="ttc" id="structmshadow_1_1Tensor_html_a485d1558eaa9eded0b8fd23ed82c6298"><div class="ttname"><a href="structmshadow_1_1Tensor.html#a485d1558eaa9eded0b8fd23ed82c6298">mshadow::Tensor::stream_</a></div><div class="ttdeci">Stream&lt; Device &gt; * stream_</div><div class="ttdoc">stream where the computation lies stream is a device dependency concept where each computation ...</div><div class="ttdef"><b>Definition:</b> tensor.h:446</div></div>
<div class="ttc" id="dot__engine-inl_8h_html"><div class="ttname"><a href="dot__engine-inl_8h.html">dot_engine-inl.h</a></div><div class="ttdoc">definitions of how Matrix Multiplications can be evaluated </div></div>
<div class="ttc" id="structmshadow_1_1MapExpCPUEngine_html_a2815df2f68121b3093965d9bf6a7c75f"><div class="ttname"><a href="structmshadow_1_1MapExpCPUEngine.html#a2815df2f68121b3093965d9bf6a7c75f">mshadow::MapExpCPUEngine::Map</a></div><div class="ttdeci">static void Map(TRValue&lt; R, cpu, dim, DType &gt; *dst, const expr::Exp&lt; E, DType, etype &gt; &amp;exp)</div><div class="ttdef"><b>Definition:</b> tensor_cpu-inl.h:183</div></div>
<div class="ttc" id="structmshadow_1_1Stream_html"><div class="ttname"><a href="structmshadow_1_1Stream.html">mshadow::Stream</a></div><div class="ttdoc">computaion stream structure, used for asynchronous computations </div><div class="ttdef"><b>Definition:</b> tensor.h:383</div></div>
</div><!-- fragment --></div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Thu Jan 5 2023 00:58:42 for mxnet by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.13
</small></address>
</body>
</html>