blob: 6fb684b984c11af24cf06b919db49473a0de4481 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.17"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>mxnet: mxnet::common::cuda::rtc Namespace Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">mxnet
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.17 -->
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
var searchBox = new SearchBox("searchBox", "search",false,'Search');
/* @license-end */
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
$(function() {
initMenu('',true,false,'search.php','Search');
$(document).ready(function() { init_search(); });
});
/* @license-end */</script>
<div id="main-nav"></div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="namespacemxnet.html">mxnet</a></li><li class="navelem"><a class="el" href="namespacemxnet_1_1common.html">common</a></li><li class="navelem"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html">cuda</a></li><li class="navelem"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html">rtc</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#namespaces">Namespaces</a> &#124;
<a href="#func-members">Functions</a> &#124;
<a href="#var-members">Variables</a> </div>
<div class="headertitle">
<div class="title">mxnet::common::cuda::rtc Namespace Reference</div> </div>
</div><!--header-->
<div class="contents">
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
Namespaces</h2></td></tr>
<tr class="memitem:namespacemxnet_1_1common_1_1cuda_1_1rtc_1_1util"><td class="memItemLeft" align="right" valign="top"> &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc_1_1util.html">util</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
Functions</h2></td></tr>
<tr class="memitem:af55f617654efc870d5cd468dd4cf98fd"><td class="memTemplParams" colspan="2">template&lt;typename Params &gt; </td></tr>
<tr class="memitem:af55f617654efc870d5cd468dd4cf98fd"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#af55f617654efc870d5cd468dd4cf98fd">VectorizedKernelRTCLauncher</a> (const std::string &amp;parameters, const std::string &amp;kernel_name, const std::string &amp;code, int nvec, const <a class="el" href="namespacemxnet.html#abadedc1b42ee3e5b8852e63783b982ef">index_t</a> lead_dim, const <a class="el" href="namespacemxnet.html#abadedc1b42ee3e5b8852e63783b982ef">index_t</a> other_dim, <a class="el" href="structmshadow_1_1Stream.html">mshadow::Stream</a>&lt; <a class="el" href="namespacemxnet.html#a31ef33571a4f6ba8847eeee1f4907141">gpu</a> &gt; *s, const Params params, const std::vector&lt; <a class="el" href="classmxnet_1_1TBlob.html">TBlob</a> &gt; &amp;inputs, const std::vector&lt; <a class="el" href="classmxnet_1_1TBlob.html">TBlob</a> &gt; &amp;outputs, const int dev_id, const int lead_input_num=0, const <a class="el" href="namespacemxnet.html#abadedc1b42ee3e5b8852e63783b982ef">index_t</a> blocks=0)</td></tr>
<tr class="memdesc:af55f617654efc870d5cd468dd4cf98fd"><td class="mdescLeft">&#160;</td><td class="mdescRight">Launcher helper for the kernels using vectorization. <a href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#af55f617654efc870d5cd468dd4cf98fd">More...</a><br /></td></tr>
<tr class="separator:af55f617654efc870d5cd468dd4cf98fd"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a6e2afffb9b744b04aa64c8ceafd51aae"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a6e2afffb9b744b04aa64c8ceafd51aae">GetMaxSupportedArch</a> ()</td></tr>
<tr class="separator:a6e2afffb9b744b04aa64c8ceafd51aae"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aa376584cbd919b0c3cf23f22bbe948fd"><td class="memItemLeft" align="right" valign="top">CUfunction&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#aa376584cbd919b0c3cf23f22bbe948fd">get_function</a> (const std::string &amp;parameters, const std::string &amp;kernel_name, const std::string &amp;code, int dev_id)</td></tr>
<tr class="memdesc:aa376584cbd919b0c3cf23f22bbe948fd"><td class="mdescLeft">&#160;</td><td class="mdescRight">Compile and get the GPU kernel. Uses cache in order to eliminate the overhead of compilation. <a href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#aa376584cbd919b0c3cf23f22bbe948fd">More...</a><br /></td></tr>
<tr class="separator:aa376584cbd919b0c3cf23f22bbe948fd"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a9732a1d386bd52d6e11f19ab02be34b1"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a9732a1d386bd52d6e11f19ab02be34b1">launch</a> (CUfunction function, const dim3 grid_dim, const dim3 block_dim, unsigned int shared_mem_bytes, <a class="el" href="structmshadow_1_1Stream.html">mshadow::Stream</a>&lt; <a class="el" href="namespacemxnet.html#a31ef33571a4f6ba8847eeee1f4907141">gpu</a> &gt; *stream, std::vector&lt; const void * &gt; *args)</td></tr>
<tr class="memdesc:a9732a1d386bd52d6e11f19ab02be34b1"><td class="mdescLeft">&#160;</td><td class="mdescRight">Launch a GPU kernel. <a href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a9732a1d386bd52d6e11f19ab02be34b1">More...</a><br /></td></tr>
<tr class="separator:a9732a1d386bd52d6e11f19ab02be34b1"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="var-members"></a>
Variables</h2></td></tr>
<tr class="memitem:af9ebdc3671fe5eb11c4a5aec4cfecdbe"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#af9ebdc3671fe5eb11c4a5aec4cfecdbe">backward_function_definitions</a> []</td></tr>
<tr class="separator:af9ebdc3671fe5eb11c4a5aec4cfecdbe"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a05143a27c092d6956c91fd2220136eed"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a05143a27c092d6956c91fd2220136eed">grad_function_definitions</a> []</td></tr>
<tr class="separator:a05143a27c092d6956c91fd2220136eed"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a20082213a948bb3c665f08cab01520bc"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a20082213a948bb3c665f08cab01520bc">function_definitions_util</a> []</td></tr>
<tr class="separator:a20082213a948bb3c665f08cab01520bc"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aba31fb45bdad7f3027d4343aa01c787a"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#aba31fb45bdad7f3027d4343aa01c787a">function_definitions_binary</a> []</td></tr>
<tr class="separator:aba31fb45bdad7f3027d4343aa01c787a"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a91b1a52fc8fe560cf5f56220d2ca218f"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a91b1a52fc8fe560cf5f56220d2ca218f">function_definitions_unary</a> []</td></tr>
<tr class="separator:a91b1a52fc8fe560cf5f56220d2ca218f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ad3ebfc4af0e3ca45f58ed000f71f2591"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#ad3ebfc4af0e3ca45f58ed000f71f2591">fp16_support_string</a> []</td></tr>
<tr class="separator:ad3ebfc4af0e3ca45f58ed000f71f2591"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac5a777fbfe06df6918e4fca2f66c30e6"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#ac5a777fbfe06df6918e4fca2f66c30e6">reducer</a> []</td></tr>
<tr class="separator:ac5a777fbfe06df6918e4fca2f66c30e6"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac71ebdfa69467fc32f2e18e05410af21"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#ac71ebdfa69467fc32f2e18e05410af21">logic_reducer</a> []</td></tr>
<tr class="separator:ac71ebdfa69467fc32f2e18e05410af21"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a65dbf14987fef106699cfead3c209512"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a65dbf14987fef106699cfead3c209512">special_functions_definitions</a> []</td></tr>
<tr class="separator:a65dbf14987fef106699cfead3c209512"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ae927f16659df551ae48be9507647ec2d"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#ae927f16659df551ae48be9507647ec2d">type_support_string</a> []</td></tr>
<tr class="separator:ae927f16659df551ae48be9507647ec2d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a8a744161be1ad33c7ffb12d9cdfa316f"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a8a744161be1ad33c7ffb12d9cdfa316f">util_string</a> []</td></tr>
<tr class="separator:a8a744161be1ad33c7ffb12d9cdfa316f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a09828bc99a0f05955a61b9e4feabb9b8"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a09828bc99a0f05955a61b9e4feabb9b8">limits</a> []</td></tr>
<tr class="separator:a09828bc99a0f05955a61b9e4feabb9b8"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a85172c90f8071324eb159bc97c257264"><td class="memItemLeft" align="right" valign="top">const char&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#a85172c90f8071324eb159bc97c257264">vectorization_support_string</a> []</td></tr>
<tr class="separator:a85172c90f8071324eb159bc97c257264"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aef7cd83e7f0216b689f5f9d4191d9f8b"><td class="memItemLeft" align="right" valign="top">std::mutex&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda_1_1rtc.html#aef7cd83e7f0216b689f5f9d4191d9f8b">lock</a></td></tr>
<tr class="separator:aef7cd83e7f0216b689f5f9d4191d9f8b"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<h2 class="groupheader">Function Documentation</h2>
<a id="aa376584cbd919b0c3cf23f22bbe948fd"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aa376584cbd919b0c3cf23f22bbe948fd">&#9670;&nbsp;</a></span>get_function()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">CUfunction mxnet::common::cuda::rtc::get_function </td>
<td>(</td>
<td class="paramtype">const std::string &amp;&#160;</td>
<td class="paramname"><em>parameters</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const std::string &amp;&#160;</td>
<td class="paramname"><em>kernel_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const std::string &amp;&#160;</td>
<td class="paramname"><em>code</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>dev_id</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Compile and get the GPU kernel. Uses cache in order to eliminate the overhead of compilation. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">parameters</td><td>of the kernel (e.g. values of the template arguments, types used) </td></tr>
<tr><td class="paramname">kernel_name</td><td>name of the kernel </td></tr>
<tr><td class="paramname">code</td><td>used for compilation of the kernel if not found in cache </td></tr>
<tr><td class="paramname">dev_id</td><td>id of the device which the kernel will be launched on </td></tr>
</table>
</dd>
</dl>
</div>
</div>
<a id="a6e2afffb9b744b04aa64c8ceafd51aae"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a6e2afffb9b744b04aa64c8ceafd51aae">&#9670;&nbsp;</a></span>GetMaxSupportedArch()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">int mxnet::common::cuda::rtc::GetMaxSupportedArch </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a9732a1d386bd52d6e11f19ab02be34b1"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a9732a1d386bd52d6e11f19ab02be34b1">&#9670;&nbsp;</a></span>launch()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">void mxnet::common::cuda::rtc::launch </td>
<td>(</td>
<td class="paramtype">CUfunction&#160;</td>
<td class="paramname"><em>function</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const dim3&#160;</td>
<td class="paramname"><em>grid_dim</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const dim3&#160;</td>
<td class="paramname"><em>block_dim</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">unsigned int&#160;</td>
<td class="paramname"><em>shared_mem_bytes</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype"><a class="el" href="structmshadow_1_1Stream.html">mshadow::Stream</a>&lt; <a class="el" href="namespacemxnet.html#a31ef33571a4f6ba8847eeee1f4907141">gpu</a> &gt; *&#160;</td>
<td class="paramname"><em>stream</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">std::vector&lt; const void * &gt; *&#160;</td>
<td class="paramname"><em>args</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Launch a GPU kernel. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">function</td><td>to launch </td></tr>
<tr><td class="paramname">grid_dim</td><td>grid dimensions </td></tr>
<tr><td class="paramname">block_dim</td><td>block dimensions </td></tr>
<tr><td class="paramname">shared_mem_bytes</td><td>amount of dynamic shared memory needed by the kernel </td></tr>
<tr><td class="paramname">stream</td><td>used for launching the kernel </td></tr>
<tr><td class="paramname">args</td><td>arguments of the kernel </td></tr>
</table>
</dd>
</dl>
</div>
</div>
<a id="af55f617654efc870d5cd468dd4cf98fd"></a>
<h2 class="memtitle"><span class="permalink"><a href="#af55f617654efc870d5cd468dd4cf98fd">&#9670;&nbsp;</a></span>VectorizedKernelRTCLauncher()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename Params &gt; </div>
<table class="memname">
<tr>
<td class="memname">void mxnet::common::cuda::rtc::VectorizedKernelRTCLauncher </td>
<td>(</td>
<td class="paramtype">const std::string &amp;&#160;</td>
<td class="paramname"><em>parameters</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const std::string &amp;&#160;</td>
<td class="paramname"><em>kernel_name</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const std::string &amp;&#160;</td>
<td class="paramname"><em>code</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>nvec</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const <a class="el" href="namespacemxnet.html#abadedc1b42ee3e5b8852e63783b982ef">index_t</a>&#160;</td>
<td class="paramname"><em>lead_dim</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const <a class="el" href="namespacemxnet.html#abadedc1b42ee3e5b8852e63783b982ef">index_t</a>&#160;</td>
<td class="paramname"><em>other_dim</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype"><a class="el" href="structmshadow_1_1Stream.html">mshadow::Stream</a>&lt; <a class="el" href="namespacemxnet.html#a31ef33571a4f6ba8847eeee1f4907141">gpu</a> &gt; *&#160;</td>
<td class="paramname"><em>s</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const Params&#160;</td>
<td class="paramname"><em>params</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const std::vector&lt; <a class="el" href="classmxnet_1_1TBlob.html">TBlob</a> &gt; &amp;&#160;</td>
<td class="paramname"><em>inputs</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const std::vector&lt; <a class="el" href="classmxnet_1_1TBlob.html">TBlob</a> &gt; &amp;&#160;</td>
<td class="paramname"><em>outputs</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const int&#160;</td>
<td class="paramname"><em>dev_id</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const int&#160;</td>
<td class="paramname"><em>lead_input_num</em> = <code>0</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const <a class="el" href="namespacemxnet.html#abadedc1b42ee3e5b8852e63783b982ef">index_t</a>&#160;</td>
<td class="paramname"><em>blocks</em> = <code>0</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Launcher helper for the kernels using vectorization. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">parameters</td><td>of the kernel (e.g. values of the template arguments) </td></tr>
<tr><td class="paramname">kernel_name</td><td>name of the kernel </td></tr>
<tr><td class="paramname">code</td><td>used for compilation of the kernel if not found in cache </td></tr>
<tr><td class="paramname">nvec</td><td>length of the vector used for loading/storing data </td></tr>
<tr><td class="paramname">lead_dim</td><td>size of leading dimension of the tensors </td></tr>
<tr><td class="paramname">other_dim</td><td>maximum of the total size of all the other dimensions of the tensors </td></tr>
<tr><td class="paramname">s</td><td>stream used to launch the kernel </td></tr>
<tr><td class="paramname">inputs</td><td>to the kernel </td></tr>
<tr><td class="paramname">outputs</td><td>of the kernel </td></tr>
<tr><td class="paramname">dev_id</td><td>id of the devide which the kernel will be launched on </td></tr>
<tr><td class="paramname">lead_input_num</td><td>number of input to use for checking alignment (in case only a subset of inputs is used vectorized). Default is 0. </td></tr>
<tr><td class="paramname">blocks</td><td>if provided and not 0, will launch the specified number of thread blocks. Default is 0. </td></tr>
</table>
</dd>
</dl>
</div>
</div>
<h2 class="groupheader">Variable Documentation</h2>
<a id="af9ebdc3671fe5eb11c4a5aec4cfecdbe"></a>
<h2 class="memtitle"><span class="permalink"><a href="#af9ebdc3671fe5eb11c4a5aec4cfecdbe">&#9670;&nbsp;</a></span>backward_function_definitions</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::backward_function_definitions[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ad3ebfc4af0e3ca45f58ed000f71f2591"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ad3ebfc4af0e3ca45f58ed000f71f2591">&#9670;&nbsp;</a></span>fp16_support_string</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::fp16_support_string[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="aba31fb45bdad7f3027d4343aa01c787a"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aba31fb45bdad7f3027d4343aa01c787a">&#9670;&nbsp;</a></span>function_definitions_binary</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::function_definitions_binary[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a91b1a52fc8fe560cf5f56220d2ca218f"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a91b1a52fc8fe560cf5f56220d2ca218f">&#9670;&nbsp;</a></span>function_definitions_unary</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::function_definitions_unary[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a20082213a948bb3c665f08cab01520bc"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a20082213a948bb3c665f08cab01520bc">&#9670;&nbsp;</a></span>function_definitions_util</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::function_definitions_util[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a05143a27c092d6956c91fd2220136eed"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a05143a27c092d6956c91fd2220136eed">&#9670;&nbsp;</a></span>grad_function_definitions</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::grad_function_definitions[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a09828bc99a0f05955a61b9e4feabb9b8"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a09828bc99a0f05955a61b9e4feabb9b8">&#9670;&nbsp;</a></span>limits</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::limits[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="aef7cd83e7f0216b689f5f9d4191d9f8b"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aef7cd83e7f0216b689f5f9d4191d9f8b">&#9670;&nbsp;</a></span>lock</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">std::mutex mxnet::common::cuda::rtc::lock</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ac71ebdfa69467fc32f2e18e05410af21"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac71ebdfa69467fc32f2e18e05410af21">&#9670;&nbsp;</a></span>logic_reducer</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::logic_reducer[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ac5a777fbfe06df6918e4fca2f66c30e6"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac5a777fbfe06df6918e4fca2f66c30e6">&#9670;&nbsp;</a></span>reducer</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::reducer[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a65dbf14987fef106699cfead3c209512"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a65dbf14987fef106699cfead3c209512">&#9670;&nbsp;</a></span>special_functions_definitions</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::special_functions_definitions[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ae927f16659df551ae48be9507647ec2d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ae927f16659df551ae48be9507647ec2d">&#9670;&nbsp;</a></span>type_support_string</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::type_support_string[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a8a744161be1ad33c7ffb12d9cdfa316f"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a8a744161be1ad33c7ffb12d9cdfa316f">&#9670;&nbsp;</a></span>util_string</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::util_string[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a85172c90f8071324eb159bc97c257264"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a85172c90f8071324eb159bc97c257264">&#9670;&nbsp;</a></span>vectorization_support_string</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">const char mxnet::common::cuda::rtc::vectorization_support_string[]</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Thu Jan 5 2023 03:47:41 for mxnet by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.17
</small></address>
</body>
</html>