blob: a58ca0214958da4b8e062fc1600c2b1285d0c972 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.13"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>mxnet: /work/mxnet/src/common/cuda_utils.h File Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">mxnet
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.13 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
$(function() {
initMenu('',true,false,'search.php','Search');
$(document).ready(function() { init_search(); });
});
</script>
<div id="main-nav"></div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="dir_68267d1309a1af8e8297ef4c3efbcdba.html">src</a></li><li class="navelem"><a class="el" href="dir_fdedb0aba14d44ce9d99bc100e026e6a.html">common</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#nested-classes">Classes</a> &#124;
<a href="#namespaces">Namespaces</a> &#124;
<a href="#define-members">Macros</a> &#124;
<a href="#func-members">Functions</a> &#124;
<a href="#var-members">Variables</a> </div>
<div class="headertitle">
<div class="title">cuda_utils.h File Reference</div> </div>
</div><!--header-->
<div class="contents">
<p>Common CUDA utilities.
<a href="#details">More...</a></p>
<div class="textblock"><code>#include &lt;dmlc/logging.h&gt;</code><br />
<code>#include &lt;<a class="el" href="parameter_8h_source.html">dmlc/parameter.h</a>&gt;</code><br />
<code>#include &lt;<a class="el" href="optional_8h_source.html">dmlc/optional.h</a>&gt;</code><br />
<code>#include &lt;<a class="el" href="3rdparty_2mshadow_2mshadow_2base_8h_source.html">mshadow/base.h</a>&gt;</code><br />
<code>#include &lt;<a class="el" href="libinfo_8h_source.html">mxnet/libinfo.h</a>&gt;</code><br />
<code>#include &lt;cuda_runtime.h&gt;</code><br />
<code>#include &lt;cublas_v2.h&gt;</code><br />
<code>#include &lt;curand.h&gt;</code><br />
<code>#include &lt;vector&gt;</code><br />
</div><div class="textblock"><div class="dynheader">
Include dependency graph for cuda_utils.h:</div>
<div class="dyncontent">
<div class="center"><img src="cuda__utils_8h__incl.png" border="0" usemap="#_2work_2mxnet_2src_2common_2cuda__utils_8h" alt=""/></div>
<map name="_2work_2mxnet_2src_2common_2cuda__utils_8h" id="_2work_2mxnet_2src_2common_2cuda__utils_8h">
<area shape="rect" id="node3" href="parameter_8h.html" title="Provide lightweight util to do parameter setup and checking. " alt="" coords="1027,95,1164,121"/>
<area shape="rect" id="node26" href="optional_8h.html" title="Container to hold optional data. " alt="" coords="1528,169,1623,196"/>
<area shape="rect" id="node29" href="3rdparty_2mshadow_2mshadow_2base_8h.html" title="mshadow/base.h" alt="" coords="2648,169,2780,196"/>
<area shape="rect" id="node46" href="libinfo_8h.html" title="get features of the MXNet library at runtime " alt="" coords="2279,95,2400,121"/>
<area shape="rect" id="node20" href="3rdparty_2dmlc-core_2include_2dmlc_2base_8h.html" title="./base.h" alt="" coords="909,319,983,345"/>
<area shape="rect" id="node23" href="json_8h.html" title="Lightweight JSON Reader/Writer that read save into C++ data structs. This includes STL composites and..." alt="" coords="1061,169,1130,196"/>
<area shape="rect" id="node25" href="type__traits_8h.html" title="type traits information header " alt="" coords="1258,244,1370,271"/>
<area shape="rect" id="node28" href="strtonum_8h.html" title="A faster implementation of strtof and strtod. " alt="" coords="775,244,877,271"/>
<area shape="rect" id="node27" href="common_8h.html" title="defines some common utility function. " alt="" coords="1895,244,1994,271"/>
<area shape="rect" id="node43" href="half_8h.html" title="definition of half (float16) type. " alt="" coords="2322,244,2389,271"/>
<area shape="rect" id="node44" href="half2_8h.html" title="definition of vector float16, half2 type. " alt="" coords="2413,244,2487,271"/>
<area shape="rect" id="node45" href="bfloat_8h.html" title="definition of bfloat type. " alt="" coords="2512,244,2591,271"/>
<area shape="rect" id="node49" href="include_2mxnet_2c__api_8h.html" title="c_api.h" alt="" coords="2403,169,2471,196"/>
</map>
</div>
</div>
<p><a href="cuda__utils_8h_source.html">Go to the source code of this file.</a></p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
Classes</h2></td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType.html">mxnet::common::cuda::CublasType&lt; DType &gt;</a></td></tr>
<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Converts between C++ datatypes and enums/constants needed by cuBLAS. <a href="structmxnet_1_1common_1_1cuda_1_1CublasType.html#details">More...</a><br /></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01float_01_4.html">mxnet::common::cuda::CublasType&lt; float &gt;</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01double_01_4.html">mxnet::common::cuda::CublasType&lt; double &gt;</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01mshadow_1_1half_1_1half__t_01_4.html">mxnet::common::cuda::CublasType&lt; mshadow::half::half_t &gt;</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01uint8__t_01_4.html">mxnet::common::cuda::CublasType&lt; uint8_t &gt;</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01int32__t_01_4.html">mxnet::common::cuda::CublasType&lt; int32_t &gt;</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classmxnet_1_1common_1_1cuda_1_1DeviceStore.html">mxnet::common::cuda::DeviceStore</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
Namespaces</h2></td></tr>
<tr class="memitem:namespacemxnet"><td class="memItemLeft" align="right" valign="top"> &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet.html">mxnet</a></td></tr>
<tr class="memdesc:namespacemxnet"><td class="mdescLeft">&#160;</td><td class="mdescRight">namespace of mxnet <br /></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:namespacemxnet_1_1common"><td class="memItemLeft" align="right" valign="top"> &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common.html">mxnet::common</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:namespacemxnet_1_1common_1_1cuda"><td class="memItemLeft" align="right" valign="top"> &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html">mxnet::common::cuda</a></td></tr>
<tr class="memdesc:namespacemxnet_1_1common_1_1cuda"><td class="mdescLeft">&#160;</td><td class="mdescRight">common utils for cuda <br /></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a>
Macros</h2></td></tr>
<tr class="memitem:a2117b58e19182dff91ad3558e650541d"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a2117b58e19182dff91ad3558e650541d">QUOTE</a>(x)&#160;&#160;&#160;#x</td></tr>
<tr class="memdesc:a2117b58e19182dff91ad3558e650541d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Macros/inlines to assist CLion to parse Cuda files (*.cu, *.cuh) <a href="#a2117b58e19182dff91ad3558e650541d">More...</a><br /></td></tr>
<tr class="separator:a2117b58e19182dff91ad3558e650541d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a257a331aabc15f6c701df3cff96f1b10"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a>(x)&#160;&#160;&#160;<a class="el" href="cuda__utils_8h.html#a2117b58e19182dff91ad3558e650541d">QUOTE</a>(x)</td></tr>
<tr class="separator:a257a331aabc15f6c701df3cff96f1b10"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac2d16cdf196c75879d4acda60406e0ef"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#ac2d16cdf196c75879d4acda60406e0ef">STATIC_ASSERT_CUDA_VERSION_GE</a>(min_version)</td></tr>
<tr class="separator:ac2d16cdf196c75879d4acda60406e0ef"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:afc69a418242c5b851993bc2307b1c897"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#afc69a418242c5b851993bc2307b1c897">CHECK_CUDA_ERROR</a>(msg)</td></tr>
<tr class="memdesc:afc69a418242c5b851993bc2307b1c897"><td class="mdescLeft">&#160;</td><td class="mdescRight">When compiling a <b>device</b> function, check that the architecture is &gt;= Kepler (3.0) Note that <b>CUDA_ARCH</b> is not defined outside of a <b>device</b> function. <a href="#afc69a418242c5b851993bc2307b1c897">More...</a><br /></td></tr>
<tr class="separator:afc69a418242c5b851993bc2307b1c897"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a06cc7d24ca66505e69f5ad40009f5e8d">CUDA_CALL</a>(func)</td></tr>
<tr class="memdesc:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Protected CUDA call. <a href="#a06cc7d24ca66505e69f5ad40009f5e8d">More...</a><br /></td></tr>
<tr class="separator:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a685d7ca3c9370ff471665abcacdeb381"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a685d7ca3c9370ff471665abcacdeb381">CUBLAS_CALL</a>(func)</td></tr>
<tr class="memdesc:a685d7ca3c9370ff471665abcacdeb381"><td class="mdescLeft">&#160;</td><td class="mdescRight">Protected cuBLAS call. <a href="#a685d7ca3c9370ff471665abcacdeb381">More...</a><br /></td></tr>
<tr class="separator:a685d7ca3c9370ff471665abcacdeb381"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ab38940ff6950f84102baa4573675b670"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#ab38940ff6950f84102baa4573675b670">CUSOLVER_CALL</a>(func)</td></tr>
<tr class="memdesc:ab38940ff6950f84102baa4573675b670"><td class="mdescLeft">&#160;</td><td class="mdescRight">Protected cuSolver call. <a href="#ab38940ff6950f84102baa4573675b670">More...</a><br /></td></tr>
<tr class="separator:ab38940ff6950f84102baa4573675b670"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a82d7233550780a8c186e79c24aed8406"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a82d7233550780a8c186e79c24aed8406">CURAND_CALL</a>(func)</td></tr>
<tr class="memdesc:a82d7233550780a8c186e79c24aed8406"><td class="mdescLeft">&#160;</td><td class="mdescRight">Protected cuRAND call. <a href="#a82d7233550780a8c186e79c24aed8406">More...</a><br /></td></tr>
<tr class="separator:a82d7233550780a8c186e79c24aed8406"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a63b6d263b94df9e33474894ad02b792d"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a63b6d263b94df9e33474894ad02b792d">NVRTC_CALL</a>(x)</td></tr>
<tr class="memdesc:a63b6d263b94df9e33474894ad02b792d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Protected NVRTC call. <a href="#a63b6d263b94df9e33474894ad02b792d">More...</a><br /></td></tr>
<tr class="separator:a63b6d263b94df9e33474894ad02b792d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a0d9b08b9ef45122c54bf5a121aeab5c3"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a0d9b08b9ef45122c54bf5a121aeab5c3">CUDA_DRIVER_CALL</a>(func)</td></tr>
<tr class="memdesc:a0d9b08b9ef45122c54bf5a121aeab5c3"><td class="mdescLeft">&#160;</td><td class="mdescRight">Protected CUDA driver call. <a href="#a0d9b08b9ef45122c54bf5a121aeab5c3">More...</a><br /></td></tr>
<tr class="separator:a0d9b08b9ef45122c54bf5a121aeab5c3"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a685e3713856baaafb1d4edea43725c83"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a685e3713856baaafb1d4edea43725c83">CUDA_UNROLL</a>&#160;&#160;&#160;_Pragma(&quot;unroll&quot;)</td></tr>
<tr class="separator:a685e3713856baaafb1d4edea43725c83"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:addb314f15d765a2ba72ae37dab23c03b"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#addb314f15d765a2ba72ae37dab23c03b">CUDA_NOUNROLL</a>&#160;&#160;&#160;_Pragma(&quot;nounroll&quot;)</td></tr>
<tr class="separator:addb314f15d765a2ba72ae37dab23c03b"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aa7ba00b841d6b7ba443b0e58dac9ab88"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#aa7ba00b841d6b7ba443b0e58dac9ab88">MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT</a>&#160;&#160;&#160;true</td></tr>
<tr class="separator:aa7ba00b841d6b7ba443b0e58dac9ab88"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aa16d34c218441b0d4074baa8c66a5521"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#aa16d34c218441b0d4074baa8c66a5521">MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT</a>&#160;&#160;&#160;false</td></tr>
<tr class="separator:aa16d34c218441b0d4074baa8c66a5521"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
Functions</h2></td></tr>
<tr class="memitem:a9feee613a4f16a954dd68e55345a72ac"><td class="memItemLeft" align="right" valign="top">const char *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a> (cublasStatus_t error)</td></tr>
<tr class="memdesc:a9feee613a4f16a954dd68e55345a72ac"><td class="mdescLeft">&#160;</td><td class="mdescRight">Get string representation of cuBLAS errors. <a href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">More...</a><br /></td></tr>
<tr class="separator:a9feee613a4f16a954dd68e55345a72ac"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:abf9bcb4cb696e9ae61b818510dac39c8"><td class="memItemLeft" align="right" valign="top">const char *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a> (cusolverStatus_t error)</td></tr>
<tr class="memdesc:abf9bcb4cb696e9ae61b818510dac39c8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Get string representation of cuSOLVER errors. <a href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">More...</a><br /></td></tr>
<tr class="separator:abf9bcb4cb696e9ae61b818510dac39c8"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a97c06b2f4d26445a7386b0f54fae1feb"><td class="memItemLeft" align="right" valign="top">const char *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a> (curandStatus_t status)</td></tr>
<tr class="memdesc:a97c06b2f4d26445a7386b0f54fae1feb"><td class="mdescLeft">&#160;</td><td class="mdescRight">Get string representation of cuRAND errors. <a href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">More...</a><br /></td></tr>
<tr class="separator:a97c06b2f4d26445a7386b0f54fae1feb"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a6f3ee04eb382c57e10916108db3efd80"><td class="memTemplParams" colspan="2">template&lt;typename DType &gt; </td></tr>
<tr class="memitem:a6f3ee04eb382c57e10916108db3efd80"><td class="memTemplItemLeft" align="right" valign="top">DType __device__&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a6f3ee04eb382c57e10916108db3efd80">mxnet::common::cuda::CudaMax</a> (DType a, DType b)</td></tr>
<tr class="separator:a6f3ee04eb382c57e10916108db3efd80"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a03888f252f813f6d052ae84bf8801498"><td class="memTemplParams" colspan="2">template&lt;typename DType &gt; </td></tr>
<tr class="memitem:a03888f252f813f6d052ae84bf8801498"><td class="memTemplItemLeft" align="right" valign="top">DType __device__&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a03888f252f813f6d052ae84bf8801498">mxnet::common::cuda::CudaMin</a> (DType a, DType b)</td></tr>
<tr class="separator:a03888f252f813f6d052ae84bf8801498"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aa7e0a8f7264c65d8000560d84d7fc54d"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#aa7e0a8f7264c65d8000560d84d7fc54d">mxnet::common::cuda::get_load_type</a> (size_t N)</td></tr>
<tr class="memdesc:aa7e0a8f7264c65d8000560d84d7fc54d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Get the largest datatype suitable to read requested number of bytes. <a href="namespacemxnet_1_1common_1_1cuda.html#aa7e0a8f7264c65d8000560d84d7fc54d">More...</a><br /></td></tr>
<tr class="separator:aa7e0a8f7264c65d8000560d84d7fc54d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a7608f1c1700694e453f37cfadfe9e30e"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a7608f1c1700694e453f37cfadfe9e30e">mxnet::common::cuda::get_rows_per_block</a> (size_t row_size, int num_threads_per_block)</td></tr>
<tr class="memdesc:a7608f1c1700694e453f37cfadfe9e30e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Determine how many rows in a 2D matrix should a block of threads handle based on the row size and the number of threads in a block. <a href="namespacemxnet_1_1common_1_1cuda.html#a7608f1c1700694e453f37cfadfe9e30e">More...</a><br /></td></tr>
<tr class="separator:a7608f1c1700694e453f37cfadfe9e30e"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a31f4237a3ff5be2d420461a9baaffd1e"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a31f4237a3ff5be2d420461a9baaffd1e">cudaAttributeLookup</a> (int device_id, std::vector&lt; int32_t &gt; *cached_values, cudaDeviceAttr attr, const char *attr_name)</td></tr>
<tr class="memdesc:a31f4237a3ff5be2d420461a9baaffd1e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Return an attribute GPU <code>device_id</code>. <a href="#a31f4237a3ff5be2d420461a9baaffd1e">More...</a><br /></td></tr>
<tr class="separator:a31f4237a3ff5be2d420461a9baaffd1e"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aa79f548df23452162de37663f171e99d"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#aa79f548df23452162de37663f171e99d">ComputeCapabilityMajor</a> (int device_id)</td></tr>
<tr class="memdesc:aa79f548df23452162de37663f171e99d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Determine major version number of the gpu's cuda compute architecture. <a href="#aa79f548df23452162de37663f171e99d">More...</a><br /></td></tr>
<tr class="separator:aa79f548df23452162de37663f171e99d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a7c16e8770e4f399cabed1fc231ffd9b6">ComputeCapabilityMinor</a> (int device_id)</td></tr>
<tr class="memdesc:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="mdescLeft">&#160;</td><td class="mdescRight">Determine minor version number of the gpu's cuda compute architecture. <a href="#a7c16e8770e4f399cabed1fc231ffd9b6">More...</a><br /></td></tr>
<tr class="separator:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a9779e3ad0efd0faec7fbe431c0db896d"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a9779e3ad0efd0faec7fbe431c0db896d">SMArch</a> (int device_id)</td></tr>
<tr class="memdesc:a9779e3ad0efd0faec7fbe431c0db896d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Return the integer SM architecture (e.g. Volta = 70). <a href="#a9779e3ad0efd0faec7fbe431c0db896d">More...</a><br /></td></tr>
<tr class="separator:a9779e3ad0efd0faec7fbe431c0db896d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac51c1cdc60e05dd857bfabca52355f2f"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#ac51c1cdc60e05dd857bfabca52355f2f">MultiprocessorCount</a> (int device_id)</td></tr>
<tr class="memdesc:ac51c1cdc60e05dd857bfabca52355f2f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Return the number of streaming multiprocessors of GPU <code>device_id</code>. <a href="#ac51c1cdc60e05dd857bfabca52355f2f">More...</a><br /></td></tr>
<tr class="separator:ac51c1cdc60e05dd857bfabca52355f2f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af5b41c04e3d281500957c305532cd478"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#af5b41c04e3d281500957c305532cd478">MaxSharedMemoryPerMultiprocessor</a> (int device_id)</td></tr>
<tr class="memdesc:af5b41c04e3d281500957c305532cd478"><td class="mdescLeft">&#160;</td><td class="mdescRight">Return the shared memory size in bytes of each of the GPU's streaming multiprocessors. <a href="#af5b41c04e3d281500957c305532cd478">More...</a><br /></td></tr>
<tr class="separator:af5b41c04e3d281500957c305532cd478"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a82a24f3db4d0c91374cb3fe7d413f603"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a82a24f3db4d0c91374cb3fe7d413f603">SupportsCooperativeLaunch</a> (int device_id)</td></tr>
<tr class="memdesc:a82a24f3db4d0c91374cb3fe7d413f603"><td class="mdescLeft">&#160;</td><td class="mdescRight">Return whether the GPU <code>device_id</code> supports cooperative-group kernel launching. <a href="#a82a24f3db4d0c91374cb3fe7d413f603">More...</a><br /></td></tr>
<tr class="separator:a82a24f3db4d0c91374cb3fe7d413f603"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:afb4268417c1d8886a39142c85c8f188f"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#afb4268417c1d8886a39142c85c8f188f">SupportsFloat16Compute</a> (int device_id)</td></tr>
<tr class="memdesc:afb4268417c1d8886a39142c85c8f188f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative. <a href="#afb4268417c1d8886a39142c85c8f188f">More...</a><br /></td></tr>
<tr class="separator:afb4268417c1d8886a39142c85c8f188f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#af7e22ce6d80d61e8ca37df23880ff1a9">SupportsTensorCore</a> (int device_id)</td></tr>
<tr class="memdesc:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="mdescLeft">&#160;</td><td class="mdescRight">Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative. <a href="#af7e22ce6d80d61e8ca37df23880ff1a9">More...</a><br /></td></tr>
<tr class="separator:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a464dee13053e3b0b1006c6307069196c"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a464dee13053e3b0b1006c6307069196c">GetEnvAllowTensorCore</a> ()</td></tr>
<tr class="memdesc:a464dee13053e3b0b1006c6307069196c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns global policy for TensorCore algo use. <a href="#a464dee13053e3b0b1006c6307069196c">More...</a><br /></td></tr>
<tr class="separator:a464dee13053e3b0b1006c6307069196c"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ad77e70546b7f35ecba0098caa2d07523"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#ad77e70546b7f35ecba0098caa2d07523">GetEnvAllowTensorCoreConversion</a> ()</td></tr>
<tr class="memdesc:ad77e70546b7f35ecba0098caa2d07523"><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns global policy for TensorCore implicit type casting. <a href="#ad77e70546b7f35ecba0098caa2d07523">More...</a><br /></td></tr>
<tr class="separator:ad77e70546b7f35ecba0098caa2d07523"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="var-members"></a>
Variables</h2></td></tr>
<tr class="memitem:a7d0d1e932a096c498381cec82a650cfa"><td class="memItemLeft" align="right" valign="top">constexpr size_t&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a7d0d1e932a096c498381cec82a650cfa">kMaxNumGpus</a> = 64</td></tr>
<tr class="memdesc:a7d0d1e932a096c498381cec82a650cfa"><td class="mdescLeft">&#160;</td><td class="mdescRight">Maximum number of GPUs. <a href="#a7d0d1e932a096c498381cec82a650cfa">More...</a><br /></td></tr>
<tr class="separator:a7d0d1e932a096c498381cec82a650cfa"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><p>Common CUDA utilities. </p>
</div><h2 class="groupheader">Macro Definition Documentation</h2>
<a id="afc69a418242c5b851993bc2307b1c897"></a>
<h2 class="memtitle"><span class="permalink"><a href="#afc69a418242c5b851993bc2307b1c897">&#9670;&nbsp;</a></span>CHECK_CUDA_ERROR</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CHECK_CUDA_ERROR</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">msg</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cudaError_t e = cudaGetLastError(); \</div><div class="line"> CHECK_EQ(e, cudaSuccess) &lt;&lt; (msg) &lt;&lt; <span class="stringliteral">&quot; CUDA: &quot;</span> &lt;&lt; cudaGetErrorString(e); \</div><div class="line"> }</div></div><!-- fragment -->
<p>When compiling a <b>device</b> function, check that the architecture is &gt;= Kepler (3.0) Note that <b>CUDA_ARCH</b> is not defined outside of a <b>device</b> function. </p>
<p>Check CUDA error. </p><dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">msg</td><td>Message to print if an error occured. </td></tr>
</table>
</dd>
</dl>
</div>
</div>
<a id="a685d7ca3c9370ff471665abcacdeb381"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a685d7ca3c9370ff471665abcacdeb381">&#9670;&nbsp;</a></span>CUBLAS_CALL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUBLAS_CALL</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cublasStatus_t e = (func); \</div><div class="line"> CHECK_EQ(e, CUBLAS_STATUS_SUCCESS) \</div><div class="line"> &lt;&lt; <span class="stringliteral">&quot;cuBLAS: &quot;</span> &lt;&lt; <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a>(e); \</div><div class="line"> }</div><div class="ttc" id="namespacemxnet_1_1common_1_1cuda_html_a9feee613a4f16a954dd68e55345a72ac"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a></div><div class="ttdeci">const char * CublasGetErrorString(cublasStatus_t error)</div><div class="ttdoc">Get string representation of cuBLAS errors. </div><div class="ttdef"><b>Definition:</b> cuda_utils.h:257</div></div>
</div><!-- fragment -->
<p>Protected cuBLAS call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for cuBLAS errors after invocation of the expression. </p>
</div>
</div>
<a id="a06cc7d24ca66505e69f5ad40009f5e8d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a06cc7d24ca66505e69f5ad40009f5e8d">&#9670;&nbsp;</a></span>CUDA_CALL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUDA_CALL</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cudaError_t e = (func); \</div><div class="line"> CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \</div><div class="line"> &lt;&lt; <span class="stringliteral">&quot;CUDA: &quot;</span> &lt;&lt; cudaGetErrorString(e); \</div><div class="line"> }</div></div><!-- fragment -->
<p>Protected CUDA call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for CUDA errors after invocation of the expression. </p>
</div>
</div>
<a id="a0d9b08b9ef45122c54bf5a121aeab5c3"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a0d9b08b9ef45122c54bf5a121aeab5c3">&#9670;&nbsp;</a></span>CUDA_DRIVER_CALL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUDA_DRIVER_CALL</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> CUresult e = (func); \</div><div class="line"> if (e != CUDA_SUCCESS) { \</div><div class="line"> char <span class="keyword">const</span> * err_msg = <span class="keyword">nullptr</span>; \</div><div class="line"> if (cuGetErrorString(e, &amp;err_msg) == CUDA_ERROR_INVALID_VALUE) { \</div><div class="line"> LOG(FATAL) &lt;&lt; <span class="stringliteral">&quot;CUDA Driver: Unknown error &quot;</span> &lt;&lt; e; \</div><div class="line"> } <span class="keywordflow">else</span> { \</div><div class="line"> LOG(FATAL) &lt;&lt; <span class="stringliteral">&quot;CUDA Driver: &quot;</span> &lt;&lt; err_msg; \</div><div class="line"> } \</div><div class="line"> } \</div><div class="line"> }</div></div><!-- fragment -->
<p>Protected CUDA driver call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for CUDA driver errors after invocation of the expression. </p>
</div>
</div>
<a id="addb314f15d765a2ba72ae37dab23c03b"></a>
<h2 class="memtitle"><span class="permalink"><a href="#addb314f15d765a2ba72ae37dab23c03b">&#9670;&nbsp;</a></span>CUDA_NOUNROLL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUDA_NOUNROLL&#160;&#160;&#160;_Pragma(&quot;nounroll&quot;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a685e3713856baaafb1d4edea43725c83"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a685e3713856baaafb1d4edea43725c83">&#9670;&nbsp;</a></span>CUDA_UNROLL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUDA_UNROLL&#160;&#160;&#160;_Pragma(&quot;unroll&quot;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a82d7233550780a8c186e79c24aed8406"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a82d7233550780a8c186e79c24aed8406">&#9670;&nbsp;</a></span>CURAND_CALL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CURAND_CALL</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> curandStatus_t e = (func); \</div><div class="line"> CHECK_EQ(e, CURAND_STATUS_SUCCESS) \</div><div class="line"> &lt;&lt; <span class="stringliteral">&quot;cuRAND: &quot;</span> &lt;&lt; <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a>(e); \</div><div class="line"> }</div><div class="ttc" id="namespacemxnet_1_1common_1_1cuda_html_a97c06b2f4d26445a7386b0f54fae1feb"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a></div><div class="ttdeci">const char * CurandGetErrorString(curandStatus_t status)</div><div class="ttdoc">Get string representation of cuRAND errors. </div><div class="ttdef"><b>Definition:</b> cuda_utils.h:328</div></div>
</div><!-- fragment -->
<p>Protected cuRAND call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for cuRAND errors after invocation of the expression. </p>
</div>
</div>
<a id="ab38940ff6950f84102baa4573675b670"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ab38940ff6950f84102baa4573675b670">&#9670;&nbsp;</a></span>CUSOLVER_CALL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUSOLVER_CALL</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cusolverStatus_t e = (func); \</div><div class="line"> CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS) \</div><div class="line"> &lt;&lt; <span class="stringliteral">&quot;cuSolver: &quot;</span> &lt;&lt; <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a>(e); \</div><div class="line"> }</div><div class="ttc" id="namespacemxnet_1_1common_1_1cuda_html_abf9bcb4cb696e9ae61b818510dac39c8"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a></div><div class="ttdeci">const char * CusolverGetErrorString(cusolverStatus_t error)</div><div class="ttdoc">Get string representation of cuSOLVER errors. </div><div class="ttdef"><b>Definition:</b> cuda_utils.h:299</div></div>
</div><!-- fragment -->
<p>Protected cuSolver call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for cuSolver errors after invocation of the expression. </p>
</div>
</div>
<a id="aa7ba00b841d6b7ba443b0e58dac9ab88"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aa7ba00b841d6b7ba443b0e58dac9ab88">&#9670;&nbsp;</a></span>MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT&#160;&#160;&#160;true</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="aa16d34c218441b0d4074baa8c66a5521"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aa16d34c218441b0d4074baa8c66a5521">&#9670;&nbsp;</a></span>MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT&#160;&#160;&#160;false</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a63b6d263b94df9e33474894ad02b792d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a63b6d263b94df9e33474894ad02b792d">&#9670;&nbsp;</a></span>NVRTC_CALL</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define NVRTC_CALL</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">x</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> nvrtcResult result = x; \</div><div class="line"> CHECK_EQ(result, NVRTC_SUCCESS) \</div><div class="line"> &lt;&lt; #x <span class="stringliteral">&quot; failed with error &quot;</span> \</div><div class="line"> &lt;&lt; nvrtcGetErrorString(result); \</div><div class="line"> }</div></div><!-- fragment -->
<p>Protected NVRTC call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for NVRTC errors after invocation of the expression. </p>
</div>
</div>
<a id="a2117b58e19182dff91ad3558e650541d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a2117b58e19182dff91ad3558e650541d">&#9670;&nbsp;</a></span>QUOTE</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define QUOTE</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">x</td><td>)</td>
<td>&#160;&#160;&#160;#x</td>
</tr>
</table>
</div><div class="memdoc">
<p>Macros/inlines to assist CLion to parse Cuda files (*.cu, *.cuh) </p>
</div>
</div>
<a id="a257a331aabc15f6c701df3cff96f1b10"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a257a331aabc15f6c701df3cff96f1b10">&#9670;&nbsp;</a></span>QUOTEVALUE</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define QUOTEVALUE</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">x</td><td>)</td>
<td>&#160;&#160;&#160;<a class="el" href="cuda__utils_8h.html#a2117b58e19182dff91ad3558e650541d">QUOTE</a>(x)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ac2d16cdf196c75879d4acda60406e0ef"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac2d16cdf196c75879d4acda60406e0ef">&#9670;&nbsp;</a></span>STATIC_ASSERT_CUDA_VERSION_GE</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define STATIC_ASSERT_CUDA_VERSION_GE</td>
<td>(</td>
<td class="paramtype">&#160;</td>
<td class="paramname">min_version</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">static_assert(CUDA_VERSION &gt;= min_version, <span class="stringliteral">&quot;Compiled-against CUDA version &quot;</span> \</div><div class="line"> <a class="code" href="cuda__utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a>(CUDA_VERSION) <span class="stringliteral">&quot; is too old, please upgrade system to version &quot;</span> \</div><div class="line"> <a class="code" href="cuda__utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a>(min_version) <span class="stringliteral">&quot; or later.&quot;</span>)</div><div class="ttc" id="cuda__utils_8h_html_a257a331aabc15f6c701df3cff96f1b10"><div class="ttname"><a href="cuda__utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a></div><div class="ttdeci">#define QUOTEVALUE(x)</div><div class="ttdef"><b>Definition:</b> cuda_utils.h:51</div></div>
</div><!-- fragment -->
</div>
</div>
<h2 class="groupheader">Function Documentation</h2>
<a id="aa79f548df23452162de37663f171e99d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aa79f548df23452162de37663f171e99d">&#9670;&nbsp;</a></span>ComputeCapabilityMajor()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int ComputeCapabilityMajor </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine major version number of the gpu's cuda compute architecture. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the major version number of the gpu's cuda compute architecture. </dd></dl>
</div>
</div>
<a id="a7c16e8770e4f399cabed1fc231ffd9b6"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a7c16e8770e4f399cabed1fc231ffd9b6">&#9670;&nbsp;</a></span>ComputeCapabilityMinor()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int ComputeCapabilityMinor </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine minor version number of the gpu's cuda compute architecture. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the minor version number of the gpu's cuda compute architecture. </dd></dl>
</div>
</div>
<a id="a31f4237a3ff5be2d420461a9baaffd1e"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a31f4237a3ff5be2d420461a9baaffd1e">&#9670;&nbsp;</a></span>cudaAttributeLookup()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int cudaAttributeLookup </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">std::vector&lt; int32_t &gt; *&#160;</td>
<td class="paramname"><em>cached_values</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">cudaDeviceAttr&#160;</td>
<td class="paramname"><em>attr</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const char *&#160;</td>
<td class="paramname"><em>attr_name</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Return an attribute GPU <code>device_id</code>. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
<tr><td class="paramname">cached_values</td><td>An array of attributes for already-looked-up GPUs. </td></tr>
<tr><td class="paramname">attr</td><td>The attribute, by number. </td></tr>
<tr><td class="paramname">attr_name</td><td>A string representation of the attribute, for error messages. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the gpu's attribute value. </dd></dl>
</div>
</div>
<a id="a464dee13053e3b0b1006c6307069196c"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a464dee13053e3b0b1006c6307069196c">&#9670;&nbsp;</a></span>GetEnvAllowTensorCore()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool GetEnvAllowTensorCore </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Returns global policy for TensorCore algo use. </p>
<dl class="section return"><dt>Returns</dt><dd>whether to allow TensorCore algo (if not specified by the Operator locally). </dd></dl>
</div>
</div>
<a id="ad77e70546b7f35ecba0098caa2d07523"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ad77e70546b7f35ecba0098caa2d07523">&#9670;&nbsp;</a></span>GetEnvAllowTensorCoreConversion()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool GetEnvAllowTensorCoreConversion </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Returns global policy for TensorCore implicit type casting. </p>
</div>
</div>
<a id="af5b41c04e3d281500957c305532cd478"></a>
<h2 class="memtitle"><span class="permalink"><a href="#af5b41c04e3d281500957c305532cd478">&#9670;&nbsp;</a></span>MaxSharedMemoryPerMultiprocessor()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int MaxSharedMemoryPerMultiprocessor </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Return the shared memory size in bytes of each of the GPU's streaming multiprocessors. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the shared memory size per streaming multiprocessor. </dd></dl>
</div>
</div>
<a id="ac51c1cdc60e05dd857bfabca52355f2f"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac51c1cdc60e05dd857bfabca52355f2f">&#9670;&nbsp;</a></span>MultiprocessorCount()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int MultiprocessorCount </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Return the number of streaming multiprocessors of GPU <code>device_id</code>. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the gpu's count of streaming multiprocessors. </dd></dl>
</div>
</div>
<a id="a9779e3ad0efd0faec7fbe431c0db896d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a9779e3ad0efd0faec7fbe431c0db896d">&#9670;&nbsp;</a></span>SMArch()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int SMArch </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Return the integer SM architecture (e.g. Volta = 70). </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the gpu's cuda compute architecture as an int. </dd></dl>
</div>
</div>
<a id="a82a24f3db4d0c91374cb3fe7d413f603"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a82a24f3db4d0c91374cb3fe7d413f603">&#9670;&nbsp;</a></span>SupportsCooperativeLaunch()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool SupportsCooperativeLaunch </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Return whether the GPU <code>device_id</code> supports cooperative-group kernel launching. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the gpu's ability to run cooperative-group kernels. </dd></dl>
</div>
</div>
<a id="afb4268417c1d8886a39142c85c8f188f"></a>
<h2 class="memtitle"><span class="permalink"><a href="#afb4268417c1d8886a39142c85c8f188f">&#9670;&nbsp;</a></span>SupportsFloat16Compute()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool SupportsFloat16Compute </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>whether the gpu's architecture supports float16 math. </dd></dl>
</div>
</div>
<a id="af7e22ce6d80d61e8ca37df23880ff1a9"></a>
<h2 class="memtitle"><span class="permalink"><a href="#af7e22ce6d80d61e8ca37df23880ff1a9">&#9670;&nbsp;</a></span>SupportsTensorCore()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool SupportsTensorCore </td>
<td>(</td>
<td class="paramtype">int&#160;</td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>whether the gpu's architecture supports Tensor Core math. </dd></dl>
</div>
</div>
<h2 class="groupheader">Variable Documentation</h2>
<a id="a7d0d1e932a096c498381cec82a650cfa"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a7d0d1e932a096c498381cec82a650cfa">&#9670;&nbsp;</a></span>kMaxNumGpus</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">constexpr size_t kMaxNumGpus = 64</td>
</tr>
</table>
</div><div class="memdoc">
<p>Maximum number of GPUs. </p>
</div>
</div>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Thu Jan 5 2023 00:58:43 for mxnet by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.13
</small></address>
</body>
</html>