blob: 4dc05a8ed7d41617316b89e34fec8ee25c9cda13 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/xhtml;charset=utf-8" http-equiv="Content-Type"/>
<meta content="IE=9" http-equiv="X-UA-Compatible"/>
<meta content="Doxygen 1.8.11" name="generator"/>
<title>mxnet: src/common/cuda_utils.h File Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script src="jquery.js" type="text/javascript"></script>
<script src="dynsections.js" type="text/javascript"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script src="search/searchdata.js" type="text/javascript"></script>
<script src="search/search.js" type="text/javascript"></script>
<script type="text/javascript">
$(document).ready(function() { init_search(); });
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css"/>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellpadding="0" cellspacing="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">mxnet
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.11 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<div class="tabs" id="navrow1">
<ul class="tablist">
<li><a href="index.html"><span>Main Page</span></a></li>
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
<li><a href="annotated.html"><span>Classes</span></a></li>
<li class="current"><a href="files.html"><span>Files</span></a></li>
<li>
<div class="MSearchBoxInactive" id="MSearchBox">
<span class="left">
<img alt="" id="MSearchSelect" onmouseout="return searchBox.OnSearchSelectHide()" onmouseover="return searchBox.OnSearchSelectShow()" src="search/mag_sel.png"/>
<input accesskey="S" id="MSearchField" onblur="searchBox.OnSearchFieldFocus(false)" onfocus="searchBox.OnSearchFieldFocus(true)" onkeyup="searchBox.OnSearchFieldChange(event)" type="text" value="Search"/>
</span><span class="right">
<a href="javascript:searchBox.CloseResultsWindow()" id="MSearchClose"><img alt="" border="0" id="MSearchCloseImg" src="search/close.png"/></a>
</span>
</div>
</li>
</ul>
</div>
<div class="tabs2" id="navrow2">
<ul class="tablist">
<li><a href="files.html"><span>File List</span></a></li>
<li><a href="globals.html"><span>File Members</span></a></li>
</ul>
</div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow" onkeydown="return searchBox.OnSearchSelectKey(event)" onmouseout="return searchBox.OnSearchSelectHide()" onmouseover="return searchBox.OnSearchSelectShow()">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe frameborder="0" id="MSearchResults" name="MSearchResults" src="javascript:void(0)">
</iframe>
</div>
<div class="navpath" id="nav-path">
<ul>
<li class="navelem"><a class="el" href="dir_68267d1309a1af8e8297ef4c3efbcdba.html">src</a></li><li class="navelem"><a class="el" href="dir_fdedb0aba14d44ce9d99bc100e026e6a.html">common</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#namespaces">Namespaces</a> |
<a href="#define-members">Macros</a> |
<a href="#func-members">Functions</a> </div>
<div class="headertitle">
<div class="title">cuda_utils.h File Reference</div> </div>
</div><!--header-->
<div class="contents">
<p>CUDA debugging utilities.
<a href="#details">More...</a></p>
<div class="textblock"><code>#include &lt;dmlc/logging.h&gt;</code><br/>
<code>#include &lt;dmlc/parameter.h&gt;</code><br/>
<code>#include &lt;dmlc/optional.h&gt;</code><br/>
<code>#include &lt;mshadow/base.h&gt;</code><br/>
<code>#include &lt;cuda_runtime.h&gt;</code><br/>
<code>#include &lt;cublas_v2.h&gt;</code><br/>
<code>#include &lt;curand.h&gt;</code><br/>
</div><div class="textblock"><div class="dynheader">
Include dependency graph for cuda_utils.h:</div>
<div class="dyncontent">
<div class="center"><img alt="" border="0" src="cuda__utils_8h__incl.png" usemap="#src_2common_2cuda__utils_8h"/></div>
<map id="src_2common_2cuda__utils_8h" name="src_2common_2cuda__utils_8h">
</map>
</div>
</div>
<p><a href="cuda__utils_8h_source.html">Go to the source code of this file.</a></p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a>
Namespaces</h2></td></tr>
<tr class="memitem:namespacemxnet"><td align="right" class="memItemLeft" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet.html">mxnet</a></td></tr>
<tr class="memdesc:namespacemxnet"><td class="mdescLeft"> </td><td class="mdescRight">namespace of mxnet <br/></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:namespacemxnet_1_1common"><td align="right" class="memItemLeft" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common.html">mxnet::common</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:namespacemxnet_1_1common_1_1cuda"><td align="right" class="memItemLeft" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html">mxnet::common::cuda</a></td></tr>
<tr class="memdesc:namespacemxnet_1_1common_1_1cuda"><td class="mdescLeft"> </td><td class="mdescRight">common utils for cuda <br/></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a>
Macros</h2></td></tr>
<tr class="memitem:afc69a418242c5b851993bc2307b1c897"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#afc69a418242c5b851993bc2307b1c897">CHECK_CUDA_ERROR</a>(msg)</td></tr>
<tr class="memdesc:afc69a418242c5b851993bc2307b1c897"><td class="mdescLeft"> </td><td class="mdescRight">Check CUDA error. <a href="#afc69a418242c5b851993bc2307b1c897">More...</a><br/></td></tr>
<tr class="separator:afc69a418242c5b851993bc2307b1c897"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a06cc7d24ca66505e69f5ad40009f5e8d"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a06cc7d24ca66505e69f5ad40009f5e8d">CUDA_CALL</a>(func)</td></tr>
<tr class="memdesc:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="mdescLeft"> </td><td class="mdescRight">Protected CUDA call. <a href="#a06cc7d24ca66505e69f5ad40009f5e8d">More...</a><br/></td></tr>
<tr class="separator:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a685d7ca3c9370ff471665abcacdeb381"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a685d7ca3c9370ff471665abcacdeb381">CUBLAS_CALL</a>(func)</td></tr>
<tr class="memdesc:a685d7ca3c9370ff471665abcacdeb381"><td class="mdescLeft"> </td><td class="mdescRight">Protected cuBLAS call. <a href="#a685d7ca3c9370ff471665abcacdeb381">More...</a><br/></td></tr>
<tr class="separator:a685d7ca3c9370ff471665abcacdeb381"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ab38940ff6950f84102baa4573675b670"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#ab38940ff6950f84102baa4573675b670">CUSOLVER_CALL</a>(func)</td></tr>
<tr class="memdesc:ab38940ff6950f84102baa4573675b670"><td class="mdescLeft"> </td><td class="mdescRight">Protected cuSolver call. <a href="#ab38940ff6950f84102baa4573675b670">More...</a><br/></td></tr>
<tr class="separator:ab38940ff6950f84102baa4573675b670"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a82d7233550780a8c186e79c24aed8406"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a82d7233550780a8c186e79c24aed8406">CURAND_CALL</a>(func)</td></tr>
<tr class="memdesc:a82d7233550780a8c186e79c24aed8406"><td class="mdescLeft"> </td><td class="mdescRight">Protected cuRAND call. <a href="#a82d7233550780a8c186e79c24aed8406">More...</a><br/></td></tr>
<tr class="separator:a82d7233550780a8c186e79c24aed8406"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a685e3713856baaafb1d4edea43725c83"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a685e3713856baaafb1d4edea43725c83">CUDA_UNROLL</a>   _Pragma("unroll")</td></tr>
<tr class="separator:a685e3713856baaafb1d4edea43725c83"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:addb314f15d765a2ba72ae37dab23c03b"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#addb314f15d765a2ba72ae37dab23c03b">CUDA_NOUNROLL</a>   _Pragma("nounroll")</td></tr>
<tr class="separator:addb314f15d765a2ba72ae37dab23c03b"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:aa7ba00b841d6b7ba443b0e58dac9ab88"><td align="right" class="memItemLeft" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#aa7ba00b841d6b7ba443b0e58dac9ab88">MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT</a>   true</td></tr>
<tr class="separator:aa7ba00b841d6b7ba443b0e58dac9ab88"><td class="memSeparator" colspan="2"> </td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
Functions</h2></td></tr>
<tr class="memitem:a9feee613a4f16a954dd68e55345a72ac"><td align="right" class="memItemLeft" valign="top">const char * </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a> (cublasStatus_t error)</td></tr>
<tr class="memdesc:a9feee613a4f16a954dd68e55345a72ac"><td class="mdescLeft"> </td><td class="mdescRight">Get string representation of cuBLAS errors. <a href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">More...</a><br/></td></tr>
<tr class="separator:a9feee613a4f16a954dd68e55345a72ac"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:abf9bcb4cb696e9ae61b818510dac39c8"><td align="right" class="memItemLeft" valign="top">const char * </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a> (cusolverStatus_t error)</td></tr>
<tr class="memdesc:abf9bcb4cb696e9ae61b818510dac39c8"><td class="mdescLeft"> </td><td class="mdescRight">Get string representation of cuSOLVER errors. <a href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">More...</a><br/></td></tr>
<tr class="separator:abf9bcb4cb696e9ae61b818510dac39c8"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a97c06b2f4d26445a7386b0f54fae1feb"><td align="right" class="memItemLeft" valign="top">const char * </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a> (curandStatus_t status)</td></tr>
<tr class="memdesc:a97c06b2f4d26445a7386b0f54fae1feb"><td class="mdescLeft"> </td><td class="mdescRight">Get string representation of cuRAND errors. <a href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">More...</a><br/></td></tr>
<tr class="separator:a97c06b2f4d26445a7386b0f54fae1feb"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a6f3ee04eb382c57e10916108db3efd80"><td class="memTemplParams" colspan="2">template&lt;typename DType &gt; </td></tr>
<tr class="memitem:a6f3ee04eb382c57e10916108db3efd80"><td align="right" class="memTemplItemLeft" valign="top">DType __device__ </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a6f3ee04eb382c57e10916108db3efd80">mxnet::common::cuda::CudaMax</a> (DType a, DType b)</td></tr>
<tr class="separator:a6f3ee04eb382c57e10916108db3efd80"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a03888f252f813f6d052ae84bf8801498"><td class="memTemplParams" colspan="2">template&lt;typename DType &gt; </td></tr>
<tr class="memitem:a03888f252f813f6d052ae84bf8801498"><td align="right" class="memTemplItemLeft" valign="top">DType __device__ </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a03888f252f813f6d052ae84bf8801498">mxnet::common::cuda::CudaMin</a> (DType a, DType b)</td></tr>
<tr class="separator:a03888f252f813f6d052ae84bf8801498"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:aa79f548df23452162de37663f171e99d"><td align="right" class="memItemLeft" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#aa79f548df23452162de37663f171e99d">ComputeCapabilityMajor</a> (int device_id)</td></tr>
<tr class="memdesc:aa79f548df23452162de37663f171e99d"><td class="mdescLeft"> </td><td class="mdescRight">Determine major version number of the gpu's cuda compute architecture. <a href="#aa79f548df23452162de37663f171e99d">More...</a><br/></td></tr>
<tr class="separator:aa79f548df23452162de37663f171e99d"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a7c16e8770e4f399cabed1fc231ffd9b6"><td align="right" class="memItemLeft" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a7c16e8770e4f399cabed1fc231ffd9b6">ComputeCapabilityMinor</a> (int device_id)</td></tr>
<tr class="memdesc:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="mdescLeft"> </td><td class="mdescRight">Determine minor version number of the gpu's cuda compute architecture. <a href="#a7c16e8770e4f399cabed1fc231ffd9b6">More...</a><br/></td></tr>
<tr class="separator:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a9779e3ad0efd0faec7fbe431c0db896d"><td align="right" class="memItemLeft" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a9779e3ad0efd0faec7fbe431c0db896d">SMArch</a> (int device_id)</td></tr>
<tr class="memdesc:a9779e3ad0efd0faec7fbe431c0db896d"><td class="mdescLeft"> </td><td class="mdescRight">Return the integer SM architecture (e.g. Volta = 70). <a href="#a9779e3ad0efd0faec7fbe431c0db896d">More...</a><br/></td></tr>
<tr class="separator:a9779e3ad0efd0faec7fbe431c0db896d"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:afb4268417c1d8886a39142c85c8f188f"><td align="right" class="memItemLeft" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#afb4268417c1d8886a39142c85c8f188f">SupportsFloat16Compute</a> (int device_id)</td></tr>
<tr class="memdesc:afb4268417c1d8886a39142c85c8f188f"><td class="mdescLeft"> </td><td class="mdescRight">Determine whether a cuda-capable gpu's architecture supports float16 math. <a href="#afb4268417c1d8886a39142c85c8f188f">More...</a><br/></td></tr>
<tr class="separator:afb4268417c1d8886a39142c85c8f188f"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:af7e22ce6d80d61e8ca37df23880ff1a9"><td align="right" class="memItemLeft" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#af7e22ce6d80d61e8ca37df23880ff1a9">SupportsTensorCore</a> (int device_id)</td></tr>
<tr class="memdesc:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="mdescLeft"> </td><td class="mdescRight">Determine whether a cuda-capable gpu's architecture supports Tensor Core math. <a href="#af7e22ce6d80d61e8ca37df23880ff1a9">More...</a><br/></td></tr>
<tr class="separator:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a464dee13053e3b0b1006c6307069196c"><td align="right" class="memItemLeft" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda__utils_8h.html#a464dee13053e3b0b1006c6307069196c">GetEnvAllowTensorCore</a> ()</td></tr>
<tr class="memdesc:a464dee13053e3b0b1006c6307069196c"><td class="mdescLeft"> </td><td class="mdescRight">Returns global policy for TensorCore algo use. <a href="#a464dee13053e3b0b1006c6307069196c">More...</a><br/></td></tr>
<tr class="separator:a464dee13053e3b0b1006c6307069196c"><td class="memSeparator" colspan="2"> </td></tr>
</table>
<a id="details" name="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><p>CUDA debugging utilities. </p>
</div><h2 class="groupheader">Macro Definition Documentation</h2>
<a class="anchor" id="afc69a418242c5b851993bc2307b1c897"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CHECK_CUDA_ERROR</td>
<td>(</td>
<td class="paramtype"> </td>
<td class="paramname">msg</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cudaError_t e = cudaGetLastError(); \</div><div class="line"> CHECK_EQ(e, cudaSuccess) &lt;&lt; (msg) &lt;&lt; <span class="stringliteral">" CUDA: "</span> &lt;&lt; cudaGetErrorString(e); \</div><div class="line"> }</div></div><!-- fragment -->
<p>Check CUDA error. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">msg</td><td>Message to print if an error occured. </td></tr>
</table>
</dd>
</dl>
</div>
</div>
<a class="anchor" id="a685d7ca3c9370ff471665abcacdeb381"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUBLAS_CALL</td>
<td>(</td>
<td class="paramtype"> </td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cublasStatus_t e = (func); \</div><div class="line"> CHECK_EQ(e, CUBLAS_STATUS_SUCCESS) \</div><div class="line"> &lt;&lt; <span class="stringliteral">"cuBLAS: "</span> &lt;&lt; <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">common::cuda::CublasGetErrorString</a>(e); \</div><div class="line"> }</div><div class="ttc" id="namespacemxnet_1_1common_1_1cuda_html_a9feee613a4f16a954dd68e55345a72ac"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a></div><div class="ttdeci">const char * CublasGetErrorString(cublasStatus_t error)</div><div class="ttdoc">Get string representation of cuBLAS errors. </div><div class="ttdef"><b>Definition:</b> cuda_utils.h:64</div></div>
</div><!-- fragment -->
<p>Protected cuBLAS call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for cuBLAS errors after invocation of the expression. </p>
</div>
</div>
<a class="anchor" id="a06cc7d24ca66505e69f5ad40009f5e8d"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUDA_CALL</td>
<td>(</td>
<td class="paramtype"> </td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cudaError_t e = (func); \</div><div class="line"> CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \</div><div class="line"> &lt;&lt; <span class="stringliteral">"CUDA: "</span> &lt;&lt; cudaGetErrorString(e); \</div><div class="line"> }</div></div><!-- fragment -->
<p>Protected CUDA call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for CUDA errors after invocation of the expression. </p>
</div>
</div>
<a class="anchor" id="addb314f15d765a2ba72ae37dab23c03b"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUDA_NOUNROLL   _Pragma("nounroll")</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a685e3713856baaafb1d4edea43725c83"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUDA_UNROLL   _Pragma("unroll")</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a class="anchor" id="a82d7233550780a8c186e79c24aed8406"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CURAND_CALL</td>
<td>(</td>
<td class="paramtype"> </td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> curandStatus_t e = (func); \</div><div class="line"> CHECK_EQ(e, CURAND_STATUS_SUCCESS) \</div><div class="line"> &lt;&lt; <span class="stringliteral">"cuRAND: "</span> &lt;&lt; <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">common::cuda::CurandGetErrorString</a>(e); \</div><div class="line"> }</div><div class="ttc" id="namespacemxnet_1_1common_1_1cuda_html_a97c06b2f4d26445a7386b0f54fae1feb"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a></div><div class="ttdeci">const char * CurandGetErrorString(curandStatus_t status)</div><div class="ttdoc">Get string representation of cuRAND errors. </div><div class="ttdef"><b>Definition:</b> cuda_utils.h:124</div></div>
</div><!-- fragment -->
<p>Protected cuRAND call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for cuRAND errors after invocation of the expression. </p>
</div>
</div>
<a class="anchor" id="ab38940ff6950f84102baa4573675b670"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define CUSOLVER_CALL</td>
<td>(</td>
<td class="paramtype"> </td>
<td class="paramname">func</td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<b>Value:</b><div class="fragment"><div class="line">{ \</div><div class="line"> cusolverStatus_t e = (func); \</div><div class="line"> CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS) \</div><div class="line"> &lt;&lt; <span class="stringliteral">"cuSolver: "</span> &lt;&lt; <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">common::cuda::CusolverGetErrorString</a>(e); \</div><div class="line"> }</div><div class="ttc" id="namespacemxnet_1_1common_1_1cuda_html_abf9bcb4cb696e9ae61b818510dac39c8"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a></div><div class="ttdeci">const char * CusolverGetErrorString(cusolverStatus_t error)</div><div class="ttdoc">Get string representation of cuSOLVER errors. </div><div class="ttdef"><b>Definition:</b> cuda_utils.h:95</div></div>
</div><!-- fragment -->
<p>Protected cuSolver call. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">func</td><td>Expression to call.</td></tr>
</table>
</dd>
</dl>
<p>It checks for cuSolver errors after invocation of the expression. </p>
</div>
</div>
<a class="anchor" id="aa7ba00b841d6b7ba443b0e58dac9ab88"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT   true</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<h2 class="groupheader">Function Documentation</h2>
<a class="anchor" id="aa79f548df23452162de37663f171e99d"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int ComputeCapabilityMajor </td>
<td>(</td>
<td class="paramtype">int </td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine major version number of the gpu's cuda compute architecture. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the major version number of the gpu's cuda compute architecture. </dd></dl>
</div>
</div>
<a class="anchor" id="a7c16e8770e4f399cabed1fc231ffd9b6"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int ComputeCapabilityMinor </td>
<td>(</td>
<td class="paramtype">int </td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine minor version number of the gpu's cuda compute architecture. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the minor version number of the gpu's cuda compute architecture. </dd></dl>
</div>
</div>
<a class="anchor" id="a464dee13053e3b0b1006c6307069196c"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool GetEnvAllowTensorCore </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Returns global policy for TensorCore algo use. </p>
<dl class="section return"><dt>Returns</dt><dd>whether to allow TensorCore algo (if not specified by the Operator locally). </dd></dl>
</div>
</div>
<a class="anchor" id="a9779e3ad0efd0faec7fbe431c0db896d"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int SMArch </td>
<td>(</td>
<td class="paramtype">int </td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Return the integer SM architecture (e.g. Volta = 70). </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the gpu's cuda compute architecture as an int. </dd></dl>
</div>
</div>
<a class="anchor" id="afb4268417c1d8886a39142c85c8f188f"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool SupportsFloat16Compute </td>
<td>(</td>
<td class="paramtype">int </td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine whether a cuda-capable gpu's architecture supports float16 math. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>whether the gpu's architecture supports float16 math. </dd></dl>
</div>
</div>
<a class="anchor" id="af7e22ce6d80d61e8ca37df23880ff1a9"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">bool SupportsTensorCore </td>
<td>(</td>
<td class="paramtype">int </td>
<td class="paramname"><em>device_id</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Determine whether a cuda-capable gpu's architecture supports Tensor Core math. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>whether the gpu's architecture supports Tensor Core math. </dd></dl>
</div>
</div>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Thu Sep 19 2019 13:11:20 for mxnet by  <a href="http://www.doxygen.org/index.html">
<img alt="doxygen" class="footer" src="doxygen.png"/>
</a> 1.8.11
</small></address>
</body>
</html>