| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
| <meta http-equiv="X-UA-Compatible" content="IE=9"/> |
| <meta name="generator" content="Doxygen 1.8.17"/> |
| <meta name="viewport" content="width=device-width, initial-scale=1"/> |
| <title>mxnet: /work/mxnet/src/common/cuda/utils.h File Reference</title> |
| <link href="tabs.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="jquery.js"></script> |
| <script type="text/javascript" src="dynsections.js"></script> |
| <link href="search/search.css" rel="stylesheet" type="text/css"/> |
| <script type="text/javascript" src="search/searchdata.js"></script> |
| <script type="text/javascript" src="search/search.js"></script> |
| <link href="doxygen.css" rel="stylesheet" type="text/css" /> |
| </head> |
| <body> |
| <div id="top"><!-- do not remove this div, it is closed by doxygen! --> |
| <div id="titlearea"> |
| <table cellspacing="0" cellpadding="0"> |
| <tbody> |
| <tr style="height: 56px;"> |
| <td id="projectalign" style="padding-left: 0.5em;"> |
| <div id="projectname">mxnet |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <!-- end header part --> |
| <!-- Generated by Doxygen 1.8.17 --> |
| <script type="text/javascript"> |
| /* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */ |
| var searchBox = new SearchBox("searchBox", "search",false,'Search'); |
| /* @license-end */ |
| </script> |
| <script type="text/javascript" src="menudata.js"></script> |
| <script type="text/javascript" src="menu.js"></script> |
| <script type="text/javascript"> |
| /* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */ |
| $(function() { |
| initMenu('',true,false,'search.php','Search'); |
| $(document).ready(function() { init_search(); }); |
| }); |
| /* @license-end */</script> |
| <div id="main-nav"></div> |
| <!-- window showing the filter options --> |
| <div id="MSearchSelectWindow" |
| onmouseover="return searchBox.OnSearchSelectShow()" |
| onmouseout="return searchBox.OnSearchSelectHide()" |
| onkeydown="return searchBox.OnSearchSelectKey(event)"> |
| </div> |
| |
| <!-- iframe showing the search results (closed by default) --> |
| <div id="MSearchResultsWindow"> |
| <iframe src="javascript:void(0)" frameborder="0" |
| name="MSearchResults" id="MSearchResults"> |
| </iframe> |
| </div> |
| |
| <div id="nav-path" class="navpath"> |
| <ul> |
| <li class="navelem"><a class="el" href="dir_68267d1309a1af8e8297ef4c3efbcdba.html">src</a></li><li class="navelem"><a class="el" href="dir_fdedb0aba14d44ce9d99bc100e026e6a.html">common</a></li><li class="navelem"><a class="el" href="dir_5d530576593496167de63f3f304bdbc7.html">cuda</a></li> </ul> |
| </div> |
| </div><!-- top --> |
| <div class="header"> |
| <div class="summary"> |
| <a href="#nested-classes">Classes</a> | |
| <a href="#namespaces">Namespaces</a> | |
| <a href="#define-members">Macros</a> | |
| <a href="#func-members">Functions</a> | |
| <a href="#var-members">Variables</a> </div> |
| <div class="headertitle"> |
| <div class="title">utils.h File Reference</div> </div> |
| </div><!--header--> |
| <div class="contents"> |
| |
| <p>Common CUDA utilities. |
| <a href="#details">More...</a></p> |
| <div class="textblock"><code>#include <dmlc/logging.h></code><br /> |
| <code>#include <<a class="el" href="parameter_8h_source.html">dmlc/parameter.h</a>></code><br /> |
| <code>#include <<a class="el" href="optional_8h_source.html">dmlc/optional.h</a>></code><br /> |
| <code>#include <<a class="el" href="3rdparty_2mshadow_2mshadow_2base_8h_source.html">mshadow/base.h</a>></code><br /> |
| <code>#include <<a class="el" href="libinfo_8h_source.html">mxnet/libinfo.h</a>></code><br /> |
| <code>#include <cuda_runtime.h></code><br /> |
| <code>#include <cublas_v2.h></code><br /> |
| <code>#include <curand.h></code><br /> |
| <code>#include <vector></code><br /> |
| </div><div class="textblock"><div class="dynheader"> |
| Include dependency graph for utils.h:</div> |
| <div class="dyncontent"> |
| <div class="center"><img src="cuda_2utils_8h__incl.png" border="0" usemap="#_2work_2mxnet_2src_2common_2cuda_2utils_8h" alt=""/></div> |
| <!-- MAP 0 --> |
| </div> |
| </div> |
| <p><a href="cuda_2utils_8h_source.html">Go to the source code of this file.</a></p> |
| <table class="memberdecls"> |
| <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a> |
| Classes</h2></td></tr> |
| <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType.html">mxnet::common::cuda::CublasType< DType ></a></td></tr> |
| <tr class="memdesc:"><td class="mdescLeft"> </td><td class="mdescRight">Converts between C++ datatypes and enums/constants needed by cuBLAS. <a href="structmxnet_1_1common_1_1cuda_1_1CublasType.html#details">More...</a><br /></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01float_01_4.html">mxnet::common::cuda::CublasType< float ></a></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01double_01_4.html">mxnet::common::cuda::CublasType< double ></a></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01mshadow_1_1half_1_1half__t_01_4.html">mxnet::common::cuda::CublasType< mshadow::half::half_t ></a></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01uint8__t_01_4.html">mxnet::common::cuda::CublasType< uint8_t ></a></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct  </td><td class="memItemRight" valign="bottom"><a class="el" href="structmxnet_1_1common_1_1cuda_1_1CublasType_3_01int32__t_01_4.html">mxnet::common::cuda::CublasType< int32_t ></a></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class  </td><td class="memItemRight" valign="bottom"><a class="el" href="classmxnet_1_1common_1_1cuda_1_1DeviceStore.html">mxnet::common::cuda::DeviceStore</a></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| </table><table class="memberdecls"> |
| <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="namespaces"></a> |
| Namespaces</h2></td></tr> |
| <tr class="memitem:namespacemxnet"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet.html">mxnet</a></td></tr> |
| <tr class="memdesc:namespacemxnet"><td class="mdescLeft"> </td><td class="mdescRight">namespace of mxnet <br /></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:namespacemxnet_1_1common"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common.html">mxnet::common</a></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:namespacemxnet_1_1common_1_1cuda"><td class="memItemLeft" align="right" valign="top">  </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html">mxnet::common::cuda</a></td></tr> |
| <tr class="memdesc:namespacemxnet_1_1common_1_1cuda"><td class="mdescLeft"> </td><td class="mdescRight">common utils for cuda <br /></td></tr> |
| <tr class="separator:"><td class="memSeparator" colspan="2"> </td></tr> |
| </table><table class="memberdecls"> |
| <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a> |
| Macros</h2></td></tr> |
| <tr class="memitem:a2117b58e19182dff91ad3558e650541d"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a2117b58e19182dff91ad3558e650541d">QUOTE</a>(x)   #x</td></tr> |
| <tr class="memdesc:a2117b58e19182dff91ad3558e650541d"><td class="mdescLeft"> </td><td class="mdescRight">Macros/inlines to assist CLion to parse Cuda files (*.cu, *.cuh) <a href="cuda_2utils_8h.html#a2117b58e19182dff91ad3558e650541d">More...</a><br /></td></tr> |
| <tr class="separator:a2117b58e19182dff91ad3558e650541d"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a257a331aabc15f6c701df3cff96f1b10"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a>(x)   <a class="el" href="cuda_2utils_8h.html#a2117b58e19182dff91ad3558e650541d">QUOTE</a>(x)</td></tr> |
| <tr class="separator:a257a331aabc15f6c701df3cff96f1b10"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ac2d16cdf196c75879d4acda60406e0ef"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#ac2d16cdf196c75879d4acda60406e0ef">STATIC_ASSERT_CUDA_VERSION_GE</a>(min_version)</td></tr> |
| <tr class="separator:ac2d16cdf196c75879d4acda60406e0ef"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:afc69a418242c5b851993bc2307b1c897"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#afc69a418242c5b851993bc2307b1c897">CHECK_CUDA_ERROR</a>(msg)</td></tr> |
| <tr class="memdesc:afc69a418242c5b851993bc2307b1c897"><td class="mdescLeft"> </td><td class="mdescRight">When compiling a <b>device</b> function, check that the architecture is >= Kepler (3.0) Note that <b>CUDA_ARCH</b> is not defined outside of a <b>device</b> function. <a href="cuda_2utils_8h.html#afc69a418242c5b851993bc2307b1c897">More...</a><br /></td></tr> |
| <tr class="separator:afc69a418242c5b851993bc2307b1c897"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a06cc7d24ca66505e69f5ad40009f5e8d">CUDA_CALL</a>(func)</td></tr> |
| <tr class="memdesc:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="mdescLeft"> </td><td class="mdescRight">Protected CUDA call. <a href="cuda_2utils_8h.html#a06cc7d24ca66505e69f5ad40009f5e8d">More...</a><br /></td></tr> |
| <tr class="separator:a06cc7d24ca66505e69f5ad40009f5e8d"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a685d7ca3c9370ff471665abcacdeb381"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a685d7ca3c9370ff471665abcacdeb381">CUBLAS_CALL</a>(func)</td></tr> |
| <tr class="memdesc:a685d7ca3c9370ff471665abcacdeb381"><td class="mdescLeft"> </td><td class="mdescRight">Protected cuBLAS call. <a href="cuda_2utils_8h.html#a685d7ca3c9370ff471665abcacdeb381">More...</a><br /></td></tr> |
| <tr class="separator:a685d7ca3c9370ff471665abcacdeb381"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ab38940ff6950f84102baa4573675b670"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#ab38940ff6950f84102baa4573675b670">CUSOLVER_CALL</a>(func)</td></tr> |
| <tr class="memdesc:ab38940ff6950f84102baa4573675b670"><td class="mdescLeft"> </td><td class="mdescRight">Protected cuSolver call. <a href="cuda_2utils_8h.html#ab38940ff6950f84102baa4573675b670">More...</a><br /></td></tr> |
| <tr class="separator:ab38940ff6950f84102baa4573675b670"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a82d7233550780a8c186e79c24aed8406"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a82d7233550780a8c186e79c24aed8406">CURAND_CALL</a>(func)</td></tr> |
| <tr class="memdesc:a82d7233550780a8c186e79c24aed8406"><td class="mdescLeft"> </td><td class="mdescRight">Protected cuRAND call. <a href="cuda_2utils_8h.html#a82d7233550780a8c186e79c24aed8406">More...</a><br /></td></tr> |
| <tr class="separator:a82d7233550780a8c186e79c24aed8406"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a63b6d263b94df9e33474894ad02b792d"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a63b6d263b94df9e33474894ad02b792d">NVRTC_CALL</a>(x)</td></tr> |
| <tr class="memdesc:a63b6d263b94df9e33474894ad02b792d"><td class="mdescLeft"> </td><td class="mdescRight">Protected NVRTC call. <a href="cuda_2utils_8h.html#a63b6d263b94df9e33474894ad02b792d">More...</a><br /></td></tr> |
| <tr class="separator:a63b6d263b94df9e33474894ad02b792d"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a0d9b08b9ef45122c54bf5a121aeab5c3"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a0d9b08b9ef45122c54bf5a121aeab5c3">CUDA_DRIVER_CALL</a>(func)</td></tr> |
| <tr class="memdesc:a0d9b08b9ef45122c54bf5a121aeab5c3"><td class="mdescLeft"> </td><td class="mdescRight">Protected CUDA driver call. <a href="cuda_2utils_8h.html#a0d9b08b9ef45122c54bf5a121aeab5c3">More...</a><br /></td></tr> |
| <tr class="separator:a0d9b08b9ef45122c54bf5a121aeab5c3"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a685e3713856baaafb1d4edea43725c83"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a685e3713856baaafb1d4edea43725c83">CUDA_UNROLL</a>   _Pragma("unroll")</td></tr> |
| <tr class="separator:a685e3713856baaafb1d4edea43725c83"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:addb314f15d765a2ba72ae37dab23c03b"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#addb314f15d765a2ba72ae37dab23c03b">CUDA_NOUNROLL</a>   _Pragma("nounroll")</td></tr> |
| <tr class="separator:addb314f15d765a2ba72ae37dab23c03b"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aa7ba00b841d6b7ba443b0e58dac9ab88"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#aa7ba00b841d6b7ba443b0e58dac9ab88">MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT</a>   true</td></tr> |
| <tr class="separator:aa7ba00b841d6b7ba443b0e58dac9ab88"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aa16d34c218441b0d4074baa8c66a5521"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#aa16d34c218441b0d4074baa8c66a5521">MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT</a>   false</td></tr> |
| <tr class="separator:aa16d34c218441b0d4074baa8c66a5521"><td class="memSeparator" colspan="2"> </td></tr> |
| </table><table class="memberdecls"> |
| <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a> |
| Functions</h2></td></tr> |
| <tr class="memitem:a9feee613a4f16a954dd68e55345a72ac"><td class="memItemLeft" align="right" valign="top">const char * </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a> (cublasStatus_t error)</td></tr> |
| <tr class="memdesc:a9feee613a4f16a954dd68e55345a72ac"><td class="mdescLeft"> </td><td class="mdescRight">Get string representation of cuBLAS errors. <a href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">More...</a><br /></td></tr> |
| <tr class="separator:a9feee613a4f16a954dd68e55345a72ac"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:abf9bcb4cb696e9ae61b818510dac39c8"><td class="memItemLeft" align="right" valign="top">const char * </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a> (cusolverStatus_t error)</td></tr> |
| <tr class="memdesc:abf9bcb4cb696e9ae61b818510dac39c8"><td class="mdescLeft"> </td><td class="mdescRight">Get string representation of cuSOLVER errors. <a href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">More...</a><br /></td></tr> |
| <tr class="separator:abf9bcb4cb696e9ae61b818510dac39c8"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a97c06b2f4d26445a7386b0f54fae1feb"><td class="memItemLeft" align="right" valign="top">const char * </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a> (curandStatus_t status)</td></tr> |
| <tr class="memdesc:a97c06b2f4d26445a7386b0f54fae1feb"><td class="mdescLeft"> </td><td class="mdescRight">Get string representation of cuRAND errors. <a href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">More...</a><br /></td></tr> |
| <tr class="separator:a97c06b2f4d26445a7386b0f54fae1feb"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a6f3ee04eb382c57e10916108db3efd80"><td class="memTemplParams" colspan="2">template<typename DType > </td></tr> |
| <tr class="memitem:a6f3ee04eb382c57e10916108db3efd80"><td class="memTemplItemLeft" align="right" valign="top">DType __device__ </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a6f3ee04eb382c57e10916108db3efd80">mxnet::common::cuda::CudaMax</a> (DType a, DType b)</td></tr> |
| <tr class="separator:a6f3ee04eb382c57e10916108db3efd80"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a03888f252f813f6d052ae84bf8801498"><td class="memTemplParams" colspan="2">template<typename DType > </td></tr> |
| <tr class="memitem:a03888f252f813f6d052ae84bf8801498"><td class="memTemplItemLeft" align="right" valign="top">DType __device__ </td><td class="memTemplItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a03888f252f813f6d052ae84bf8801498">mxnet::common::cuda::CudaMin</a> (DType a, DType b)</td></tr> |
| <tr class="separator:a03888f252f813f6d052ae84bf8801498"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aa7e0a8f7264c65d8000560d84d7fc54d"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#aa7e0a8f7264c65d8000560d84d7fc54d">mxnet::common::cuda::get_load_type</a> (size_t N)</td></tr> |
| <tr class="memdesc:aa7e0a8f7264c65d8000560d84d7fc54d"><td class="mdescLeft"> </td><td class="mdescRight">Get the largest datatype suitable to read requested number of bytes. <a href="namespacemxnet_1_1common_1_1cuda.html#aa7e0a8f7264c65d8000560d84d7fc54d">More...</a><br /></td></tr> |
| <tr class="separator:aa7e0a8f7264c65d8000560d84d7fc54d"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a7608f1c1700694e453f37cfadfe9e30e"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="namespacemxnet_1_1common_1_1cuda.html#a7608f1c1700694e453f37cfadfe9e30e">mxnet::common::cuda::get_rows_per_block</a> (size_t row_size, int num_threads_per_block)</td></tr> |
| <tr class="memdesc:a7608f1c1700694e453f37cfadfe9e30e"><td class="mdescLeft"> </td><td class="mdescRight">Determine how many rows in a 2D matrix should a block of threads handle based on the row size and the number of threads in a block. <a href="namespacemxnet_1_1common_1_1cuda.html#a7608f1c1700694e453f37cfadfe9e30e">More...</a><br /></td></tr> |
| <tr class="separator:a7608f1c1700694e453f37cfadfe9e30e"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a31f4237a3ff5be2d420461a9baaffd1e"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a31f4237a3ff5be2d420461a9baaffd1e">cudaAttributeLookup</a> (int device_id, std::vector< int32_t > *cached_values, cudaDeviceAttr attr, const char *attr_name)</td></tr> |
| <tr class="memdesc:a31f4237a3ff5be2d420461a9baaffd1e"><td class="mdescLeft"> </td><td class="mdescRight">Return an attribute GPU <code>device_id</code>. <a href="cuda_2utils_8h.html#a31f4237a3ff5be2d420461a9baaffd1e">More...</a><br /></td></tr> |
| <tr class="separator:a31f4237a3ff5be2d420461a9baaffd1e"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:aa79f548df23452162de37663f171e99d"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#aa79f548df23452162de37663f171e99d">ComputeCapabilityMajor</a> (int device_id)</td></tr> |
| <tr class="memdesc:aa79f548df23452162de37663f171e99d"><td class="mdescLeft"> </td><td class="mdescRight">Determine major version number of the gpu's cuda compute architecture. <a href="cuda_2utils_8h.html#aa79f548df23452162de37663f171e99d">More...</a><br /></td></tr> |
| <tr class="separator:aa79f548df23452162de37663f171e99d"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a7c16e8770e4f399cabed1fc231ffd9b6">ComputeCapabilityMinor</a> (int device_id)</td></tr> |
| <tr class="memdesc:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="mdescLeft"> </td><td class="mdescRight">Determine minor version number of the gpu's cuda compute architecture. <a href="cuda_2utils_8h.html#a7c16e8770e4f399cabed1fc231ffd9b6">More...</a><br /></td></tr> |
| <tr class="separator:a7c16e8770e4f399cabed1fc231ffd9b6"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a9779e3ad0efd0faec7fbe431c0db896d"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a9779e3ad0efd0faec7fbe431c0db896d">SMArch</a> (int device_id)</td></tr> |
| <tr class="memdesc:a9779e3ad0efd0faec7fbe431c0db896d"><td class="mdescLeft"> </td><td class="mdescRight">Return the integer SM architecture (e.g. Volta = 70). <a href="cuda_2utils_8h.html#a9779e3ad0efd0faec7fbe431c0db896d">More...</a><br /></td></tr> |
| <tr class="separator:a9779e3ad0efd0faec7fbe431c0db896d"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ac51c1cdc60e05dd857bfabca52355f2f"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#ac51c1cdc60e05dd857bfabca52355f2f">MultiprocessorCount</a> (int device_id)</td></tr> |
| <tr class="memdesc:ac51c1cdc60e05dd857bfabca52355f2f"><td class="mdescLeft"> </td><td class="mdescRight">Return the number of streaming multiprocessors of GPU <code>device_id</code>. <a href="cuda_2utils_8h.html#ac51c1cdc60e05dd857bfabca52355f2f">More...</a><br /></td></tr> |
| <tr class="separator:ac51c1cdc60e05dd857bfabca52355f2f"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:af5b41c04e3d281500957c305532cd478"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#af5b41c04e3d281500957c305532cd478">MaxSharedMemoryPerMultiprocessor</a> (int device_id)</td></tr> |
| <tr class="memdesc:af5b41c04e3d281500957c305532cd478"><td class="mdescLeft"> </td><td class="mdescRight">Return the shared memory size in bytes of each of the GPU's streaming multiprocessors. <a href="cuda_2utils_8h.html#af5b41c04e3d281500957c305532cd478">More...</a><br /></td></tr> |
| <tr class="separator:af5b41c04e3d281500957c305532cd478"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a82a24f3db4d0c91374cb3fe7d413f603"><td class="memItemLeft" align="right" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a82a24f3db4d0c91374cb3fe7d413f603">SupportsCooperativeLaunch</a> (int device_id)</td></tr> |
| <tr class="memdesc:a82a24f3db4d0c91374cb3fe7d413f603"><td class="mdescLeft"> </td><td class="mdescRight">Return whether the GPU <code>device_id</code> supports cooperative-group kernel launching. <a href="cuda_2utils_8h.html#a82a24f3db4d0c91374cb3fe7d413f603">More...</a><br /></td></tr> |
| <tr class="separator:a82a24f3db4d0c91374cb3fe7d413f603"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:afb4268417c1d8886a39142c85c8f188f"><td class="memItemLeft" align="right" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#afb4268417c1d8886a39142c85c8f188f">SupportsFloat16Compute</a> (int device_id)</td></tr> |
| <tr class="memdesc:afb4268417c1d8886a39142c85c8f188f"><td class="mdescLeft"> </td><td class="mdescRight">Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative. <a href="cuda_2utils_8h.html#afb4268417c1d8886a39142c85c8f188f">More...</a><br /></td></tr> |
| <tr class="separator:afb4268417c1d8886a39142c85c8f188f"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="memItemLeft" align="right" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#af7e22ce6d80d61e8ca37df23880ff1a9">SupportsTensorCore</a> (int device_id)</td></tr> |
| <tr class="memdesc:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="mdescLeft"> </td><td class="mdescRight">Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative. <a href="cuda_2utils_8h.html#af7e22ce6d80d61e8ca37df23880ff1a9">More...</a><br /></td></tr> |
| <tr class="separator:af7e22ce6d80d61e8ca37df23880ff1a9"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:a464dee13053e3b0b1006c6307069196c"><td class="memItemLeft" align="right" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a464dee13053e3b0b1006c6307069196c">GetEnvAllowTensorCore</a> ()</td></tr> |
| <tr class="memdesc:a464dee13053e3b0b1006c6307069196c"><td class="mdescLeft"> </td><td class="mdescRight">Returns global policy for TensorCore algo use. <a href="cuda_2utils_8h.html#a464dee13053e3b0b1006c6307069196c">More...</a><br /></td></tr> |
| <tr class="separator:a464dee13053e3b0b1006c6307069196c"><td class="memSeparator" colspan="2"> </td></tr> |
| <tr class="memitem:ad77e70546b7f35ecba0098caa2d07523"><td class="memItemLeft" align="right" valign="top">bool </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#ad77e70546b7f35ecba0098caa2d07523">GetEnvAllowTensorCoreConversion</a> ()</td></tr> |
| <tr class="memdesc:ad77e70546b7f35ecba0098caa2d07523"><td class="mdescLeft"> </td><td class="mdescRight">Returns global policy for TensorCore implicit type casting. <a href="cuda_2utils_8h.html#ad77e70546b7f35ecba0098caa2d07523">More...</a><br /></td></tr> |
| <tr class="separator:ad77e70546b7f35ecba0098caa2d07523"><td class="memSeparator" colspan="2"> </td></tr> |
| </table><table class="memberdecls"> |
| <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="var-members"></a> |
| Variables</h2></td></tr> |
| <tr class="memitem:a7d0d1e932a096c498381cec82a650cfa"><td class="memItemLeft" align="right" valign="top">constexpr size_t </td><td class="memItemRight" valign="bottom"><a class="el" href="cuda_2utils_8h.html#a7d0d1e932a096c498381cec82a650cfa">kMaxNumGpus</a> = 64</td></tr> |
| <tr class="memdesc:a7d0d1e932a096c498381cec82a650cfa"><td class="mdescLeft"> </td><td class="mdescRight">Maximum number of GPUs. <a href="cuda_2utils_8h.html#a7d0d1e932a096c498381cec82a650cfa">More...</a><br /></td></tr> |
| <tr class="separator:a7d0d1e932a096c498381cec82a650cfa"><td class="memSeparator" colspan="2"> </td></tr> |
| </table> |
| <a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2> |
| <div class="textblock"><p>Common CUDA utilities. </p> |
| </div><h2 class="groupheader">Macro Definition Documentation</h2> |
| <a id="afc69a418242c5b851993bc2307b1c897"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#afc69a418242c5b851993bc2307b1c897">◆ </a></span>CHECK_CUDA_ERROR</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CHECK_CUDA_ERROR</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">msg</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> { \</div> |
| <div class="line"> cudaError_t e = cudaGetLastError(); \</div> |
| <div class="line"> CHECK_EQ(e, cudaSuccess) << (msg) << <span class="stringliteral">" CUDA: "</span> << cudaGetErrorString(e); \</div> |
| <div class="line"> }</div> |
| </div><!-- fragment --> |
| <p>When compiling a <b>device</b> function, check that the architecture is >= Kepler (3.0) Note that <b>CUDA_ARCH</b> is not defined outside of a <b>device</b> function. </p> |
| <p>Check CUDA error. </p><dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">msg</td><td>Message to print if an error occured. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| |
| </div> |
| </div> |
| <a id="a685d7ca3c9370ff471665abcacdeb381"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a685d7ca3c9370ff471665abcacdeb381">◆ </a></span>CUBLAS_CALL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CUBLAS_CALL</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">func</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> { \</div> |
| <div class="line"> cublasStatus_t e = (func); \</div> |
| <div class="line"> CHECK_EQ(e, CUBLAS_STATUS_SUCCESS) \</div> |
| <div class="line"> << <span class="stringliteral">"cuBLAS: "</span> << <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a>(e); \</div> |
| <div class="line"> }</div> |
| </div><!-- fragment --> |
| <p>Protected cuBLAS call. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">func</td><td>Expression to call.</td></tr> |
| </table> |
| </dd> |
| </dl> |
| <p>It checks for cuBLAS errors after invocation of the expression. </p> |
| |
| </div> |
| </div> |
| <a id="a06cc7d24ca66505e69f5ad40009f5e8d"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a06cc7d24ca66505e69f5ad40009f5e8d">◆ </a></span>CUDA_CALL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CUDA_CALL</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">func</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> { \</div> |
| <div class="line"> cudaError_t e = (func); \</div> |
| <div class="line"> CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) << <span class="stringliteral">"CUDA: "</span> << cudaGetErrorString(e); \</div> |
| <div class="line"> }</div> |
| </div><!-- fragment --> |
| <p>Protected CUDA call. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">func</td><td>Expression to call.</td></tr> |
| </table> |
| </dd> |
| </dl> |
| <p>It checks for CUDA errors after invocation of the expression. </p> |
| |
| </div> |
| </div> |
| <a id="a0d9b08b9ef45122c54bf5a121aeab5c3"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a0d9b08b9ef45122c54bf5a121aeab5c3">◆ </a></span>CUDA_DRIVER_CALL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CUDA_DRIVER_CALL</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">func</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> { \</div> |
| <div class="line"> CUresult e = (func); \</div> |
| <div class="line"> if (e != CUDA_SUCCESS) { \</div> |
| <div class="line"> char <span class="keyword">const</span>* err_msg = <span class="keyword">nullptr</span>; \</div> |
| <div class="line"> if (cuGetErrorString(e, &err_msg) == CUDA_ERROR_INVALID_VALUE) { \</div> |
| <div class="line"> LOG(FATAL) << <span class="stringliteral">"CUDA Driver: Unknown error "</span> << e; \</div> |
| <div class="line"> } <span class="keywordflow">else</span> { \</div> |
| <div class="line"> LOG(FATAL) << <span class="stringliteral">"CUDA Driver: "</span> << e << <span class="stringliteral">" "</span> << err_msg; \</div> |
| <div class="line"> } \</div> |
| <div class="line"> } \</div> |
| <div class="line"> }</div> |
| </div><!-- fragment --> |
| <p>Protected CUDA driver call. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">func</td><td>Expression to call.</td></tr> |
| </table> |
| </dd> |
| </dl> |
| <p>It checks for CUDA driver errors after invocation of the expression. </p> |
| |
| </div> |
| </div> |
| <a id="addb314f15d765a2ba72ae37dab23c03b"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#addb314f15d765a2ba72ae37dab23c03b">◆ </a></span>CUDA_NOUNROLL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CUDA_NOUNROLL   _Pragma("nounroll")</td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| </div> |
| </div> |
| <a id="a685e3713856baaafb1d4edea43725c83"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a685e3713856baaafb1d4edea43725c83">◆ </a></span>CUDA_UNROLL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CUDA_UNROLL   _Pragma("unroll")</td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| </div> |
| </div> |
| <a id="a82d7233550780a8c186e79c24aed8406"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a82d7233550780a8c186e79c24aed8406">◆ </a></span>CURAND_CALL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CURAND_CALL</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">func</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> { \</div> |
| <div class="line"> curandStatus_t e = (func); \</div> |
| <div class="line"> CHECK_EQ(e, CURAND_STATUS_SUCCESS) \</div> |
| <div class="line"> << <span class="stringliteral">"cuRAND: "</span> << <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a>(e); \</div> |
| <div class="line"> }</div> |
| </div><!-- fragment --> |
| <p>Protected cuRAND call. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">func</td><td>Expression to call.</td></tr> |
| </table> |
| </dd> |
| </dl> |
| <p>It checks for cuRAND errors after invocation of the expression. </p> |
| |
| </div> |
| </div> |
| <a id="ab38940ff6950f84102baa4573675b670"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#ab38940ff6950f84102baa4573675b670">◆ </a></span>CUSOLVER_CALL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define CUSOLVER_CALL</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">func</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> { \</div> |
| <div class="line"> cusolverStatus_t e = (func); \</div> |
| <div class="line"> CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS) \</div> |
| <div class="line"> << <span class="stringliteral">"cuSolver: "</span> << <a class="code" href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a>(e); \</div> |
| <div class="line"> }</div> |
| </div><!-- fragment --> |
| <p>Protected cuSolver call. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">func</td><td>Expression to call.</td></tr> |
| </table> |
| </dd> |
| </dl> |
| <p>It checks for cuSolver errors after invocation of the expression. </p> |
| |
| </div> |
| </div> |
| <a id="aa7ba00b841d6b7ba443b0e58dac9ab88"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#aa7ba00b841d6b7ba443b0e58dac9ab88">◆ </a></span>MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT   true</td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| </div> |
| </div> |
| <a id="aa16d34c218441b0d4074baa8c66a5521"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#aa16d34c218441b0d4074baa8c66a5521">◆ </a></span>MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT   false</td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| </div> |
| </div> |
| <a id="a63b6d263b94df9e33474894ad02b792d"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a63b6d263b94df9e33474894ad02b792d">◆ </a></span>NVRTC_CALL</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define NVRTC_CALL</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">x</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> { \</div> |
| <div class="line"> nvrtcResult result = x; \</div> |
| <div class="line"> CHECK_EQ(result, NVRTC_SUCCESS) << #x <span class="stringliteral">" failed with error "</span> << nvrtcGetErrorString(result); \</div> |
| <div class="line"> }</div> |
| </div><!-- fragment --> |
| <p>Protected NVRTC call. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">func</td><td>Expression to call.</td></tr> |
| </table> |
| </dd> |
| </dl> |
| <p>It checks for NVRTC errors after invocation of the expression. </p> |
| |
| </div> |
| </div> |
| <a id="a2117b58e19182dff91ad3558e650541d"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a2117b58e19182dff91ad3558e650541d">◆ </a></span>QUOTE</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define QUOTE</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">x</td><td>)</td> |
| <td>   #x</td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Macros/inlines to assist CLion to parse Cuda files (*.cu, *.cuh) </p> |
| |
| </div> |
| </div> |
| <a id="a257a331aabc15f6c701df3cff96f1b10"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a257a331aabc15f6c701df3cff96f1b10">◆ </a></span>QUOTEVALUE</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define QUOTEVALUE</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">x</td><td>)</td> |
| <td>   <a class="el" href="cuda_2utils_8h.html#a2117b58e19182dff91ad3558e650541d">QUOTE</a>(x)</td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| </div> |
| </div> |
| <a id="ac2d16cdf196c75879d4acda60406e0ef"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#ac2d16cdf196c75879d4acda60406e0ef">◆ </a></span>STATIC_ASSERT_CUDA_VERSION_GE</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">#define STATIC_ASSERT_CUDA_VERSION_GE</td> |
| <td>(</td> |
| <td class="paramtype"> </td> |
| <td class="paramname">min_version</td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| <b>Value:</b><div class="fragment"><div class="line"> static_assert(CUDA_VERSION >= min_version, <span class="stringliteral">"Compiled-against CUDA version "</span> \</div> |
| <div class="line"> <a class="code" href="cuda_2utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a>(CUDA_VERSION) <span class="stringliteral">" is too old, please upgrade system to version "</span> \</div> |
| <div class="line"> <a class="code" href="cuda_2utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a>(min_version) <span class="stringliteral">" or later."</span>)</div> |
| </div><!-- fragment --> |
| </div> |
| </div> |
| <h2 class="groupheader">Function Documentation</h2> |
| <a id="aa79f548df23452162de37663f171e99d"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#aa79f548df23452162de37663f171e99d">◆ </a></span>ComputeCapabilityMajor()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int ComputeCapabilityMajor </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Determine major version number of the gpu's cuda compute architecture. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>the major version number of the gpu's cuda compute architecture. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="a7c16e8770e4f399cabed1fc231ffd9b6"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a7c16e8770e4f399cabed1fc231ffd9b6">◆ </a></span>ComputeCapabilityMinor()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int ComputeCapabilityMinor </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Determine minor version number of the gpu's cuda compute architecture. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>the minor version number of the gpu's cuda compute architecture. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="a31f4237a3ff5be2d420461a9baaffd1e"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a31f4237a3ff5be2d420461a9baaffd1e">◆ </a></span>cudaAttributeLookup()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int cudaAttributeLookup </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">std::vector< int32_t > * </td> |
| <td class="paramname"><em>cached_values</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">cudaDeviceAttr </td> |
| <td class="paramname"><em>attr</em>, </td> |
| </tr> |
| <tr> |
| <td class="paramkey"></td> |
| <td></td> |
| <td class="paramtype">const char * </td> |
| <td class="paramname"><em>attr_name</em> </td> |
| </tr> |
| <tr> |
| <td></td> |
| <td>)</td> |
| <td></td><td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Return an attribute GPU <code>device_id</code>. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| <tr><td class="paramname">cached_values</td><td>An array of attributes for already-looked-up GPUs. </td></tr> |
| <tr><td class="paramname">attr</td><td>The attribute, by number. </td></tr> |
| <tr><td class="paramname">attr_name</td><td>A string representation of the attribute, for error messages. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>the gpu's attribute value. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="a464dee13053e3b0b1006c6307069196c"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a464dee13053e3b0b1006c6307069196c">◆ </a></span>GetEnvAllowTensorCore()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">bool GetEnvAllowTensorCore </td> |
| <td>(</td> |
| <td class="paramname"></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Returns global policy for TensorCore algo use. </p> |
| <dl class="section return"><dt>Returns</dt><dd>whether to allow TensorCore algo (if not specified by the Operator locally). </dd></dl> |
| |
| </div> |
| </div> |
| <a id="ad77e70546b7f35ecba0098caa2d07523"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#ad77e70546b7f35ecba0098caa2d07523">◆ </a></span>GetEnvAllowTensorCoreConversion()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">bool GetEnvAllowTensorCoreConversion </td> |
| <td>(</td> |
| <td class="paramname"></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Returns global policy for TensorCore implicit type casting. </p> |
| |
| </div> |
| </div> |
| <a id="af5b41c04e3d281500957c305532cd478"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#af5b41c04e3d281500957c305532cd478">◆ </a></span>MaxSharedMemoryPerMultiprocessor()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int MaxSharedMemoryPerMultiprocessor </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Return the shared memory size in bytes of each of the GPU's streaming multiprocessors. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>the shared memory size per streaming multiprocessor. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="ac51c1cdc60e05dd857bfabca52355f2f"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#ac51c1cdc60e05dd857bfabca52355f2f">◆ </a></span>MultiprocessorCount()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int MultiprocessorCount </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Return the number of streaming multiprocessors of GPU <code>device_id</code>. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>the gpu's count of streaming multiprocessors. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="a9779e3ad0efd0faec7fbe431c0db896d"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a9779e3ad0efd0faec7fbe431c0db896d">◆ </a></span>SMArch()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">int SMArch </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Return the integer SM architecture (e.g. Volta = 70). </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>the gpu's cuda compute architecture as an int. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="a82a24f3db4d0c91374cb3fe7d413f603"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a82a24f3db4d0c91374cb3fe7d413f603">◆ </a></span>SupportsCooperativeLaunch()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">bool SupportsCooperativeLaunch </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Return whether the GPU <code>device_id</code> supports cooperative-group kernel launching. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>the gpu's ability to run cooperative-group kernels. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="afb4268417c1d8886a39142c85c8f188f"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#afb4268417c1d8886a39142c85c8f188f">◆ </a></span>SupportsFloat16Compute()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">bool SupportsFloat16Compute </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>whether the gpu's architecture supports float16 math. </dd></dl> |
| |
| </div> |
| </div> |
| <a id="af7e22ce6d80d61e8ca37df23880ff1a9"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#af7e22ce6d80d61e8ca37df23880ff1a9">◆ </a></span>SupportsTensorCore()</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">bool SupportsTensorCore </td> |
| <td>(</td> |
| <td class="paramtype">int </td> |
| <td class="paramname"><em>device_id</em></td><td>)</td> |
| <td></td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">inline</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative. </p> |
| <dl class="params"><dt>Parameters</dt><dd> |
| <table class="params"> |
| <tr><td class="paramname">device_id</td><td>The device index of the cuda-capable gpu of interest. </td></tr> |
| </table> |
| </dd> |
| </dl> |
| <dl class="section return"><dt>Returns</dt><dd>whether the gpu's architecture supports Tensor Core math. </dd></dl> |
| |
| </div> |
| </div> |
| <h2 class="groupheader">Variable Documentation</h2> |
| <a id="a7d0d1e932a096c498381cec82a650cfa"></a> |
| <h2 class="memtitle"><span class="permalink"><a href="#a7d0d1e932a096c498381cec82a650cfa">◆ </a></span>kMaxNumGpus</h2> |
| |
| <div class="memitem"> |
| <div class="memproto"> |
| <table class="mlabels"> |
| <tr> |
| <td class="mlabels-left"> |
| <table class="memname"> |
| <tr> |
| <td class="memname">constexpr size_t kMaxNumGpus = 64</td> |
| </tr> |
| </table> |
| </td> |
| <td class="mlabels-right"> |
| <span class="mlabels"><span class="mlabel">constexpr</span></span> </td> |
| </tr> |
| </table> |
| </div><div class="memdoc"> |
| |
| <p>Maximum number of GPUs. </p> |
| |
| </div> |
| </div> |
| </div><!-- contents --> |
| <div class="ttc" id="anamespacemxnet_1_1common_1_1cuda_html_abf9bcb4cb696e9ae61b818510dac39c8"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#abf9bcb4cb696e9ae61b818510dac39c8">mxnet::common::cuda::CusolverGetErrorString</a></div><div class="ttdeci">const char * CusolverGetErrorString(cusolverStatus_t error)</div><div class="ttdoc">Get string representation of cuSOLVER errors.</div><div class="ttdef"><b>Definition:</b> utils.h:319</div></div> |
| <div class="ttc" id="acuda_2utils_8h_html_a257a331aabc15f6c701df3cff96f1b10"><div class="ttname"><a href="cuda_2utils_8h.html#a257a331aabc15f6c701df3cff96f1b10">QUOTEVALUE</a></div><div class="ttdeci">#define QUOTEVALUE(x)</div><div class="ttdef"><b>Definition:</b> utils.h:58</div></div> |
| <div class="ttc" id="anamespacemxnet_1_1common_1_1cuda_html_a9feee613a4f16a954dd68e55345a72ac"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#a9feee613a4f16a954dd68e55345a72ac">mxnet::common::cuda::CublasGetErrorString</a></div><div class="ttdeci">const char * CublasGetErrorString(cublasStatus_t error)</div><div class="ttdoc">Get string representation of cuBLAS errors.</div><div class="ttdef"><b>Definition:</b> utils.h:277</div></div> |
| <div class="ttc" id="anamespacemxnet_1_1common_1_1cuda_html_a97c06b2f4d26445a7386b0f54fae1feb"><div class="ttname"><a href="namespacemxnet_1_1common_1_1cuda.html#a97c06b2f4d26445a7386b0f54fae1feb">mxnet::common::cuda::CurandGetErrorString</a></div><div class="ttdeci">const char * CurandGetErrorString(curandStatus_t status)</div><div class="ttdoc">Get string representation of cuRAND errors.</div><div class="ttdef"><b>Definition:</b> utils.h:348</div></div> |
| <!-- start footer part --> |
| <hr class="footer"/><address class="footer"><small> |
| Generated on Thu Jan 5 2023 03:47:40 for mxnet by  <a href="http://www.doxygen.org/index.html"> |
| <img class="footer" src="doxygen.png" alt="doxygen"/> |
| </a> 1.8.17 |
| </small></address> |
| </body> |
| </html> |