blob: f22f65b0e8312e1da923d06697c374f2ad27f0df [file] [log] [blame]
<!-- HTML header for doxygen 1.8.4-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.13"/>
<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
<title>MADlib: Show GPU Configuration</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtreedata.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { init_search(); });
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
<!-- hack in the navigation tree -->
<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
<!-- google analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-45382226-1', 'madlib.apache.org');
ga('send', 'pageview');
</script>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
<td style="padding-left: 0.5em;">
<div id="projectname">
<span id="projectnumber">1.18.0</span>
</div>
<div id="projectbrief">User Documentation for Apache MADlib</div>
</td>
<td> <div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.13 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
<div id="nav-sync" class="sync"></div>
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('group__grp__gpu__configuration.html','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="header">
<div class="headertitle">
<div class="title">Show GPU Configuration<div class="ingroups"><a class="el" href="group__grp__dl.html">Deep Learning</a> &raquo; <a class="el" href="group__grp__dl__utilities.html">Utilities for Deep Learning</a></div></div> </div>
</div><!--header-->
<div class="contents">
<div class="toc"><b>Contents</b><ul>
<li class="level1">
<a href="#get_gpu_config">GPU Configuration</a> </li>
<li class="level1">
<a href="#example">Examples</a> </li>
<li class="level1">
<a href="#references">References</a> </li>
<li class="level1">
<a href="#related">Related Topics</a> </li>
</ul>
</div><p>This utility function reports the number and type of GPUs attached to hosts on the database cluster.</p>
<p>This can be useful when determining which segments to use for training deep neural nets. For example, for economic reasons you may wish to set up a heterogeneous clusters with GPUs only on some of the hosts, not all of them. This utility can help you identify where the GPUS are and direct the compute to those locations only for model training.</p>
<p><a class="anchor" id="get_gpu_config"></a></p><dl class="section user"><dt>GPU Confuguration</dt><dd></dd></dl>
<pre class="syntax">
gpu_configuration( output_table,
source
)
</pre><p> <b>Arguments</b> </p><dl class="arglist">
<dt>output_table </dt>
<dd><p class="startdd">TEXT. Name of the output table to write out the GPU information. </p>
<p class="enddd"></p>
</dd>
<dt>source (optional) </dt>
<dd><p class="startdd">TEXT, default: 'tensorflow'. Source for determining GPU configuration. Using 'tensorflow' returns a description based on what TensorFlow reports. Using 'nvidia' returns a description based on what the Nvidia Systems Management Interface (nvidia-smi) reports [1]. Note that MADlib and Keras will use the TensorFlow information; the lower level nvidia-smi info is provided for convenience.</p>
<dl class="section note"><dt>Note</dt><dd>If the command 'nvidia-smi -L' returns an error, we do not pass through the error message, but instead will show no GPUs for that host in the output table. You may want to run nvidia-smi from the command line to see error and informational messages.</dd></dl>
</dd>
</dl>
<p><b>Output</b> <br />
The output table contains the following: </p><table class="output">
<tr>
<th>hostname </th><td>TEXT. Name of the host machine in the cluster. Does not include master or mirrors. For PostgreSQL this will always return 'localhost'. </td></tr>
<tr>
<th>gpu_descr </th><td>TEXT. String reported by TensorFlow or nvidia-smi. The formats are different and shown in the examples below. </td></tr>
</table>
<p><a class="anchor" id="example"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
<ol type="1">
<li>Get GPU configuration as per TensorFlow: <pre class="example">
DROP TABLE IF EXISTS host_gpu_mapping_tf;
SELECT * FROM madlib.gpu_configuration('host_gpu_mapping_tf');
SELECT * FROM host_gpu_mapping_tf ORDER BY hostname, gpu_descr;
</pre> <pre class="result">
hostname | gpu_descr
----------+------------------------------------------------------------------------------------------
phoenix0 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
phoenix0 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
phoenix0 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0
phoenix0 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0
phoenix1 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
phoenix1 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
phoenix3 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
phoenix3 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
phoenix3 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0
phoenix3 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0
phoenix4 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
phoenix4 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
phoenix4 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0
phoenix4 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0
(14 rows)
</pre> In this heterogeneous cluster there are 4 GPUs attached to hosts 0, 3 and 4. There are 2 GPUs attached to host 1 and no GPUs attached to host 2.</li>
<li>Get GPU configuration as per nvidia-smi: <pre class="example">
DROP TABLE IF EXISTS host_gpu_mapping_nvidia;
SELECT * FROM madlib.gpu_configuration('host_gpu_mapping_nvidia', -- output table
'nvidia' -- source for GPU info
);
SELECT * FROM host_gpu_mapping_nvidia ORDER BY hostname, gpu_descr;
</pre> <pre class="result">
hostname | gpu_descr
----------+------------------------------------------------------------------------------
phoenix0 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-f2ccc77e-2501-f6ee-4754-069dda256fb2)
phoenix0 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-b1fc40ca-c7c6-bc86-f20f-6e9a62cda3f8)
phoenix0 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-d93bb21b-96f9-7c1d-3bab-cdd92b7bbc9d)
phoenix0 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-2d79c4a8-479e-2f33-39f8-3ba80b63f830)
phoenix1 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-0af6bb1e-5b5b-4988-ad3a-a917e9584702)
phoenix1 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-d824c976-a8aa-ef26-a13c-9a9a7fe86bfd)
phoenix3 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-3681d0b6-1ec6-0453-fd81-29d88e549cd9)
phoenix3 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-d4b1f2e7-b238-ac9a-bbfe-918adeb69472)
phoenix3 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-42a32ef1-a60c-e599-c8cf-0e669111ab6f)
phoenix3 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-1cce09c4-6856-8031-be0b-8e8bbf9a10f3)
phoenix4 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-a71bdc18-fdd5-ba25-617e-19b23cc8e827)
phoenix4 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-f9d13688-7fe6-a029-24d1-985a5659f18f)
phoenix4 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-06a7f54b-c07a-e87a-20d6-09bd99b19531)
phoenix4 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-af3b32f3-8bd9-cb75-a8fb-25253b9da926)
(14 rows)
</pre></li>
<li>To get a fuller picture at the segment level, combine with the Greenplum catalog table 'gp_segment_configuration' which contains information about segment instance configuration [2]. Here is an example of this table filtering out master and mirrors: <pre class="example">
SELECT * FROM gp_segment_configuration WHERE role='p' AND content&gt;=0 ORDER BY hostname, dbid;
</pre> <pre class="result">
dbid | content | role | preferred_role | mode | status | port | hostname | address | replication_port
------+---------+------+----------------+------+--------+-------+----------+----------+------------------
2 | 0 | p | p | c | u | 40000 | phoenix0 | phoenix0 | 70000
3 | 1 | p | p | c | u | 40001 | phoenix0 | phoenix0 | 70001
4 | 2 | p | p | c | u | 40002 | phoenix0 | phoenix0 | 70002
5 | 3 | p | p | c | u | 40003 | phoenix0 | phoenix0 | 70003
6 | 4 | p | p | c | u | 40000 | phoenix1 | phoenix1 | 70000
7 | 5 | p | p | c | u | 40001 | phoenix1 | phoenix1 | 70001
8 | 6 | p | p | c | u | 40002 | phoenix1 | phoenix1 | 70002
9 | 7 | p | p | c | u | 40003 | phoenix1 | phoenix1 | 70003
10 | 8 | p | p | c | u | 40000 | phoenix2 | phoenix2 | 70000
11 | 9 | p | p | c | u | 40001 | phoenix2 | phoenix2 | 70001
12 | 10 | p | p | c | u | 40002 | phoenix2 | phoenix2 | 70002
13 | 11 | p | p | c | u | 40003 | phoenix2 | phoenix2 | 70003
14 | 12 | p | p | c | u | 40000 | phoenix3 | phoenix3 | 70000
15 | 13 | p | p | c | u | 40001 | phoenix3 | phoenix3 | 70001
16 | 14 | p | p | c | u | 40002 | phoenix3 | phoenix3 | 70002
17 | 15 | p | p | c | u | 40003 | phoenix3 | phoenix3 | 70003
18 | 16 | p | p | c | u | 40000 | phoenix4 | phoenix4 | 70000
19 | 17 | p | p | c | u | 40001 | phoenix4 | phoenix4 | 70001
20 | 18 | p | p | c | u | 40002 | phoenix4 | phoenix4 | 70002
21 | 19 | p | p | c | u | 40003 | phoenix4 | phoenix4 | 70003
(20 rows)
</pre> Now join this table with the GPU resources table to create a table containing a list of all segments on hosts with GPUs attached: <pre class="example">
DROP TABLE IF EXISTS segments_to_use;
CREATE TABLE segments_to_use AS
SELECT DISTINCT dbid, hostname FROM gp_segment_configuration JOIN host_gpu_mapping_tf USING (hostname)
WHERE role='p' AND content&gt;=0;
SELECT * FROM segments_to_use ORDER BY hostname, dbid;
</pre> <pre class="result">
dbid | hostname
------+----------
2 | phoenix0
3 | phoenix0
4 | phoenix0
5 | phoenix0
6 | phoenix1
7 | phoenix1
8 | phoenix1
9 | phoenix1
14 | phoenix3
15 | phoenix3
16 | phoenix3
17 | phoenix3
18 | phoenix4
19 | phoenix4
20 | phoenix4
21 | phoenix4
(16 rows)
</pre></li>
</ol>
<p><a class="anchor" id="references"></a></p><dl class="section user"><dt>References</dt><dd></dd></dl>
<p>[1] Nvidia Systems Management Interface (nvidia-smi) <a href="https://developer.nvidia.com/nvidia-system-management-interface">https://developer.nvidia.com/nvidia-system-management-interface</a></p>
<p>[2] Greenplum 'gp_segment_configuration' table <a href="https://gpdb.docs.pivotal.io/latest/ref_guide/system_catalogs/gp_segment_configuration.html">https://gpdb.docs.pivotal.io/latest/ref_guide/system_catalogs/gp_segment_configuration.html</a></p>
<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
<p>See <a class="el" href="madlib__keras__gpu__info_8sql__in.html" title="Utility function to report number and type of GPUs in the database cluster. ">madlib_keras_gpu_info.sql_in</a> </p>
</div><!-- contents -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="footer">Generated on Wed Mar 31 2021 20:45:48 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
</ul>
</div>
</body>
</html>