| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| * |
| * @file madlib_keras_gpu_info.sql_in |
| * |
| * @brief SQL functions for GPU configuration. |
| * @date Nov 2019 |
| * |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /** |
| @addtogroup grp_gpu_configuration |
| |
| @brief Utility function to report number and type of GPUs in the database cluster. |
| |
| \warning <em> This MADlib method is still in early stage development. |
| Interface and implementation are subject to change. </em> |
| |
| <div class="toc"><b>Contents</b><ul> |
| <li class="level1"><a href="#get_gpu_config">GPU Configuration</a></li> |
| <li class="level1"><a href="#example">Examples</a></li> |
| <li class="level1"><a href="#references">References</a></li> |
| <li class="level1"><a href="#related">Related Topics</a></li> |
| </ul></div> |
| |
| This utility function reports the number and type of GPUs |
| attached to hosts on the database cluster. |
| This can be useful when determining which segments to use for |
| training deep neural nets. For example, for economic reasons |
| you may wish to set up a heterogeneous clusters with GPUs only |
| on some of the hosts, not all of them. This utility |
| can help you identify where the GPUS are and direct the compute |
| to those locations only in subsequent training and evaluation steps. |
| |
| @anchor get_gpu_config |
| @par GPU Confuguration |
| |
| <pre class="syntax"> |
| gpu_configuration( output_table, |
| source |
| ) |
| </pre> |
| \b Arguments |
| <dl class="arglist"> |
| <dt>output_table</dt> |
| <dd>TEXT. Name of the output table to write out the GPU information. |
| </dd> |
| |
| <dt>source (optional)</dt> |
| <dd>TEXT, default: 'tensorflow'. Source for determining GPU configuration. |
| Using 'tensorflow' returns a description based on what TensorFlow reports. |
| Using 'nvidia' returns a description based on what the Nvidia Systems |
| Management Interface (nvidia-smi) reports [1]. Note that MADlib and Keras will use the |
| TensorFlow information; the lower level nvidia-smi info is provided for convenience. |
| |
| @note |
| If the command 'nvidia-smi -L' returns an error, we do not pass through |
| the error message, but instead will show no GPUs for that host in the output table. |
| You may want to run nvidia-smi from the command line to see error |
| and informational messages. |
| |
| </dd> |
| </dl> |
| |
| <b>Output</b> |
| <br> |
| The output table contains the following: |
| <table class="output"> |
| <tr> |
| <th>hostname</th> |
| <td>TEXT. Name of the host machine in the cluster. |
| Does not include master or mirrors. For PostgreSQL |
| this will always return 'localhost'. |
| </td> |
| </tr> |
| <tr> |
| <th>gpu_descr</th> |
| <td>TEXT. String reported by TensorFlow or nvidia-smi. |
| The formats are different and shown in the examples below. |
| </td> |
| </tr> |
| </table> |
| </br> |
| |
| @anchor example |
| @par Examples |
| |
| -# Get GPU configuration as per TensorFlow: |
| <pre class="example"> |
| DROP TABLE IF EXISTS host_gpu_mapping_tf; |
| SELECT * FROM madlib.gpu_configuration('host_gpu_mapping_tf'); |
| SELECT * FROM host_gpu_mapping_tf ORDER BY hostname, gpu_descr; |
| </pre> |
| <pre class="result"> |
| hostname | gpu_descr |
| ----------+------------------------------------------------------------------------------------------ |
| phoenix0 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0 |
| phoenix0 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0 |
| phoenix0 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0 |
| phoenix0 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0 |
| phoenix1 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0 |
| phoenix1 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0 |
| phoenix3 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0 |
| phoenix3 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0 |
| phoenix3 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0 |
| phoenix3 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0 |
| phoenix4 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0 |
| phoenix4 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0 |
| phoenix4 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0 |
| phoenix4 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0 |
| (14 rows) |
| </pre> |
| In this heterogeneous cluster there are 4 GPUs attached to hosts 0, 3 and 4. There are 2 GPUs |
| attached to host 1 and no GPUs attached to host 2. |
| |
| -# Get GPU configuration as per nvidia-smi: |
| <pre class="example"> |
| DROP TABLE IF EXISTS host_gpu_mapping_nvidia; |
| SELECT * FROM madlib.gpu_configuration('host_gpu_mapping_nvidia', -- output table |
| 'nvidia' -- source for GPU info |
| ); |
| SELECT * FROM host_gpu_mapping_nvidia ORDER BY hostname, gpu_descr; |
| </pre> |
| <pre class="result"> |
| hostname | gpu_descr |
| ----------+------------------------------------------------------------------------------ |
| phoenix0 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-f2ccc77e-2501-f6ee-4754-069dda256fb2) |
| phoenix0 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-b1fc40ca-c7c6-bc86-f20f-6e9a62cda3f8) |
| phoenix0 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-d93bb21b-96f9-7c1d-3bab-cdd92b7bbc9d) |
| phoenix0 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-2d79c4a8-479e-2f33-39f8-3ba80b63f830) |
| phoenix1 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-0af6bb1e-5b5b-4988-ad3a-a917e9584702) |
| phoenix1 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-d824c976-a8aa-ef26-a13c-9a9a7fe86bfd) |
| phoenix3 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-3681d0b6-1ec6-0453-fd81-29d88e549cd9) |
| phoenix3 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-d4b1f2e7-b238-ac9a-bbfe-918adeb69472) |
| phoenix3 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-42a32ef1-a60c-e599-c8cf-0e669111ab6f) |
| phoenix3 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-1cce09c4-6856-8031-be0b-8e8bbf9a10f3) |
| phoenix4 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-a71bdc18-fdd5-ba25-617e-19b23cc8e827) |
| phoenix4 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-f9d13688-7fe6-a029-24d1-985a5659f18f) |
| phoenix4 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-06a7f54b-c07a-e87a-20d6-09bd99b19531) |
| phoenix4 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-af3b32f3-8bd9-cb75-a8fb-25253b9da926) |
| (14 rows) |
| </pre> |
| |
| -# To get a fuller picture at the segment level, combine with the Greenplum catalog |
| table 'gp_segment_configuration' which contains information about |
| segment instance configuration [2]. Here is an example of this table |
| filtering out master and mirrors: |
| <pre class="example"> |
| SELECT * FROM gp_segment_configuration WHERE role='p' AND content>=0 ORDER BY hostname, dbid; |
| </pre> |
| <pre class="result"> |
| dbid | content | role | preferred_role | mode | status | port | hostname | address | replication_port |
| ------+---------+------+----------------+------+--------+-------+----------+----------+------------------ |
| 2 | 0 | p | p | c | u | 40000 | phoenix0 | phoenix0 | 70000 |
| 3 | 1 | p | p | c | u | 40001 | phoenix0 | phoenix0 | 70001 |
| 4 | 2 | p | p | c | u | 40002 | phoenix0 | phoenix0 | 70002 |
| 5 | 3 | p | p | c | u | 40003 | phoenix0 | phoenix0 | 70003 |
| 6 | 4 | p | p | c | u | 40000 | phoenix1 | phoenix1 | 70000 |
| 7 | 5 | p | p | c | u | 40001 | phoenix1 | phoenix1 | 70001 |
| 8 | 6 | p | p | c | u | 40002 | phoenix1 | phoenix1 | 70002 |
| 9 | 7 | p | p | c | u | 40003 | phoenix1 | phoenix1 | 70003 |
| 10 | 8 | p | p | c | u | 40000 | phoenix2 | phoenix2 | 70000 |
| 11 | 9 | p | p | c | u | 40001 | phoenix2 | phoenix2 | 70001 |
| 12 | 10 | p | p | c | u | 40002 | phoenix2 | phoenix2 | 70002 |
| 13 | 11 | p | p | c | u | 40003 | phoenix2 | phoenix2 | 70003 |
| 14 | 12 | p | p | c | u | 40000 | phoenix3 | phoenix3 | 70000 |
| 15 | 13 | p | p | c | u | 40001 | phoenix3 | phoenix3 | 70001 |
| 16 | 14 | p | p | c | u | 40002 | phoenix3 | phoenix3 | 70002 |
| 17 | 15 | p | p | c | u | 40003 | phoenix3 | phoenix3 | 70003 |
| 18 | 16 | p | p | c | u | 40000 | phoenix4 | phoenix4 | 70000 |
| 19 | 17 | p | p | c | u | 40001 | phoenix4 | phoenix4 | 70001 |
| 20 | 18 | p | p | c | u | 40002 | phoenix4 | phoenix4 | 70002 |
| 21 | 19 | p | p | c | u | 40003 | phoenix4 | phoenix4 | 70003 |
| (20 rows) |
| </pre> |
| Now join this table with the GPU resources table to create a table containing a |
| list of all segments on hosts with GPUs attached: |
| <pre class="example"> |
| DROP TABLE IF EXISTS segments_to_use; |
| CREATE TABLE segments_to_use AS |
| SELECT DISTINCT dbid, hostname FROM gp_segment_configuration JOIN host_gpu_mapping_tf USING (hostname) |
| WHERE role='p' AND content>=0; |
| SELECT * FROM segments_to_use ORDER BY hostname, dbid; |
| </pre> |
| <pre class="result"> |
| dbid | hostname |
| ------+---------- |
| 2 | phoenix0 |
| 3 | phoenix0 |
| 4 | phoenix0 |
| 5 | phoenix0 |
| 6 | phoenix1 |
| 7 | phoenix1 |
| 8 | phoenix1 |
| 9 | phoenix1 |
| 14 | phoenix3 |
| 15 | phoenix3 |
| 16 | phoenix3 |
| 17 | phoenix3 |
| 18 | phoenix4 |
| 19 | phoenix4 |
| 20 | phoenix4 |
| 21 | phoenix4 |
| (16 rows) |
| </pre> |
| |
| @anchor references |
| @par References |
| |
| [1] Nvidia Systems Management Interface (nvidia-smi) https://developer.nvidia.com/nvidia-system-management-interface |
| |
| [2] Greenplum 'gp_segment_configuration' table https://gpdb.docs.pivotal.io/latest/ref_guide/system_catalogs/gp_segment_configuration.html |
| |
| @anchor related |
| @par Related Topics |
| |
| See madlib_keras_gpu_info.sql_in |
| |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_info_tensorflow() RETURNS TEXT[] as |
| $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras_gpu_info') |
| return madlib_keras_gpu_info.GPUInfoFunctions.get_gpu_info_from_tensorflow() |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_info_nvidia() RETURNS TEXT[] as |
| $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras_gpu_info') |
| return madlib_keras_gpu_info.GPUInfoFunctions.get_gpu_info_from_nvidia() |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_configuration(output_table text, source text) |
| RETURNS VOID AS |
| $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_gpu_info') |
| from utilities.control import MinWarning |
| with AOControl(False) and MinWarning("error"): |
| madlib_keras_gpu_info.gpu_configuration(schema_madlib, output_table, source) |
| $$ |
| LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_configuration(output_table text) |
| RETURNS VOID AS |
| $$ |
| SELECT MADLIB_SCHEMA.gpu_configuration($1, NULL); |
| $$ |
| LANGUAGE sql; |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_configuration() |
| RETURNS VARCHAR AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_gpu_info') |
| return madlib_keras_gpu_info.gpu_configuration_help(schema_madlib) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |