DL: Add new helper function for gpu_configuration (#455) * DL: Add new helper function for gpu_configuration JIRA: MADLIB-1390 This commit adds a new helper function to query the gpu configuration of the cluster. The purpose of this function is to list the state of the cluster w.r.t GPUs, however it is configured. This will be useful in case of supporting asymmetric clusters. Also takes in a source param which can be one of 'tensorflow' or 'nvidia'. Defaults to 'tensorflow'. Note: Return [] for any host where `nvidia-smi -L` fails regardless of the type of failure. Example for gpdb List the state of the cluster, however it is configured: SELECT * FROM madlib.gpu_configuration(); gpu_descr | hostname -----------------+------------------------- NVIDIA Tesla P100 | host1 NVIDIA Tesla P100 | host1 Super Duper GPU | host2 Super Duper GPU | host2 (4 rows) Example for postgres select * from madlib.gpu_configuration(); hostname | gpu_descr ---------+----------- localhost | NVIDIA Tesla P100 (1 row) Co-authored-by: Ekta Khanna <ekhanna@pivotal.io>

commit: 77ee7456fcc80039460a6ac3bfe2e61f47026dbf [log] [tgz]
author: kaknikhil <kaknikhil@users.noreply.github.com> Wed Nov 13 10:45:44 2019 -0800
committer: GitHub <noreply@github.com> Wed Nov 13 10:45:44 2019 -0800
tree: e000b208a0f55570909b49f19200fad3c02de264
parent: 0f50490e9d7cdf117fefb81bd8efbfee85d95573 [diff]
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index b31dedb..38b61c5 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in

@@ -290,6 +290,7 @@
     @brief A collection of modules for deep learning.
     @details A collection of modules for deep learning.
     @{
+        @defgroup grp_gpu_configuration GPU Configuration
         @defgroup grp_keras Keras
         @defgroup grp_keras_model_arch Load Model
         @defgroup grp_input_preprocessor_dl Preprocessor for Images

diff --git a/src/ports/postgres/modules/deep_learning/gpu_info_from_tf.py_in b/src/ports/postgres/modules/deep_learning/gpu_info_from_tf.py_in
new file mode 100644
index 0000000..b1e59b1
--- /dev/null
+++ b/src/ports/postgres/modules/deep_learning/gpu_info_from_tf.py_in

@@ -0,0 +1,39 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+@file gpu_info_from_tf.py_in
+
+@brief This file prints out the gpu descriptions using tensorflow API. This file
+is intended to be called using subprocess. See madlib_keras_gpu_info.py_in
+for more details.
+"""
+
+from tensorflow.python.client import device_lib
+from keras import backend as K
+
+config = K.tf.ConfigProto()
+config.gpu_options.allow_growth = True
+sess = K.tf.Session(config=config)
+local_device_protos = device_lib.list_local_devices()
+K.clear_session()
+sess.close()
+if local_device_protos:
+    for x in local_device_protos:
+        if x.device_type == 'GPU':
+            print x.physical_device_desc

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in
new file mode 100644
index 0000000..4eec8a7
--- /dev/null
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in

@@ -0,0 +1,256 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+@file madlib_keras_gpu_info.py_in
+
+@brief GPU configuration helper function
+
+@namespace madlib_keras_gpu_info
+"""
+
+import os
+import subprocess
+
+import plpy
+from utilities.utilities import is_platform_pg
+from utilities.utilities import unique_string
+from utilities.validate_args import output_tbl_valid
+
+class OutputInfoSchema:
+    TEMP_INFO_TABLE = unique_string(desp='gpu_info')
+    SEG_ID_COL = 'gp_seg_id'
+    GPU_DESCR_COL = 'gpu_descr'
+
+
+class Source:
+    NVIDIA = 'nvidia'
+    TENSORFLOW = 'tensorflow'
+
+
+class GPUInfoFunctions:
+    @staticmethod
+    def get_gpu_info_from_nvidia():
+        """
+        This function will run only on segment(s). Make sure not to run any non
+        select plpy execute.
+        :return: list of gpu descriptions as returned by nvidia-smi -L.
+        """
+        try:
+            return subprocess.check_output(["nvidia-smi", "-L"]).splitlines()
+        except Exception:  # Handle case when nvidia-smi -L fails
+            return []
+
+    @staticmethod
+    def get_gpu_info_from_tensorflow():
+        """
+        This function will run only on segment(s). Make sure not to run any non
+        select plpy execute.
+        :return: list of gpu descriptions as returned by tensorflow
+        """
+        current_working_dir = os.path.dirname(os.path.realpath(__file__))
+        gpus = subprocess.check_output(["python", "gpu_info_from_tf.py"],
+                                       cwd=current_working_dir).splitlines()
+        return gpus
+
+
+def gpu_configuration(schema_madlib, output_table, source):
+    """
+    :return: List of gpus along with their hostname in the format
+        GPDB
+        gpu_descr         |       hostname
+        ------------------+--------------------------
+        NVIDIA Tesla P100 | host1
+        NVIDIA Tesla P100 | host1
+        Super Duper GPU   | host2
+        Super Duper GPU   | host2
+        1. We use gp_dist_random to run either the nvidia smi UDF or the tensorflow UDF
+            on all the hosts. (see gpu_info_nvidia/gpu_info_tensorflow)
+        2. Also we do not need to run the tf/nvidia UDF on all the segments,
+            just one segment per host. That's why we group the output of
+            gp_segment_configuration by hostname and get the min segment from
+            each host.
+        3. To get the hostname along with the gpu description, we have to join
+            the output of nvidia/tf UDF with gp_segment_configuration and filter
+            out the following
+            * master
+            * mirror segments
+            * empty/null gpu description
+        Workflow for gpdb
+        1. Run query to get min seg ids on each host. This is so that we can run
+        the gpu UDF on just one segment per host.
+        2. Create a table by running the tf/nvidia UDF on the segment ids returned
+        from the previous step. Note that this table will only contain the output
+        of the UDF and the segment id itself. This table does not contain hostnames
+        3. To get the hostname associated with the segment id, we need to join the
+        table created in step with gp_segment_configuration.
+        It's important to note that we can merge all these 3 queries into one but
+        the problem with that is that a redistribution happens before running the UDF
+        which means the UDF does not run on the segments that we pass in to the query.
+        To avoid this, we broke down the query into 3 parts so that the UDF is always
+        run on the intended segments.
+
+        POSTGRES
+        gpu_descr         |       hostname
+        ------------------+--------------------------
+        NVIDIA Tesla P100 | localhost
+    """
+    module_name = 'madlib_keras_gpu_info'
+    output_tbl_valid(output_table, module_name)
+
+    if not source:
+        source = Source.TENSORFLOW
+    source = source.lower()
+    if source != Source.TENSORFLOW and source != Source.NVIDIA:
+        plpy.error("DL: source has to be one of {0} or {1}".format(
+            Source.TENSORFLOW, Source.NVIDIA))
+
+    gpu_fn_name = 'gpu_info_{0}'.format(source)
+    if is_platform_pg():
+        gpu_for_postgres(schema_madlib, output_table, gpu_fn_name)
+    else:
+        gpu_for_gpdb(schema_madlib, output_table, gpu_fn_name)
+
+
+def gpu_for_postgres(schema_madlib, output_table, gpu_fn_name):
+    gpu_info_query = """
+    CREATE TABLE {0} AS
+    SELECT 'localhost' as hostname, {1} from (SELECT unnest({2}.{3}()) AS {1}) s1
+    where {1} is NOT NULL AND {1} != ''
+        """.format(output_table, OutputInfoSchema.GPU_DESCR_COL,
+                   schema_madlib, gpu_fn_name)
+    plpy.execute(gpu_info_query)
+
+
+def gpu_for_gpdb(schema_madlib, output_table, gpu_fn_name):
+    min_seg_on_each_host = get_min_seg_ids_on_each_host()
+
+    create_gpu_info_table_without_hostname(schema_madlib, gpu_fn_name,
+                                           min_seg_on_each_host)
+
+    create_gpu_info_table_with_hostname(output_table)
+
+    plpy.execute("DROP TABLE IF EXISTS {0}".format(OutputInfoSchema.TEMP_INFO_TABLE))
+
+
+def get_min_seg_ids_on_each_host():
+    """
+    Run query to get min seg ids on each host. This is so that we can run
+    the gpu UDF on just one segment per host.
+    :return: List of min seg id per host
+    """
+    min_seg_id_alias = 'min_seg_id'
+    min_seg_query = """
+    SELECT {min_seg_id_alias} FROM 
+    (select hostname, min(content) AS {min_seg_id_alias} 
+    FROM gp_segment_configuration WHERE content != -1 AND role='p' 
+    GROUP BY hostname) min_seg_id_subquery
+    """.format(**locals())
+    min_seg_on_each_host = plpy.execute(min_seg_query)
+
+    min_seg_on_each_host = ','.join([str(seg[min_seg_id_alias])
+                                     for seg in min_seg_on_each_host])
+    return min_seg_on_each_host
+
+
+def create_gpu_info_table_without_hostname(schema_madlib, gpu_fn_name,
+                                           min_seg_on_each_host):
+    """ output_table,
+    Create a table by running the tf/nvidia UDF on the segment ids returned
+    from the previous step. Note that this table will only contain the output
+    of the UDF and the segment id itself. This table does not contain hostnames
+    :param schema_madlib:
+    :param gpu_fn_name:
+    :param min_seg_on_each_host:
+    """
+    gpu_info_per_host_query = """
+    CREATE TABLE {0} AS SELECT gp_segment_id AS {1}, {2}.{3}()
+    AS {4} FROM gp_dist_random('gp_id') WHERE gp_segment_id IN ({5})
+    """.format(OutputInfoSchema.TEMP_INFO_TABLE,
+               OutputInfoSchema.SEG_ID_COL,
+               schema_madlib, gpu_fn_name,
+               OutputInfoSchema.GPU_DESCR_COL,
+               min_seg_on_each_host)
+    plpy.execute(gpu_info_per_host_query)
+
+
+def create_gpu_info_table_with_hostname(output_table):
+    """
+    Create the final output table that contains the hostname and the gpu description.
+    To create this table, we need to join the table created in
+    create_gpu_info_table_without_hostname with gp_segment_configuration.
+    """
+    final_join_query = """
+    CREATE TABLE {0} AS
+    SELECT hostname, {1} FROM
+    (
+    SELECT hostname, unnest({1}) AS {1} FROM {2}
+    JOIN
+    gp_segment_configuration ON {3}=content WHERE content != -1 AND role='p'
+    ) s1
+    WHERE {1} != '' AND {1} is NOT NULL ORDER BY 1,2;
+    """.format(output_table, OutputInfoSchema.GPU_DESCR_COL,
+               OutputInfoSchema.TEMP_INFO_TABLE,
+               OutputInfoSchema.SEG_ID_COL)
+    plpy.execute(final_join_query)
+
+
+def gpu_configuration_help(schema_madlib):
+    """
+        Help function for gpu configuration
+
+        Args:
+            @param schema_madlib
+
+        Returns:
+            String. Help/usage information
+        """
+
+    help_string = """
+    Utility function to report number and type of GPUs on the database cluster.
+    -----------------------------------------------------------------------
+                                USAGE
+    -----------------------------------------------------------------------
+     SELECT {schema_madlib}.gpu_configuration(
+        output_table,            -- Name of the output table to write out the 
+                                    GPU information.  
+        source                   -- Default: 'tensorflow'. Source for determining
+                                    GPU configuration.
+                                    Using 'tensorflow' returns a description based
+                                    on what TensorFlow reports.
+                                    Using 'nvidia' returns a description based 
+                                    on what the Nvidia Systems Management Interface
+                                    (nvidia-smi) reports [1].  
+                                    Note that MADlib and Keras will use the TensorFlow
+                                    information; the lower level nvidia-smi info 
+                                    is provided for convenience.
+        )
+    );
+
+    -----------------------------------------------------------------------
+                                OUTPUT
+    -----------------------------------------------------------------------
+    The output table ('output_table' above) contains the following columns:
+
+    hostname:   Name of the host machine in the cluster.
+                Does not include master or mirrors.  For PostgreSQL this will
+                always return 'localhost'.
+    gpu_descr:  String reported by TensorFlow or nvidia-smi.
+    """
+
+    return help_string.format(schema_madlib=schema_madlib)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.sql_in
new file mode 100644
index 0000000..94bb121
--- /dev/null
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.sql_in

@@ -0,0 +1,279 @@
+/* ----------------------------------------------------------------------- *//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *
+ * @file madlib_keras_gpu_info.sql_in
+ *
+ * @brief SQL functions for GPU configuration.
+ * @date Nov 2019
+ *
+ *
+ *//* ----------------------------------------------------------------------- */
+
+m4_include(`SQLCommon.m4')
+
+/**
+@addtogroup grp_gpu_configuration
+
+@brief Utility function to report number and type of GPUs on the database cluster.
+
+\warning <em> This MADlib method is still in early stage development.
+Interface and implementation are subject to change. </em>
+
+<div class="toc"><b>Contents</b><ul>
+<li class="level1"><a href="#get_gpu_config">GPU Configuration</a></li>
+<li class="level1"><a href="#example">Examples</a></li>
+<li class="level1"><a href="#references">References</a></li>
+<li class="level1"><a href="#related">Related Topics</a></li>
+</ul></div>
+
+This utility function reports the number and type of GPUs
+attached to hosts on the database cluster.
+This can be useful when determining which segments to use for
+training deep neural nets.  For example, for economic reasons
+you may wish to set up a heterogeneous clusters with GPUs only
+on some of the hosts, not all of them. This utility
+can help you identify where the GPUS are and direct the compute
+to those locations only in subsequent training and evaluation steps.
+
+@anchor get_gpu_config
+@par GPU Confuguration
+
+<pre class="syntax">
+gpu_configuration( output_table,
+                   source
+	               )
+</pre>
+\b Arguments
+<dl class="arglist">
+  <dt>output_table</dt>
+  <dd>TEXT. Name of the output table to write out the GPU information.
+  </dd>
+
+  <dt>source (optional)</dt>
+  <dd>TEXT, default: 'tensorflow'. Source for determining GPU configuration.
+  Using 'tensorflow' returns a description based on what TensorFlow reports.
+  Using 'nvidia' returns a description based on what the Nvidia Systems
+  Management Interface (nvidia-smi) reports [1].  Note that MADlib and Keras will use the
+  TensorFlow information; the lower level nvidia-smi info is provided for convenience.
+
+  @note
+  If the command 'nvidia-smi -L' returns an error, we do not pass through
+  the error message, but instead will show no GPUs for that host in the output table.
+  You may want to run nvidia-smi from the command line to see error
+  and informational messages.
+
+  </dd>
+</dl>
+
+<b>Output</b>
+<br>
+    The output table contains the following:
+    <table class="output">
+      <tr>
+        <th>hostname</th>
+        <td>TEXT. Name of the host machine in the cluster.
+        Does not include master or mirrors.  For PostgreSQL
+        this will always return 'localhost'.
+        </td>
+      </tr>
+      <tr>
+        <th>gpu_descr</th>
+        <td>TEXT. String reported by TensorFlow or nvidia-smi.
+        The formats are different and shown in the examples below.
+        </td>
+      </tr>
+    </table>
+</br>
+
+@anchor example
+@par Examples
+
+-# Get GPU configuration as per TensorFlow:
+<pre class="example">
+DROP TABLE IF EXISTS host_gpu_mapping_tf;
+SELECT * FROM madlib.gpu_configuration('host_gpu_mapping_tf');
+SELECT * FROM host_gpu_mapping_tf ORDER BY hostname, gpu_descr;
+</pre>
+<pre class="result">
+ hostname |                                        gpu_descr
+----------+------------------------------------------------------------------------------------------
+ phoenix0 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
+ phoenix0 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
+ phoenix0 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0
+ phoenix0 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0
+ phoenix1 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
+ phoenix1 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
+ phoenix3 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
+ phoenix3 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
+ phoenix3 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0
+ phoenix3 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0
+ phoenix4 | device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
+ phoenix4 | device: 1, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:05.0, compute capability: 6.0
+ phoenix4 | device: 2, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:06.0, compute capability: 6.0
+ phoenix4 | device: 3, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:07.0, compute capability: 6.0
+(14 rows)
+</pre>
+In this heterogeneous cluster there are 4 GPUs attached to hosts 0, 3 and 4.  There are 2 GPUs
+attached to host 1 and no GPUs attached to host 2.
+
+-# Get GPU configuration as per nvidia-smi:
+<pre class="example">
+DROP TABLE IF EXISTS host_gpu_mapping_nvidia;
+SELECT * FROM madlib.gpu_configuration('host_gpu_mapping_nvidia', -- output table
+                                       'nvidia'                   -- source for GPU info
+                                       );
+SELECT * FROM host_gpu_mapping_nvidia ORDER BY hostname, gpu_descr;
+</pre>
+<pre class="result">
+ hostname |                                  gpu_descr
+----------+------------------------------------------------------------------------------
+ phoenix0 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-f2ccc77e-2501-f6ee-4754-069dda256fb2)
+ phoenix0 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-b1fc40ca-c7c6-bc86-f20f-6e9a62cda3f8)
+ phoenix0 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-d93bb21b-96f9-7c1d-3bab-cdd92b7bbc9d)
+ phoenix0 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-2d79c4a8-479e-2f33-39f8-3ba80b63f830)
+ phoenix1 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-0af6bb1e-5b5b-4988-ad3a-a917e9584702)
+ phoenix1 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-d824c976-a8aa-ef26-a13c-9a9a7fe86bfd)
+ phoenix3 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-3681d0b6-1ec6-0453-fd81-29d88e549cd9)
+ phoenix3 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-d4b1f2e7-b238-ac9a-bbfe-918adeb69472)
+ phoenix3 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-42a32ef1-a60c-e599-c8cf-0e669111ab6f)
+ phoenix3 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-1cce09c4-6856-8031-be0b-8e8bbf9a10f3)
+ phoenix4 | GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-a71bdc18-fdd5-ba25-617e-19b23cc8e827)
+ phoenix4 | GPU 1: Tesla P100-PCIE-16GB (UUID: GPU-f9d13688-7fe6-a029-24d1-985a5659f18f)
+ phoenix4 | GPU 2: Tesla P100-PCIE-16GB (UUID: GPU-06a7f54b-c07a-e87a-20d6-09bd99b19531)
+ phoenix4 | GPU 3: Tesla P100-PCIE-16GB (UUID: GPU-af3b32f3-8bd9-cb75-a8fb-25253b9da926)
+(14 rows)
+</pre>
+
+-# To get a fuller picture at the segment level, combine with the Greenplum catalog
+table 'gp_segment_configuration' which contains information about
+segment instance configuration [2].  Here is an example of this table
+filtering out master and mirrors:
+<pre class="example">
+SELECT * FROM gp_segment_configuration WHERE role='p' AND content>=0 ORDER BY hostname, dbid;
+</pre>
+<pre class="result">
+ dbid | content | role | preferred_role | mode | status | port  | hostname | address  | replication_port
+------+---------+------+----------------+------+--------+-------+----------+----------+------------------
+    2 |       0 | p    | p              | c    | u      | 40000 | phoenix0 | phoenix0 |            70000
+    3 |       1 | p    | p              | c    | u      | 40001 | phoenix0 | phoenix0 |            70001
+    4 |       2 | p    | p              | c    | u      | 40002 | phoenix0 | phoenix0 |            70002
+    5 |       3 | p    | p              | c    | u      | 40003 | phoenix0 | phoenix0 |            70003
+    6 |       4 | p    | p              | c    | u      | 40000 | phoenix1 | phoenix1 |            70000
+    7 |       5 | p    | p              | c    | u      | 40001 | phoenix1 | phoenix1 |            70001
+    8 |       6 | p    | p              | c    | u      | 40002 | phoenix1 | phoenix1 |            70002
+    9 |       7 | p    | p              | c    | u      | 40003 | phoenix1 | phoenix1 |            70003
+   10 |       8 | p    | p              | c    | u      | 40000 | phoenix2 | phoenix2 |            70000
+   11 |       9 | p    | p              | c    | u      | 40001 | phoenix2 | phoenix2 |            70001
+   12 |      10 | p    | p              | c    | u      | 40002 | phoenix2 | phoenix2 |            70002
+   13 |      11 | p    | p              | c    | u      | 40003 | phoenix2 | phoenix2 |            70003
+   14 |      12 | p    | p              | c    | u      | 40000 | phoenix3 | phoenix3 |            70000
+   15 |      13 | p    | p              | c    | u      | 40001 | phoenix3 | phoenix3 |            70001
+   16 |      14 | p    | p              | c    | u      | 40002 | phoenix3 | phoenix3 |            70002
+   17 |      15 | p    | p              | c    | u      | 40003 | phoenix3 | phoenix3 |            70003
+   18 |      16 | p    | p              | c    | u      | 40000 | phoenix4 | phoenix4 |            70000
+   19 |      17 | p    | p              | c    | u      | 40001 | phoenix4 | phoenix4 |            70001
+   20 |      18 | p    | p              | c    | u      | 40002 | phoenix4 | phoenix4 |            70002
+   21 |      19 | p    | p              | c    | u      | 40003 | phoenix4 | phoenix4 |            70003
+(20 rows)
+</pre>
+Now join this table with the GPU resources table to create a table containing a
+list of all segments on hosts with GPUs attached:
+<pre class="example">
+DROP TABLE IF EXISTS segments_to_use;
+CREATE TABLE segments_to_use AS
+  SELECT DISTINCT dbid, hostname FROM gp_segment_configuration JOIN host_gpu_mapping_tf USING (hostname)
+  WHERE role='p' AND content>=0;
+SELECT * FROM segments_to_use ORDER BY hostname, dbid;
+</pre>
+<pre class="result">
+ dbid | hostname
+------+----------
+    2 | phoenix0
+    3 | phoenix0
+    4 | phoenix0
+    5 | phoenix0
+    6 | phoenix1
+    7 | phoenix1
+    8 | phoenix1
+    9 | phoenix1
+   14 | phoenix3
+   15 | phoenix3
+   16 | phoenix3
+   17 | phoenix3
+   18 | phoenix4
+   19 | phoenix4
+   20 | phoenix4
+   21 | phoenix4
+(16 rows)
+</pre>
+
+@anchor references
+@par References
+
+[1] Nvidia Systems Management Interface (nvidia-smi) https://developer.nvidia.com/nvidia-system-management-interface
+
+[2] Greenplum 'gp_segment_configuration' table https://gpdb.docs.pivotal.io/latest/ref_guide/system_catalogs/gp_segment_configuration.html
+
+@anchor related
+@par Related Topics
+
+See madlib_keras_gpu_info.sql_in
+
+*/
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_info_tensorflow() RETURNS TEXT[] as
+$$
+    PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras_gpu_info')
+    return madlib_keras_gpu_info.GPUInfoFunctions.get_gpu_info_from_tensorflow()
+$$ LANGUAGE plpythonu
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_info_nvidia() RETURNS TEXT[] as
+$$
+    PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras_gpu_info')
+    return madlib_keras_gpu_info.GPUInfoFunctions.get_gpu_info_from_nvidia()
+$$ LANGUAGE plpythonu
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_configuration(output_table text, source text)
+RETURNS VOID AS
+$$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras_gpu_info')
+    from utilities.control import MinWarning
+    with AOControl(False) and MinWarning("error"):
+        madlib_keras_gpu_info.gpu_configuration(schema_madlib, output_table, source)
+$$
+LANGUAGE plpythonu
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_configuration(output_table text)
+RETURNS VOID AS
+$$
+    SELECT MADLIB_SCHEMA.gpu_configuration($1, NULL);
+$$
+LANGUAGE sql;
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gpu_configuration()
+    RETURNS VARCHAR AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras_gpu_info')
+    return madlib_keras_gpu_info.gpu_configuration_help(schema_madlib)
+$$ LANGUAGE plpythonu IMMUTABLE
+    m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_gpu_info.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_gpu_info.sql_in
new file mode 100644
index 0000000..a8376db
--- /dev/null
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_gpu_info.sql_in

@@ -0,0 +1,42 @@
+/* ---------------------------------------------------------------------*//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *//* ---------------------------------------------------------------------*/
+
+SELECT assert(trap_error($$SELECT * from gpu_configuration('output_table', 'invalid_source')$$) = 1, 'Invalid source');
+
+DROP TABLE IF EXISTS output_table_tf;
+SELECT gpu_configuration('output_table_tf');
+SELECT * FROM output_table_tf;
+
+DROP TABLE IF EXISTS output_table_tf;
+SELECT gpu_configuration('output_table_tf', NULL);
+SELECT * FROM output_table_tf;
+
+DROP TABLE IF EXISTS output_table_nvidia;
+SELECT gpu_configuration('output_table_nvidia','NVIDIA');
+SELECT * FROM output_table_nvidia;
+
+DROP TABLE IF EXISTS output_table_tf;
+SELECT gpu_configuration('output_table_tf', 'tensorflow');
+SELECT * FROM output_table_tf;
+
+DROP TABLE IF EXISTS output_table_tf;
+SELECT * FROM gpu_configuration('output_table_tf');
+SELECT * FROM output_table_tf;
\ No newline at end of file

diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_gpu_info.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_gpu_info.py_in
new file mode 100644
index 0000000..cc885f9
--- /dev/null
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_gpu_info.py_in

@@ -0,0 +1,88 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+m4_changequote(`<!', `!>')
+
+import sys
+import numpy as np
+import os
+from os import path
+# Add dl module to the pythonpath.
+sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
+sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
+
+import unittest
+from mock import *
+import plpy_mock as plpy
+
+
+
+class GPUInfoTestCase(unittest.TestCase):
+    def setUp(self):
+        self.plpy_mock = Mock(spec='error')
+        patches = {
+            'plpy': plpy
+        }
+
+        self.plpy_mock_execute = MagicMock()
+        plpy.execute = self.plpy_mock_execute
+
+        self.module_patcher = patch.dict('sys.modules', patches)
+        self.module_patcher.start()
+        import madlib_keras_gpu_info
+        self.module = madlib_keras_gpu_info
+        self.module.output_tbl_valid = Mock()
+        self.table_name = 'does_not_matter'
+
+    def tearDown(self):
+        self.module_patcher.stop()
+
+    def test_gpu_configuration_invalid_source(self):
+        with self.assertRaises(plpy.PLPYException):
+            self.module.gpu_configuration('schema', self.table_name, 'invalid_source')
+
+    def test_gpu_configuration_none_source(self):
+        self.module.gpu_configuration('schema', self.table_name, None)
+        self.assertIn('tensorflow', str(self.plpy_mock_execute.call_args_list))
+        self.assertNotIn('nvidia', str(self.plpy_mock_execute.call_args_list))
+
+    def test_gpu_configuration_tensorflow(self):
+        self.module.gpu_configuration('schema', self.table_name, 'tensorflow')
+        self.assertIn('tensorflow', str(self.plpy_mock_execute.call_args_list))
+        self.assertNotIn('nvidia', str(self.plpy_mock_execute.call_args_list))
+
+    def test_gpu_configuration_nvidia(self):
+        self.module.gpu_configuration('schema', self.table_name, 'nvidia')
+        self.assertIn('nvidia', str(self.plpy_mock_execute.call_args_list))
+        self.assertNotIn('tensorflow', str(self.plpy_mock_execute.call_args_list))
+
+    def test_gpu_configuration_tensorflow_uppercase(self):
+        self.module.gpu_configuration('schema', self.table_name, 'TENSORFLOW')
+        self.assertIn('tensorflow', str(self.plpy_mock_execute.call_args_list))
+        self.assertNotIn('nvidia', str(self.plpy_mock_execute.call_args_list))
+
+    def test_gpu_configuration_nvidia_mixedcase(self):
+        self.module.gpu_configuration('schema', self.table_name, 'NVidIa')
+        self.assertIn('nvidia', str(self.plpy_mock_execute.call_args_list))
+        self.assertNotIn('tensorflow', str(self.plpy_mock_execute.call_args_list))
+
+
+if __name__ == '__main__':
+    unittest.main()
+# ---------------------------------------------------------------------
commit	77ee7456fcc80039460a6ac3bfe2e61f47026dbf	[log] [tgz]
author	kaknikhil <kaknikhil@users.noreply.github.com>	Wed Nov 13 10:45:44 2019 -0800
committer	GitHub <noreply@github.com>	Wed Nov 13 10:45:44 2019 -0800
tree	e000b208a0f55570909b49f19200fad3c02de264
parent	0f50490e9d7cdf117fefb81bd8efbfee85d95573 [diff]