utilities: Add debug.py_in Starting a module of debugging functions we can use, to easily turn on or off certain kinds of debug messages. debug.print_timings(force=False) : Used in madlib_keras_fit_multiple.py_in to print timing information for fit_multiple() Breaks down run_training() into _hop_time_, _uda_time_, _truncate_time_, _copy_time_, and _delete_time_ Enable all by uncommenting: debug.timings_enabled = True at top of madlib_keras_fit_multiple.py_in Enable a single call to debug.print_timings(), by passing force=True. Will print even if debug.timings_enabled is not set. debug.print_mst_keys(force=False) : print a map between mst_keys and seg_id's as the mst_keys hop around from segment to segment... to verify correct MOP behavior. debug.plpy.execute(query) prints query, EXPLAIN ANALYZE, and timing of a query while executing it PR #521

commit: 666a5f11966a1c389e1bda594cfa1f361422f18b [log] [tgz]
author: Domino Valdano <dvaldano@pivotal.io> Thu May 07 23:48:34 2020 +0000
committer: Domino Valdano <dvaldano@vmware.com> Mon Jan 04 12:58:22 2021 -0800
tree: 6f99a810e467f6485644d87d773bccf57dea663e
parent: cf06f17e612401ab08ed81fa9bc4bb6e2920ab4d [diff]
diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
new file mode 100644
index 0000000..5e288be
--- /dev/null
+++ b/src/ports/postgres/modules/utilities/debug.py_in

@@ -0,0 +1,206 @@
+import plpy as plpy_orig
+import time
+from deep_learning.madlib_keras_model_selection import ModelSelectionSchema
+from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
+
+mst_key_col = ModelSelectionSchema.MST_KEY
+dist_key_col = DISTRIBUTION_KEY_COLNAME
+
+start_times = dict()
+timings_enabled = False
+
+def start_timing(msg, force=False):
+    if timings_enabled or force:
+        start_times[msg] = time.time()
+        plpy_orig.info("|_{}_time_HDR|Elapsed (s)|Current|Current (s)|Start|Start (s)|".format(msg))
+
+def print_timing(msg, force=False):
+    if timings_enabled or force:
+        try:
+            start_time = start_times[msg]
+        except:
+            raise Exception(
+                "print_timing({msg}) called with no start_timing({msg})!".format(msg=msg)
+            )
+        current_time = time.time() 
+        plpy_orig.info(
+            '|_{0}_time|{1}|{2}|{3}|{4}|{5}'.format(
+                msg,
+                current_time - start_time,
+                time.ctime(current_time),
+                current_time,
+                time.ctime(start_time),
+                start_time
+            )
+        )
+
+mst_keys_enabled = False
+def print_mst_keys(table, label, force=False):
+    if not (mst_keys_enabled or force):
+        return
+
+    res = plpy_orig.execute("""
+        SELECT gp_segment_id AS seg_id,
+               {mst_key_col},
+               {dist_key_col}
+        FROM {table} ORDER BY {dist_key_col}
+    """.format(dist_key_col=dist_key_col,
+               table=table,
+               mst_key_col=mst_key_col))
+
+    plpy_orig.info("|_MST_KEYS_{label}_HDR|mst_key|seg_id|dist_key|table".format(**locals()))
+    if not res:
+        plpy_orig.error("{table} is empty!  Aborting".format(table=table))
+
+    for r in res:
+        seg_id = r['seg_id']
+        mst_key = r['mst_key']
+        dist_key = r[dist_key_col]
+        plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals()))
+
+class prep_entry:
+    def __init__(self, sql, args, kwargs):
+        self.sql = sql
+        self.args = args
+        self.kwargs = kwargs
+
+def plpy_prepare(*args, **kwargs):
+    """ debug.plpy.prepare(sql, ..., force=False)
+
+        If you want debug.plpy.execute() to be able
+        to display the query and/or plan for a
+        prepared query, you must call this function
+        (as debug.plpy.prepare() ) in place of
+        regular plpy.prepare().  Otherwise the execute
+        wrapper will not have access to the query string,
+        so you will only get timing info (no plan).
+    """ 
+    force = False
+    if 'force' in kwargs:
+        force = kwargs['force']
+        del kwargs['force']
+
+    plpy = plpy_orig # override global plpy,
+                     # to avoid infinite recursion
+
+    if not (plpy_execute_enabled or force):
+        return plpy.prepare(*args, **kwargs)
+
+    if len(args) < 1:
+        raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
+    elif type(sql) != str:
+        raise TypeError('debug.plpy.prepare() takes a str as its 1st parameter')
+
+    sql = args[0]
+    plpy.info(sql)
+
+    plan = plpy_orig.prepare(*args, **kwargs)
+    prep = prep_entry(sql, args[1:], kwargs)
+    plpy_wrapper.prepared_queries[plan] = prep
+    return plan
+
+plpy_execute_enabled = False
+def plpy_execute(*args, **kwargs):
+    """ debug.plpy.execute(q, ..., force=False)
+
+        Replace plpy.execute(q, ...) with
+        debug.plpy.execute(q, ...) to debug
+        a query.  Shows the query itself, the
+        EXPLAIN of it, and how long the query
+        takes to execute.
+    """
+
+    force = False
+    if 'force' in kwargs:
+        force = kwargs['force']
+        del kwargs['force']
+
+    plpy = plpy_orig # override global plpy,
+                     # to avoid infinite recursion
+
+    if not (plpy_execute_enabled or force):
+        return plpy.execute(*args, **kwargs)
+
+    if len(args) > 0:
+        q = args[0]
+    else:
+        raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
+
+    prep = None
+    if type(q) == str:
+        plpy.info(q)
+        sql = q
+    elif repr(type(q)) == "<type 'PLyPlan'>":
+        if q in plpy_wrapper.prepared_queries:
+            prep = plpy_wrapper.prepared_queries[q]
+            sql = prep.sql
+        else:
+            sql = q
+    else:
+        raise TypeError(
+            "First arg of debug.plpy.execute() must be str or <type 'PLyPlan'>, got {}".format(type(q))
+        )
+
+    # Print EXPLAIN of sql command
+    explain_query = "EXPLAIN" + sql
+    if prep:
+        explain_query = plpy.prepare(explain_query, *prep.args, **prep.kwargs)
+    res = plpy.execute(explain_query, *args[1:], **kwargs)
+    for r in res:
+        plpy.info(r['QUERY PLAN'])
+
+    # Run actual sql command, with timing
+    start = time.time()
+    res = plpy.execute(*args, **kwargs)
+
+    # Print how long execution of query took
+    plpy.info("Query took {0}s".format(time.time() - start))
+    if res:
+        plpy.info("Query returned {} row(s)".format(len(res)))
+    else:
+        plpy.info("Query returned 0 rows")
+    return res
+
+plpy_info_enabled = False
+def plpy_info(*args, **kwargs):
+    """ plpy_info(..., force=False)
+
+      plpy.info() if enabled, otherwise do nothing   
+    """
+
+    force = False
+    if 'force' in kwargs:
+        force = kwargs['force']
+        del kwargs['force']
+
+    if plpy_info_enabled or force:
+        plpy_orig.info(*args, **kwargs)
+
+plpy_debug_enabled = False
+def plpy_debug(*args, **kwargs):
+    """ debug.plpy.debug(..., force=False)
+
+        Behaves like plpy.debug() if disabled (printing only
+        if DEBUG level is set high enough), but becomes a
+        plpy.info() if enabled.
+    """
+
+    force = False
+    if 'force' in kwargs:
+        force = kwargs['force']
+        del kwargs['force']
+
+    if plpy_debug_enabled or force:
+        plpy_orig.info(*args, **kwargs)
+    else:
+        plpy_orig.debug(*args, **kwargs)
+
+class plpy_wrapper:
+    prepare = staticmethod(plpy_prepare)
+    execute = staticmethod(plpy_execute)
+    info = staticmethod(plpy_info)
+    debug = staticmethod(plpy_debug)
+
+    prepared_queries = dict()
+
+plpy = plpy_wrapper

diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index eb507d9..3cb219a 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in

@@ -21,29 +21,8 @@
 from validate_args import drop_tables
 import plpy
 
-
 m4_changequote(`<!', `!>')
 
-def plpy_execute_debug(sql, *args, **kwargs):
-    """ Replace plpy.execute(sql, ...) with
-        plpy_execute_debug(sql, ...) to debug
-        a query.  Shows the query itself, the
-        EXPLAIN of it, and how long the query
-        takes to execute.
-    """
-    plpy.info(sql)  # Print sql command
-
-    # Print EXPLAIN of sql command
-    res = plpy.execute("EXPLAIN " + sql, *args)
-    for r in res:
-        plpy.info(r['QUERY PLAN'])
-
-    # Run actual sql command, with timing
-    start = time.time()
-    plpy.execute(sql, *args)
-
-    # Print how long execution of query took
-    plpy.info("Query took {0}s".format(time.time() - start))
 
 def has_function_properties():
     """ __HAS_FUNCTION_PROPERTIES__ variable defined during configure """
commit	666a5f11966a1c389e1bda594cfa1f361422f18b	[log] [tgz]
author	Domino Valdano <dvaldano@pivotal.io>	Thu May 07 23:48:34 2020 +0000
committer	Domino Valdano <dvaldano@vmware.com>	Mon Jan 04 12:58:22 2021 -0800
tree	6f99a810e467f6485644d87d773bccf57dea663e
parent	cf06f17e612401ab08ed81fa9bc4bb6e2920ab4d [diff]