DL: Remove weight passing for multi model evaluate Instead of reading the weights from a table and passing to the evaluate function, we decided to pass the table directly and let the evaluate function handle reading in the same query.

commit: 5d33cf645c57fa1c0d5e4fd78b49975c16a4fe09 [log] [tgz]
author: Orhan Kislal <okislal@apache.org> Mon Jun 22 19:55:40 2020 -0400
committer: Domino Valdano <dvaldano@pivotal.io> Wed Jul 01 17:46:58 2020 -0700
tree: 35ed7f18e2f18f1deed86d75453e6baf9f1e91fd
parent: 93cfa565088c61cf741111ebf56e5eab9f012577 [diff]
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index ee27554..091fce2 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in

@@ -33,6 +33,8 @@
 from madlib_keras_wrapper import *
 from model_arch_info import *
 
+from madlib_keras_model_selection import ModelSelectionSchema
+
 from utilities.utilities import _assert
 from utilities.utilities import add_postfix
 from utilities.utilities import is_platform_pg
@@ -396,7 +398,7 @@
 def compute_loss_and_metrics(schema_madlib, table, compile_params, model_arch,
                              serialized_weights, use_gpus, accessible_gpus_for_seg,
                              dist_key_mapping, images_per_seg_val, metrics_list, loss_list,
-                             curr_iter, is_final_iteration):
+                             curr_iter, is_final_iteration, model_table=None, mst_key=None):
     """
     Compute the loss and metric using a given model (serialized_weights) on the
     given dataset (table.)
@@ -411,7 +413,9 @@
                                                    accessible_gpus_for_seg,
                                                    dist_key_mapping,
                                                    images_per_seg_val,
-                                                   is_final_iteration)
+                                                   is_final_iteration,
+                                                   model_table,
+                                                   mst_key)
     end_val = time.time()
 
     if len(evaluate_result) not in [1, 2]:
@@ -670,10 +674,10 @@
 def get_loss_metric_from_keras_eval(schema_madlib, table, compile_params,
                                     model_arch, serialized_weights, use_gpus,
                                     accessible_gpus_for_seg, dist_key_mapping, images_per_seg,
-                                    is_final_iteration=True):
+                                    is_final_iteration=True, model_table=None, mst_key=None):
 
     dist_key_col = '0' if is_platform_pg() else DISTRIBUTION_KEY_COLNAME
-    gp_segment_id_col = '0' if is_platform_pg() else GP_SEGMENT_ID_COLNAME
+    gp_segment_id_col = '0' if is_platform_pg() else '__table__.{0}'.format(GP_SEGMENT_ID_COLNAME)
     segments_per_host = get_segments_per_host()
 
     mb_dep_var_col = MINIBATCH_OUTPUT_DEPENDENT_COLNAME_DL
@@ -688,14 +692,15 @@
     and accuracy of each tuple which then gets averaged to get the final result.
     """
     use_gpus = use_gpus if use_gpus else False
-    evaluate_query = plpy.prepare("""
+
+    eval_sql = """
         select ({schema_madlib}.internal_keras_evaluate(
                                             {mb_dep_var_col},
                                             {mb_indep_var_col},
                                             {dep_shape_col},
                                             {ind_shape_col},
                                             $MAD${model_arch}$MAD$,
-                                            $1,
+                                            {weights},
                                             {compile_params},
                                             {dist_key_col},
                                             ARRAY{dist_key_mapping},
@@ -706,9 +711,20 @@
                                             ARRAY{accessible_gpus_for_seg},
                                             {is_final_iteration}
                                             )) as loss_metric
-        from {table}
-        """.format(**locals()), ["bytea"])
-    res = plpy.execute(evaluate_query, [serialized_weights])
+        from {table} AS __table__ {mult_sql}
+        """
+
+    if mst_key:
+        weights = '__mt__.{0}'.format(MODEL_WEIGHTS_COLNAME)
+        mst_key_col = ModelSelectionSchema.MST_KEY
+        mult_sql = ', {model_table} AS __mt__ WHERE {mst_key_col} = {mst_key}'.format(**locals())
+        res = plpy.execute(eval_sql.format(**locals()))
+    else:
+        weights = '$1'
+        mult_sql = ''
+        evaluate_query = plpy.prepare(eval_sql.format(**locals()), ["bytea"])
+        res = plpy.execute(evaluate_query, [serialized_weights])
+
     loss_metric = res[0]['loss_metric']
     return loss_metric
 

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index 9de9774..c122def 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in

@@ -261,19 +261,19 @@
             images_per_seg = self.images_per_seg_valid
             self.info_str += "\n\tValidation set after iteration {0}:".format(epoch)
         for mst in self.msts:
-            weights = query_weights(self.model_output_table, self.model_weights_col,
-                                    self.mst_key_col, mst[self.mst_key_col])
             model_arch, _ = get_model_arch_weights(self.model_arch_table, mst[self.model_id_col])
             _, metric, loss = compute_loss_and_metrics(
                 self.schema_madlib, table, "$madlib${0}$madlib$".format(
                     mst[self.compile_params_col]),
                 model_arch,
-                weights,
+                None,
                 self.use_gpus,
                 self.accessible_gpus_for_seg,
                 seg_ids,
                 images_per_seg,
-                [], [], epoch, True)
+                [], [], epoch, True,
+                self.model_output_table,
+                mst[self.mst_key_col])
             mst_metric_eval_time[mst[self.mst_key_col]] \
                 .append(time.time() - self.metrics_elapsed_start_time)
             mst_loss[mst[self.mst_key_col]].append(loss)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
index 5be078b..b2b7397 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in

@@ -235,16 +235,6 @@
     res = [x[dist_key_col] for x in res]
     return res
 
-def query_weights(model_output_table, model_weights_col, mst_key_col, mst_key):
-    mlp_weights_query = """
-                        SELECT {model_weights_col}, {mst_key_col}
-                        FROM {model_output_table}
-                        WHERE {mst_key_col} = {mst_key}
-                        """.format(**locals())
-
-    res = plpy.execute(mlp_weights_query)
-    return res[0][model_weights_col]
-
 def create_summary_view(module_name, model_table, mst_key):
     tmp_view_summary = unique_string('tmp_view_summary')
     model_summary_table = add_postfix(model_table, "_summary")
commit	5d33cf645c57fa1c0d5e4fd78b49975c16a4fe09	[log] [tgz]
author	Orhan Kislal <okislal@apache.org>	Mon Jun 22 19:55:40 2020 -0400
committer	Domino Valdano <dvaldano@pivotal.io>	Wed Jul 01 17:46:58 2020 -0700
tree	35ed7f18e2f18f1deed86d75453e6baf9f1e91fd
parent	93cfa565088c61cf741111ebf56e5eab9f012577 [diff]