DL: Enforce append_only=False on new model selection tables Using AOcontrol caused load_model_selection to hang under certain circumstances. This commit moves the disabling of append_only to the table creation to circumvent the issue.

commit: 354ff4dd943771153f9b3fe88123db25019464ef [log] [tgz]
author: Orhan Kislal <okislal@apache.org> Wed Feb 24 16:34:21 2021 +0300
committer: Orhan Kislal <okislal@pivotal.io> Tue Mar 02 15:17:32 2021 +0300
tree: a5809777f4a298fe53486a3ccc83ea7e55791487
parent: 638be5d1f21081773a09f3978140698b232d629c [diff]
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
index f29d399..f94cb44 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in

@@ -32,7 +32,7 @@
 from madlib_keras_wrapper import parse_and_validate_compile_params
 from utilities.control import MinWarning
 from utilities.utilities import add_postfix, _assert, _assert_equal, extract_keyvalue_params
-from utilities.utilities import quote_ident, get_schema
+from utilities.utilities import quote_ident, get_schema, is_platform_pg
 from utilities.validate_args import table_exists, drop_tables
 
 from tensorflow.keras import losses as losses
@@ -154,6 +154,8 @@
     def create_mst_table(self):
         """Initialize the output mst table.
         """
+        with_query = "" if is_platform_pg() else """
+            with(appendonly=false)"""
         create_query = """
                         CREATE TABLE {self.model_selection_table} (
                             {mst_key} SERIAL,
@@ -161,12 +163,13 @@
                             {compile_params} VARCHAR,
                             {fit_params} VARCHAR,
                             unique ({model_id}, {compile_params}, {fit_params})
-                        );
+                        ) {with_query};
                        """.format(self=self,
                                   mst_key=ModelSelectionSchema.MST_KEY,
                                   model_id=ModelSelectionSchema.MODEL_ID,
                                   compile_params=ModelSelectionSchema.COMPILE_PARAMS,
-                                  fit_params=ModelSelectionSchema.FIT_PARAMS)
+                                  fit_params=ModelSelectionSchema.FIT_PARAMS,
+                                  with_query=with_query)
         with MinWarning('warning'):
             plpy.execute(create_query)
 
@@ -547,7 +550,8 @@
     def create_mst_table(self):
         """Initialize the output mst table, if it doesn't exist (for incremental loading).
         """
-
+        with_query = "" if is_platform_pg() else """
+            with(appendonly=false)"""
         create_query = """
                         CREATE TABLE {self.model_selection_table} (
                             {mst_key} SERIAL,
@@ -555,12 +559,14 @@
                             {compile_params} VARCHAR,
                             {fit_params} VARCHAR,
                             unique ({model_id}, {compile_params}, {fit_params})
-                        );
+                        ) {with_query};
                        """.format(self=self,
                                   mst_key=ModelSelectionSchema.MST_KEY,
                                   model_id=ModelSelectionSchema.MODEL_ID,
                                   compile_params=ModelSelectionSchema.COMPILE_PARAMS,
-                                  fit_params=ModelSelectionSchema.FIT_PARAMS)
+                                  fit_params=ModelSelectionSchema.FIT_PARAMS,
+                                  with_query=with_query)
+
         with MinWarning('warning'):
             plpy.execute(create_query)
 

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index 870dd18..325f355 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in

@@ -44,14 +44,14 @@
 <li class="level1"><a href="#related">Related Topics</a></li>
 </ul></div>
 
-This module generates model configurations 
+This module generates model configurations
 for training multiple models at the same time
 using <a href="group__grp__keras__run__model__selection.html">Run Model Selection</a>.
 By model configurations we mean both hyperparameters and
-model architectures. Grid search or random search 
-can be used to generate the configurations. 
+model architectures. Grid search or random search
+can be used to generate the configurations.
 The output table from this module
-defines the combinations of model architectures, 
+defines the combinations of model architectures,
 compile and fit parameters to be trained in parallel.
 
 <!-- --------------------------------------START-------------------------------------------------------------- -->
@@ -95,9 +95,9 @@
   </dd>
 
   <dt>compile_params_grid</dt>
-  <dd>VARCHAR. String representation of a Python dictionary 
+  <dd>VARCHAR. String representation of a Python dictionary
   of compile parameters to be tested. Each entry
-  of the dictionary should consist of keys as compile parameter names, 
+  of the dictionary should consist of keys as compile parameter names,
   and values as a Python list of compile parameter values to be passed to Keras.
   Also, optimizer parameters are a nested dictionary to allow different
   optimizer types to have different parameters or ranges of parameters.
@@ -105,41 +105,41 @@
 
   <pre class="example">
   $$
-    {'loss': ['categorical_crossentropy'], 
-     'optimizer_params_list': [ 
-        {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']}, 
-        {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}], 
+    {'loss': ['categorical_crossentropy'],
+     'optimizer_params_list': [
+        {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
+        {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
      'metrics': ['accuracy']
     }
   $$
   </pre>
 
   The following types of sampling are supported:  'linear', 'log' and 'log_near_one'.
-  The 'log_near_one' sampling is useful for exponentially weighted average types of parameters like momentum, 
-  which are very sensitive to changes near 1.  It has the effect of producing more values near 1 
-  than regular log-based sampling. 
-  
+  The 'log_near_one' sampling is useful for exponentially weighted average types of parameters like momentum,
+  which are very sensitive to changes near 1.  It has the effect of producing more values near 1
+  than regular log-based sampling.
+
   In the case of grid search, omit the sample type and just put the grid points in the list.
   For custom loss functions, custom metrics, and custom top k categorical accuracy,
   list the custom function name and provide the name of the
-  table where the serialized Python objects reside using the 
+  table where the serialized Python objects reside using the
   parameter 'object_table' below. See the examples section later on this page.
   For more information on custom functions, please
-  see <a href="group__grp__custom__function.html">Load Custom Functions</a>. 
+  see <a href="group__grp__custom__function.html">Load Custom Functions</a>.
   </dd>
 
   <dt>fit_params_grid</dt>
-  <dd>VARCHAR.  String representation of a Python dictionary 
+  <dd>VARCHAR.  String representation of a Python dictionary
   of fit parameters to be tested. Each entry
-  of the dictionary should consist of keys as fit parameter names, 
+  of the dictionary should consist of keys as fit parameter names,
   and values as a Python list of fit parameter values
   to be passed to Keras. Here is an example:
 
   <pre class="example">
-  $$ 
+  $$
     {'batch_size': [32, 64, 128, 256],
-     'epochs': [10, 20, 30] 
-    } 
+     'epochs': [10, 20, 30]
+    }
   $$
   </pre>
   </dd>
@@ -148,11 +148,11 @@
   <dd>VARCHAR, 'grid' or 'random'. Search strategy
   for generating model configurations.  For grid search, will generate
   all combinations of model IDs + compile params + fit params.  For
-  random search, specify the number of configs you want to 
-  generate using the 'num_configs' parameter below. 
+  random search, specify the number of configs you want to
+  generate using the 'num_configs' parameter below.
   Note that you can also use short forms
-  for the 'grid' or 'random' keywords, e.g.,'rand' or 'r' instead 
-  of writing out 'random' in full.  
+  for the 'grid' or 'random' keywords, e.g.,'rand' or 'r' instead
+  of writing out 'random' in full.
   </dd>
 
   <dt>num_configs (optional)</dt>
@@ -161,15 +161,15 @@
   </dd>
 
   <dt>random_state (optional)</dt>
-  <dd>INTEGER, default: NULL.  Pseudo random number generator 
-  state used for random uniform sampling from lists of possible 
+  <dd>INTEGER, default: NULL.  Pseudo random number generator
+  state used for random uniform sampling from lists of possible
   values. Pass an integer for reproducible output across multiple
   function calls.  Only applies when search_type='random'.
   </dd>
 
   <dt>object_table (optional)</dt>
-  <dd>VARCHAR, default: NULL. Name of the table containing 
-  Python objects in the case that custom loss functions or 
+  <dd>VARCHAR, default: NULL. Name of the table containing
+  Python objects in the case that custom loss functions or
   custom metrics are specified in the 'compile_params_grid'.
   </dd>
 
@@ -385,7 +385,7 @@
 (2 rows)
 </pre>
 
--# Generate model configurations using grid search. The output table for grid search 
+-# Generate model configurations using grid search. The output table for grid search
 contains the unique combinations of model architectures, compile and fit parameters.
 <pre class="example">
 DROP TABLE IF EXISTS mst_table, mst_table_summary;
@@ -437,24 +437,24 @@
                                         'mst_table',          -- model selection table output
                                          ARRAY[1,2],          -- model ids from model architecture table
                                          $$
-                                            {'loss': ['categorical_crossentropy'], 
+                                            {'loss': ['categorical_crossentropy'],
                                              'optimizer_params_list': [
-                                                 {'optimizer': ['SGD']}, 
-                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001], 'momentum': [0.95]}, 
-                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1], 'decay': [1e-4]}], 
+                                                 {'optimizer': ['SGD']},
+                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001], 'momentum': [0.95]},
+                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1], 'decay': [1e-4]}],
                                              'metrics': ['accuracy']}
-                                         $$,                  -- compile_param_grid    
-                                         $$ 
+                                         $$,                  -- compile_param_grid
+                                         $$
                                          { 'batch_size': [64, 128],
-                                           'epochs': [10] 
-                                         } 
-                                         $$,                  -- fit_param_grid                                          
-                                         'grid'               -- search_type 
+                                           'epochs': [10]
+                                         }
+                                         $$,                  -- fit_param_grid
+                                         'grid'               -- search_type
                                          );
 SELECT * FROM mst_table ORDER BY mst_key;
 </pre>
 <pre class="result">
- mst_key | model_id |                                        compile_params                                         |        fit_params        
+ mst_key | model_id |                                        compile_params                                         |        fit_params
 ---------+----------+-----------------------------------------------------------------------------------------------+--------------------------
        1 |        1 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy'                        | epochs=10,batch_size=64
        2 |        1 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy'                        | epochs=10,batch_size=128
@@ -479,8 +479,8 @@
 (20 rows)
 </pre>
 
--# Generate model configurations using random search. The output table for random search 
-contains the specified number of model architectures, compile and fit parameters, 
+-# Generate model configurations using random search. The output table for random search
+contains the specified number of model architectures, compile and fit parameters,
 sampled from the specified distributions.
 <pre class="example">
 DROP TABLE IF EXISTS mst_table, mst_table_summary;
@@ -506,7 +506,7 @@
 SELECT * FROM mst_table ORDER BY mst_key;
 </pre>
 <pre class="result">
- mst_key | model_id |                                                       compile_params                                                        |        fit_params        
+ mst_key | model_id |                                                       compile_params                                                        |        fit_params
 ---------+----------+-----------------------------------------------------------------------------------------------------------------------------+--------------------------
        1 |        1 | optimizer='SGD(lr=0.000195784477708685,momentum=0.9768159513291526)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
        2 |        2 | optimizer='SGD(lr=0.0002499200066875511,momentum=0.9807877269510826)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
@@ -542,16 +542,16 @@
                                         'mst_table',          -- model selection table output
                                          ARRAY[1,2],          -- model ids from model architecture table
                                          $$
-                                            {'loss': ['categorical_crossentropy'], 
-                                             'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ], 
+                                            {'loss': ['categorical_crossentropy'],
+                                             'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ],
                                              'metrics': ['accuracy']}
-                                         $$,                  -- compile_param_grid    
-                                         $$ 
+                                         $$,                  -- compile_param_grid
+                                         $$
                                          { 'batch_size': [64, 128],
-                                           'epochs': [10] 
-                                         } 
-                                         $$,                  -- fit_param_grid                                          
-                                         'grid'               -- search_type 
+                                           'epochs': [10]
+                                         }
+                                         $$,                  -- fit_param_grid
+                                         'grid'               -- search_type
                                          );
 </pre>
 Now add to the existing table and note that mst_key continues where it left off:
@@ -561,24 +561,24 @@
                                         'mst_table',          -- model selection table output
                                          ARRAY[1,2],          -- model ids from model architecture table
                                          $$
-                                            {'loss': ['categorical_crossentropy'], 
-                                             'optimizer_params_list': [ 
-                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']}, 
-                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}], 
+                                            {'loss': ['categorical_crossentropy'],
+                                             'optimizer_params_list': [
+                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
+                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
                                              'metrics': ['accuracy']}
-                                         $$,                  -- compile_param_grid    
-                                         $$ 
+                                         $$,                  -- compile_param_grid
+                                         $$
                                          { 'batch_size': [64, 128],
-                                           'epochs': [10] 
-                                         } 
-                                         $$,                  -- fit_param_grid                                          
+                                           'epochs': [10]
+                                         }
+                                         $$,                  -- fit_param_grid
                                          'random',            -- search_type
                                           20
                                          );
 SELECT * FROM mst_table ORDER BY mst_key;
 </pre>
 <pre class="result">
- mst_key | model_id |                                                       compile_params                                                        |        fit_params        
+ mst_key | model_id |                                                       compile_params                                                        |        fit_params
 ---------+----------+-----------------------------------------------------------------------------------------------------------------------------+--------------------------
        1 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                             | epochs=10,batch_size=64
        2 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                             | epochs=10,batch_size=128
@@ -619,13 +619,13 @@
 (36 rows)
 </pre>
 
--# Create model selection table manually.  
-If you want more control over the content of the model selection table, 
-you could use grid or random search to generate a large number of combinations, 
-then SELECT a subset of rows for training.  Alternatively, you could manually 
-create the model selection table and the associated summary table. Both must be 
+-# Create model selection table manually.
+If you want more control over the content of the model selection table,
+you could use grid or random search to generate a large number of combinations,
+then SELECT a subset of rows for training.  Alternatively, you could manually
+create the model selection table and the associated summary table. Both must be
 created since they are needed by the multiple model fit module.
-For example, let's say we don't want all combinations but only want 
+For example, let's say we don't want all combinations but only want
 batch_size=4 for model_id=1 and batch_size=8 for model_id=2:
 <pre class="example">
 DROP TABLE IF EXISTS mst_table_manual;
@@ -770,7 +770,7 @@
       16 |        2 | optimizer='SGD(lr=0.01)',metrics=['top_3_accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
 (16 rows)
 </pre>
--# <b>[Deprecated]</b> Load model selection table.  This method is replaced 
+-# <b>[Deprecated]</b> Load model selection table.  This method is replaced
 by the 'generate_model_configs' method described above.
 Select the model(s) from the model
 architecture table that you want to run, along with the compile and
@@ -841,9 +841,8 @@
     object_table            VARCHAR
 ) RETURNS VOID AS $$
     PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
-    with AOControl(False):
-        mst_loader = madlib_keras_model_selection.MstLoader(**globals())
-        mst_loader.load()
+    mst_loader = madlib_keras_model_selection.MstLoader(**globals())
+    mst_loader.load()
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
 
@@ -873,9 +872,9 @@
     object_table            VARCHAR DEFAULT NULL
 ) RETURNS VOID AS $$
     PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
-    with AOControl(False):
-        mst_loader = madlib_keras_model_selection.MstSearch(**globals())
-        mst_loader.load()
+
+    mst_loader = madlib_keras_model_selection.MstSearch(**globals())
+    mst_loader.load()
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
 
@@ -894,8 +893,7 @@
     object_table            VARCHAR DEFAULT NULL
 ) RETURNS VOID AS $$
     PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
-    with AOControl(False):
-        mst_loader = madlib_keras_model_selection.MstSearch(**globals())
-        mst_loader.load()
+    mst_loader = madlib_keras_model_selection.MstSearch(**globals())
+    mst_loader.load()
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
commit	354ff4dd943771153f9b3fe88123db25019464ef	[log] [tgz]
author	Orhan Kislal <okislal@apache.org>	Wed Feb 24 16:34:21 2021 +0300
committer	Orhan Kislal <okislal@pivotal.io>	Tue Mar 02 15:17:32 2021 +0300
tree	a5809777f4a298fe53486a3ccc83ea7e55791487
parent	638be5d1f21081773a09f3978140698b232d629c [diff]