DL: Enforce append_only=False on new model selection tables
Using AOcontrol caused load_model_selection to hang under certain
circumstances. This commit moves the disabling of append_only
to the table creation to circumvent the issue.
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
index f29d399..f94cb44 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
@@ -32,7 +32,7 @@
from madlib_keras_wrapper import parse_and_validate_compile_params
from utilities.control import MinWarning
from utilities.utilities import add_postfix, _assert, _assert_equal, extract_keyvalue_params
-from utilities.utilities import quote_ident, get_schema
+from utilities.utilities import quote_ident, get_schema, is_platform_pg
from utilities.validate_args import table_exists, drop_tables
from tensorflow.keras import losses as losses
@@ -154,6 +154,8 @@
def create_mst_table(self):
"""Initialize the output mst table.
"""
+ with_query = "" if is_platform_pg() else """
+ with(appendonly=false)"""
create_query = """
CREATE TABLE {self.model_selection_table} (
{mst_key} SERIAL,
@@ -161,12 +163,13 @@
{compile_params} VARCHAR,
{fit_params} VARCHAR,
unique ({model_id}, {compile_params}, {fit_params})
- );
+ ) {with_query};
""".format(self=self,
mst_key=ModelSelectionSchema.MST_KEY,
model_id=ModelSelectionSchema.MODEL_ID,
compile_params=ModelSelectionSchema.COMPILE_PARAMS,
- fit_params=ModelSelectionSchema.FIT_PARAMS)
+ fit_params=ModelSelectionSchema.FIT_PARAMS,
+ with_query=with_query)
with MinWarning('warning'):
plpy.execute(create_query)
@@ -547,7 +550,8 @@
def create_mst_table(self):
"""Initialize the output mst table, if it doesn't exist (for incremental loading).
"""
-
+ with_query = "" if is_platform_pg() else """
+ with(appendonly=false)"""
create_query = """
CREATE TABLE {self.model_selection_table} (
{mst_key} SERIAL,
@@ -555,12 +559,14 @@
{compile_params} VARCHAR,
{fit_params} VARCHAR,
unique ({model_id}, {compile_params}, {fit_params})
- );
+ ) {with_query};
""".format(self=self,
mst_key=ModelSelectionSchema.MST_KEY,
model_id=ModelSelectionSchema.MODEL_ID,
compile_params=ModelSelectionSchema.COMPILE_PARAMS,
- fit_params=ModelSelectionSchema.FIT_PARAMS)
+ fit_params=ModelSelectionSchema.FIT_PARAMS,
+ with_query=with_query)
+
with MinWarning('warning'):
plpy.execute(create_query)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index 870dd18..325f355 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
@@ -44,14 +44,14 @@
<li class="level1"><a href="#related">Related Topics</a></li>
</ul></div>
-This module generates model configurations
+This module generates model configurations
for training multiple models at the same time
using <a href="group__grp__keras__run__model__selection.html">Run Model Selection</a>.
By model configurations we mean both hyperparameters and
-model architectures. Grid search or random search
-can be used to generate the configurations.
+model architectures. Grid search or random search
+can be used to generate the configurations.
The output table from this module
-defines the combinations of model architectures,
+defines the combinations of model architectures,
compile and fit parameters to be trained in parallel.
<!-- --------------------------------------START-------------------------------------------------------------- -->
@@ -95,9 +95,9 @@
</dd>
<dt>compile_params_grid</dt>
- <dd>VARCHAR. String representation of a Python dictionary
+ <dd>VARCHAR. String representation of a Python dictionary
of compile parameters to be tested. Each entry
- of the dictionary should consist of keys as compile parameter names,
+ of the dictionary should consist of keys as compile parameter names,
and values as a Python list of compile parameter values to be passed to Keras.
Also, optimizer parameters are a nested dictionary to allow different
optimizer types to have different parameters or ranges of parameters.
@@ -105,41 +105,41 @@
<pre class="example">
$$
- {'loss': ['categorical_crossentropy'],
- 'optimizer_params_list': [
- {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
- {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
+ {'loss': ['categorical_crossentropy'],
+ 'optimizer_params_list': [
+ {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
+ {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
'metrics': ['accuracy']
}
$$
</pre>
The following types of sampling are supported: 'linear', 'log' and 'log_near_one'.
- The 'log_near_one' sampling is useful for exponentially weighted average types of parameters like momentum,
- which are very sensitive to changes near 1. It has the effect of producing more values near 1
- than regular log-based sampling.
-
+ The 'log_near_one' sampling is useful for exponentially weighted average types of parameters like momentum,
+ which are very sensitive to changes near 1. It has the effect of producing more values near 1
+ than regular log-based sampling.
+
In the case of grid search, omit the sample type and just put the grid points in the list.
For custom loss functions, custom metrics, and custom top k categorical accuracy,
list the custom function name and provide the name of the
- table where the serialized Python objects reside using the
+ table where the serialized Python objects reside using the
parameter 'object_table' below. See the examples section later on this page.
For more information on custom functions, please
- see <a href="group__grp__custom__function.html">Load Custom Functions</a>.
+ see <a href="group__grp__custom__function.html">Load Custom Functions</a>.
</dd>
<dt>fit_params_grid</dt>
- <dd>VARCHAR. String representation of a Python dictionary
+ <dd>VARCHAR. String representation of a Python dictionary
of fit parameters to be tested. Each entry
- of the dictionary should consist of keys as fit parameter names,
+ of the dictionary should consist of keys as fit parameter names,
and values as a Python list of fit parameter values
to be passed to Keras. Here is an example:
<pre class="example">
- $$
+ $$
{'batch_size': [32, 64, 128, 256],
- 'epochs': [10, 20, 30]
- }
+ 'epochs': [10, 20, 30]
+ }
$$
</pre>
</dd>
@@ -148,11 +148,11 @@
<dd>VARCHAR, 'grid' or 'random'. Search strategy
for generating model configurations. For grid search, will generate
all combinations of model IDs + compile params + fit params. For
- random search, specify the number of configs you want to
- generate using the 'num_configs' parameter below.
+ random search, specify the number of configs you want to
+ generate using the 'num_configs' parameter below.
Note that you can also use short forms
- for the 'grid' or 'random' keywords, e.g.,'rand' or 'r' instead
- of writing out 'random' in full.
+ for the 'grid' or 'random' keywords, e.g.,'rand' or 'r' instead
+ of writing out 'random' in full.
</dd>
<dt>num_configs (optional)</dt>
@@ -161,15 +161,15 @@
</dd>
<dt>random_state (optional)</dt>
- <dd>INTEGER, default: NULL. Pseudo random number generator
- state used for random uniform sampling from lists of possible
+ <dd>INTEGER, default: NULL. Pseudo random number generator
+ state used for random uniform sampling from lists of possible
values. Pass an integer for reproducible output across multiple
function calls. Only applies when search_type='random'.
</dd>
<dt>object_table (optional)</dt>
- <dd>VARCHAR, default: NULL. Name of the table containing
- Python objects in the case that custom loss functions or
+ <dd>VARCHAR, default: NULL. Name of the table containing
+ Python objects in the case that custom loss functions or
custom metrics are specified in the 'compile_params_grid'.
</dd>
@@ -385,7 +385,7 @@
(2 rows)
</pre>
--# Generate model configurations using grid search. The output table for grid search
+-# Generate model configurations using grid search. The output table for grid search
contains the unique combinations of model architectures, compile and fit parameters.
<pre class="example">
DROP TABLE IF EXISTS mst_table, mst_table_summary;
@@ -437,24 +437,24 @@
'mst_table', -- model selection table output
ARRAY[1,2], -- model ids from model architecture table
$$
- {'loss': ['categorical_crossentropy'],
+ {'loss': ['categorical_crossentropy'],
'optimizer_params_list': [
- {'optimizer': ['SGD']},
- {'optimizer': ['SGD'], 'lr': [0.0001, 0.001], 'momentum': [0.95]},
- {'optimizer': ['Adam'], 'lr': [0.01, 0.1], 'decay': [1e-4]}],
+ {'optimizer': ['SGD']},
+ {'optimizer': ['SGD'], 'lr': [0.0001, 0.001], 'momentum': [0.95]},
+ {'optimizer': ['Adam'], 'lr': [0.01, 0.1], 'decay': [1e-4]}],
'metrics': ['accuracy']}
- $$, -- compile_param_grid
- $$
+ $$, -- compile_param_grid
+ $$
{ 'batch_size': [64, 128],
- 'epochs': [10]
- }
- $$, -- fit_param_grid
- 'grid' -- search_type
+ 'epochs': [10]
+ }
+ $$, -- fit_param_grid
+ 'grid' -- search_type
);
SELECT * FROM mst_table ORDER BY mst_key;
</pre>
<pre class="result">
- mst_key | model_id | compile_params | fit_params
+ mst_key | model_id | compile_params | fit_params
---------+----------+-----------------------------------------------------------------------------------------------+--------------------------
1 | 1 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
2 | 1 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
@@ -479,8 +479,8 @@
(20 rows)
</pre>
--# Generate model configurations using random search. The output table for random search
-contains the specified number of model architectures, compile and fit parameters,
+-# Generate model configurations using random search. The output table for random search
+contains the specified number of model architectures, compile and fit parameters,
sampled from the specified distributions.
<pre class="example">
DROP TABLE IF EXISTS mst_table, mst_table_summary;
@@ -506,7 +506,7 @@
SELECT * FROM mst_table ORDER BY mst_key;
</pre>
<pre class="result">
- mst_key | model_id | compile_params | fit_params
+ mst_key | model_id | compile_params | fit_params
---------+----------+-----------------------------------------------------------------------------------------------------------------------------+--------------------------
1 | 1 | optimizer='SGD(lr=0.000195784477708685,momentum=0.9768159513291526)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
2 | 2 | optimizer='SGD(lr=0.0002499200066875511,momentum=0.9807877269510826)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
@@ -542,16 +542,16 @@
'mst_table', -- model selection table output
ARRAY[1,2], -- model ids from model architecture table
$$
- {'loss': ['categorical_crossentropy'],
- 'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ],
+ {'loss': ['categorical_crossentropy'],
+ 'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ],
'metrics': ['accuracy']}
- $$, -- compile_param_grid
- $$
+ $$, -- compile_param_grid
+ $$
{ 'batch_size': [64, 128],
- 'epochs': [10]
- }
- $$, -- fit_param_grid
- 'grid' -- search_type
+ 'epochs': [10]
+ }
+ $$, -- fit_param_grid
+ 'grid' -- search_type
);
</pre>
Now add to the existing table and note that mst_key continues where it left off:
@@ -561,24 +561,24 @@
'mst_table', -- model selection table output
ARRAY[1,2], -- model ids from model architecture table
$$
- {'loss': ['categorical_crossentropy'],
- 'optimizer_params_list': [
- {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
- {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
+ {'loss': ['categorical_crossentropy'],
+ 'optimizer_params_list': [
+ {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
+ {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
'metrics': ['accuracy']}
- $$, -- compile_param_grid
- $$
+ $$, -- compile_param_grid
+ $$
{ 'batch_size': [64, 128],
- 'epochs': [10]
- }
- $$, -- fit_param_grid
+ 'epochs': [10]
+ }
+ $$, -- fit_param_grid
'random', -- search_type
20
);
SELECT * FROM mst_table ORDER BY mst_key;
</pre>
<pre class="result">
- mst_key | model_id | compile_params | fit_params
+ mst_key | model_id | compile_params | fit_params
---------+----------+-----------------------------------------------------------------------------------------------------------------------------+--------------------------
1 | 1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
2 | 1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
@@ -619,13 +619,13 @@
(36 rows)
</pre>
--# Create model selection table manually.
-If you want more control over the content of the model selection table,
-you could use grid or random search to generate a large number of combinations,
-then SELECT a subset of rows for training. Alternatively, you could manually
-create the model selection table and the associated summary table. Both must be
+-# Create model selection table manually.
+If you want more control over the content of the model selection table,
+you could use grid or random search to generate a large number of combinations,
+then SELECT a subset of rows for training. Alternatively, you could manually
+create the model selection table and the associated summary table. Both must be
created since they are needed by the multiple model fit module.
-For example, let's say we don't want all combinations but only want
+For example, let's say we don't want all combinations but only want
batch_size=4 for model_id=1 and batch_size=8 for model_id=2:
<pre class="example">
DROP TABLE IF EXISTS mst_table_manual;
@@ -770,7 +770,7 @@
16 | 2 | optimizer='SGD(lr=0.01)',metrics=['top_3_accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
(16 rows)
</pre>
--# <b>[Deprecated]</b> Load model selection table. This method is replaced
+-# <b>[Deprecated]</b> Load model selection table. This method is replaced
by the 'generate_model_configs' method described above.
Select the model(s) from the model
architecture table that you want to run, along with the compile and
@@ -841,9 +841,8 @@
object_table VARCHAR
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
- with AOControl(False):
- mst_loader = madlib_keras_model_selection.MstLoader(**globals())
- mst_loader.load()
+ mst_loader = madlib_keras_model_selection.MstLoader(**globals())
+ mst_loader.load()
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -873,9 +872,9 @@
object_table VARCHAR DEFAULT NULL
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
- with AOControl(False):
- mst_loader = madlib_keras_model_selection.MstSearch(**globals())
- mst_loader.load()
+
+ mst_loader = madlib_keras_model_selection.MstSearch(**globals())
+ mst_loader.load()
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -894,8 +893,7 @@
object_table VARCHAR DEFAULT NULL
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
- with AOControl(False):
- mst_loader = madlib_keras_model_selection.MstSearch(**globals())
- mst_loader.load()
+ mst_loader = madlib_keras_model_selection.MstSearch(**globals())
+ mst_loader.load()
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');