DL: Add support for grid search and random search JIRA: MADLIB-1439 The load_model_selection_table function requires the user to manually specify the grid of compile and fit params. Hence, we implement a function called generate_model_selection_configs (in the same module) to perform grid/random search. The user would declare the compile and fit params grid separately as strings enveloping python dictionaries, and the name of the search algorithm (along with any corresponding arguments), and the output format of the new function would be the same as the previous one for better integrability with other MADlib functions related to model training etc. This commit includes the implementation, unit tests (in python and SQL) and documentation for the newly created function.

commit: 34415b3281c54d1494ccaad0c43895bfd4440774 [log] [tgz]
author: Advitya Gemawat <agemawat@vmware.com> Mon Jul 13 11:20:35 2020 -0700
committer: Orhan Kislal <okislal@pivotal.io> Fri Aug 07 18:54:34 2020 -0400
tree: 2170d42afe09486991bdb0816d6dac33b9f6aa15
parent: 60a9d59a2e9aa4c70fb7506d8a5a704429f4b2c3 [diff]
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index 4afb091..f7f6889 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in

@@ -13,6 +13,7 @@
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-user/">User mailing list</a></li>
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-dev/">Dev mailing list</a></li>
 <li>User documentation for earlier releases:
+    <a href="../v1.17.0/index.html">v1.17.0</a>,
     <a href="../v1.16/index.html">v1.16</a>,
     <a href="../v1.15.1/index.html">v1.15.1</a>,
     <a href="../v1.15/index.html">v1.15</a>,
@@ -298,8 +299,9 @@
         @brief Train multiple deep learning models at the same time for model architecture search and hyperparameter selection.
         @details Train multiple deep learning models at the same time for model architecture search and hyperparameter selection.
         @{
+            @defgroup grp_automl AutoML
+            @defgroup grp_keras_setup_model_selection Generate Model Configurations
             @defgroup grp_keras_run_model_selection Run Model Selection
-            @defgroup grp_keras_setup_model_selection Setup Model Selection
         @}
         @defgroup grp_input_preprocessor_dl Preprocessor for Images
     @}

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
index 46267f0..67c8713 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in

@@ -16,13 +16,24 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 
-import plpy
+from ast import literal_eval
 from collections import OrderedDict
-from madlib_keras_validator import MstLoaderInputValidator
-from utilities.control import MinWarning
-from utilities.utilities import add_postfix
-from madlib_keras_wrapper import convert_string_of_args_to_dict
+from itertools import product as itertools_product
+import keras.losses as losses
+import keras.metrics as metrics
 from keras_model_arch_table import ModelArchSchema
+import numpy as np
+import plpy
+from copy import deepcopy
+
+from madlib_keras_custom_function import CustomFunctionSchema
+from madlib_keras_validator import MstLoaderInputValidator
+from madlib_keras_wrapper import convert_string_of_args_to_dict
+from madlib_keras_wrapper import parse_and_validate_fit_params
+from madlib_keras_wrapper import parse_and_validate_compile_params
+from utilities.control import MinWarning
+from utilities.utilities import add_postfix, extract_keyvalue_params, _assert, _assert_equal
+from utilities.validate_args import table_exists, drop_tables
 
 class ModelSelectionSchema:
     MST_KEY = 'mst_key'
@@ -32,6 +43,9 @@
     COMPILE_PARAMS = 'compile_params'
     FIT_PARAMS = 'fit_params'
     col_types = ('SERIAL', 'INTEGER', 'VARCHAR', 'VARCHAR')
+    GRID_SEARCH='grid'
+    RANDOM_SEARCH='random'
+    OPTIMIZER_PARAMS_LIST = 'optimizer_params_list'
 
 @MinWarning("warning")
 class MstLoader():
@@ -44,7 +58,7 @@
         compile_params_list (list): The input list of compile params choices.
         fit_params_list (list): The input list of fit params choices.
         model_id_list (list): The input list of model id choices.
-        model_arch_table (str): The name of model architechure table.
+        model_arch_table (str): The name of model architecture table.
         model_selection_table (str): The name of the output mst table.
         msts (list): The list of generated msts.
 
@@ -203,3 +217,426 @@
                                   object_table_name=ModelSelectionSchema.OBJECT_TABLE,
                                   **locals())
         plpy.execute(insert_summary_query)
+
+@MinWarning("warning")
+class MstSearch():
+    """
+    The utility class for generating model selection configs and loading into a MST table with model parameters.
+
+    Currently takes string representations of python dictionaries for compile and fit params.
+    Generates configs with a chosen search algorithm
+
+    Attributes:
+        model_arch_table (str): The name of model architecture table.
+        model_selection_table (str): The name of the output mst table.
+        model_id_list (list): The input list of model id choices.
+        compile_params_grid (string repr of python dict): The input of compile params choices.
+        fit_params_grid (string repr of python dict): The input of fit params choices.
+        search_type (str, default 'grid'): Hyperparameter search strategy, 'grid' or 'random'.
+
+        Only for 'random' search type (defaults None):
+            num_configs (int): Number of configs to generate.
+            random_state (int): Seed for result reproducibility.
+
+        object_table (str, default None): The name of the object table, for custom (metric) functions.
+
+    """
+
+    def __init__(self,
+                 model_arch_table,
+                 model_selection_table,
+                 model_id_list,
+                 compile_params_grid,
+                 fit_params_grid,
+                 search_type='grid',
+                 num_configs=None,
+                 random_state=None,
+                 object_table=None,
+                 **kwargs):
+
+        self.model_arch_table = model_arch_table
+        self.model_selection_table = model_selection_table
+        self.model_selection_summary_table = add_postfix(
+            model_selection_table, "_summary")
+        self.model_id_list = sorted(list(set(model_id_list)))
+
+        MstLoaderInputValidator(
+            model_arch_table=self.model_arch_table,
+            model_selection_table=self.model_selection_table,
+            model_selection_summary_table=self.model_selection_summary_table,
+            model_id_list=self.model_id_list,
+            compile_params_list=compile_params_grid,
+            fit_params_list=fit_params_grid,
+            object_table=object_table,
+            module_name='generate_model_configs'
+        )
+
+        self.search_type = search_type
+        self.num_configs = num_configs
+        self.random_state = random_state
+        self.object_table = object_table
+
+        compile_params_grid = compile_params_grid.replace('\n', '').replace(' ', '')
+        fit_params_grid = fit_params_grid.replace('\n', '').replace(' ', '')
+        self.accepted_distributions = ['linear', 'log', 'log_near_one']
+
+        # extracting python dict
+        self.compile_params_dict = literal_eval(compile_params_grid)
+        self.fit_params_dict = literal_eval(fit_params_grid)
+        self.validate_inputs(compile_params_grid, fit_params_grid)
+
+        self.msts = []
+
+        if ModelSelectionSchema.GRID_SEARCH.startswith(self.search_type.lower()):
+            self.find_grid_combinations()
+        elif ModelSelectionSchema.RANDOM_SEARCH.startswith(self.search_type.lower()):
+            # else should also suffice as random search is established.
+            self.find_random_combinations()
+
+        #################
+        compile_params_lst, fit_params_lst = [], []
+        for i in self.msts:
+            compile_params_lst.append(i[ModelSelectionSchema.COMPILE_PARAMS])
+            fit_params_lst.append(i[ModelSelectionSchema.FIT_PARAMS])
+        self._validate_params_and_object_table(compile_params_lst, fit_params_lst)
+
+    def load(self):
+        """The entry point for loading the model selection table.
+        """
+        # All of the side effects happen in this function.
+        if table_exists(self.model_selection_table):
+            if table_exists(self.model_selection_summary_table):
+                res = plpy.execute("SELECT model_arch_table from {0}".format(self.model_selection_summary_table))
+                # exactly one value
+                for r in res:
+                    _assert_equal(r['model_arch_table'], self.model_arch_table,
+                            "DL: Inconsistent model arch table. Use '{0}' if appending rows to '{1}'".format(
+                                r['model_arch_table'], self.model_selection_table
+                            ))
+            else:
+                self.create_mst_summary_table()
+        else:
+            self.create_mst_table()
+            self.create_mst_summary_table()
+        self.insert_into_mst_table()
+
+    def validate_inputs(self, compile_params_grid, fit_params_grid):
+        """
+        Ensures validity of inputs related to grid and random search.
+
+        :param compile_params_grid: The input string repr of compile params choices.
+        :param fit_params_grid: The input string repr of fit params choices.
+        """
+
+        if ModelSelectionSchema.GRID_SEARCH.startswith(self.search_type.lower()):
+            _assert(self.num_configs is None and self.random_state is None,
+                    "DL: 'num_configs' and 'random_state' must be NULL for grid search")
+            for distribution_type in self.accepted_distributions:
+                _assert(distribution_type not in compile_params_grid and distribution_type not in fit_params_grid,
+                        "DL: Cannot search from a distribution with grid search")
+        elif ModelSelectionSchema.RANDOM_SEARCH.startswith(self.search_type.lower()):
+            _assert(self.num_configs is not None, "DL: 'num_configs' cannot be NULL for random search")
+        else:
+            plpy.error("DL: 'search_type' must be either 'grid' or 'random'")
+
+        if ModelSelectionSchema.OPTIMIZER_PARAMS_LIST in self.compile_params_dict:
+            optimizer_params_list = self.compile_params_dict[ModelSelectionSchema.OPTIMIZER_PARAMS_LIST]
+            optimizer_param_keys = set([j for i in optimizer_params_list for j in i])
+            _assert(set(self.compile_params_dict).isdisjoint(optimizer_param_keys),
+                    "DL: 'optimizer_params_list' key should only contain 'optimizer' and/or optimizer related params \
+                    and no such params should reside out of the key")
+            for k in optimizer_params_list:
+                _assert(len(k) != 0, "DL: empty dictionaries cannot be specified in the value list of \
+                'optimizer_params_list'")
+
+    def _validate_params_and_object_table(self, compile_params_lst, fit_params_lst):
+        if not fit_params_lst:
+            plpy.error("fit_params_list cannot be NULL")
+        for fit_params in fit_params_lst:
+            try:
+                res = parse_and_validate_fit_params(fit_params)
+            except Exception as e:
+                plpy.error(
+                    """Fit param check failed for: {0} \n
+                    {1}
+                    """.format(fit_params, str(e)))
+        if not compile_params_lst:
+            plpy.error( "compile_params_list cannot be NULL")
+        custom_fn_name = []
+        ## Initialize builtin loss/metrics functions
+        builtin_losses = dir(losses)
+        builtin_metrics = dir(metrics)
+        # Default metrics, since it is not part of the builtin metrics list
+        builtin_metrics.append('accuracy')
+        if self.object_table is not None:
+            res = plpy.execute("SELECT {0} from {1}".format(CustomFunctionSchema.FN_NAME,
+                                                            self.object_table))
+            for r in res:
+                custom_fn_name.append(r[CustomFunctionSchema.FN_NAME])
+        for compile_params in compile_params_lst:
+            try:
+                _, _, res = parse_and_validate_compile_params(compile_params, [ModelSelectionSchema.OPTIMIZER_PARAMS_LIST])
+                # Validating if loss/metrics function called in compile_params
+                # is either defined in object table or is a built_in keras
+                # loss/metrics function
+                error_suffix = "but input object table missing!"
+                if self.object_table is not None:
+                    error_suffix = "is not defined in object table '{0}'!".format(self.object_table)
+
+                _assert(res['loss'] in custom_fn_name or res['loss'] in builtin_losses,
+                        "custom function '{0}' used in compile params " \
+                        "{1}".format(res['loss'], error_suffix))
+                if 'metrics' in res:
+                    _assert((len(set(res['metrics']).intersection(custom_fn_name)) > 0
+                             or len(set(res['metrics']).intersection(builtin_metrics)) > 0),
+                            "custom function '{0}' used in compile params " \
+                            "{1}".format(res['metrics'], error_suffix))
+
+            except Exception as e:
+                plpy.error(
+                    """Compile param check failed for: {0} \n
+                    {1}
+                    """.format(compile_params, str(e)))
+
+    def find_grid_combinations(self):
+        """
+        Finds combinations using grid search.
+        """
+        # assuming optimizer_params_list is present
+        if ModelSelectionSchema.OPTIMIZER_PARAMS_LIST in self.compile_params_dict:
+            for opt_params_dict in self.compile_params_dict[ModelSelectionSchema.OPTIMIZER_PARAMS_LIST]:
+                keys, values = zip(*opt_params_dict.items())
+                opt_configs_params = [dict(zip(keys, v)) for v in itertools_product(*values)]
+                copied_compile_dict = deepcopy(self.compile_params_dict)
+                copied_compile_dict[ModelSelectionSchema.OPTIMIZER_PARAMS_LIST] = opt_configs_params
+                self.grid_combinations_helper(copied_compile_dict, self.fit_params_dict)
+        else:
+            self.grid_combinations_helper(self.compile_params_dict, self.fit_params_dict)
+
+    def grid_combinations_helper(self, compile_dict, fit_dict):
+        combined_dict = dict(compile_dict, **fit_dict)
+        combined_dict[ModelSelectionSchema.MODEL_ID] = self.model_id_list
+        keys, values = zip(*combined_dict.items())
+        all_configs_params = [dict(zip(keys, v)) for v in itertools_product(*values)]
+
+        # to separate the compile and fit configs
+        for config in all_configs_params:
+            combination = {}
+            compile_configs, fit_configs = {}, {}
+            for k in config:
+                if k == ModelSelectionSchema.MODEL_ID:
+                    combination[ModelSelectionSchema.MODEL_ID] = config[k]
+                elif k in compile_dict:
+                    compile_configs[k] = config[k]
+                elif k in fit_dict:
+                    fit_configs[k] = config[k]
+                else:
+                    plpy.error("DL: {0} is an unidentified key".format(k))
+            combination[ModelSelectionSchema.COMPILE_PARAMS] = self.generate_row_string(compile_configs)
+            combination[ModelSelectionSchema.FIT_PARAMS] = self.generate_row_string(fit_configs)
+            self.msts.append(combination)
+
+    def find_random_combinations(self):
+        """
+        Finds combinations using random search.
+        """
+        seed_changes = 0
+
+        for _ in range(self.num_configs):
+            combination = {}
+            if self.random_state:
+                np.random.seed(self.random_state+seed_changes)
+                seed_changes += 1
+            combination[ModelSelectionSchema.MODEL_ID] = np.random.choice(self.model_id_list)
+            compile_d = {}
+            compile_d, seed_changes = self.generate_param_config(self.compile_params_dict, compile_d, seed_changes)
+            combination[ModelSelectionSchema.COMPILE_PARAMS] = self.generate_row_string(compile_d)
+            fit_d = {}
+            fit_d, seed_changes = self.generate_param_config(self.fit_params_dict, fit_d, seed_changes)
+            combination[ModelSelectionSchema.FIT_PARAMS] = self.generate_row_string(fit_d)
+            self.msts.append(combination)
+
+    def generate_param_config(self, params_dict, config_dict, seed_changes):
+        """
+        Generating a parameter configuration for random search.
+        :param params_dict: Dictionary of params choices.
+        :param config_dict: Dictionary to store param config.
+        :param seed_changes: Changes in seed for random sampling + reproducibility.
+        :return: config_dict, seed_changes.
+        """
+        for cp in params_dict:
+            if self.random_state:
+                np.random.seed(self.random_state+seed_changes)
+                seed_changes += 1
+            param_values = params_dict[cp]
+            if cp == ModelSelectionSchema.OPTIMIZER_PARAMS_LIST:
+                opt_dict = np.random.choice(param_values)
+                opt_combination = {}
+                for i in opt_dict:
+                    opt_values = opt_dict[i]
+                    if self.random_state:
+                        np.random.seed(self.random_state+seed_changes)
+                        seed_changes += 1
+                    opt_combination[i] = self.sample_val(cp, opt_values)
+                config_dict[cp] = opt_combination
+            else:
+                config_dict[cp] = self.sample_val(cp, param_values)
+        return config_dict, seed_changes
+
+    def sample_val(self, cp, param_value_list):
+        """
+        Samples a value from a given list of values, either randomly from a list of discrete elements,
+        or from a specified distribution.
+        :param cp: compile param
+        :param param_value_list: list of values (or specified distribution) for a param
+        :return: sampled value
+        """
+        # check if need to sample from a distribution
+        if param_value_list[-1] in self.accepted_distributions:
+            _assert_equal(len(param_value_list), 3,
+                          "DL: {0} should have exactly 3 elements if picking from a distribution".format(cp))
+            _assert(param_value_list[1] > param_value_list[0],
+                    "DL: {0} should be of the format [lower_bound, upper_bound, distribution_type]".format(cp))
+            if param_value_list[-1] == 'linear':
+                return np.random.uniform(param_value_list[0], param_value_list[1])
+            elif param_value_list[-1] == 'log':
+                return np.power(10, np.random.uniform(np.log10(param_value_list[0]),
+                                                                 np.log10(param_value_list[1])))
+            elif param_value_list[-1] == 'log_near_one':
+                return 1.0 - np.power(10, np.random.uniform(np.log10(1.0-param_value_list[1]),
+                                                                       np.log10(1.0-param_value_list[0])))
+            else:
+                plpy.error("DL: Please choose a valid distribution type {0}".format(
+                    tuple(self.accepted_distributions)))
+        else:
+            # random sampling
+            return np.random.choice(param_value_list)
+
+    def generate_row_string(self, configs_dict):
+        """
+        Generate row strings for MST table.
+        :param configs_dict: Dictionary of params config.
+        :return: string to insert as a row in MST table.
+        """
+        result_row_string = ""
+
+        if ModelSelectionSchema.OPTIMIZER_PARAMS_LIST in configs_dict:
+            optimizer_params_dict = configs_dict[ModelSelectionSchema.OPTIMIZER_PARAMS_LIST]
+            if 'optimizer' in optimizer_params_dict:
+                if optimizer_params_dict['optimizer'].lower() == 'sgd':
+                    optimizer_value = "SGD"
+                elif optimizer_params_dict['optimizer'].lower() == 'rmsprop':
+                    optimizer_value = "RMSprop"
+                else:
+                    optimizer_value = optimizer_params_dict['optimizer'].capitalize()
+                opt_string = "optimizer" + "=" + "'" + str(optimizer_value) \
+                             + "()" + "'"
+            else:
+                opt_string = "optimizer='RMSprop()'" # default optimizer
+            opt_param_string = ""
+            for opt_param in optimizer_params_dict:
+                if opt_param == 'optimizer':
+                    continue
+                opt_param_string += opt_param + '=' + str(optimizer_params_dict[opt_param]) + ','
+            if opt_param_string == "":
+                result_row_string += opt_string
+            else:
+                opt_param_string = opt_param_string[:-1] # to exclude the last comma
+                part = opt_string.split('(')
+                result_row_string += part[0] + '(' + opt_param_string + part[1]
+
+        for c in configs_dict:
+            if c == ModelSelectionSchema.OPTIMIZER_PARAMS_LIST:
+                continue
+            elif c == 'metrics':
+                if callable(configs_dict[c]):
+                    result_row_string += "," + str(c) + "=" + "[" + str(configs_dict[c]) + "]"
+                else:
+                    result_row_string += "," + str(c) + "=" + "['" + str(configs_dict[c]) + "']"
+            else:
+                if type(configs_dict[c]) == str or type(configs_dict[c]) == np.string_:
+                    result_row_string += "," + str(c) + "=" + "'" + str(configs_dict[c]) + "'"
+                else:
+                    # ints, floats, none type, booleans
+                    result_row_string += "," + str(c) + "=" + str(configs_dict[c])
+
+        if result_row_string[0] == ',':
+            return result_row_string[1:]
+        return result_row_string
+
+    def create_mst_table(self):
+        """Initialize the output mst table, if it doesn't exist (for incremental loading).
+        """
+
+        create_query = """
+                        CREATE TABLE {self.model_selection_table} (
+                            {mst_key} SERIAL,
+                            {model_id} INTEGER,
+                            {compile_params} VARCHAR,
+                            {fit_params} VARCHAR,
+                            unique ({model_id}, {compile_params}, {fit_params})
+                        );
+                       """.format(self=self,
+                                  mst_key=ModelSelectionSchema.MST_KEY,
+                                  model_id=ModelSelectionSchema.MODEL_ID,
+                                  compile_params=ModelSelectionSchema.COMPILE_PARAMS,
+                                  fit_params=ModelSelectionSchema.FIT_PARAMS)
+        with MinWarning('warning'):
+            plpy.execute(create_query)
+
+    def create_mst_summary_table(self):
+        """Initialize the output mst table.
+        """
+        create_query = """
+                        CREATE TABLE {self.model_selection_summary_table} (
+                            {model_arch_table} VARCHAR,
+                            {object_table} VARCHAR
+                        );
+                       """.format(self=self,
+                                  model_arch_table=ModelSelectionSchema.MODEL_ARCH_TABLE,
+                                  object_table=ModelSelectionSchema.OBJECT_TABLE)
+        with MinWarning('warning'):
+            plpy.execute(create_query)
+
+    def insert_into_mst_table(self):
+        """Insert every thing in self.msts into the mst table.
+        """
+        for mst in self.msts:
+            model_id = mst[ModelSelectionSchema.MODEL_ID]
+            compile_params = mst[ModelSelectionSchema.COMPILE_PARAMS]
+            fit_params = mst[ModelSelectionSchema.FIT_PARAMS]
+            insert_query = """
+                            INSERT INTO
+                                {self.model_selection_table}(
+                                    {model_id_col},
+                                    {compile_params_col},
+                                    {fit_params_col}
+                                )
+                            VALUES (
+                                {model_id},
+                                $${compile_params}$$,
+                                $${fit_params}$$
+                            )
+                           """.format(model_id_col=ModelSelectionSchema.MODEL_ID,
+                                      compile_params_col=ModelSelectionSchema.COMPILE_PARAMS,
+                                      fit_params_col=ModelSelectionSchema.FIT_PARAMS,
+                                      **locals())
+            plpy.execute(insert_query)
+        if self.object_table is None:
+            object_table = 'NULL::VARCHAR'
+        else:
+            object_table = '$${0}$$'.format(self.object_table)
+        insert_summary_query = """
+                        INSERT INTO
+                            {self.model_selection_summary_table}(
+                                {model_arch_table_name},
+                                {object_table_name}
+                        )
+                        VALUES (
+                            $${self.model_arch_table}$$,
+                            {object_table}
+                        )
+                       """.format(model_arch_table_name=ModelSelectionSchema.MODEL_ARCH_TABLE,
+                                  object_table_name=ModelSelectionSchema.OBJECT_TABLE,
+                                  **locals())
+        plpy.execute(insert_summary_query)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index d6c10e3..01a27e3 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in

@@ -20,8 +20,8 @@
  *
  * @file madlib_keras_model_selection.sql_in
  *
- * @brief SQL functions for model hopper distributed training
- * @date August 2019
+ * @brief Generate configurations for model selection (hyperparams, architectures)
+ * @date August 2020
  *
  *
  *//* ----------------------------------------------------------------------- */
@@ -30,36 +30,44 @@
 /**
 @addtogroup grp_keras_setup_model_selection
 
-@brief Utility function to set up a model selection table for model architecture search
+@brief Utility function to generate configurations for model architecture search
 and hyperparameter tuning.
 
 \warning <em> This MADlib method is still in early stage development.
 Interface and implementation are subject to change. </em>
 
 <div class="toc"><b>Contents</b><ul>
-<li class="level1"><a href="#load_mst_table">Load Model Selection Table</a></li>
+<li class="level1"><a href="#gen_mst_configs">Generate Model Configurations</a></li>
+<li class="level1"><a href="#load_mst_table">Load Model Selection Table [Deprecated]</a></li>
 <li class="level1"><a href="#example">Examples</a></li>
 <li class="level1"><a href="#notes">Notes</a></li>
 <li class="level1"><a href="#related">Related Topics</a></li>
 </ul></div>
 
-This utility function sets up a model selection table
-for use by the multiple model Keras fit feature of MADlib.
-By model selection we mean both hyperparameter tuning and
-model architecture search.  The table defines the unique combinations
-of model architectures, compile and fit parameters
-to run on a massively parallel processing database cluster.
+This module generates model configurations 
+for training multiple models at the same time
+using <a href="group__grp__keras__run__model__selection.html">Run Model Selection</a>.
+By model configurations we mean both hyperparameters and
+model architectures. Grid search or random search 
+can be used to generate the configurations. 
+The output table from this module
+defines the combinations of model architectures, 
+compile and fit parameters to be trained in parallel.
 
-@anchor load_mst_table
-@par Load Model Selection Table
+<!-- --------------------------------------START-------------------------------------------------------------- -->
+@anchor gen_mst_configs
+@par Generate Model Configurations
 
 <pre class="syntax">
-load_model_selection_table(
+generate_model_configs(
     model_arch_table,
     model_selection_table,
     model_id_list,
-    compile_params_list,
-    fit_params_list,
+    compile_params_grid,
+    fit_params_grid,
+    search_type,
+    num_configs,
+    random_state,
     object_table
     )
 </pre>
@@ -69,11 +77,11 @@
   <dt>model_arch_table</dt>
   <dd>VARCHAR. Table containing model architectures and weights.
   For more information on this table
-  refer to <a href="group__grp__keras__model__arch.html">Load Model</a>.
+  refer to the module <a href="group__grp__keras__model__arch.html">Load Models</a>.
   </dd>
 
   <dt>model_selection_table</dt>
-  <dd>VARCHAR. Model selection table created by this utility.  A summary table
+  <dd>VARCHAR. Model selection table created by this module.  A summary table
   named <model_selection_table>_summary is also created.  Contents of both output
   tables are described below.
   </dd>
@@ -82,28 +90,85 @@
   <dd>INTEGER[]. Array of model IDs from the 'model_arch_table' to be included
   in the run combinations.  For hyperparameter search, this will typically be
   one model ID.  For model architecture search, this will be the different model IDs
-  that you want to test.
+  that you want to compare.
   </dd>
 
-  <dt>compile_params_list</dt>
-  <dd>VARCHAR[]. Array of compile parameters to be tested.  Each element
-  of the array should consist of a string of compile parameters
-  exactly as it is to be passed to Keras. For custom loss functions or custom metrics,
-  list the custom function name in the usual way, and also provide the name of the
-  table where the serialized objects reside in the parameter 'object_table'
-  below.
+  <dt>compile_params_grid</dt>
+  <dd>VARCHAR. String representation of a Python dictionary 
+  of compile parameters to be tested. Each entry
+  of the dictionary should consist of keys as compile parameter names, 
+  and values as a Python list of compile parameter values to be passed to Keras.
+  Also, optimizer parameters are a nested dictionary to allow different
+  optimizer types to have different parameters or ranges of parameters.
+  Here is an example:
+
+  <pre class="example">
+  $$
+    {'loss': ['categorical_crossentropy'], 
+     'optimizer_params_list': [ 
+        {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']}, 
+        {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}], 
+     'metrics': ['accuracy']
+    }
+  $$
+  </pre>
+
+  The following types of sampling are supported:  'linear', 'log' and 'log_near_one'.
+  The 'log_near_one' sampling is useful for exponentially weighted average types of parameters like momentum, 
+  which are very sensitive to changes near 1.  It has the effect of producing more values near 1 
+  than regular log-based sampling. 
+  
+  In the case of grid search, omit the sample type and just put the grid points in the list.
+  For custom loss functions or custom metrics,
+  list the custom function name in the usual way, and provide the name of the
+  table where the serialized Python objects reside using the 
+  parameter 'object_table' below. See the examples section later on this page for more examples. 
   </dd>
 
-  <dt>fit_params_list</dt>
-  <dd>VARCHAR[].  Array of fit parameters to be tested.  Each element
-  of the array should consist of a string of fit parameters
-  exactly as it is to be passed to Keras.
+  <dt>fit_params_grid</dt>
+  <dd>VARCHAR.  String representation of a Python dictionary 
+  of fit parameters to be tested. Each entry
+  of the dictionary should consist of keys as fit parameter names, 
+  and values as a Python list of fit parameter values
+  to be passed to Keras. Here is an example:
+
+  <pre class="example">
+  $$ 
+    {'batch_size': [32, 64, 128, 256],
+     'epochs': [10, 20, 30] 
+    } 
+  $$
+  </pre>
+  See the examples section later on this page for more examples.
+  </dd>
+
+  <dt>search_type</dt>
+  <dd>VARCHAR, 'grid' or 'random'. Search strategy
+  for generating model configurations.  For grid search, will generate
+  all combinations of model IDs + compile params + fit params.  For
+  random search, specify the number of configs you want to 
+  generate using the 'num_configs' parameter below. 
+  Note that you can also use short forms
+  for the 'grid' or 'random' keywords, e.g.,'rand' or 'r' instead 
+  of writing out 'random' in full.  
+  </dd>
+
+  <dt>num_configs (optional)</dt>
+  <dd>INTEGER, default: NULL. Number of model configs to generate.
+  Only applies when search_type='random'.
+  </dd>
+
+  <dt>random_state (optional)</dt>
+  <dd>INTEGER, default: NULL.  Pseudo random number generator 
+  state used for random uniform sampling from lists of possible 
+  values. Pass an integer for reproducible output across multiple
+  function calls.  Only applies when search_type='random'.
   </dd>
 
   <dt>object_table (optional)</dt>
-  <dd>VARCHAR, default: NULL. Name of the table containing Python objects in the case that
-  custom loss functions or custom metrics are specified in the
-  parameter 'compile_params_list'.
+  <dd>VARCHAR, default: NULL. Name of the table containing 
+  Python objects in the case that custom loss functions or 
+  custom metrics are specified in the 'compile_params_grid'.
   </dd>
 
 </dl>
@@ -153,6 +218,68 @@
     </table>
 </br>
 
+<!-- --------------------------------------END-------------------------------------------------------------- -->
+
+@anchor load_mst_table
+@par Load Model Selection Table [Deprecated]
+
+This method is deprecated and replaced by the method 'generate_model_configs()' described above.
+
+<pre class="syntax">
+load_model_selection_table(
+    model_arch_table,
+    model_selection_table,
+    model_id_list,
+    compile_params_list,
+    fit_params_list,
+    object_table
+    )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+  <dt>model_arch_table</dt>
+  <dd>VARCHAR. Table containing model architectures and weights.
+  For more information on this table
+  refer to <a href="group__grp__keras__model__arch.html">Load Model</a>.
+  </dd>
+
+  <dt>model_selection_table</dt>
+  <dd>VARCHAR. Model selection table created by this utility.  A summary table
+  named <model_selection_table>_summary is also created.  Contents of both output
+  tables are the same as described above for the method 'generate_model_configs()'.
+  </dd>
+
+  <dt>model_id_list</dt>
+  <dd>INTEGER[]. Array of model IDs from the 'model_arch_table' to be included
+  in the run combinations.  For hyperparameter search, this will typically be
+  one model ID.  For model architecture search, this will be the different model IDs
+  that you want to test.
+  </dd>
+
+  <dt>compile_params_list</dt>
+  <dd>VARCHAR[]. Array of compile parameters to be tested.  Each element
+  of the array should consist of a string of compile parameters
+  exactly as it is to be passed to Keras. For custom loss functions or custom metrics,
+  list the custom function name in the usual way, and also provide the name of the
+  table where the serialized objects reside in the parameter 'object_table'
+  below.
+  </dd>
+
+  <dt>fit_params_list</dt>
+  <dd>VARCHAR[].  Array of fit parameters to be tested.  Each element
+  of the array should consist of a string of fit parameters
+  exactly as it is to be passed to Keras.
+  </dd>
+
+  <dt>object_table (optional)</dt>
+  <dd>VARCHAR, default: NULL. Name of the table containing Python objects in the case that
+  custom loss functions or custom metrics are specified in the
+  parameter 'compile_params_list'.
+  </dd>
+
+</dl>
+
 @anchor example
 @par Examples
 -# The model selection table works in conjunction with a model architecture table,
@@ -256,7 +383,300 @@
 (2 rows)
 </pre>
 
--# Load model selection table.  Select the model(s) from the model
+-# Generate model configurations using grid search. The output table for grid search 
+contains the unique combinations of model architectures, compile and fit parameters.
+<pre class="example">
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT madlib.generate_model_configs(
+                                        'model_arch_library', -- model architecture table
+                                        'mst_table',          -- model selection table output
+                                         ARRAY[1,2],          -- model ids from model architecture table
+                                         $$
+                                            {'loss': ['categorical_crossentropy'], 
+                                             'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ], 
+                                             'metrics': ['accuracy']}
+                                         $$,                  -- compile_param_grid    
+                                         $$ 
+                                         { 'batch_size': [64, 128],
+                                           'epochs': [10] 
+                                         } 
+                                         $$,                  -- fit_param_grid                                          
+                                         'grid'               -- search_type 
+                                         );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_id |                                 compile_params                                  |        fit_params        
+---------+----------+---------------------------------------------------------------------------------+--------------------------
+       1 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
+       2 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+       3 |        1 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+       4 |        1 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+       5 |        1 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+       6 |        1 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+       7 |        1 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+       8 |        1 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+       9 |        2 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
+      10 |        2 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+      11 |        2 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      12 |        2 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      13 |        2 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      14 |        2 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      15 |        2 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+      16 |        2 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+(16 rows)
+</pre>
+Note that above uses the same learning rate for the two optimizers. If you wanted to 
+use different learning rates and different parameters for different optimizers (common):
+<pre class="example">
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT madlib.generate_model_configs(
+                                        'model_arch_library', -- model architecture table
+                                        'mst_table',          -- model selection table output
+                                         ARRAY[1,2],          -- model ids from model architecture table
+                                         $$
+                                            {'loss': ['categorical_crossentropy'], 
+                                             'optimizer_params_list': [
+                                                 {'optimizer': ['SGD']}, 
+                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001], 'momentum': [0.95]}, 
+                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1], 'decay': [1e-4]}], 
+                                             'metrics': ['accuracy']}
+                                         $$,                  -- compile_param_grid    
+                                         $$ 
+                                         { 'batch_size': [64, 128],
+                                           'epochs': [10] 
+                                         } 
+                                         $$,                  -- fit_param_grid                                          
+                                         'grid'               -- search_type 
+                                         );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_id |                                        compile_params                                         |        fit_params        
+---------+----------+-----------------------------------------------------------------------------------------------+--------------------------
+       1 |        1 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy'                        | epochs=10,batch_size=64
+       2 |        1 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy'                        | epochs=10,batch_size=128
+       3 |        2 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy'                        | epochs=10,batch_size=64
+       4 |        2 | optimizer='SGD()',metrics=['accuracy'],loss='categorical_crossentropy'                        | epochs=10,batch_size=128
+       5 |        1 | optimizer='SGD(lr=0.0001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
+       6 |        1 | optimizer='SGD(lr=0.0001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+       7 |        1 | optimizer='SGD(lr=0.001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+       8 |        1 | optimizer='SGD(lr=0.001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+       9 |        2 | optimizer='SGD(lr=0.0001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
+      10 |        2 | optimizer='SGD(lr=0.0001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+      11 |        2 | optimizer='SGD(lr=0.001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      12 |        2 | optimizer='SGD(lr=0.001,momentum=0.95)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      13 |        1 | optimizer='Adam(lr=0.01,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+      14 |        1 | optimizer='Adam(lr=0.01,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+      15 |        1 | optimizer='Adam(lr=0.1,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'    | epochs=10,batch_size=64
+      16 |        1 | optimizer='Adam(lr=0.1,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'    | epochs=10,batch_size=128
+      17 |        2 | optimizer='Adam(lr=0.01,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+      18 |        2 | optimizer='Adam(lr=0.01,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+      19 |        2 | optimizer='Adam(lr=0.1,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'    | epochs=10,batch_size=64
+      20 |        2 | optimizer='Adam(lr=0.1,decay=0.0001)',metrics=['accuracy'],loss='categorical_crossentropy'    | epochs=10,batch_size=128
+(20 rows)
+</pre>
+
+-# Generate model configurations using random search. The output table for random search 
+contains the specified number of model architectures, compile and fit parameters, 
+sampled from the specified distributions.
+<pre class="example">
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT madlib.generate_model_configs(
+                                        'model_arch_library', -- model architecture table
+                                        'mst_table',          -- model selection table output
+                                         ARRAY[1,2],          -- model ids from model architecture table
+                                         $$
+                                            {'loss': ['categorical_crossentropy'], 
+                                             'optimizer_params_list': [ 
+                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']}, 
+                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}], 
+                                             'metrics': ['accuracy']}
+                                         $$,                  -- compile_param_grid    
+                                         $$ 
+                                         { 'batch_size': [64, 128],
+                                           'epochs': [10] 
+                                         } 
+                                         $$,                  -- fit_param_grid                                          
+                                         'random',            -- search_type
+                                         20
+                                         );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_id |                                                       compile_params                                                        |        fit_params        
+---------+----------+-----------------------------------------------------------------------------------------------------------------------------+--------------------------
+       1 |        1 | optimizer='SGD(lr=0.000195784477708685,momentum=0.9768159513291526)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+       2 |        2 | optimizer='SGD(lr=0.0002499200066875511,momentum=0.9807877269510826)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+       3 |        1 | optimizer='SGD(lr=0.0009097798285407916,momentum=0.9706029152411938)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+       4 |        1 | optimizer='SGD(lr=0.0001272842475986666,momentum=0.9858583458057799)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+       5 |        1 | optimizer='SGD(lr=0.0001367874444015989,momentum=0.9772674033475668)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+       6 |        2 | optimizer='SGD(lr=0.0002233708561319785,momentum=0.9743315606145182)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+       7 |        1 | optimizer='SGD(lr=0.0009066689970530365,momentum=0.9835897505288803)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+       8 |        1 | optimizer='SGD(lr=0.0007589416356572876,momentum=0.958751411608181)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+       9 |        2 | optimizer='Adam(lr=0.057814228170084386,decay=1.0641718595377929e-06)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+      10 |        1 | optimizer='Adam(lr=0.01927466297833838,decay=1.039476442716842e-06)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+      11 |        2 | optimizer='Adam(lr=0.014718555287257804,decay=9.947768661882175e-05)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      12 |        1 | optimizer='Adam(lr=0.010397686133595378,decay=2.5730580994358942e-05)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+      13 |        1 | optimizer='SGD(lr=0.0008624562426613621,momentum=0.989134963527059)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+      14 |        2 | optimizer='SGD(lr=0.00010555974470031461,momentum=0.980489419269402)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      15 |        2 | optimizer='Adam(lr=0.05041699703418617,decay=4.685540619995589e-05)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+      16 |        1 | optimizer='Adam(lr=0.034295140601304126,decay=1.6034699865163222e-06)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
+      17 |        1 | optimizer='Adam(lr=0.06888969005355218,decay=1.6318109152382423e-05)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      18 |        2 | optimizer='SGD(lr=0.0008225712651952847,momentum=0.9819748008695103)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      19 |        1 | optimizer='Adam(lr=0.0819110285922332,decay=1.6912312124827899e-06)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+      20 |        1 | optimizer='Adam(lr=0.011688026325555774,decay=2.9315437856404027e-05)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+(20 rows)
+</pre>
+
+-# Incremental loading for more complex combinations.  If it is easier to generate the model configurations 
+incrementally rather than all at once, you can do that by not dropping the model selection table and associated 
+summary table, in which case the new model configurations will be appended to the existing table.  Here we combine two 
+of the previous examples in to a single output table:
+<pre class="example">
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT madlib.generate_model_configs(
+                                        'model_arch_library', -- model architecture table
+                                        'mst_table',          -- model selection table output
+                                         ARRAY[1,2],          -- model ids from model architecture table
+                                         $$
+                                            {'loss': ['categorical_crossentropy'], 
+                                             'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ], 
+                                             'metrics': ['accuracy']}
+                                         $$,                  -- compile_param_grid    
+                                         $$ 
+                                         { 'batch_size': [64, 128],
+                                           'epochs': [10] 
+                                         } 
+                                         $$,                  -- fit_param_grid                                          
+                                         'grid'               -- search_type 
+                                         );
+</pre>
+Now add to the existing table and note that mst_key continues where it left off:
+<pre class="example">
+SELECT madlib.generate_model_configs(
+                                        'model_arch_library', -- model architecture table
+                                        'mst_table',          -- model selection table output
+                                         ARRAY[1,2],          -- model ids from model architecture table
+                                         $$
+                                            {'loss': ['categorical_crossentropy'], 
+                                             'optimizer_params_list': [ 
+                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']}, 
+                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}], 
+                                             'metrics': ['accuracy']}
+                                         $$,                  -- compile_param_grid    
+                                         $$ 
+                                         { 'batch_size': [64, 128],
+                                           'epochs': [10] 
+                                         } 
+                                         $$,                  -- fit_param_grid                                          
+                                         'random',            -- search_type
+                                          20
+                                         );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_id |                                                       compile_params                                                        |        fit_params        
+---------+----------+-----------------------------------------------------------------------------------------------------------------------------+--------------------------
+       1 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                             | epochs=10,batch_size=64
+       2 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                             | epochs=10,batch_size=128
+       3 |        1 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=64
+       4 |        1 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=128
+       5 |        1 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=64
+       6 |        1 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=128
+       7 |        1 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                               | epochs=10,batch_size=64
+       8 |        1 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                               | epochs=10,batch_size=128
+       9 |        2 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                             | epochs=10,batch_size=64
+      10 |        2 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                             | epochs=10,batch_size=128
+      11 |        2 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=64
+      12 |        2 | optimizer='SGD(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=128
+      13 |        2 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=64
+      14 |        2 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                              | epochs=10,batch_size=128
+      15 |        2 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                               | epochs=10,batch_size=64
+      16 |        2 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'                                               | epochs=10,batch_size=128
+      17 |        2 | optimizer='SGD(lr=0.00013996842804647915,momentum=0.9677072493281305)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
+      18 |        1 | optimizer='Adam(lr=0.04252873277972123,decay=9.503983307511243e-05)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+      19 |        2 | optimizer='Adam(lr=0.06666969394323848,decay=1.5626668941131748e-05)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      20 |        2 | optimizer='SGD(lr=0.00016137313867804707,momentum=0.954293112127019)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      21 |        2 | optimizer='Adam(lr=0.019443570245321506,decay=1.2882524497407873e-06)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
+      22 |        2 | optimizer='Adam(lr=0.06302317748060839,decay=6.238009849562074e-05)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
+      23 |        2 | optimizer='SGD(lr=0.00010890482493011119,momentum=0.9826239169968034)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+      24 |        1 | optimizer='SGD(lr=0.0009201966766121783,momentum=0.9896730563556151)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      25 |        2 | optimizer='SGD(lr=0.00028961522836420906,momentum=0.9859394149216544)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+      26 |        1 | optimizer='SGD(lr=0.0001503249757866609,momentum=0.9777816636354879)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      27 |        2 | optimizer='SGD(lr=0.0008405326172626768,momentum=0.9538686498263182)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      28 |        1 | optimizer='SGD(lr=0.00011926989091387571,momentum=0.9876746918399469)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
+      29 |        1 | optimizer='Adam(lr=0.018794361633022855,decay=9.387826286694454e-06)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      30 |        2 | optimizer='SGD(lr=0.0009692977025027591,momentum=0.9878758592330659)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      31 |        2 | optimizer='SGD(lr=0.0006671929498585603,momentum=0.9786502962872058)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      32 |        2 | optimizer='Adam(lr=0.03948766165185474,decay=3.056584635386748e-06)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=64
+      33 |        2 | optimizer='Adam(lr=0.020343961099103417,decay=1.183810228780669e-05)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      34 |        1 | optimizer='Adam(lr=0.016854644990148417,decay=3.561117893117444e-06)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=64
+      35 |        2 | optimizer='SGD(lr=0.0004620089560788749,momentum=0.9887310587871919)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+      36 |        1 | optimizer='SGD(lr=0.0002493912675066962,momentum=0.9892077270385708)',metrics=['accuracy'],loss='categorical_crossentropy'  | epochs=10,batch_size=128
+(36 rows)
+</pre>
+
+-# Create model selection table manually.  
+If you want more control over the content of the model selection table, 
+you could use grid or random search to generate a large number of combinations, 
+then SELECT a subset of rows for training.  Alternatively, you could manually 
+create the model selection table and the associated summary table. Both must be 
+created since they are needed by the multiple model fit module.
+For example, let's say we don't want all combinations but only want 
+batch_size=4 for model_id=1 and batch_size=8 for model_id=2:
+<pre class="example">
+DROP TABLE IF EXISTS mst_table_manual;
+CREATE TABLE mst_table_manual(
+    mst_key serial,
+    model_id integer,
+    compile_params varchar,
+    fit_params varchar
+);
+INSERT INTO mst_table_manual(model_id, compile_params, fit_params) VALUES
+(1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+(1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+(1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+(2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+(2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+(2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=8,epochs=1');
+SELECT * FROM mst_table_manual ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_id      |                                 compile_params                                  |      fit_params
+---------+---------------+---------------------------------------------------------------------------------+-----------------------
+       1 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
+       2 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=4,epochs=1
+       3 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+       4 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       5 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
+       6 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(6 rows)
+</pre>
+Create the summary table which must be named
+with the model selection output table appended by "_summary":
+<pre class="example">
+DROP TABLE IF EXISTS mst_table_manual_summary;
+CREATE TABLE mst_table_manual_summary (
+    model_arch_table varchar
+);
+INSERT INTO mst_table_manual_summary(model_arch_table) VALUES
+('model_arch_library');
+SELECT * FROM mst_table_manual_summary;
+</pre>
+<pre class="result">
+  model_arch_table
+--------------------+
+ model_arch_library
+(1 row)
+</pre>
+
+-# Custom loss functions and custom metrics.
+TBD
+
+-# <b>[Deprecated]</b> Load model selection table.  This method is replaced 
+by the 'generate_model_configs()' method described above.
+Select the model(s) from the model
 architecture table that you want to run, along with the compile and
 fit parameters.  Unique combinations will be created:
 <pre class="example">
@@ -304,132 +724,10 @@
 (1 row)
 </pre>
 
--# Create model selection table manually.  If you would like to
-have more control over the set of model selection parameters
-to run, you can manually create the model selection table and
-the associated summary table. Both must be created since
-they are needed by the multiple model fit module.
-For example, let's say we don't want all combinations
-but only want batch_size=4 for model_id=1 and batch_size=8 for model_id=2:
-<pre class="example">
-DROP TABLE IF EXISTS mst_table_manual;
-CREATE TABLE mst_table_manual(
-    mst_key serial,
-    model_id integer,
-    compile_params varchar,
-    fit_params varchar
-);
-INSERT INTO mst_table_manual(model_id, compile_params, fit_params) VALUES
-(1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
-(1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
-(1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
-(2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
-(2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
-(2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=8,epochs=1');
-SELECT * FROM mst_table_manual ORDER BY mst_key;
-</pre>
-<pre class="result">
- mst_key | model_id      |                                 compile_params                                  |      fit_params
----------+---------------+---------------------------------------------------------------------------------+-----------------------
-       1 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
-       2 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=4,epochs=1
-       3 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
-       4 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
-       5 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
-       6 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
-(6 rows)
-</pre>
-Create the summary table which must be named
-with the model selection output table appended by "_summary":
-<pre class="example">
-DROP TABLE IF EXISTS mst_table_manual_summary;
-CREATE TABLE mst_table_manual_summary (
-    model_arch_table varchar
-);
-INSERT INTO mst_table_manual_summary(model_arch_table) VALUES
-('model_arch_library');
-SELECT * FROM mst_table_manual_summary;
-</pre>
-<pre class="result">
-  model_arch_table
---------------------+
- model_arch_library
-(1 row)
-</pre>
-
--# Generate hyperparameters automatically.  You can use other
-libraries or methods to generate hyperparameters according
-to the tests that you want to run. For example, let's randomly
-generate batch size from powers of 2 and learning
-rate on a log scale.
-We use psycopg which is a PostgreSQL database adapter
-for the Python programming language.
-<pre class="example">
-import numpy as np
-import psycopg2 as p2
-conn = p2.connect('postgresql://gpadmin@35.239.240.26:5432/madlib')
-#conn = p2.connect('postgresql://fmcquillan@localhost:5432/madlib')
-cur = conn.cursor()
-\#
-%sql DROP TABLE IF EXISTS mst_table_auto, mst_table_auto_summary;
-\#
-\#compile params
-learning_rate = np.random.permutation([0.1,0.01,0.001,0.0001])[:3]
-compile_param1 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[0]) + ")',metrics=['accuracy']"
-compile_param2 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[1]) + ")',metrics=['accuracy']"
-compile_param3 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[2]) + ")',metrics=['accuracy']"
-compile_params = [compile_param1,compile_param2,compile_param3]
-\#
-\#fit params
-batch_size = np.random.permutation([4,8,16,32,64])[:2]
-fit_param1 = "batch_size=" + str(batch_size[0]) + ",epochs=1"
-fit_param2 = "batch_size=" + str(batch_size[1]) + ",epochs=1"
-fit_params = [fit_param1,fit_param2]
-\#
-query = "SELECT madlib.load_model_selection_table('model_arch_library', 'mst_table_auto', ARRAY[1,2], %s, %s);"
-\#
-cur.execute(query,[compile_params, fit_params])
-conn.commit()
-\#
-\# review model selection table
-%sql SELECT * FROM mst_table_auto ORDER BY mst_key;
-</pre>
-<pre class="result">
- mst_key | model_id      |                                  compile_params                                  |      fit_params
----------+---------------+----------------------------------------------------------------------------------+-----------------------
-       1 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.0001)',metrics=['accuracy'] | batch_size=4,epochs=1
-       2 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.0001)',metrics=['accuracy'] | batch_size=8,epochs=1
-       3 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']    | batch_size=4,epochs=1
-       4 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']    | batch_size=8,epochs=1
-       5 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']  | batch_size=4,epochs=1
-       6 |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']  | batch_size=8,epochs=1
-       7 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.0001)',metrics=['accuracy'] | batch_size=4,epochs=1
-       8 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.0001)',metrics=['accuracy'] | batch_size=8,epochs=1
-       9 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']    | batch_size=4,epochs=1
-      10 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']    | batch_size=8,epochs=1
-      11 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']  | batch_size=4,epochs=1
-      12 |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']  | batch_size=8,epochs=1
-(12 rows)
-</pre>
-The name of the model architecture table is stored in the summary table:
-<pre class="example">
-SELECT * FROM mst_table_auto_summary;
-</pre>
-<pre class="result">
-  model_arch_table
---------------------+
- model_arch_library
-(1 row)
-</pre>
-
 @anchor notes
 @par Notes
 
-1. In this method, the same compile and fit parameters are applied to all model architectures
-when generating combinations.  However, you may wish to have different compile and fit parameters
-for each model.  To do so, call 'load_model_selection_table'
-multiple times - once for each model.  Then you can combine the resulting tables using UNION or other means.
-Note that the 'mst_key' must be unique so you will need to regenerate it in your final combined table.
+1. TBD
 
 @anchor related
 @par Related Topics
@@ -463,3 +761,24 @@
   SELECT MADLIB_SCHEMA.load_model_selection_table($1, $2, $3, $4, $5, NULL);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+/*
+--------------------------------------------------------------------------
+*/
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.generate_model_configs(
+    model_arch_table        VARCHAR,
+    model_selection_table   VARCHAR,
+    model_id_list           INTEGER[],
+    compile_params_grid     VARCHAR,
+    fit_params_grid         VARCHAR,
+    search_type             VARCHAR DEFAULT 'grid',
+    num_configs             INTEGER DEFAULT NULL,
+    random_state            INTEGER DEFAULT NULL,
+    object_table            VARCHAR DEFAULT NULL
+) RETURNS VOID AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
+    with AOControl(False):
+        mst_loader = madlib_keras_model_selection.MstSearch(**globals())
+        mst_loader.load()
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
index bb2e744..8ced08b 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in

@@ -453,7 +453,8 @@
                  model_id_list,
                  compile_params_list,
                  fit_params_list,
-                 object_table
+                 object_table,
+                 module_name='load_model_selection_table'
                  ):
         self.model_arch_table = model_arch_table
         self.model_selection_table = model_selection_table
@@ -462,13 +463,14 @@
         self.compile_params_list = compile_params_list
         self.fit_params_list = fit_params_list
         self.object_table = object_table
-        self.module_name = 'load_model_selection_table'
+        self.module_name = module_name #'load_model_selection_table'
         self._validate_input_args()
 
     def _validate_input_args(self):
         self._validate_input_output_tables()
         self._validate_model_ids()
-        self._validate_compile_and_fit_params()
+        if self.module_name == 'load_model_selection_table':
+            self._validate_compile_and_fit_params()
 
     def _validate_model_ids(self):
         model_id_str = '({0})'\
@@ -540,7 +542,8 @@
         input_tbl_valid(self.model_arch_table, self.module_name)
         if self.object_table is not None:
             input_tbl_valid(self.object_table, self.module_name)
-        output_tbl_valid(self.model_selection_table, self.module_name)
-        output_tbl_valid(self.model_selection_summary_table, self.module_name)
+        if self.module_name == 'load_model_selection_table':
+            output_tbl_valid(self.model_selection_table, self.module_name)
+            output_tbl_valid(self.model_selection_summary_table, self.module_name)
 
 

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 575be98..a4463e7 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in

@@ -190,7 +190,7 @@
     return loss
 
 # Parse the compile parameters and the optimizer.
-def parse_and_validate_compile_params(str_of_args):
+def parse_and_validate_compile_params(str_of_args, additional_params=[]):
     """
     Args:
         @param: str_of_args     The string of arguments given by the user
@@ -201,13 +201,15 @@
     """
     literal_eval_compile_params = ['metrics', 'loss_weights',
                                    'weighted_metrics', 'sample_weight_mode']
-    accepted_compile_params = literal_eval_compile_params + ['optimizer', 'loss']
+    accepted_compile_params = literal_eval_compile_params + ['optimizer', 'loss'] + additional_params
 
     compile_dict = convert_string_of_args_to_dict(str_of_args)
     compile_dict = validate_and_literal_eval_keys(compile_dict,
                                                   literal_eval_compile_params,
                                                   accepted_compile_params)
-    _assert('optimizer' in compile_dict, "optimizer is a required parameter for compile")
+    if len(additional_params) == 0:
+        # optimizer is not a required parameter for keras compile
+        _assert('optimizer' in compile_dict, "optimizer is a required parameter for compile")
     opt_name, opt_args = parse_optimizer(compile_dict)
 
     _assert('loss' in compile_dict, "loss is a required parameter for compile")

diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
index 90442e9..69e7da7 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in

@@ -31,6 +31,164 @@
              `\1../../modules/deep_learning/test/madlib_keras_custom_function.setup.sql_in'
 )
 
+------------------------------------------------------------------------------------------------------
+-- Generate Model Selection Configs tests
+
+-- Valid inputs should pass and yield 24 msts in the table
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT generate_model_configs(
+    'iris_model_arch',
+    'mst_table',
+    ARRAY[1,2],
+    $$
+    {'loss': ['categorical_crossentropy'],
+    'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1, 1]} ],
+    'metrics': ['accuracy']}
+    $$,
+    $$
+    {'batch_size': [8, 32], 'epochs': [4]}
+    $$
+);
+
+SELECT assert(
+    COUNT(*)=24,
+    'The length of mst table does not match with the inputs'
+)
+FROM mst_table;
+
+-- Test summary table output
+SELECT assert(
+    model_arch_table = 'iris_model_arch',
+    'Model selection output Summary Validation failed. Actual:' || __to_char(summary))
+FROM (SELECT * FROM mst_table_summary) summary;
+
+-- Invalid arguments must be errored out
+
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT generate_model_configs(
+        'iris_model_arch',
+        'mst_table',
+        ARRAY[-1, 2],
+        $$
+        {'loss': ['categorical_crossentropy'],
+        'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1, 1]} ],
+        'metrics': ['accuracy']}
+        $$,
+        $$
+        {'batch_size': [8, 32], 'epochs': [4]}
+        $$
+    );
+$TRAP$)=1, 'Should error out if model_id is not in the model arch table');
+
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT generate_model_configs(
+        'iris_model_arch',
+        'mst_table',
+        ARRAY[1],
+        $${foo='bar'}$$,
+        $${batch_size='bar'}$$
+    );
+$TRAP$)=1, 'Should error out if the provided parameters are not valid');
+
+-- Incremental Loading for appending
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT generate_model_configs(
+    'iris_model_arch',
+    'mst_table',
+    ARRAY[1],
+    $$
+    {'loss': ['categorical_crossentropy'],
+    'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1, 'log']} ],
+    'metrics': ['accuracy']}
+    $$,
+    $$
+    {'batch_size': [8, 32], 'epochs': [4]}
+    $$,
+    'random',
+    8
+);
+SELECT assert(
+    COUNT(*)=8,
+    'The length of mst table does not match with the inputs'
+)
+FROM mst_table;
+
+-- purposely dropping summary table to ensure same incremental loading workflow
+DROP TABLE IF EXISTS mst_table_summary;
+SELECT generate_model_configs(
+    'iris_model_arch',
+    'mst_table',
+    ARRAY[2],
+    $$
+    {'loss': ['categorical_crossentropy'],
+    'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1, 'log']} ],
+    'metrics': ['accuracy']}
+    $$,
+    $$
+    {'batch_size': [8, 32], 'epochs': [4]}
+    $$,
+    'random',
+    7
+);
+select assert(
+ model_arch_table='iris_model_arch', 'Fail')
+  from (select model_arch_table from mst_table_summary) t;
+
+
+SELECT assert(
+    COUNT(*)=15,
+    'The length of mst table does not match with the inputs'
+)
+FROM mst_table;
+
+-- not dropping summary table to ensure assertion
+SELECT generate_model_configs(
+    'iris_model_arch',
+    'mst_table',
+    ARRAY[1,2],
+    $$
+    {'loss': ['categorical_crossentropy'],
+    'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1, 'linear']} ],
+    'metrics': ['accuracy']}
+    $$,
+    $$
+    {'batch_size': [8, 32], 'epochs': [4]}
+    $$,
+    'random',
+    12
+);
+
+select assert(
+ model_arch_table='iris_model_arch', 'Fail')
+  from (select model_arch_table from mst_table_summary) t;
+
+SELECT assert(
+    COUNT(*)=27,
+    'The length of mst table does not match with the inputs'
+)
+FROM mst_table;
+
+SELECT assert(trap_error($TRAP$
+    SELECT generate_model_configs(
+        'invalid_model_arch',
+        'mst_table',
+        ARRAY[1, 2],
+        $$
+        {'loss': ['categorical_crossentropy'],
+        'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1, 'log']} ],
+        'metrics': ['accuracy']}
+        $$,
+        $$
+        {'batch_size': [8, 32], 'epochs': [4]}
+        $$,
+        'random',
+        14
+    );
+$TRAP$)=1, 'Should error out if previous summary table does not have the same model arch table');
+------------------------------------------------------------------------------------------------------
+
 -- MST table generation tests
 -- Valid inputs should pass and yield 6 msts in the table
 DROP TABLE IF EXISTS mst_table, mst_table_summary;

diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in
index b911992..71ed047 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in

@@ -30,6 +30,302 @@
 import unittest
 from mock import *
 import plpy_mock as plpy
+import numpy as np
+
+class GenerateModelSelectionConfigsTestCase(unittest.TestCase):
+    def setUp(self):
+        # The side effects of this class(writing to the output table) are not
+        # tested here. They are tested in dev-check.
+        self.plpy_mock = Mock(spec='error')
+        patches = {
+            'plpy': plpy
+        }
+
+        self.plpy_mock_execute = MagicMock()
+        plpy.execute = self.plpy_mock_execute
+
+        self.module_patcher = patch.dict('sys.modules', patches)
+        self.module_patcher.start()
+        import deep_learning.madlib_keras_model_selection
+        self.module = deep_learning.madlib_keras_model_selection
+        self.module.MstLoaderInputValidator = MagicMock()
+
+        self.subject = self.module.MstSearch
+        self.model_selection_table = 'mst_table'
+        self.model_arch_table = 'model_arch_library'
+        self.model_id_list = [1, 2]
+        self.compile_params_grid = """
+            {'loss': ['categorical_crossentropy'], 
+            'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1]} ], 
+            'metrics': ['accuracy']}
+        """
+        self.fit_params_grid = """
+        {'batch_size': [8, 32], 'epochs': [1, 2]}
+        """
+        self.search_type = 'grid'
+        self.num_configs = None
+        self.random_state = None
+        self.object_table = 'custom_function_table'
+
+    def test_mst_table_dimension(self):
+        generate_mst = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid
+        )
+        self.assertEqual(32, len(generate_mst.msts))
+
+        generate_mst = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            9,
+            42
+        )
+        self.assertEqual(9, len(generate_mst.msts))
+
+        generate_mst = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            9,
+            None
+        )
+        self.assertEqual(9, len(generate_mst.msts))
+
+        self.compile_params_grid = """
+            {'loss': ['categorical_crossentropy'],   
+            'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1]} ],
+            'metrics': ['accuracy']}
+        """
+        generate_mst = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            9,
+            None
+        )
+        self.assertEqual(9, len(generate_mst.msts))
+
+    def test_invalid_input_args(self):
+        with self.assertRaises(plpy.PLPYException):
+            generate_mst = self.subject(
+                self.model_selection_table,
+                self.model_arch_table,
+                self.model_id_list,
+                self.compile_params_grid,
+                self.fit_params_grid,
+                self.search_type,
+                8
+            )
+        with self.assertRaises(plpy.PLPYException):
+            generate_mst = self.subject(
+                self.model_selection_table,
+                self.model_arch_table,
+                self.model_id_list,
+                self.compile_params_grid,
+                self.fit_params_grid,
+                self.search_type,
+                8,
+                42,
+                self.object_table
+            )
+        with self.assertRaises(plpy.PLPYException):
+            generate_mst = self.subject(
+                self.model_selection_table,
+                self.model_arch_table,
+                self.model_id_list,
+                self.compile_params_grid,
+                self.fit_params_grid,
+                'random'
+            )
+        with self.assertRaises(plpy.PLPYException):
+            generate_mst = self.subject(
+                self.model_selection_table,
+                self.model_arch_table,
+                self.model_id_list,
+                self.compile_params_grid,
+                self.fit_params_grid,
+                'random',
+                None,
+                19
+            )
+        with self.assertRaises(plpy.PLPYException):
+            generate_mst = self.subject(
+                self.model_selection_table,
+                self.model_arch_table,
+                [-3],
+                self.compile_params_grid,
+                self.fit_params_grid,
+                'random',
+                None,
+                19
+            )
+
+        self.compile_params_grid = """
+            {'losss': ['categorical_crossentropy'],
+            'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1]} ],
+            'metrics': ['accuracy']}
+        """
+        self.fit_params_grid = """
+        {'batch_size': [8, 32], 'epchs': [1, 2]}
+        """
+
+        with self.assertRaises(plpy.PLPYException):
+            generate_mst = self.subject(
+                self.model_selection_table,
+                self.model_arch_table,
+                self.model_id_list,
+                self.compile_params_grid,
+                self.fit_params_grid
+            )
+
+    def test_duplicate_params(self):
+        self.model_id_list = [1, 1, 2]
+        generate_mst = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid
+        )
+        self.assertEqual(32, len(generate_mst.msts))
+        generate_mst = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            17
+        )
+        self.assertEqual(17, len(generate_mst.msts))
+
+    def test_array_inference(self):
+        # as literal_eval can only parse python datatypes
+        lr_lst = repr(list(np.random.uniform(0.001, 0.1, 3)))
+
+        self.compile_params_grid = "{'loss': ['categorical_crossentropy'], " \
+                                   "'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], " \
+                                   "'lr': " + str(lr_lst) + "} ], " \
+                                   "'metrics': ['accuracy']}"
+
+        generate_mst1 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid
+        )
+        self.assertEqual(48, len(generate_mst1.msts))
+
+    def test_output_types(self):
+        generate_mst1 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'grid'
+        )
+        for d1 in generate_mst1.msts:
+            self.assertEqual("loss='categorical_crossentropy'" in d1['compile_params'], True)
+
+        generate_mst2 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            6,
+            47
+        )
+        for d2 in generate_mst2.msts:
+            self.assertEqual("loss='categorical_crossentropy'" in d2['compile_params'], True)
+
+    def test_seed_result_reproducibility(self):
+        generate_mst1 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            6,
+            47
+        )
+        generate_mst2 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            6,
+            47
+        )
+        generate_mst3 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'random',
+            6,
+            47
+        )
+        self.assertEqual(generate_mst1.msts, generate_mst2.msts, generate_mst3.msts)
+
+    def test_multiple_optimizer_configs(self):
+        self.compile_params_grid = """
+            {'loss': ['categorical_crossentropy'],
+            'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'], 'lr': [0.9, 0.95]},
+            {'optimizer': ['Adam', 'SGD']} ],
+            'metrics': ['accuracy']}
+        """
+        generate_mst1 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'gr'
+        )
+        self.assertEqual(48, len(generate_mst1.msts))
+
+        self.compile_params_grid = """
+            {'loss': ['categorical_crossentropy'],
+            'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'], 'lr': [0.9, 0.95, 'log'],
+            'epsilon': [0.3, 0.5, 'log_near_one']},
+            {'optimizer': ['Adam', 'SGD'], 'lr': [0.6, 0.65, 'log']} ],
+            'metrics': ['accuracy']}
+        """
+        generate_mst2 = self.subject(
+            self.model_selection_table,
+            self.model_arch_table,
+            self.model_id_list,
+            self.compile_params_grid,
+            self.fit_params_grid,
+            'rand',
+            6,
+            6
+        )
+        self.assertEqual(6, len(generate_mst2.msts))
+
+    def tearDown(self):
+            self.module_patcher.stop()
 
 class LoadModelSelectionTableTestCase(unittest.TestCase):
     def setUp(self):

diff --git a/src/ports/postgres/modules/kmeans/kmeans.sql_in b/src/ports/postgres/modules/kmeans/kmeans.sql_in
index 9e36a92..3003148 100644
--- a/src/ports/postgres/modules/kmeans/kmeans.sql_in
+++ b/src/ports/postgres/modules/kmeans/kmeans.sql_in

@@ -246,7 +246,7 @@
 <dt>output_table</dt>
 <dd>TEXT. Name of the output table containing results for each k
 value. Details of the output table are shown below.
-A summary table called 'output_table_summary' will also be
+A summary table called <output_table>_summary will also be
 created for the best k value as per the selection algorithm.</dd>
 
 <dt>k</dt>
commit	34415b3281c54d1494ccaad0c43895bfd4440774	[log] [tgz]
author	Advitya Gemawat <agemawat@vmware.com>	Mon Jul 13 11:20:35 2020 -0700
committer	Orhan Kislal <okislal@pivotal.io>	Fri Aug 07 18:54:34 2020 -0400
tree	2170d42afe09486991bdb0816d6dac33b9f6aa15
parent	60a9d59a2e9aa4c70fb7506d8a5a704429f4b2c3 [diff]