| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| * |
| * @file madlib_keras_model_selection.sql_in |
| * |
| * @brief Explore network architectures and hyperparameters by training many models a time. |
| * @date August 2019 |
| * |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /** |
| @addtogroup grp_keras_run_model_selection |
| |
| @brief Explore network architectures and hyperparameters by training many models a time. |
| |
| <div class="toc"><b>Contents</b><ul> |
| <li class="level1"><a href="#keras_fit">Fit</a></li> |
| <li class="level1"><a href="#keras_evaluate">Evaluate</a></li> |
| <li class="level1"><a href="#keras_predict">Predict</a></li> |
| <li class="level1"><a href="#example">Examples</a></li> |
| <li class="level1"><a href="#notes">Notes</a></li> |
| <li class="level1"><a href="#background">Technical Background</a></li> |
| <li class="level1"><a href="#literature">Literature</a></li> |
| <li class="level1"><a href="#related">Related Topics</a></li> |
| </ul></div> |
| |
| This module allows you to explore network architectures and |
| hyperparameters by training many models a time across the |
| database cluster. |
| |
| The aim is to support efficient empirical comparison of multiple |
| training configurations. This process is called model selection, |
| and the implementation here is based on a parallel execution strategy |
| called model hopper parallelism (MOP) [1,2]. |
| |
| Models are designed in Keras [3], which is a high-level neural |
| network API written in Python. It can run |
| on top of different backends and the one that is currently |
| supported by MADlib is TensorFlow [4]. |
| |
| The main use case is classification |
| using sequential models, which are made up of a |
| linear stack of layers. This includes multilayer perceptrons (MLPs) |
| and convolutional neural networks (CNNs). Regression is not |
| currently supported. |
| |
| Before using this model selection method, you will need to |
| <a href="group__grp__input__preprocessor__dl.html">Prepocess Data</a> |
| which prepares data for |
| use by models that support mini-batch as an optimization option. |
| This is a one-time operation and you would only |
| need to re-run the preprocessor if your input data has changed. |
| The advantage of using mini-batching is that it |
| can perform better than stochastic gradient descent |
| because it uses more than one training example at a time, |
| typically resulting faster and smoother convergence [5]. |
| |
| You can set up the models and hyperparameters to try with the |
| <a href="group__grp__keras__setup__model__selection.html">Define Model |
| Configurations</a> function to define the unique combinations |
| of model architectures, compile and fit parameters. |
| |
| @note 1. If 'madlib_keras_fit_multiple_model()' is running on GPDB 5 and some versions |
| of GPDB 6, the database will |
| keep adding to the disk space (in proportion to model size) and will only |
| release the disk space once the fit multiple query has completed execution. |
| This is not the case for GPDB 6.5.0+ where disk space is released during the |
| fit multiple query. |
| |
| @note 2. CUDA GPU memory cannot be released until the process holding it is terminated. |
| When a MADlib deep learning function is called with GPUs, Greenplum internally |
| creates a process (called a slice) which calls TensorFlow to do the computation. |
| This process holds the GPU memory until one of the following two things happen: |
| query finishes and user logs out of the Postgres client/session; or, |
| query finishes and user waits for the timeout set by gp_vmem_idle_resource_timeout. |
| The default value for this timeout is 18 sec [8]. So the recommendation is: |
| log out/reconnect to the session after every GPU query; or |
| wait for gp_vmem_idle_resource_timeout before you run another GPU query (you can |
| also set it to a lower value). |
| |
| @note 3. This method does not currently support multi-input or multi-output neural networks. |
| |
| |
| @anchor keras_fit |
| @par Fit |
| The fit (training) function has the following format: |
| |
| <pre class="syntax"> |
| madlib_keras_fit_multiple_model( |
| source_table, |
| model_output_table, |
| model_selection_table, |
| num_iterations, |
| use_gpus, |
| validation_table, |
| metrics_compute_frequency, |
| warm_start, |
| name, |
| description, |
| use_caching |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>source_table</dt> |
| <dd>TEXT. Name of the table containing the training data. |
| This is the name of the output |
| table from the data preprocessor. Independent |
| and dependent variables are specified in the preprocessor |
| step which is why you do not need to explictly state |
| them here as part of the fit function.</dd> |
| |
| <dt>model_output_table</dt> |
| <dd>TEXT. Name of the output table containing the |
| multiple models created. |
| @note pg_temp is not allowed as an output table schema for fit multiple. |
| Details of output tables are shown below. |
| </dd> |
| |
| <dt>model_selection_table</dt> |
| <dd>TEXT. Name of the table containing model selection parameters to be tried. |
| Here we mean both hyper-parameter tuning and model architecture search. |
| </dd> |
| |
| <DT>num_iterations</DT> |
| <DD>INTEGER. Number of iterations to train. |
| |
| @note |
| This parameter is different than the number of passes over the dataset, |
| which is commonly referred to as the number of epochs. Since MADlib operates |
| in a distributed system, the number of |
| epochs is actually equal to this parameter 'num_iterations' X 'epochs' as |
| specified in the Keras fit parameter. |
| </DD> |
| |
| <DT>use_gpus (optional)</DT> |
| <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs |
| are to be used for training the neural network. Set to TRUE to use GPUs. |
| |
| @note |
| This parameter must not conflict with how the distribution rules are set in |
| the preprocessor function. For example, if you set a distribution rule to use |
| certain segments on hosts that do not have GPUs attached, you will get an error |
| if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues |
| when segments share GPU resources. |
| For example, if you have 1 GPU per segment host and your cluster has 4 |
| segments per segment host, it means that all 4 |
| segments will share the same |
| GPU on each host. The current recommended |
| configuration is 1 GPU per segment. |
| </DD> |
| |
| <dt>validation_table (optional)</dt> |
| <dd>TEXT, default: none. Name of the table containing |
| the validation dataset. |
| Note that the validation dataset must be preprocessed |
| in the same way as the training dataset, so this |
| is the name of the output |
| table from running the data preprocessor on the validation dataset. |
| Using a validation dataset can mean a |
| longer training time, depending on its size. |
| This can be controlled using the 'metrics_compute_frequency' |
| parameter described below.</dd> |
| |
| <DT>metrics_compute_frequency (optional)</DT> |
| <DD>INTEGER, default: once at the end of training |
| after 'num_iterations'. Frequency to compute per-iteration |
| metrics for the training dataset and validation dataset |
| (if specified). There can be considerable cost to |
| computing metrics every iteration, especially if the |
| training dataset is large. This parameter is a way of |
| controlling the frequency of those computations. |
| For example, if you specify 5, then metrics will be computed |
| every 5 iterations as well as at the end of training |
| after 'num_iterations'. If you use the default, |
| metrics will be computed only |
| once after 'num_iterations' have completed. |
| </DD> |
| |
| <DT>warm_start (optional)</DT> |
| <DD>BOOLEAN, default: FALSE. |
| Initalize weights with the coefficients |
| from the last call to the fit |
| function. If set to TRUE, weights will be |
| initialized from the model table |
| generated by the previous training run. |
| |
| @note |
| The warm start feature works based on the name of the |
| model output table from a previous training run. |
| When using warm start, do not drop the model output table |
| or the model output summary table |
| before calling the fit function, since these are needed to obtain the |
| weights from the previous run. |
| If you are not using warm start, the model output table |
| and the model output table summary must be dropped in |
| the usual way before calling the training function. |
| </DD> |
| |
| <DT>name (optional)</DT> |
| <DD>TEXT, default: NULL. |
| Free text string to identify a name, if desired. |
| </DD> |
| |
| <DT>description (optional)</DT> |
| <DD>TEXT, default: NULL. |
| Free text string to provide a description, if desired. |
| </DD> |
| |
| <DT>use_caching (optional)</DT> |
| <DD>BOOLEAN, default: FALSE. Use caching of data in memory on the |
| segment in order to speed up processing. |
| |
| @note |
| When set to TRUE, byte arrays on each segment are maintained |
| in cache (SD). This can speed up training significantly, however the |
| memory usage per segment increases. In effect, it |
| requires enough available memory on a segment so that all training data |
| residing on that segment can be read into memory. |
| </dl> |
| |
| <b>Output tables</b> |
| <br> |
| The model output table produced by fit contains the following columns. |
| There is one row per model as per the rows in the 'model_selection_table': |
| <table class="output"> |
| <tr> |
| <th>mst_key</th> |
| <td>INTEGER. ID that defines a unique tuple for model architecture-compile parameters-fit parameters, |
| as defined in the 'model_selection_table'.</td> |
| </tr> |
| <tr> |
| <th>model_weights</th> |
| <td>BYTEA8. Byte array containing the weights of the neural net.</td> |
| </tr> |
| <tr> |
| <th>model_arch</th> |
| <td>TEXT. A JSON representation of the model architecture |
| used in training.</td> |
| </tr> |
| </table> |
| |
| An info table named \<model_output_table\>_info is also created, which has the following columns. |
| There is one row per model as per the rows in the 'model_selection_table': |
| <table class="output"> |
| <tr> |
| <th>mst_key</th> |
| <td>INTEGER. ID that defines a unique tuple for model architecture-compile parameters-fit parameters, |
| as defined in the 'model_selection_table'.</td> |
| </tr> |
| <tr> |
| <th>model_id</th> |
| <td>INTEGER. ID that defines model in the 'model_arch_table'.</td> |
| </tr> |
| <tr> |
| <th>compile_params</th> |
| <td>Compile parameters passed to Keras.</td> |
| </tr> |
| <tr> |
| <th>fit_params</th> |
| <td>Fit parameters passed to Keras.</td> |
| </tr> |
| <tr> |
| <th>model_type</th> |
| <td>General identifier for type of model trained. |
| Currently says 'madlib_keras'.</td> |
| </tr> |
| <tr> |
| <th>model_size</th> |
| <td>Size of the model in KB. Models are stored in |
| 'bytea' data format which is used for binary strings |
| in PostgreSQL type databases.</td> |
| </tr> |
| <tr> |
| <th>metrics_elapsed_time</th> |
| <td> Array of elapsed time for metric computations as |
| per the 'metrics_compute_frequency' parameter. |
| Useful for drawing a curve showing loss, accuracy or |
| other metrics as a function of time. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of elapsed time for every 5th |
| iteration, plus the last iteration. |
| Note that this field reports the time for training + |
| validation if there is a validation table provided.</td> |
| </tr> |
| <tr> |
| <th>metrics_type</th> |
| <td>Metric specified in the 'compile_params'.</td> |
| </tr> |
| <tr> |
| <th>loss_type</th> |
| <td>Loss specified in the 'compile_params'.</td> |
| </tr> |
| <tr> |
| <th>training_metrics_final</th> |
| <td>Final value of the training |
| metric after all iterations have completed. |
| The metric reported is the one |
| specified in the 'metrics_type' parameter.</td> |
| </tr> |
| <tr> |
| <th>training_loss_final</th> |
| <td>Final value of the training loss after all |
| iterations have completed.</td> |
| </tr> |
| <tr> |
| <th>training_metrics</th> |
| <td>Array of training metrics as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of metrics for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| <tr> |
| <th>training_loss</th> |
| <td>Array of training losses as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of losses for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| <tr> |
| <th>validation_metrics_final</th> |
| <td>Final value of the validation |
| metric after all iterations have completed. |
| The metric reported is the one |
| specified in the 'metrics_type' parameter.</td> |
| </tr> |
| <tr> |
| <th>validation_loss_final</th> |
| <td>Final value of the validation loss after all |
| iterations have completed.</td> |
| </tr> |
| <tr> |
| <th>validation_metrics</th> |
| <td>Array of validation metrics as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of metrics for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| <tr> |
| <th>validation_loss</th> |
| <td>Array of validation losses as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of losses for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| |
| </table> |
| |
| A summary table named \<model\>_summary is also created, which has the following columns: |
| <table class="output"> |
| <tr> |
| <th>source_table</th> |
| <td>Source table used for training.</td> |
| </tr> |
| <tr> |
| <th>validation_table</th> |
| <td>Name of the table containing |
| the validation dataset (if specified).</td> |
| </tr> |
| <tr> |
| <th>model</th> |
| <td>Name of the output table containing |
| the model for each model selection tuple.</td> |
| </tr> |
| <tr> |
| <th>model_info</th> |
| <td>Name of the output table containing |
| the model performance and other info for |
| each model selection tuple.</td> |
| </tr> |
| <tr> |
| <th>dependent_varname</th> |
| <td>Dependent variable column from the original |
| source table in the data preprocessing step.</td> |
| </tr> |
| <tr> |
| <th>independent_varname</th> |
| <td>Independent variables column from the original |
| source table in the data preprocessing step.</td> |
| </tr> |
| <tr> |
| <th>model_arch_table</th> |
| <td>Name of the table containing |
| the model architecture and (optionally) the |
| initial model weights.</td> |
| </tr> |
| <tr> |
| <th>num_iterations</th> |
| <td>Number of iterations of training completed.</td> |
| </tr> |
| <tr> |
| <th>metrics_compute_frequency</th> |
| <td>Frequency that per-iteration metrics are computed |
| for the training dataset and validation |
| datasets.</td> |
| </tr> |
| <tr> |
| <th>warm_start</th> |
| <td>Indicates whether warm start used or not.</td> |
| </tr> |
| <tr> |
| <th>name</th> |
| <td>Name of the training run (free text).</td> |
| </tr> |
| <tr> |
| <th>description</th> |
| <td>Description of the training run (free text).</td> |
| </tr> |
| <tr> |
| <th>start_training_time</th> |
| <td>Timestamp for start of training.</td> |
| </tr> |
| <tr> |
| <th>end_training_time</th> |
| <td>Timestamp for end of training.</td> |
| </tr> |
| <tr> |
| <th>madlib_version</th> |
| <td>Version of MADlib used.</td> |
| </tr> |
| <tr> |
| <th>num_classes</th> |
| <td>Count of distinct classes values used.</td> |
| </tr> |
| <tr> |
| <th><dependent_varname>_class_values</th> |
| <td>Array of actual class values used for a particular dependent |
| variable. A column will be generated for each dependent variable.</td> |
| </tr> |
| <tr> |
| <th>dependent_vartype</th> |
| <td>Data type of the dependent variable.</td> |
| </tr> |
| <tr> |
| <th>normalizing_constant</th> |
| <td>Normalizing constant used from the |
| data preprocessing step.</td> |
| </tr> |
| <tr> |
| <th>metrics_iters</th> |
| <td>Array indicating the iterations for which |
| metrics are calculated, as derived from the |
| parameters 'num_iterations' and 'metrics_compute_frequency'. |
| For example, if 'num_iterations=5' |
| and 'metrics_compute_frequency=2', then 'metrics_iters' value |
| would be {2,4,5} indicating that metrics were computed |
| at iterations 2, 4 and 5 (at the end). |
| If 'num_iterations=5' |
| and 'metrics_compute_frequency=1', then 'metrics_iters' value |
| would be {1,2,3,4,5} indicating that metrics were computed |
| at every iteration.</td> |
| </tr> |
| </table> |
| |
| @anchor keras_evaluate |
| @par Evaluate |
| The evaluation function has the following format: |
| |
| <pre class="syntax"> |
| madlib_keras_evaluate( |
| model_table, |
| test_table, |
| output_table, |
| use_gpus, |
| mst_key |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| |
| <DT>model_table</DT> |
| <DD>TEXT. Name of the table containing the model |
| to use for validation. |
| </DD> |
| |
| <DT>test_table</DT> |
| <dd>TEXT. Name of the table containing the evaluation dataset. |
| Note that test/validation data must be preprocessed in the same |
| way as the training dataset, so |
| this is the name of the output |
| table from the data preprocessor. Independent |
| and dependent variables are specified in the preprocessor |
| step which is why you do not need to explictly state |
| them here as part of the fit function.</dd> |
| |
| <DT>output_table</DT> |
| <DD>TEXT. Name of table that validation output will be |
| written to. Table contains:</DD> |
| <table class="output"> |
| <tr> |
| <th>loss</th> |
| <td>Loss value on evaluation dataset, where 'loss_type' |
| below identifies the type of loss.</td> |
| </tr> |
| <tr> |
| <th>metric</th> |
| <td>Metric value on evaluation dataset, where 'metrics_type' |
| below identifies the type of metric.</td> |
| </tr> |
| <tr> |
| <th>metrics_type</th> |
| <td>Type of metric used that was used in the training step. |
| (It means you cannot have a different metric in evaluate compared |
| to training.)</td> |
| </tr> |
| <tr> |
| <th>loss_type</th> |
| <td>Type of loss function that was used in the training step. |
| (It means you cannot have a different loss in evaluate compared |
| to training.)</td> |
| </tr> |
| |
| <DT>use_gpus (optional)</DT> |
| <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs |
| are to be used for training the neural network. Set to TRUE to use GPUs. |
| |
| @note |
| This parameter must not conflict with how the distribution rules are set in |
| the preprocessor function. For example, if you set a distribution rule to use |
| certain segments on hosts that do not have GPUs attached, you will get an error |
| if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues |
| when segments share GPU resources. |
| For example, if you have 1 GPU per segment host and your cluster has 4 |
| segments per segment host, it means that all 4 |
| segments will share the same |
| GPU on each host. The current recommended |
| configuration is 1 GPU per segment. |
| </DD> |
| |
| <DT>mst_key (optional)</DT> |
| <DD>INTEGER, default: NULL. ID that defines a unique tuple for |
| model architecture-compile parameters-fit parameters as defined in the model |
| selection table. |
| </DD> |
| </DL> |
| |
| @anchor keras_predict |
| @par Predict |
| The prediction function has the following format: |
| <pre class="syntax"> |
| madlib_keras_predict( |
| model_table, |
| test_table, |
| id_col, |
| independent_varname, |
| output_table, |
| pred_type, |
| use_gpus, |
| mst_key |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| |
| <DT>model_table</DT> |
| <DD>TEXT. Name of the table containing the model |
| to use for prediction. |
| </DD> |
| |
| <DT>test_table</DT> |
| <DD>TEXT. Name of the table containing the dataset to |
| predict on. Note that test data is not preprocessed (unlike |
| fit and evaluate) so put one test image per row for prediction. |
| Also see the comment below for the 'independent_varname' parameter |
| regarding normalization. |
| |
| </DD> |
| |
| <DT>id_col</DT> |
| <DD>TEXT. Name of the id column in the test data table. |
| </DD> |
| |
| <DT>independent_varname</DT> |
| <DD>TEXT. Column with independent variables in the test table. |
| If a 'normalizing_const' is specified when preprocessing the |
| training dataset, this same normalization will be applied to |
| the independent variables used in predict. |
| </DD> |
| |
| <DT>output_table</DT> |
| <DD>TEXT. Name of the table that prediction output will be |
| written to. Table contains:</DD> |
| <table class="output"> |
| <tr> |
| <th>id</th> |
| <td>Gives the 'id' for each prediction, corresponding to each row from |
| the test_table.</td> |
| </tr> |
| <tr> |
| <th>class_name</th> |
| <td> |
| The estimated variable. |
| </td> |
| </tr> |
| <tr> |
| <th>class_value</th> |
| <td> |
| The estimated class for classification. |
| </td> |
| </tr> |
| <tr> |
| <th>prob</th> |
| <td> |
| Probability of a given class. |
| </td> |
| </tr> |
| <tr> |
| <th>rank</th> |
| <td> |
| The rank of a given class based on the ordering of probabilities. |
| </td> |
| </tr> |
| |
| <DT>pred_type (optional)</DT> |
| <DD>TEXT or INTEGER or DOUBLE PRECISION default: 'prob'. |
| The type and range of output desired. This parameter allows the following options. |
| - 'response': the actual prediction |
| - 'prob': the probability value for each class |
| - 0<value<1: the lower limit for the probability (double precision) |
| - 1<=value: the lower limit for the rank of the prediction (integer) |
| </DD> |
| |
| <DT>use_gpus (optional)</DT> |
| <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines |
| whether GPUs are to be used for prediction/inference. |
| Set to TRUE to use GPUs. |
| |
| @note |
| The prediction function uses the whole cluster. If you are using GPUs, it |
| requires that GPUs are attached to all hosts, and that there are the same number |
| of GPUs on each host (homogeneous cluster). This is different from the fit() |
| and evaluate() functions that support GPUs on only some of the hosts (heterogeneous cluster). |
| Therefore, if you have GPUs only on some of the hosts, or an uneven numbers of GPUs per host, then |
| set this parameter to FALSE to use CPUs. |
| </DD> |
| |
| <DT>mst_key (optional)</DT> |
| <DD>INTEGER, default: NULL. ID that defines a unique tuple for |
| model architecture-compile parameters-fit parameters as defined in the model |
| selection table. |
| </DD> |
| </DL> |
| |
| @anchor example |
| @par Examples |
| |
| @note |
| Deep learning works best on very large datasets, |
| but that is not convenient for a quick introduction |
| to the syntax. So in this example we use an MLP on the well |
| known iris data set from https://archive.ics.uci.edu/ml/datasets/iris. |
| For more realistic examples with images please refer |
| to the deep learning notebooks |
| at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts. |
| |
| <h4>Classification</h4> |
| |
| -# Create an input data set. |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_data; |
| CREATE TABLE iris_data( |
| id serial, |
| attributes numeric[], |
| class_text varchar |
| ); |
| INSERT INTO iris_data(id, attributes, class_text) VALUES |
| (1,ARRAY[5.1,3.5,1.4,0.2],'Iris-setosa'), |
| (2,ARRAY[4.9,3.0,1.4,0.2],'Iris-setosa'), |
| (3,ARRAY[4.7,3.2,1.3,0.2],'Iris-setosa'), |
| (4,ARRAY[4.6,3.1,1.5,0.2],'Iris-setosa'), |
| (5,ARRAY[5.0,3.6,1.4,0.2],'Iris-setosa'), |
| (6,ARRAY[5.4,3.9,1.7,0.4],'Iris-setosa'), |
| (7,ARRAY[4.6,3.4,1.4,0.3],'Iris-setosa'), |
| (8,ARRAY[5.0,3.4,1.5,0.2],'Iris-setosa'), |
| (9,ARRAY[4.4,2.9,1.4,0.2],'Iris-setosa'), |
| (10,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'), |
| (11,ARRAY[5.4,3.7,1.5,0.2],'Iris-setosa'), |
| (12,ARRAY[4.8,3.4,1.6,0.2],'Iris-setosa'), |
| (13,ARRAY[4.8,3.0,1.4,0.1],'Iris-setosa'), |
| (14,ARRAY[4.3,3.0,1.1,0.1],'Iris-setosa'), |
| (15,ARRAY[5.8,4.0,1.2,0.2],'Iris-setosa'), |
| (16,ARRAY[5.7,4.4,1.5,0.4],'Iris-setosa'), |
| (17,ARRAY[5.4,3.9,1.3,0.4],'Iris-setosa'), |
| (18,ARRAY[5.1,3.5,1.4,0.3],'Iris-setosa'), |
| (19,ARRAY[5.7,3.8,1.7,0.3],'Iris-setosa'), |
| (20,ARRAY[5.1,3.8,1.5,0.3],'Iris-setosa'), |
| (21,ARRAY[5.4,3.4,1.7,0.2],'Iris-setosa'), |
| (22,ARRAY[5.1,3.7,1.5,0.4],'Iris-setosa'), |
| (23,ARRAY[4.6,3.6,1.0,0.2],'Iris-setosa'), |
| (24,ARRAY[5.1,3.3,1.7,0.5],'Iris-setosa'), |
| (25,ARRAY[4.8,3.4,1.9,0.2],'Iris-setosa'), |
| (26,ARRAY[5.0,3.0,1.6,0.2],'Iris-setosa'), |
| (27,ARRAY[5.0,3.4,1.6,0.4],'Iris-setosa'), |
| (28,ARRAY[5.2,3.5,1.5,0.2],'Iris-setosa'), |
| (29,ARRAY[5.2,3.4,1.4,0.2],'Iris-setosa'), |
| (30,ARRAY[4.7,3.2,1.6,0.2],'Iris-setosa'), |
| (31,ARRAY[4.8,3.1,1.6,0.2],'Iris-setosa'), |
| (32,ARRAY[5.4,3.4,1.5,0.4],'Iris-setosa'), |
| (33,ARRAY[5.2,4.1,1.5,0.1],'Iris-setosa'), |
| (34,ARRAY[5.5,4.2,1.4,0.2],'Iris-setosa'), |
| (35,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'), |
| (36,ARRAY[5.0,3.2,1.2,0.2],'Iris-setosa'), |
| (37,ARRAY[5.5,3.5,1.3,0.2],'Iris-setosa'), |
| (38,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'), |
| (39,ARRAY[4.4,3.0,1.3,0.2],'Iris-setosa'), |
| (40,ARRAY[5.1,3.4,1.5,0.2],'Iris-setosa'), |
| (41,ARRAY[5.0,3.5,1.3,0.3],'Iris-setosa'), |
| (42,ARRAY[4.5,2.3,1.3,0.3],'Iris-setosa'), |
| (43,ARRAY[4.4,3.2,1.3,0.2],'Iris-setosa'), |
| (44,ARRAY[5.0,3.5,1.6,0.6],'Iris-setosa'), |
| (45,ARRAY[5.1,3.8,1.9,0.4],'Iris-setosa'), |
| (46,ARRAY[4.8,3.0,1.4,0.3],'Iris-setosa'), |
| (47,ARRAY[5.1,3.8,1.6,0.2],'Iris-setosa'), |
| (48,ARRAY[4.6,3.2,1.4,0.2],'Iris-setosa'), |
| (49,ARRAY[5.3,3.7,1.5,0.2],'Iris-setosa'), |
| (50,ARRAY[5.0,3.3,1.4,0.2],'Iris-setosa'), |
| (51,ARRAY[7.0,3.2,4.7,1.4],'Iris-versicolor'), |
| (52,ARRAY[6.4,3.2,4.5,1.5],'Iris-versicolor'), |
| (53,ARRAY[6.9,3.1,4.9,1.5],'Iris-versicolor'), |
| (54,ARRAY[5.5,2.3,4.0,1.3],'Iris-versicolor'), |
| (55,ARRAY[6.5,2.8,4.6,1.5],'Iris-versicolor'), |
| (56,ARRAY[5.7,2.8,4.5,1.3],'Iris-versicolor'), |
| (57,ARRAY[6.3,3.3,4.7,1.6],'Iris-versicolor'), |
| (58,ARRAY[4.9,2.4,3.3,1.0],'Iris-versicolor'), |
| (59,ARRAY[6.6,2.9,4.6,1.3],'Iris-versicolor'), |
| (60,ARRAY[5.2,2.7,3.9,1.4],'Iris-versicolor'), |
| (61,ARRAY[5.0,2.0,3.5,1.0],'Iris-versicolor'), |
| (62,ARRAY[5.9,3.0,4.2,1.5],'Iris-versicolor'), |
| (63,ARRAY[6.0,2.2,4.0,1.0],'Iris-versicolor'), |
| (64,ARRAY[6.1,2.9,4.7,1.4],'Iris-versicolor'), |
| (65,ARRAY[5.6,2.9,3.6,1.3],'Iris-versicolor'), |
| (66,ARRAY[6.7,3.1,4.4,1.4],'Iris-versicolor'), |
| (67,ARRAY[5.6,3.0,4.5,1.5],'Iris-versicolor'), |
| (68,ARRAY[5.8,2.7,4.1,1.0],'Iris-versicolor'), |
| (69,ARRAY[6.2,2.2,4.5,1.5],'Iris-versicolor'), |
| (70,ARRAY[5.6,2.5,3.9,1.1],'Iris-versicolor'), |
| (71,ARRAY[5.9,3.2,4.8,1.8],'Iris-versicolor'), |
| (72,ARRAY[6.1,2.8,4.0,1.3],'Iris-versicolor'), |
| (73,ARRAY[6.3,2.5,4.9,1.5],'Iris-versicolor'), |
| (74,ARRAY[6.1,2.8,4.7,1.2],'Iris-versicolor'), |
| (75,ARRAY[6.4,2.9,4.3,1.3],'Iris-versicolor'), |
| (76,ARRAY[6.6,3.0,4.4,1.4],'Iris-versicolor'), |
| (77,ARRAY[6.8,2.8,4.8,1.4],'Iris-versicolor'), |
| (78,ARRAY[6.7,3.0,5.0,1.7],'Iris-versicolor'), |
| (79,ARRAY[6.0,2.9,4.5,1.5],'Iris-versicolor'), |
| (80,ARRAY[5.7,2.6,3.5,1.0],'Iris-versicolor'), |
| (81,ARRAY[5.5,2.4,3.8,1.1],'Iris-versicolor'), |
| (82,ARRAY[5.5,2.4,3.7,1.0],'Iris-versicolor'), |
| (83,ARRAY[5.8,2.7,3.9,1.2],'Iris-versicolor'), |
| (84,ARRAY[6.0,2.7,5.1,1.6],'Iris-versicolor'), |
| (85,ARRAY[5.4,3.0,4.5,1.5],'Iris-versicolor'), |
| (86,ARRAY[6.0,3.4,4.5,1.6],'Iris-versicolor'), |
| (87,ARRAY[6.7,3.1,4.7,1.5],'Iris-versicolor'), |
| (88,ARRAY[6.3,2.3,4.4,1.3],'Iris-versicolor'), |
| (89,ARRAY[5.6,3.0,4.1,1.3],'Iris-versicolor'), |
| (90,ARRAY[5.5,2.5,4.0,1.3],'Iris-versicolor'), |
| (91,ARRAY[5.5,2.6,4.4,1.2],'Iris-versicolor'), |
| (92,ARRAY[6.1,3.0,4.6,1.4],'Iris-versicolor'), |
| (93,ARRAY[5.8,2.6,4.0,1.2],'Iris-versicolor'), |
| (94,ARRAY[5.0,2.3,3.3,1.0],'Iris-versicolor'), |
| (95,ARRAY[5.6,2.7,4.2,1.3],'Iris-versicolor'), |
| (96,ARRAY[5.7,3.0,4.2,1.2],'Iris-versicolor'), |
| (97,ARRAY[5.7,2.9,4.2,1.3],'Iris-versicolor'), |
| (98,ARRAY[6.2,2.9,4.3,1.3],'Iris-versicolor'), |
| (99,ARRAY[5.1,2.5,3.0,1.1],'Iris-versicolor'), |
| (100,ARRAY[5.7,2.8,4.1,1.3],'Iris-versicolor'), |
| (101,ARRAY[6.3,3.3,6.0,2.5],'Iris-virginica'), |
| (102,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'), |
| (103,ARRAY[7.1,3.0,5.9,2.1],'Iris-virginica'), |
| (104,ARRAY[6.3,2.9,5.6,1.8],'Iris-virginica'), |
| (105,ARRAY[6.5,3.0,5.8,2.2],'Iris-virginica'), |
| (106,ARRAY[7.6,3.0,6.6,2.1],'Iris-virginica'), |
| (107,ARRAY[4.9,2.5,4.5,1.7],'Iris-virginica'), |
| (108,ARRAY[7.3,2.9,6.3,1.8],'Iris-virginica'), |
| (109,ARRAY[6.7,2.5,5.8,1.8],'Iris-virginica'), |
| (110,ARRAY[7.2,3.6,6.1,2.5],'Iris-virginica'), |
| (111,ARRAY[6.5,3.2,5.1,2.0],'Iris-virginica'), |
| (112,ARRAY[6.4,2.7,5.3,1.9],'Iris-virginica'), |
| (113,ARRAY[6.8,3.0,5.5,2.1],'Iris-virginica'), |
| (114,ARRAY[5.7,2.5,5.0,2.0],'Iris-virginica'), |
| (115,ARRAY[5.8,2.8,5.1,2.4],'Iris-virginica'), |
| (116,ARRAY[6.4,3.2,5.3,2.3],'Iris-virginica'), |
| (117,ARRAY[6.5,3.0,5.5,1.8],'Iris-virginica'), |
| (118,ARRAY[7.7,3.8,6.7,2.2],'Iris-virginica'), |
| (119,ARRAY[7.7,2.6,6.9,2.3],'Iris-virginica'), |
| (120,ARRAY[6.0,2.2,5.0,1.5],'Iris-virginica'), |
| (121,ARRAY[6.9,3.2,5.7,2.3],'Iris-virginica'), |
| (122,ARRAY[5.6,2.8,4.9,2.0],'Iris-virginica'), |
| (123,ARRAY[7.7,2.8,6.7,2.0],'Iris-virginica'), |
| (124,ARRAY[6.3,2.7,4.9,1.8],'Iris-virginica'), |
| (125,ARRAY[6.7,3.3,5.7,2.1],'Iris-virginica'), |
| (126,ARRAY[7.2,3.2,6.0,1.8],'Iris-virginica'), |
| (127,ARRAY[6.2,2.8,4.8,1.8],'Iris-virginica'), |
| (128,ARRAY[6.1,3.0,4.9,1.8],'Iris-virginica'), |
| (129,ARRAY[6.4,2.8,5.6,2.1],'Iris-virginica'), |
| (130,ARRAY[7.2,3.0,5.8,1.6],'Iris-virginica'), |
| (131,ARRAY[7.4,2.8,6.1,1.9],'Iris-virginica'), |
| (132,ARRAY[7.9,3.8,6.4,2.0],'Iris-virginica'), |
| (133,ARRAY[6.4,2.8,5.6,2.2],'Iris-virginica'), |
| (134,ARRAY[6.3,2.8,5.1,1.5],'Iris-virginica'), |
| (135,ARRAY[6.1,2.6,5.6,1.4],'Iris-virginica'), |
| (136,ARRAY[7.7,3.0,6.1,2.3],'Iris-virginica'), |
| (137,ARRAY[6.3,3.4,5.6,2.4],'Iris-virginica'), |
| (138,ARRAY[6.4,3.1,5.5,1.8],'Iris-virginica'), |
| (139,ARRAY[6.0,3.0,4.8,1.8],'Iris-virginica'), |
| (140,ARRAY[6.9,3.1,5.4,2.1],'Iris-virginica'), |
| (141,ARRAY[6.7,3.1,5.6,2.4],'Iris-virginica'), |
| (142,ARRAY[6.9,3.1,5.1,2.3],'Iris-virginica'), |
| (143,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'), |
| (144,ARRAY[6.8,3.2,5.9,2.3],'Iris-virginica'), |
| (145,ARRAY[6.7,3.3,5.7,2.5],'Iris-virginica'), |
| (146,ARRAY[6.7,3.0,5.2,2.3],'Iris-virginica'), |
| (147,ARRAY[6.3,2.5,5.0,1.9],'Iris-virginica'), |
| (148,ARRAY[6.5,3.0,5.2,2.0],'Iris-virginica'), |
| (149,ARRAY[6.2,3.4,5.4,2.3],'Iris-virginica'), |
| (150,ARRAY[5.9,3.0,5.1,1.8],'Iris-virginica'); |
| </pre> |
| Create a test/validation dataset from the training data: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_train, iris_test; |
| -- Set seed so results are reproducible |
| SELECT setseed(0); |
| SELECT madlib.train_test_split('iris_data', -- Source table |
| 'iris', -- Output table root name |
| 0.8, -- Train proportion |
| NULL, -- Test proportion (0.2) |
| NULL, -- Strata definition |
| NULL, -- Output all columns |
| NULL, -- Sample without replacement |
| TRUE -- Separate output tables |
| ); |
| SELECT COUNT(*) FROM iris_train; |
| </pre> |
| <pre class="result"> |
| count |
| ------+ |
| 120 |
| </pre> |
| |
| -# Call the preprocessor for deep learning. For the training dataset: |
| <pre class="example"> |
| \\x on |
| DROP TABLE IF EXISTS iris_train_packed, iris_train_packed_summary; |
| SELECT madlib.training_preprocessor_dl('iris_train', -- Source table |
| 'iris_train_packed', -- Output table |
| 'class_text', -- Dependent variable |
| 'attributes' -- Independent variable |
| ); |
| SELECT * FROM iris_train_packed_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------+--------------------------------------------- |
| source_table | iris_train |
| output_table | iris_train_packed |
| dependent_varname | class_text |
| independent_varname | attributes |
| dependent_vartype | character varying |
| class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| buffer_size | 60 |
| normalizing_const | 1.0 |
| num_classes | 3 |
| </pre> |
| For the validation dataset: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_test_packed, iris_test_packed_summary; |
| SELECT madlib.validation_preprocessor_dl('iris_test', -- Source table |
| 'iris_test_packed', -- Output table |
| 'class_text', -- Dependent variable |
| 'attributes', -- Independent variable |
| 'iris_train_packed' -- From training preprocessor step |
| ); |
| SELECT * FROM iris_test_packed_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------+--------------------------------------------- |
| source_table | iris_test |
| output_table | iris_test_packed |
| dependent_varname | class_text |
| independent_varname | attributes |
| dependent_vartype | character varying |
| class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| buffer_size | 15 |
| normalizing_const | 1.0 |
| num_classes | 3 |
| </pre> |
| |
| -# Define and load model architecture. Use Keras to define |
| the model architecture with 1 hidden layer: |
| <pre class="example"> |
| from tensorflow import keras |
| from tensorflow.keras.models import Sequential |
| from tensorflow.keras.layers import Dense |
| model1 = Sequential() |
| model1.add(Dense(10, activation='relu', input_shape=(4,))) |
| model1.add(Dense(10, activation='relu')) |
| model1.add(Dense(3, activation='softmax')) |
| model1.summary() |
| \verbatim |
| |
| _________________________________________________________________ |
| Layer (type) Output Shape Param # |
| ================================================================= |
| dense_1 (Dense) (None, 10) 50 |
| _________________________________________________________________ |
| dense_2 (Dense) (None, 10) 110 |
| _________________________________________________________________ |
| dense_3 (Dense) (None, 3) 33 |
| ================================================================= |
| Total params: 193 |
| Trainable params: 193 |
| Non-trainable params: 0 |
| \endverbatim |
| </pre> |
| Export the model to JSON: |
| <pre class="example"> |
| model1.to_json() |
| </pre> |
| <pre class="result"> |
| '{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}' |
| </pre> |
| Define model architecture with 2 hidden layers: |
| <pre class="example"> |
| model2 = Sequential() |
| model2.add(Dense(10, activation='relu', input_shape=(4,))) |
| model2.add(Dense(10, activation='relu')) |
| model2.add(Dense(10, activation='relu')) |
| model2.add(Dense(3, activation='softmax')) |
| model2.summary() |
| \verbatim |
| |
| Layer (type) Output Shape Param # |
| ================================================================= |
| dense_4 (Dense) (None, 10) 50 |
| _________________________________________________________________ |
| dense_5 (Dense) (None, 10) 110 |
| _________________________________________________________________ |
| dense_6 (Dense) (None, 10) 110 |
| _________________________________________________________________ |
| dense_7 (Dense) (None, 3) 33 |
| ================================================================= |
| Total params: 303 |
| Trainable params: 303 |
| Non-trainable params: 0 |
| \endverbatim |
| </pre> |
| Export the model to JSON: |
| <pre class="example"> |
| model2.to_json() |
| </pre> |
| <pre class="result"> |
| '{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_5", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_6", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_7", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}' |
| </pre> |
| Load into model architecture table: |
| <pre class="example"> |
| DROP TABLE IF EXISTS model_arch_library; |
| SELECT madlib.load_keras_model('model_arch_library', -- Output table, |
| $$ |
| {"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"} |
| $$ |
| ::json, -- JSON blob |
| NULL, -- Weights |
| 'Sophie', -- Name |
| 'MLP with 1 hidden layer' -- Descr |
| ); |
| SELECT madlib.load_keras_model('model_arch_library', -- Output table, |
| $$ |
| {"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_5", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_6", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_7", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"} |
| $$ |
| ::json, -- JSON blob |
| NULL, -- Weights |
| 'Maria', -- Name |
| 'MLP with 2 hidden layers' -- Descr |
| ); |
| </pre> |
| -# Generate model configurations using grid search. The output table for grid |
| search contains the unique combinations of model architectures, compile and |
| fit parameters. |
| <pre class="example"> |
| DROP TABLE IF EXISTS mst_table, mst_table_summary; |
| SELECT madlib.generate_model_configs( |
| 'model_arch_library', -- model architecture table |
| 'mst_table', -- model selection table output |
| ARRAY[1,2], -- model ids from model architecture table |
| $$ |
| {'loss': ['categorical_crossentropy'], |
| 'optimizer_params_list': [ {'optimizer': ['Adam'], 'lr': [0.001, 0.01, 0.1]} ], |
| 'metrics': ['accuracy']} |
| $$, -- compile_param_grid |
| $$ |
| { 'batch_size': [4, 8], |
| 'epochs': [1] |
| } |
| $$, -- fit_param_grid |
| 'grid' -- search_type |
| ); |
| SELECT * FROM mst_table ORDER BY mst_key; |
| </pre> |
| <pre class="result"> |
| mst_key | model_id | compile_params | fit_params |
| ---------+----------+---------------------------------------------------------------------------------+----------------------- |
| 1 | 1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=4 |
| 2 | 1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=8 |
| 3 | 1 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=4 |
| 4 | 1 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=8 |
| 5 | 1 | optimizer='Adam(lr=0.1)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=4 |
| 6 | 1 | optimizer='Adam(lr=0.1)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=8 |
| 7 | 2 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=4 |
| 8 | 2 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=8 |
| 9 | 2 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=4 |
| 10 | 2 | optimizer='Adam(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=8 |
| 11 | 2 | optimizer='Adam(lr=0.1)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=4 |
| 12 | 2 | optimizer='Adam(lr=0.1)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=1,batch_size=8 |
| (12 rows) |
| </pre> |
| This is the name of the model architecture table that corresponds to the model selection table: |
| <pre class="example"> |
| SELECT * FROM mst_table_summary; |
| </pre> |
| <pre class="result"> |
| model_arch_table | object_table |
| --------------------+-------------- |
| model_arch_library | |
| </pre> |
| |
| -# Train multiple models. |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_multi_model, iris_multi_model_summary, iris_multi_model_info; |
| SELECT madlib.madlib_keras_fit_multiple_model('iris_train_packed', -- source_table |
| 'iris_multi_model', -- model_output_table |
| 'mst_table', -- model_selection_table |
| 10, -- num_iterations |
| FALSE -- use gpus |
| ); |
| </pre> |
| View the model summary: |
| <pre class="example"> |
| SELECT * FROM iris_multi_model_summary; |
| </pre> |
| <pre class="result"> |
| source_table | iris_train_packed |
| validation_table | |
| model | iris_multi_model |
| model_info | iris_multi_model_info |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| model_arch_table | model_arch_library |
| model_selection_table | mst_table |
| object_table | |
| num_iterations | 10 |
| metrics_compute_frequency | 10 |
| warm_start | f |
| name | |
| description | |
| start_training_time | 2021-02-05 00:40:42.695613 |
| end_training_time | 2021-02-05 00:42:20.796712 |
| madlib_version | 1.20.0 |
| num_classes | {1} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| dependent_vartype | {"character varying"} |
| normalizing_const | 1 |
| metrics_iters | {10} |
| </pre> |
| View results for each model: |
| <pre class="example"> |
| SELECT * FROM iris_multi_model_info ORDER BY training_metrics_final DESC, training_loss_final; |
| </pre> |
| <pre class="result"> |
| mst_key | model_id | compile_params | fit_params | model_type | model_size | metrics_elapsed_time | metrics_type | loss_type | training_metrics_final | training_loss_final | training_metrics | training_loss | validation_metrics_final | validation_loss_final | validation_metrics | validation_loss |
| ---------+----------+---------------------------------------------------------------------------------+-----------------------+--------------+------------+----------------------+--------------+--------------------------+------------------------+---------------------+---------------------+----------------------+--------------------------+-----------------------+--------------------+----------------- |
| 2 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {96.2744579315186} | {accuracy} | categorical_crossentropy | 0.966666638851166 | 0.0771341994404793 | {0.966666638851166} | {0.0771341994404793} | | | | |
| 3 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {95.0798950195312} | {accuracy} | categorical_crossentropy | 0.958333313465118 | 0.14112713932991 | {0.958333313465118} | {0.14112713932991} | | | | |
| 10 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {95.7743279933929} | {accuracy} | categorical_crossentropy | 0.949999988079071 | 0.126085489988327 | {0.949999988079071} | {0.126085489988327} | | | | |
| 5 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {97.8191220760345} | {accuracy} | categorical_crossentropy | 0.866666674613953 | 0.459462374448776 | {0.866666674613953} | {0.459462374448776} | | | | |
| 4 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {96.7445840835571} | {accuracy} | categorical_crossentropy | 0.858333349227905 | 0.279698997735977 | {0.858333349227905} | {0.279698997735977} | | | | |
| 9 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {95.3640351295471} | {accuracy} | categorical_crossentropy | 0.824999988079071 | 0.325970768928528 | {0.824999988079071} | {0.325970768928528} | | | | |
| 7 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {94.8350050449371} | {accuracy} | categorical_crossentropy | 0.800000011920929 | 0.458843886852264 | {0.800000011920929} | {0.458843886852264} | | | | |
| 12 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {96.0411529541016} | {accuracy} | categorical_crossentropy | 0.783333361148834 | 0.766786217689514 | {0.783333361148834} | {0.766786217689514} | | | | |
| 11 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {98.098680973053} | {accuracy} | categorical_crossentropy | 0.683333337306976 | 0.607033967971802 | {0.683333337306976} | {0.607033967971802} | | | | |
| 6 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {96.5071561336517} | {accuracy} | categorical_crossentropy | 0.683333337306976 | 0.704851150512695 | {0.683333337306976} | {0.704851150512695} | | | | |
| 8 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {97.4749901294708} | {accuracy} | categorical_crossentropy | 0.641666650772095 | 0.473412454128265 | {0.641666650772095} | {0.473412454128265} | | | | |
| 1 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {96.9749960899353} | {accuracy} | categorical_crossentropy | 0.358333319425583 | 1.09744954109192 | {0.358333319425583} | {1.09744954109192} | | | | |
| (12 rows) |
| </pre> |
| |
| -# Evaluate. Now run evaluate using models we built above: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_validate; |
| SELECT madlib.madlib_keras_evaluate('iris_multi_model', -- model |
| 'iris_test_packed', -- test table |
| 'iris_validate', -- output table |
| NULL, -- use gpus |
| 3 -- mst_key to use |
| ); |
| SELECT * FROM iris_validate; |
| </pre> |
| <pre class="result"> |
| loss | metric | metrics_type | loss_type |
| --------------------+--------+--------------+-------------------------- |
| 0.0789926201105118 | 1 | {accuracy} | categorical_crossentropy |
| </pre> |
| |
| -# Predict. Now predict using one of the models we built. We will use the validation data set |
| for prediction as well, which is not usual but serves to show the syntax. |
| The prediction is in the estimated_class_text column: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_predict; |
| SELECT madlib.madlib_keras_predict('iris_multi_model', -- model |
| 'iris_test', -- test_table |
| 'id', -- id column |
| 'attributes', -- independent var |
| 'iris_predict', -- output table |
| 'response', -- prediction type |
| FALSE, -- use gpus |
| 3 -- mst_key to use |
| ); |
| SELECT * FROM iris_predict ORDER BY id; |
| </pre> |
| <pre class="result"> |
| id | class_name | class_value | prob |
| -----+------------+-----------------+------------ |
| 8 | class_text | Iris-setosa | 0.9979984 |
| 15 | class_text | Iris-setosa | 0.99929357 |
| 20 | class_text | Iris-setosa | 0.9985701 |
| 21 | class_text | Iris-setosa | 0.9984106 |
| 22 | class_text | Iris-setosa | 0.9983991 |
| 27 | class_text | Iris-setosa | 0.99763095 |
| 28 | class_text | Iris-setosa | 0.998376 |
| 33 | class_text | Iris-setosa | 0.99901116 |
| 37 | class_text | Iris-setosa | 0.99871385 |
| 38 | class_text | Iris-setosa | 0.99740535 |
| 46 | class_text | Iris-setosa | 0.9968368 |
| 51 | class_text | Iris-versicolor | 0.93798196 |
| 57 | class_text | Iris-versicolor | 0.73391247 |
| 58 | class_text | Iris-versicolor | 0.9449931 |
| 59 | class_text | Iris-versicolor | 0.8938894 |
| 64 | class_text | Iris-versicolor | 0.6563309 |
| 65 | class_text | Iris-versicolor | 0.95828146 |
| 72 | class_text | Iris-versicolor | 0.94206 |
| 75 | class_text | Iris-versicolor | 0.93305194 |
| 87 | class_text | Iris-versicolor | 0.8458596 |
| 95 | class_text | Iris-versicolor | 0.76850986 |
| 97 | class_text | Iris-versicolor | 0.8467575 |
| 98 | class_text | Iris-versicolor | 0.9081358 |
| 120 | class_text | Iris-virginica | 0.86913925 |
| 121 | class_text | Iris-virginica | 0.96328884 |
| 122 | class_text | Iris-virginica | 0.9474733 |
| 123 | class_text | Iris-virginica | 0.97997576 |
| 140 | class_text | Iris-virginica | 0.8721462 |
| 144 | class_text | Iris-virginica | 0.9745266 |
| 147 | class_text | Iris-virginica | 0.8978669 |
| (30 rows) |
| </pre> |
| Count missclassifications: |
| <pre class="example"> |
| SELECT COUNT(*) FROM iris_predict JOIN iris_test USING (id) |
| WHERE iris_predict.class_value != iris_test.class_text; |
| </pre> |
| <pre class="result"> |
| count |
| -------+ |
| 0 |
| </pre> |
| Accuracy: |
| <pre class="example"> |
| SELECT round(count(*)*100/(150*0.2),2) as test_accuracy_percent from |
| (select iris_test.class_text as actual, iris_predict.class_value as estimated |
| from iris_predict inner join iris_test |
| on iris_test.id=iris_predict.id) q |
| WHERE q.actual=q.estimated; |
| </pre> |
| <pre class="result"> |
| test_accuracy_percent |
| -----------------------+ |
| 100.00 |
| </pre> |
| |
| <h4>Classification with Other Parameters</h4> |
| |
| -# Validation dataset. Now use a validation dataset |
| and compute metrics every 3rd iteration using |
| the 'metrics_compute_frequency' parameter. This can |
| help reduce run time if you do not need metrics |
| computed at every iteration. Also turn on caching. |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_multi_model, iris_multi_model_summary, iris_multi_model_info; |
| SELECT madlib.madlib_keras_fit_multiple_model('iris_train_packed', -- source_table |
| 'iris_multi_model', -- model_output_table |
| 'mst_table', -- model_selection_table |
| 10, -- num_iterations |
| FALSE, -- use gpus |
| 'iris_test_packed', -- validation dataset |
| 3, -- metrics compute frequency |
| FALSE, -- warm start |
| 'Sophie L.', -- name |
| 'Model selection for iris dataset', -- description |
| TRUE -- use caching |
| ); |
| </pre> |
| View the model summary: |
| <pre class="example"> |
| SELECT * FROM iris_multi_model_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------------+--------------------------------------------- |
| source_table | iris_train_packed |
| validation_table | iris_test_packed |
| model | iris_multi_model |
| model_info | iris_multi_model_info |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| model_arch_table | model_arch_library |
| model_selection_table | mst_table |
| object_table | |
| num_iterations | 10 |
| metrics_compute_frequency | 3 |
| warm_start | f |
| name | Sophie L. |
| description | Model selection for iris dataset |
| start_training_time | 2021-02-05 01:03:11.337798 |
| end_training_time | 2021-02-05 01:05:14.988912 |
| madlib_version | 1.20.0 |
| num_classes | {1} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| dependent_vartype | {"character varying"} |
| normalizing_const | 1 |
| metrics_iters | {3,6,9,10} |
| </pre> |
| View results for each model: |
| <pre class="example"> |
| SELECT * FROM iris_multi_model_info ORDER BY training_metrics_final DESC, training_loss_final; |
| </pre> |
| <pre class="result"> |
| ---------+----------+---------------------------------------------------------------------------------+-----------------------+--------------+------------+-----------------------------------------------------------------------+--------------+--------------------------+------------------------+---------------------+---------------------------------------------------------------------------+---------------------------------------------------------------------------+--------------------------+-----------------------+---------------------------------------------------------------------------+------------------------------------------------------------------------------ |
| 10 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {34.347088098526,69.4049510955811,105.285098075867,121.389348983765} | {accuracy} | categorical_crossentropy | 0.975000023841858 | 0.166684880852699 | {0.949999988079071,0.949999988079071,0.908333361148834,0.975000023841858} | {0.615781545639038,0.269571483135223,0.242876216769218,0.166684880852699} | 1 | 0.107727721333504 | {1,1,0.966666638851166,1} | {0.620104968547821,0.227639734745026,0.155238434672356,0.107727721333504} |
| 3 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {33.7874810695648,68.735356092453,104.679323911667,120.741965055466} | {accuracy} | categorical_crossentropy | 0.958333313465118 | 0.151768147945404 | {0.958333313465118,0.816666662693024,0.941666662693024,0.958333313465118} | {0.232924744486809,0.356248170137405,0.160927340388298,0.151768147945404} | 1 | 0.0933234840631485 | {1,0.866666674613953,0.966666638851166,1} | {0.197644680738449,0.244517579674721,0.0969053283333778,0.0933234840631485} |
| 4 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {35.5827050209045,70.4194140434265,106.230206012726,122.374103069305} | {accuracy} | categorical_crossentropy | 0.941666662693024 | 0.146068558096886 | {0.824999988079071,0.958333313465118,0.941666662693024,0.941666662693024} | {0.339193284511566,0.171411842107773,0.154963329434395,0.146068558096886} | 1 | 0.0618178397417068 | {0.933333337306976,1,1,1} | {0.302341401576996,0.0991373136639595,0.0709080845117569,0.0618178397417068} |
| 5 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {36.3988909721375,71.1546559333801,107.219161987305,123.366652965546} | {accuracy} | categorical_crossentropy | 0.908333361148834 | 0.656857788562775 | {0.358333319425583,0.683333337306976,0.766666650772095,0.908333361148834} | {0.996901094913483,0.852809607982635,0.694450318813324,0.656857788562775} | 0.899999976158142 | 0.630161166191101 | {0.233333334326744,0.600000023841858,0.633333325386047,0.899999976158142} | {1.05581676959991,0.876067101955414,0.700714349746704,0.630161166191101} |
| 2 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {35.0734770298004,69.949450969696,105.773796081543,121.901873111725} | {accuracy} | categorical_crossentropy | 0.866666674613953 | 0.26227542757988 | {0.558333337306976,0.683333337306976,0.708333313465118,0.866666674613953} | {0.755041897296906,0.551798105239868,0.504159986972809,0.26227542757988} | 0.899999976158142 | 0.156955525279045 | {0.633333325386047,0.600000023841858,0.633333325386047,0.899999976158142} | {0.663675665855408,0.674827337265015,0.613502621650696,0.156955525279045} |
| 6 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {35.3355600833893,70.1827409267426,106.000793933868,122.135368108749} | {accuracy} | categorical_crossentropy | 0.858333349227905 | 0.403681367635727 | {0.708333313465118,0.850000023841858,0.783333361148834,0.858333349227905} | {0.609176814556122,0.488206028938293,0.425172030925751,0.403681367635727} | 0.899999976158142 | 0.393621385097504 | {0.600000023841858,0.833333313465118,0.800000011920929,0.899999976158142} | {0.624664425849915,0.48302897810936,0.428876429796219,0.393621385097504} |
| 9 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {34.0762810707092,69.0332140922546,105.02664899826,121.034147024155} | {accuracy} | categorical_crossentropy | 0.816666662693024 | 0.40071240067482 | {0.716666638851166,0.975000023841858,0.941666662693024,0.816666662693024} | {0.530809044837952,0.112755678594112,0.178483173251152,0.40071240067482} | 0.899999976158142 | 0.160617485642433 | {0.666666686534882,1,1,0.899999976158142} | {0.510058879852295,0.0435655005276203,0.0271952152252197,0.160617485642433} |
| 11 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {36.664577960968,71.4262120723724,107.501835107803,123.646811962128} | {accuracy} | categorical_crossentropy | 0.816666662693024 | 0.683901190757751 | {0.358333319425583,0.574999988079071,0.758333325386047,0.816666662693024} | {0.969602465629578,0.828204095363617,0.7138671875,0.683901190757751} | 0.833333313465118 | 0.720155417919159 | {0.233333334326744,0.466666668653488,0.666666686534882,0.833333313465118} | {1.04192113876343,0.878720223903656,0.755707621574402,0.720155417919159} |
| 1 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {35.8240220546722,70.6528959274292,106.680663108826,122.609847068787} | {accuracy} | categorical_crossentropy | 0.683333337306976 | 0.479730814695358 | {0.675000011920929,0.683333337306976,0.683333337306976,0.683333337306976} | {0.507641136646271,0.467351347208023,0.507468938827515,0.479730814695358} | 0.600000023841858 | 0.503782331943512 | {0.600000023841858,0.600000023841858,0.600000023841858,0.600000023841858} | {0.504352450370789,0.448328793048859,0.561446607112885,0.503782331943512} |
| 7 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {33.5537171363831,68.2605030536652,104.443753004074,120.498863935471} | {accuracy} | categorical_crossentropy | 0.683333337306976 | 0.940275907516479 | {0.733333349227905,0.641666650772095,0.933333337306976,0.683333337306976} | {0.419289141893387,0.746466338634491,0.25556743144989,0.940275907516479} | 0.600000023841858 | 1.1002448797226 | {0.633333325386047,0.766666650772095,0.966666638851166,0.600000023841858} | {0.457020550966263,0.510054171085358,0.249405279755592,1.1002448797226} |
| 12 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {34.8103830814362,69.698233127594,105.547440052032,121.659696102142} | {accuracy} | categorical_crossentropy | 0.633333325386047 | 0.711242735385895 | {0.458333343267441,0.5,0.683333337306976,0.633333325386047} | {1.71241450309753,0.98362809419632,0.776128530502319,0.711242735385895} | 0.566666662693024 | 0.721668004989624 | {0.433333337306976,0.5,0.600000023841858,0.566666662693024} | {1.30801546573639,0.950868666172028,0.841940879821777,0.721668004989624} |
| 8 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {36.0954039096832,70.921669960022,106.977710008621,123.112148046494} | {accuracy} | categorical_crossentropy | 0.358333319425583 | 1.10370969772339 | {0.324999988079071,0.358333319425583,0.358333319425583,0.358333319425583} | {1.10047388076782,1.09791541099548,1.09736382961273,1.10370969772339} | 0.233333334326744 | 1.11518168449402 | {0.366666674613953,0.233333334326744,0.233333334326744,0.233333334326744} | {1.09480428695679,1.1150735616684,1.10827457904816,1.11518168449402} |
| (12 rows) |
| </pre> |
| |
| -# Predict probabilities for each class: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_predict; |
| SELECT madlib.madlib_keras_predict('iris_multi_model', -- model |
| 'iris_test', -- test_table |
| 'id', -- id column |
| 'attributes', -- independent var |
| 'iris_predict', -- output table |
| 'prob', -- prediction type |
| FALSE, -- use gpus |
| 3 -- mst_key to use |
| ); |
| SELECT * FROM iris_predict ORDER BY id, rank; |
| </pre> |
| <pre class="result"> |
| -----+------------+-----------------+---------------+------ |
| 8 | class_text | Iris-setosa | 0.99961567 | 1 |
| 8 | class_text | Iris-versicolor | 0.00038426166 | 2 |
| 8 | class_text | Iris-virginica | 5.4132368e-08 | 3 |
| 15 | class_text | Iris-setosa | 0.9999677 | 1 |
| 15 | class_text | Iris-versicolor | 3.235117e-05 | 2 |
| 15 | class_text | Iris-virginica | 7.5747697e-10 | 3 |
| 20 | class_text | Iris-setosa | 0.99956757 | 1 |
| 20 | class_text | Iris-versicolor | 0.0004323488 | 2 |
| 20 | class_text | Iris-virginica | 6.8698306e-08 | 3 |
| 21 | class_text | Iris-setosa | 0.99978334 | 1 |
| 21 | class_text | Iris-versicolor | 0.00021666527 | 2 |
| 21 | class_text | Iris-virginica | 1.741537e-08 | 3 |
| 22 | class_text | Iris-setosa | 0.99955314 | 1 |
| 22 | class_text | Iris-versicolor | 0.00044669537 | 2 |
| 22 | class_text | Iris-virginica | 8.287442e-08 | 3 |
| 27 | class_text | Iris-setosa | 0.999393 | 1 |
| 27 | class_text | Iris-versicolor | 0.00060693553 | 2 |
| 27 | class_text | Iris-virginica | 1.4125514e-07 | 3 |
| 28 | class_text | Iris-setosa | 0.9997589 | 1 |
| 28 | class_text | Iris-versicolor | 0.00024109415 | 2 |
| 28 | class_text | Iris-virginica | 2.3418018e-08 | 3 |
| 33 | class_text | Iris-setosa | 0.99966764 | 1 |
| 33 | class_text | Iris-versicolor | 0.00033237415 | 2 |
| 33 | class_text | Iris-virginica | 3.2712443e-08 | 3 |
| 37 | class_text | Iris-setosa | 0.9999347 | 1 |
| 37 | class_text | Iris-versicolor | 6.5290136e-05 | 2 |
| 37 | class_text | Iris-virginica | 2.6893372e-09 | 3 |
| 38 | class_text | Iris-setosa | 0.99963343 | 1 |
| 38 | class_text | Iris-versicolor | 0.0003665546 | 2 |
| 38 | class_text | Iris-virginica | 4.7346873e-08 | 3 |
| 46 | class_text | Iris-setosa | 0.9995722 | 1 |
| 46 | class_text | Iris-versicolor | 0.00042761036 | 2 |
| 46 | class_text | Iris-virginica | 8.4179504e-08 | 3 |
| 51 | class_text | Iris-versicolor | 0.9678325 | 1 |
| 51 | class_text | Iris-virginica | 0.031179406 | 2 |
| 51 | class_text | Iris-setosa | 0.0009880556 | 3 |
| 57 | class_text | Iris-versicolor | 0.7971095 | 1 |
| 57 | class_text | Iris-virginica | 0.20281655 | 2 |
| 57 | class_text | Iris-setosa | 7.402597e-05 | 3 |
| 58 | class_text | Iris-versicolor | 0.93808955 | 1 |
| 58 | class_text | Iris-virginica | 0.045088027 | 2 |
| 58 | class_text | Iris-setosa | 0.016822474 | 3 |
| 59 | class_text | Iris-versicolor | 0.95377713 | 1 |
| 59 | class_text | Iris-virginica | 0.045621604 | 2 |
| 59 | class_text | Iris-setosa | 0.0006012956 | 3 |
| 64 | class_text | Iris-versicolor | 0.8078371 | 1 |
| 64 | class_text | Iris-virginica | 0.19210756 | 2 |
| 64 | class_text | Iris-setosa | 5.5370016e-05 | 3 |
| 65 | class_text | Iris-versicolor | 0.946594 | 1 |
| 65 | class_text | Iris-virginica | 0.04081257 | 2 |
| 65 | class_text | Iris-setosa | 0.012593448 | 3 |
| 72 | class_text | Iris-versicolor | 0.9616088 | 1 |
| 72 | class_text | Iris-virginica | 0.033955 | 2 |
| 72 | class_text | Iris-setosa | 0.004436169 | 3 |
| 75 | class_text | Iris-versicolor | 0.96245867 | 1 |
| 75 | class_text | Iris-virginica | 0.03556654 | 2 |
| 75 | class_text | Iris-setosa | 0.0019747794 | 3 |
| 87 | class_text | Iris-versicolor | 0.9264334 | 1 |
| 87 | class_text | Iris-virginica | 0.07328824 | 2 |
| 87 | class_text | Iris-setosa | 0.00027841402 | 3 |
| 95 | class_text | Iris-versicolor | 0.85156035 | 1 |
| 95 | class_text | Iris-virginica | 0.14813933 | 2 |
| 95 | class_text | Iris-setosa | 0.00030031145 | 3 |
| 97 | class_text | Iris-versicolor | 0.87470025 | 1 |
| 97 | class_text | Iris-virginica | 0.1248041 | 2 |
| 97 | class_text | Iris-setosa | 0.0004957556 | 3 |
| 98 | class_text | Iris-versicolor | 0.9439469 | 1 |
| 98 | class_text | Iris-virginica | 0.05491364 | 2 |
| 98 | class_text | Iris-setosa | 0.001139452 | 3 |
| 120 | class_text | Iris-virginica | 0.58107394 | 1 |
| 120 | class_text | Iris-versicolor | 0.41892523 | 2 |
| 120 | class_text | Iris-setosa | 8.7891783e-07 | 3 |
| 121 | class_text | Iris-virginica | 0.90112364 | 1 |
| 121 | class_text | Iris-versicolor | 0.098876335 | 2 |
| 121 | class_text | Iris-setosa | 5.8106266e-09 | 3 |
| 122 | class_text | Iris-virginica | 0.8664512 | 1 |
| 122 | class_text | Iris-versicolor | 0.13354866 | 2 |
| 122 | class_text | Iris-setosa | 8.255242e-08 | 3 |
| 123 | class_text | Iris-virginica | 0.90162355 | 1 |
| 123 | class_text | Iris-versicolor | 0.09837647 | 2 |
| 123 | class_text | Iris-setosa | 2.745874e-10 | 3 |
| 140 | class_text | Iris-virginica | 0.7306292 | 1 |
| 140 | class_text | Iris-versicolor | 0.2693706 | 2 |
| 140 | class_text | Iris-setosa | 1.9480031e-07 | 3 |
| 144 | class_text | Iris-virginica | 0.92295665 | 1 |
| 144 | class_text | Iris-versicolor | 0.07704339 | 2 |
| 144 | class_text | Iris-setosa | 1.6369142e-09 | 3 |
| 147 | class_text | Iris-virginica | 0.69545543 | 1 |
| 147 | class_text | Iris-versicolor | 0.30454406 | 2 |
| 147 | class_text | Iris-setosa | 4.5714978e-07 | 3 |
| (90 rows) |
| </pre> |
| |
| -# Warm start. Next, use the warm_start parameter |
| to continue learning, using the coefficients from |
| the run above. Note that we don't drop the |
| model table or model summary table: |
| <pre class="example"> |
| SELECT madlib.madlib_keras_fit_multiple_model('iris_train_packed', -- source_table |
| 'iris_multi_model', -- model_output_table |
| 'mst_table', -- model_selection_table |
| 3, -- num_iterations |
| FALSE, -- use gpus |
| 'iris_test_packed', -- validation dataset |
| 1, -- metrics compute frequency |
| TRUE, -- warm start |
| 'Sophie L.', -- name |
| 'Simple MLP for iris dataset', -- description |
| TRUE -- use caching |
| ); |
| SELECT * FROM iris_multi_model_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------------+--------------------------------------------- |
| source_table | iris_train_packed |
| validation_table | iris_test_packed |
| model | iris_multi_model |
| model_info | iris_multi_model_info |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| model_arch_table | model_arch_library |
| model_selection_table | mst_table |
| object_table | |
| num_iterations | 3 |
| metrics_compute_frequency | 1 |
| warm_start | t |
| name | Sophie L. |
| description | Simple MLP for iris dataset |
| start_training_time | 2021-02-05 01:17:19.432839 |
| end_training_time | 2021-02-05 01:18:09.062384 |
| madlib_version | 1.20.0 |
| num_classes | {1} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| dependent_vartype | {"character varying"} |
| normalizing_const | 1 |
| metrics_iters | {1,2,3} |
| </pre> |
| View results for each model: |
| <pre class="example"> |
| SELECT * FROM iris_multi_model_info ORDER BY training_metrics_final DESC, training_loss_final; |
| </pre> |
| <pre class="result"> |
| mst_key | model_id | compile_params | fit_params | model_type | model_size | metrics_elapsed_time | metrics_type | loss_type | training_metrics_final | training_loss_final | training_metrics | training_loss | validation_metrics_final | validation_loss_final | validation_metrics | validation_loss |
| ---------+----------+---------------------------------------------------------------------------------+-----------------------+--------------+------------+------------------------------------------------------+--------------+--------------------------+------------------------+---------------------+---------------------------------------------------------+---------------------------------------------------------+--------------------------+-----------------------+---------------------------------------------------------+----------------------------------------------------------- |
| 10 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {15.5192940235138,31.2597029209137,47.3274641036987} | {accuracy} | categorical_crossentropy | 0.966666638851166 | 0.122704714536667 | {0.925000011920929,0.808333337306976,0.966666638851166} | {0.172832280397415,0.354962348937988,0.122704714536667} | 1 | 0.0603121444582939 | {1,0.866666674613953,1} | {0.100369863212109,0.210344776511192,0.0603121444582939} |
| 4 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {16.485249042511,32.207494020462,48.3005399703979} | {accuracy} | categorical_crossentropy | 0.958333313465118 | 0.104984156787395 | {0.949999988079071,0.866666674613953,0.958333313465118} | {0.126872479915619,0.29683381319046,0.104984156787395} | 1 | 0.0291607566177845 | {1,0.933333337306976,1} | {0.0433581434190273,0.127146035432816,0.0291607566177845} |
| 3 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {14.8933939933777,30.7211890220642,46.7588970661163} | {accuracy} | categorical_crossentropy | 0.958333313465118 | 0.107885278761387 | {0.941666662693024,0.966666638851166,0.958333313465118} | {0.155905768275261,0.129546254873276,0.107885278761387} | 1 | 0.0387893654406071 | {1,1,1} | {0.087645135819912,0.0574964620172977,0.0387893654406071} |
| 7 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {14.657429933548,30.4864449501038,46.5082159042358} | {accuracy} | categorical_crossentropy | 0.958333313465118 | 0.215500101447105 | {0.891666650772095,0.649999976158142,0.958333313465118} | {0.2352494597435,0.490339070558548,0.215500101447105} | 1 | 0.194652214646339 | {0.899999976158142,0.766666650772095,1} | {0.235657200217247,0.335934072732925,0.194652214646339} |
| 6 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {16.2535049915314,31.9760749340057,48.0661840438843} | {accuracy} | categorical_crossentropy | 0.941666662693024 | 0.354692339897156 | {0.975000023841858,0.908333361148834,0.941666662693024} | {0.38957467675209,0.367432981729507,0.354692339897156} | 1 | 0.321579396724701 | {1,0.966666638851166,1} | {0.348732680082321,0.345716863870621,0.321579396724701} |
| 2 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.75390625 | {16.0124208927155,31.7479128837585,47.8348250389099} | {accuracy} | categorical_crossentropy | 0.933333337306976 | 0.238343968987465 | {0.975000023841858,0.975000023841858,0.933333337306976} | {0.17331151664257,0.0775784701108932,0.238343968987465} | 0.966666638851166 | 0.182112962007523 | {0.966666638851166,1,0.966666638851166} | {0.114220358431339,0.0159573350101709,0.182112962007523} |
| 9 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {15.1569800376892,30.9868409633636,47.0489699840546} | {accuracy} | categorical_crossentropy | 0.908333361148834 | 0.254226505756378 | {0.858333349227905,0.833333313465118,0.908333361148834} | {0.450037389993668,0.500195503234863,0.254226505756378} | 0.966666638851166 | 0.0769383609294891 | {0.899999976158142,0.899999976158142,0.966666638851166} | {0.163723081350327,0.211927503347397,0.0769383609294891} |
| 11 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.18359375 | {17.7095139026642,33.4267060756683,49.6263670921326} | {accuracy} | categorical_crossentropy | 0.899999976158142 | 0.602623522281647 | {0.824999988079071,0.899999976158142,0.899999976158142} | {0.65915185213089,0.633350729942322,0.602623522281647} | 0.899999976158142 | 0.626452326774597 | {0.833333313465118,0.899999976158142,0.899999976158142} | {0.695301294326782,0.661069989204407,0.626452326774597} |
| 5 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {17.4354989528656,33.1478018760681,49.3430600166321} | {accuracy} | categorical_crossentropy | 0.741666674613953 | 0.532543838024139 | {0.916666686534882,0.875,0.741666674613953} | {0.610130310058594,0.571656167507172,0.532543838024139} | 0.633333325386047 | 0.540634453296661 | {0.966666638851166,0.899999976158142,0.633333325386047} | {0.601030588150024,0.567581355571747,0.540634453296661} |
| 12 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {15.7747659683228,31.5215599536896,47.6013031005859} | {accuracy} | categorical_crossentropy | 0.675000011920929 | 0.581513583660126 | {0.683333337306976,0.683333337306976,0.675000011920929} | {0.660648465156555,0.623969316482544,0.581513583660126} | 0.600000023841858 | 0.594986796379089 | {0.600000023841858,0.600000023841858,0.600000023841858} | {0.694275736808777,0.665920257568359,0.594986796379089} |
| 1 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.75390625 | {16.8983609676361,32.4361009597778,48.5357549190521} | {accuracy} | categorical_crossentropy | 0.608333349227905 | 0.687089145183563 | {0.583333313465118,0.699999988079071,0.608333349227905} | {0.816642761230469,0.455933183431625,0.687089145183563} | 0.533333361148834 | 0.692391216754913 | {0.533333361148834,0.600000023841858,0.533333361148834} | {0.85290253162384,0.465981602668762,0.692391216754913} |
| 8 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.18359375 | {17.1899569034576,32.8988509178162,49.0880739688873} | {accuracy} | categorical_crossentropy | 0.358333319425583 | 1.11154329776764 | {0.358333319425583,0.324999988079071,0.358333319425583} | {1.09858739376068,1.09980893135071,1.11154329776764} | 0.233333334326744 | 1.16109442710876 | {0.233333334326744,0.366666674613953,0.233333334326744} | {1.12281405925751,1.10376691818237,1.16109442710876} |
| (12 rows) |
| </pre> |
| Note that the loss and accuracy values pick up from where the previous run left off. |
| |
| @anchor notes |
| @par Notes |
| |
| 1. Refer to the deep learning section of the Apache MADlib |
| wiki [6] for important information including supported libraries |
| and versions. |
| |
| 2. Classification is currently supported, not regression. |
| |
| 3. Reminder about the distinction between warm start and transfer learning. Warm start uses model |
| state (weights) from the model output table from a previous training run - |
| set the 'warm_start' parameter to TRUE in the fit function. |
| Transfer learning uses initial model state (weights) stored in the 'model_arch_table' - in this case set the |
| 'warm_start' parameter to FALSE in the fit function. |
| |
| 4. Here are some more details on how warm start works. These details are mostly applicable when implementing autoML algorithms on top of MADlib's model selection. In short, the 'model_selection_table' dictates which models get trained and output to the 'model_output_table' and associated summary and info tables. When 'warm_start' is TRUE, models are built for each 'mst_key' in the 'model_selection_table'. If there are prior runs for an 'mst_key' then the weights from that run will be used. If there are no prior runs for an 'mst_key' then random initialization will be used. For example, let's say we start with 'mst_keys' of 1, 2, 3, and 4 in the 'model_selection_table'. We run fit once to get model and info tables for 1, 2, 3, and 4. Then we modify the 'model_selection_table' as part of an autoML scheme, in which we remove the 'mst_key' for 1 and add a new 'mst_key' for 5. Next we run fit with warm start. The result will be models created for 'mst_keys' of 2, 3, 4, and 5. Warm start will be used for 2, 3, and 4 (using prior run) and random initialization will be used for 5 (no prior run). The 'mst_key' of 1 will be dropped. |
| |
| 5. The 'num_iterations' parameter and the Keras fit parameter 'epochs' can substantially affect accuracy and run-time. |
| In general, increasing the number of 'epochs' for a fixed 'num_iterations' will speed up training, but may result |
| in lower accuracy. It's best to do some experimentation to find out what works for your models and dataset. |
| |
| @anchor background |
| @par Technical Background |
| |
| For an introduction to deep learning foundations, including MLP and CNN, |
| refer to [7]. |
| |
| This module trains many models a time across the database cluster in order |
| to explore network architectures and hyperparameters. It uses model hopper |
| parallelism (MOP) and has high convergence efficiency since it does not do |
| model averaging [2]. |
| |
| On the effect of database cluster size: as the database cluster size increases, |
| it will be faster to train a set of models, as long as you have at |
| least as many model selection tuples as segments. This is because model state is "hopped" from |
| segment to segment and training takes place in parallel [1,2]. If you have fewer model |
| selection tuples to train than segments, then some |
| segments may not be busy 100% of the time so speedup will not necessarily increase |
| on a larger cluster. Inference (prediction) is an embarrassingly parallel operation so |
| inference runtimes will be proportionally faster as the number of segments increases. |
| |
| @anchor literature |
| @literature |
| |
| @anchor mlp-lit-1 |
| [1] "Cerebro: Efficient and Reproducible Model Selection on Deep Learning Systems," |
| Supun Nakandala, Yuhao Zhang, and Arun Kumar, ACM SIGMOD 2019 DEEM Workshop, |
| https://adalabucsd.github.io/papers/2019_Cerebro_DEEM.pdf |
| |
| [2] "Cerebro: A Data System for Optimized Deep Learning Model Selection," |
| Supun Nakandala, Yuhao Zhang, and Arun Kumar, Proceedings of the VLDB Endowment (2020), Vol. 13, No. 11 |
| https://adalabucsd.github.io/papers/2020_Cerebro_VLDB.pdf |
| |
| [3] https://keras.io/ |
| |
| [4] https://www.tensorflow.org/ |
| |
| [5] "Neural Networks for Machine Learning", Lectures 6a and 6b on mini-batch gradient descent, |
| Geoffrey Hinton with Nitish Srivastava and Kevin Swersky, |
| http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf |
| |
| [6] Deep learning section of Apache MADlib wiki https://cwiki.apache.org/confluence/display/MADLIB/Deep+Learning |
| |
| [7] Deep Learning, Ian Goodfellow, Yoshua Bengio and Aaron Courville, MIT Press, 2016. |
| |
| [8] Greenplum Database server configuration parameters https://gpdb.docs.pivotal.io/latest/ref_guide/config_params/guc-list.html |
| |
| @anchor related |
| @par Related Topics |
| |
| File madlib_keras_fit_multiple_model.sql_in documents training, evaluate and predict functions. |
| |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit_multiple_model( |
| source_table VARCHAR, |
| model_output_table VARCHAR, |
| model_selection_table VARCHAR, |
| num_iterations INTEGER, |
| use_gpus BOOLEAN, |
| validation_table VARCHAR, |
| metrics_compute_frequency INTEGER, |
| warm_start BOOLEAN, |
| name VARCHAR, |
| description VARCHAR, |
| use_caching BOOLEAN DEFAULT FALSE |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_fit_multiple_model') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| with MinWarning("warning"): |
| fit_obj = madlib_keras_fit_multiple_model.FitMultipleModel(**globals()) |
| fit_obj.fit_multiple_model() |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit_multiple_model( |
| source_table VARCHAR, |
| model_output_table VARCHAR, |
| model_selection_table VARCHAR, |
| num_iterations INTEGER, |
| use_gpus BOOLEAN, |
| validation_table VARCHAR, |
| metrics_compute_frequency INTEGER, |
| warm_start BOOLEAN, |
| name VARCHAR |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_fit_multiple_model($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit_multiple_model( |
| source_table VARCHAR, |
| model_output_table VARCHAR, |
| model_selection_table VARCHAR, |
| num_iterations INTEGER, |
| use_gpus BOOLEAN, |
| validation_table VARCHAR, |
| metrics_compute_frequency INTEGER, |
| warm_start BOOLEAN |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_fit_multiple_model($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit_multiple_model( |
| source_table VARCHAR, |
| model_output_table VARCHAR, |
| model_selection_table VARCHAR, |
| num_iterations INTEGER, |
| use_gpus BOOLEAN, |
| validation_table VARCHAR, |
| metrics_compute_frequency INTEGER |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_fit_multiple_model($1, $2, $3, $4, $5, $6, $7, FALSE, NULL, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit_multiple_model( |
| source_table VARCHAR, |
| model_output_table VARCHAR, |
| model_selection_table VARCHAR, |
| num_iterations INTEGER, |
| use_gpus BOOLEAN, |
| validation_table VARCHAR |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_fit_multiple_model($1, $2, $3, $4, $5, $6, NULL, FALSE, NULL, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit_multiple_model( |
| source_table VARCHAR, |
| model_output_table VARCHAR, |
| model_selection_table VARCHAR, |
| num_iterations INTEGER, |
| use_gpus BOOLEAN |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_fit_multiple_model($1, $2, $3, $4, $5, NULL, NULL, FALSE, NULL, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_transition_multiple_model( |
| dependent_var BYTEA[], |
| independent_var BYTEA[], |
| dependent_var_shape INTEGER[], |
| independent_var_shape INTEGER[], |
| model_architecture TEXT, |
| compile_params TEXT, |
| fit_params TEXT, |
| dist_key INTEGER, |
| dist_key_mapping INTEGER[], |
| current_seg_id INTEGER, |
| segments_per_host INTEGER[], |
| images_per_seg INTEGER[], |
| accessible_gpus_for_seg INTEGER[], |
| serialized_weights BYTEA, |
| is_final_training_call BOOLEAN, |
| use_caching BOOLEAN, |
| custom_function_map BYTEA |
| ) RETURNS BYTEA AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| import traceback |
| from sys import exc_info |
| import plpy |
| try: |
| if use_caching: |
| return madlib_keras.fit_multiple_transition_caching(**globals()) |
| else: |
| return madlib_keras.fit_transition(state=None, prev_serialized_weights=serialized_weights, |
| is_multiple_model=True, **globals()) |
| except Exception as e: |
| etype, _, tb = exc_info() |
| detail = ''.join(traceback.format_exception(etype, e, tb)) |
| message = e.message + 'UDFDetail' + detail |
| e.args = (message,) |
| raise e |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |