| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| * |
| * @file madlib_keras.sql_in |
| * |
| * @brief Fit, evaluate and predict for one model. |
| * @date June 2019 |
| * |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /** |
| @addtogroup grp_keras |
| |
| @brief Fit, evaluate and predict for one model. |
| |
| <div class="toc"><b>Contents</b><ul> |
| <li class="level1"><a href="#keras_fit">Fit</a></li> |
| <li class="level1"><a href="#keras_evaluate">Evaluate</a></li> |
| <li class="level1"><a href="#keras_predict">Predict</a></li> |
| <li class="level1"><a href="#keras_predict_byom">Predict BYOM</a></li> |
| <li class="level1"><a href="#example">Examples</a></li> |
| <li class="level1"><a href="#notes">Notes</a></li> |
| <li class="level1"><a href="#background">Technical Background</a></li> |
| <li class="level1"><a href="#literature">Literature</a></li> |
| <li class="level1"><a href="#related">Related Topics</a></li> |
| </ul></div> |
| |
| This module allows you to use SQL to call deep learning |
| models designed in Keras [1], which is a high-level neural |
| network API written in Python. |
| |
| Keras can run on top of different backends and the one that is currently |
| supported by MADlib is TensorFlow [2]. The implementation |
| in MADlib is designed to train |
| a single model across multiple segments (workers) |
| in Greenplum database. (PostgreSQL is also supported.) |
| Alternatively, to train multiple models at the same time for model |
| architecture search or hyperparameter tuning, you can |
| use the methods in <a href="group__grp__model__selection.html">Train Multiple Models</a>. |
| |
| The main use case supported is classification |
| using sequential models, which are made up of a |
| linear stack of layers. This includes multilayer perceptrons (MLPs) |
| and convolutional neural networks (CNNs). Regression is not |
| currently supported. |
| |
| Before using Keras in MADlib you will need to preprocess |
| your training and evaluation datasets by using the method called |
| <a href="group__grp__input__preprocessor__dl.html">Preprocess |
| Data</a> which is a utility that prepares data for |
| use by models that support mini-batch as an optimization option. |
| This is a one-time operation and you would only |
| need to re-run the preprocessor if your input data has changed. |
| The advantage of using mini-batching is that it |
| can perform better than stochastic gradient descent |
| because it uses more than one training example at a time, |
| typically resulting faster and smoother convergence [3]. |
| |
| You can also do inference on models that have not been trained in MADlib, |
| but rather imported from an external source. This is described in the section |
| called "Predict BYOM" below, where "BYOM" stands for "Bring Your Own Model." |
| |
| Note that the following MADlib functions are targeting a specific TensorFlow |
| kernel version (1.14). Using a newer or older version may or may not work as intended. |
| |
| MADlib's deep learning methods are designed to use the TensorFlow package and its built in Keras |
| functions. To ensure consistency, please use tensorflow.keras objects (models, layers, etc.) |
| instead of importing Keras and using its objects. |
| |
| @note CUDA GPU memory cannot be released until the process holding it is terminated. |
| When a MADlib deep learning function is called with GPUs, Greenplum internally |
| creates a process (called a slice) which calls TensorFlow to do the computation. |
| This process holds the GPU memory until one of the following two things happen: |
| query finishes and user logs out of the Postgres client/session; or, |
| query finishes and user waits for the timeout set by gp_vmem_idle_resource_timeout. |
| The default value for this timeout is 18 sec [8]. So the recommendation is: |
| log out/reconnect to the session after every GPU query; or |
| wait for gp_vmem_idle_resource_timeout before you run another GPU query (you can |
| also set it to a lower value). |
| |
| @anchor keras_fit |
| @par Fit |
| The fit (training) function has the following format: |
| |
| <pre class="syntax"> |
| madlib_keras_fit( |
| source_table, |
| model, |
| model_arch_table, |
| model_id, |
| compile_params, |
| fit_params, |
| num_iterations, |
| use_gpus, |
| validation_table, |
| metrics_compute_frequency, |
| warm_start, |
| name, |
| description, |
| object_table |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>source_table</dt> |
| <dd>TEXT. Name of the table containing the training data. |
| This is the name of the output |
| table from the data preprocessor. Independent |
| and dependent variables are specified in the preprocessor |
| step which is why you do not need to explictly state |
| them here as part of the fit function.</dd> |
| |
| <dt>model</dt> |
| <dd>TEXT. Name of the output table containing the model. |
| Details of the output table are shown below. |
| </dd> |
| |
| <dt>model_arch_table</dt> |
| <dd>TEXT. Name of the table containing the model |
| architecture and (optionally) initial weights to use for |
| training. |
| </dd> |
| |
| <dt>model_id</dt> |
| <dd>INTEGER. This is the id in 'model_arch_table' |
| containing the model architecture and (optionally) |
| initial weights to use for training. |
| </dd> |
| |
| <DT>compile_params</DT> |
| <DD>TEXT. |
| Parameters passed to the compile method of the Keras |
| model class [4]. These parameters will be passed through as is |
| so they must conform to the Keras API definition. |
| As an example, you might use something like: <em>loss='categorical_crossentropy', optimizer='adam', metrics=['acc']</em>. |
| The mandatory parameters that must be specified are 'optimizer' |
| and 'loss'. Others are optional and will use the default |
| values as per Keras if not specified here. Also, when |
| specifying 'loss' and 'metrics' do <em>not</em> include the |
| module and submodule prefixes |
| like <em>loss='losses.categorical_crossentropy'</em> |
| or <em>optimizer='keras.optmizers.adam'</em>. |
| |
| @note |
| - Custom loss functions and custom metrics can be used as defined in |
| <a href="group__grp__custom__function.html">Define Custom Functions.</a> |
| List the custom function name and provide the name of the table where the |
| serialized Python objects reside using the parameter 'object_table' below. |
| - The following loss function is |
| not supported: <em>sparse_categorical_crossentropy</em>. |
| The following metrics are not |
| supported: <em>sparse_categorical_accuracy, sparse_top_k_categorical_accuracy</em>. |
| - The Keras accuracy parameter <em>top_k_categorical_accuracy</em> returns top 5 accuracy by |
| default. If you want a different top k value, use the helper function |
| <a href="group__grp__custom__function.html#top_k_function">Top k Accuracy Function</a> |
| to create a custom |
| Python function to compute the top k accuracy that you want. |
| |
| </DD> |
| |
| <DT>fit_params </DT> |
| <DD>TEXT. Parameters passed to the fit method of the Keras |
| model class [4]. These will be passed through as is |
| so they must conform to the Keras API definition. |
| As an example, you might use something like: |
| <em>batch_size=128, epochs=4</em>. |
| There are no mandatory parameters so |
| if you specify NULL, it will use all default |
| values as per Keras. |
| |
| @note |
| Callbacks are not currently supported except for TensorBoard |
| which you can specify in the usual way, |
| e.g., callbacks=[TensorBoard(log_dir="/tmp/logs/fit")] |
| </DD> |
| |
| <DT>num_iterations</DT> |
| <DD>INTEGER. Number of iterations to train. |
| </DD> |
| |
| <DT>use_gpus (optional)</DT> |
| <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs |
| are to be used for training the neural network. Set to TRUE to use GPUs. |
| |
| @note |
| This parameter must not conflict with how the distribution rules are set in |
| the preprocessor function. For example, if you set a distribution rule to use |
| certain segments on hosts that do not have GPUs attached, you will get an error |
| if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues |
| when segments share GPU resources. |
| For example, if you have 1 GPU per segment host and your cluster has 4 |
| segments per segment host, it means that all 4 |
| segments will share the same |
| GPU on each host. The current recommended |
| configuration is 1 GPU per segment. |
| </DD> |
| |
| <dt>validation_table (optional)</dt> |
| <dd>TEXT, default: none. Name of the table containing |
| the validation dataset. |
| Note that the validation dataset must be preprocessed |
| in the same way as the training dataset, so this |
| is the name of the output |
| table from running the data preprocessor on the validation dataset. |
| Using a validation dataset can mean a |
| longer training time, depending on its size. |
| This can be controlled using the 'metrics_compute_frequency' |
| paremeter described below.</dd> |
| |
| <DT>metrics_compute_frequency (optional)</DT> |
| <DD>INTEGER, default: once at the end of training |
| after 'num_iterations'. Frequency to compute per-iteration |
| metrics for the training dataset and validation dataset |
| (if specified). There can be considerable cost to |
| computing metrics every iteration, especially if the |
| training dataset is large. This parameter is a way of |
| controlling the frequency of those computations. |
| For example, if you specify 5, then metrics will be computed |
| every 5 iterations as well as at the end of training |
| after 'num_iterations'. If you use the default, |
| metrics will be computed only |
| once after 'num_iterations' have completed. |
| </DD> |
| |
| <DT>warm_start (optional)</DT> |
| <DD>BOOLEAN, default: FALSE. |
| Initalize weights with the coefficients |
| from the last call of the fit |
| function. If set to TRUE, weights will be |
| initialized from the model table |
| generated by the previous training run. |
| |
| @note |
| The warm start feature works based on the name of the |
| model output table from a previous training run. |
| When using warm start, do not drop the model output table |
| or the model output summary table |
| before calling the fit function, since these are needed to obtain the |
| weights from the previous run. |
| If you are not using warm start, the model output table |
| and the model output table summary must be dropped in |
| the usual way before calling the training function. |
| </DD> |
| |
| <DT>name (optional)</DT> |
| <DD>TEXT, default: NULL. |
| Free text string to identify a name, if desired. |
| </DD> |
| |
| <DT>description (optional)</DT> |
| <DD>TEXT, default: NULL. |
| Free text string to provide a description, if desired. |
| </DD> |
| |
| <DT>object_table (optional)</DT> |
| <DD>TEXT, default: NULL. |
| Name of the table that contains the custom functions. Note that this table |
| must be created using the <a href="group__grp__custom__function.html">Define Custom Functions</a> method. Do not |
| qualify with a schema name, since schema will be automatically pulled from |
| the function definition. |
| </DD> |
| </dl> |
| |
| <b>Output tables</b> |
| <br> |
| The model table produced by fit contains the following columns: |
| <table class="output"> |
| <tr> |
| <th>model_weights</th> |
| <td>BYTEA8. Byte array containing the weights of the neural net.</td> |
| </tr> |
| <tr> |
| <th>model_arch</th> |
| <td>TEXT. A JSON representation of the model architecture |
| used in training.</td> |
| </tr> |
| </table> |
| |
| A summary table named \<model\>_summary is also created, which has the following columns: |
| <table class="output"> |
| <tr> |
| <th>source_table</th> |
| <td>Source table used for training.</td> |
| </tr> |
| <tr> |
| <th>model</th> |
| <td>Model output table produced by training.</td> |
| </tr> |
| <tr> |
| <th>dependent_varname</th> |
| <td>Dependent variable column from the original |
| source table in the data preprocessing step.</td> |
| </tr> |
| <tr> |
| <th>independent_varname</th> |
| <td>Independent variables column from the original |
| source table in the data preprocessing step.</td> |
| </tr> |
| <tr> |
| <th>model_arch_table</th> |
| <td>Name of the table containing |
| the model architecture and (optionally) the |
| initial model weights.</td> |
| </tr> |
| <tr> |
| <th>model_id</th> |
| <td>The id of the model in |
| the model architecture table used for training.</td> |
| </tr> |
| <tr> |
| <th>compile_params</th> |
| <td>Compile parameters passed to Keras.</td> |
| </tr> |
| <tr> |
| <th>fit_params</th> |
| <td>Fit parameters passed to Keras.</td> |
| </tr> |
| <tr> |
| <th>num_iterations</th> |
| <td>Number of iterations of training completed.</td> |
| </tr> |
| <tr> |
| <th>validation_table</th> |
| <td>Name of the table containing |
| the validation dataset (if specified).</td> |
| </tr> |
| <tr> |
| <th>metrics_compute_frequency</th> |
| <td>Frequency that per-iteration metrics are computed |
| for the training dataset and validation |
| dataset.</td> |
| </tr> |
| <tr> |
| <th>name</th> |
| <td>Name of the training run (free text).</td> |
| </tr> |
| <tr> |
| <th>description</th> |
| <td>Description of the training run (free text).</td> |
| </tr> |
| <tr> |
| <th>model_type</th> |
| <td>General identifier for type of model trained. |
| Currently says 'madlib_keras'.</td> |
| </tr> |
| <tr> |
| <th>model_size</th> |
| <td>Size of the model in KB. Models are stored in |
| 'bytea' data format which is used for binary strings |
| in PostgreSQL type databases.</td> |
| </tr> |
| <tr> |
| <th>start_training_time</th> |
| <td>Timestamp for start of training.</td> |
| </tr> |
| <tr> |
| <th>end_training_time</th> |
| <td>Timestamp for end of training.</td> |
| </tr> |
| <tr> |
| <th>metrics_elapsed_time</th> |
| <td> Array of elapsed time for metric computations as |
| per the 'metrics_compute_frequency' parameter. |
| Useful for drawing a curve showing loss, accuracy or |
| other metrics as a function of time. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of elapsed time for every 5th |
| iteration, plus the last iteration. |
| Note that this field reports the time for training + |
| validation, if there is a validation table provided.</td> |
| </tr> |
| <tr> |
| <th>madlib_version</th> |
| <td>Version of MADlib used.</td> |
| </tr> |
| <tr> |
| <th>num_classes</th> |
| <td>Count of distinct classes values used for each dependent variable.</td> |
| </tr> |
| <tr> |
| <th>dependent_vartype</th> |
| <td>Data type for each dependent variable.</td> |
| </tr> |
| <tr> |
| <th>normalizing_constant</th> |
| <td>Normalizing constant used from the |
| data preprocessing step.</td> |
| </tr> |
| <tr> |
| <th>metrics_type</th> |
| <td>Metric specified in the 'compile_params'.</td> |
| </tr> |
| <tr> |
| <th>loss_type</th> |
| <td>Loss specified in the 'compile_params'.</td> |
| </tr> |
| <tr> |
| <th>training_metrics_final</th> |
| <td>Final value of the training |
| metric after all iterations have completed. |
| The metric reported is the one |
| specified in the 'metrics_type' parameter.</td> |
| </tr> |
| <tr> |
| <th>training_loss_final</th> |
| <td>Final value of the training loss after all |
| iterations have completed.</td> |
| </tr> |
| <tr> |
| <th>training_metrics</th> |
| <td>Array of training metrics as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of metrics for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| <tr> |
| <th>training_loss</th> |
| <td>Array of training losses as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of losses for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| <tr> |
| <th>validation_metrics_final</th> |
| <td>Final value of the validation |
| metric after all iterations have completed. |
| The metric reported is the one |
| specified in the 'metrics_type' parameter.</td> |
| </tr> |
| <tr> |
| <th>validation_loss_final</th> |
| <td>Final value of the validation loss after all |
| iterations have completed.</td> |
| </tr> |
| <tr> |
| <th>validation_metrics</th> |
| <td>Array of validation metrics as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of metrics for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| <tr> |
| <th>validation_loss</th> |
| <td>Array of validation losses as |
| per the 'metrics_compute_frequency' parameter. |
| For example, if 'metrics_compute_frequency=5' |
| this would be an array of losses for every 5th |
| iteration, plus the last iteration.</td> |
| </tr> |
| <tr> |
| <th>metrics_iters</th> |
| <td>Array indicating the iterations for which |
| metrics are calculated, as derived from the |
| parameters 'num_iterations' and 'metrics_compute_frequency'. |
| For example, if 'num_iterations=5' |
| and 'metrics_compute_frequency=2', then 'metrics_iters' value |
| would be {2,4,5} indicating that metrics were computed |
| at iterations 2, 4 and 5 (at the end). |
| If 'num_iterations=5' |
| and 'metrics_compute_frequency=1', then 'metrics_iters' value |
| would be {1,2,3,4,5} indicating that metrics were computed |
| at every iteration.</td> |
| </tr> |
| <tr> |
| <th><dependent_varname>_class_values</th> |
| <td>Array of class values used for a particular dependent |
| variable. A column will be generated for each dependent variable.</td> |
| </tr> |
| </table> |
| |
| @anchor keras_evaluate |
| @par Evaluate |
| The evaluation function has the following format: |
| |
| <pre class="syntax"> |
| madlib_keras_evaluate( |
| model_table, |
| test_table, |
| output_table, |
| use_gpus |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| |
| <DT>model_table</DT> |
| <DD>TEXT. Name of the table containing the model |
| to use for validation. |
| </DD> |
| |
| <DT>test_table</DT> |
| <dd>TEXT. Name of the table containing the evaluation dataset. |
| Note that test/validation data must be preprocessed in the same |
| way as the training dataset, so |
| this is the name of the output |
| table from the data preprocessor. Independent |
| and dependent variables are specified in the preprocessor |
| step which is why you do not need to explictly state |
| them here as part of the fit function.</dd> |
| |
| <DT>output_table</DT> |
| <DD>TEXT. Name of table that validation output will be |
| written to. Table contains:</DD> |
| <table class="output"> |
| <tr> |
| <th>loss</th> |
| <td>Loss value on evaluation dataset, where 'loss_type' |
| below identifies the type of loss.</td> |
| </tr> |
| <tr> |
| <th>metric</th> |
| <td>Metric value on evaluation dataset, where 'metrics_type' |
| below identifies the type of metric.</td> |
| </tr> |
| <tr> |
| <th>metrics_type</th> |
| <td>Type of metric function that was used in the training step. |
| (It means you cannot have a different metric in evaluate compared |
| to training.)</td> |
| </tr> |
| <tr> |
| <th>loss_type</th> |
| <td>Type of loss function that was used in the training step. |
| (It means you cannot have a different loss in evaluate compared |
| to training.)</td> |
| </tr> |
| |
| <DT>use_gpus (optional)</DT> |
| <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs |
| are to be used for training the neural network. Set to TRUE to use GPUs. |
| |
| @note |
| This parameter must not conflict with how the distribution rules are set in |
| the preprocessor function. For example, if you set a distribution rule to use |
| certain segments on hosts that do not have GPUs attached, you will get an error |
| if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues |
| when segments share GPU resources. |
| For example, if you have 1 GPU per segment host and your cluster has 4 |
| segments per segment host, it means that all 4 |
| segments will share the same |
| GPU on each host. The current recommended |
| configuration is 1 GPU per segment. |
| </DD> |
| |
| </DL> |
| |
| @anchor keras_predict |
| @par Predict |
| The prediction function has the following format: |
| <pre class="syntax"> |
| madlib_keras_predict( |
| model_table, |
| test_table, |
| id_col, |
| independent_varname, |
| output_table, |
| pred_type, |
| use_gpus |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| |
| <DT>model_table</DT> |
| <DD>TEXT. Name of the table containing the model |
| to use for prediction. |
| </DD> |
| |
| <DT>test_table</DT> |
| <DD>TEXT. Name of the table containing the dataset to |
| predict on. Note that test data is not preprocessed (unlike |
| fit and evaluate) so put one test image per row for prediction. |
| Also see the comment below for the 'independent_varname' parameter |
| regarding normalization. |
| |
| </DD> |
| |
| <DT>id_col</DT> |
| <DD>TEXT. Name of the id column in the test data table. |
| </DD> |
| |
| <DT>independent_varname</DT> |
| <DD>TEXT. Column with independent variables in the test table. |
| If a 'normalizing_const' is specified when preprocessing the |
| training dataset, this same normalization will be applied to |
| the independent variables used in predict. In the case that there |
| are multiple independent variables, |
| representing a multi-input neural network, |
| put the columns as a comma |
| separated list, e.g., 'indep_var1, indep_var2, indep_var3' in the same |
| way as was done in the preprocessor step for the training data. |
| </DD> |
| |
| <DT>output_table</DT> |
| <DD>TEXT. Name of the table that prediction output will be |
| written to. Table contains:</DD> |
| <table class="output"> |
| <tr> |
| <th>id</th> |
| <td>Gives the 'id' for each prediction, corresponding to each row from |
| the test_table.</td> |
| </tr> |
| <tr> |
| <th>class_name</th> |
| <td> |
| Name of variable being predicted. |
| </td> |
| </tr> |
| <tr> |
| <th>class_value</th> |
| <td> |
| Estimated class value. |
| </td> |
| </tr> |
| <tr> |
| <th>prob</th> |
| <td> |
| Probability of a given class value. |
| </td> |
| </tr> |
| <tr> |
| <th>rank</th> |
| <td> |
| The rank of a given class based on the ordering of probabilities. |
| </td> |
| </tr> |
| |
| <DT>pred_type (optional)</DT> |
| <DD>TEXT or INTEGER or DOUBLE PRECISION default: 'prob'. |
| The type and range of output desired. This parameter allows the following options. |
| - 'response': the actual prediction |
| - 'prob': the probability value for each class |
| - 0<value<1: the lower limit for the probability (double precision) |
| - 1<=value: the lower limit for the rank of the prediction (integer) |
| </DD> |
| |
| <DT>use_gpus (optional)</DT> |
| <DD>BOOLEAN, default: FALSE (i.e., CPU). |
| Flag to enable GPU support for training neural network. |
| The number of GPUs to use is determined by the parameters |
| passed to the preprocessor. |
| |
| @note |
| We have seen some memory related issues when segments |
| share GPU resources. |
| For example, if you provide 1 GPU and your |
| database cluster is set up to have 4 |
| segments per segment host, it means that all 4 |
| segments on a segment host will share the same |
| GPU. The current recommended |
| configuration is 1 GPU per segment. |
| </DD> |
| </DL> |
| |
| |
| @anchor keras_predict_byom |
| @par Predict BYOM (bring your own model) |
| The predict BYOM function allows you to do inference on models that |
| have not been trained on MADlib, but rather imported from elsewhere. |
| It has the following format: |
| <pre class="syntax"> |
| madlib_keras_predict_byom( |
| model_arch_table, |
| model_id, |
| test_table, |
| id_col, |
| independent_varname, |
| output_table, |
| pred_type, |
| use_gpus, |
| class_values, |
| normalizing_const, |
| dependent_count |
| ) |
| </pre> |
| |
| |
| \b Arguments |
| <dl class="arglist"> |
| |
| <DT>model_arch_table</DT> |
| <DD>TEXT. Name of the architecture table containing the model |
| to use for prediction. The model weights and architecture can be loaded to |
| this table by using the |
| <a href="group__grp__keras__model__arch.html">Define Model Architectures</a> function. |
| </DD> |
| |
| <DT>model_id</DT> |
| <DD>INTEGER. This is the id in 'model_arch_table' containing the model |
| architecture and model weights to use for prediction. |
| </DD> |
| |
| <DT>test_table</DT> |
| <DD>TEXT. Name of the table containing the dataset to |
| predict on. Note that test data is not preprocessed (unlike |
| fit and evaluate) so put one test image per row for prediction. |
| Set the 'normalizing_const' below for the independent variable if necessary. |
| </DD> |
| |
| <DT>id_col</DT> |
| <DD>TEXT. Name of the id column in the test data table. |
| </DD> |
| |
| <DT>independent_varname</DT> |
| <DD>TEXT. Column with independent variables in the test table. |
| Set the 'normalizing_const' below if necessary. |
| </DD> |
| |
| <DT>output_table</DT> |
| <DD>TEXT. Name of the table that prediction output will be |
| written to. Table contains:</DD> |
| <table class="output"> |
| <tr> |
| <th>id</th> |
| <td>Gives the 'id' for each prediction, corresponding to each row from |
| the test_table.</td> |
| </tr> |
| <tr> |
| <th>class_name</th> |
| <td> |
| The estimated variable. |
| </td> |
| </tr> |
| <tr> |
| <th>class_value</th> |
| <td> |
| The estimated class for classification. |
| </td> |
| </tr> |
| <tr> |
| <th>prob</th> |
| <td> |
| Probability of a given class. |
| </td> |
| </tr> |
| <tr> |
| <th>rank</th> |
| <td> |
| The rank of a given class based on the ordering of probabilities. |
| </td> |
| </tr> |
| |
| <DT>pred_type (optional)</DT> |
| <DD>TEXT, default: 'response'. The type of output desired, where 'response' |
| gives the actual prediction and 'prob' gives the probability value for each class. |
| </DD> |
| |
| <DT>use_gpus (optional)</DT> |
| <DD>BOOLEAN, default: FALSE (i.e., CPU). |
| Flag to enable GPU support for training neural network. |
| The number of GPUs to use is determined by the parameters |
| passed to the preprocessor. |
| |
| @note |
| We have seen some memory related issues when segments |
| share GPU resources. |
| For example, if you provide 1 GPU and your |
| database cluster is set up to have 4 |
| segments per segment host, it means that all 4 |
| segments on a segment host will share the same |
| GPU. The current recommended |
| configuration is 1 GPU per segment. |
| </DD> |
| |
| <DT>class_values (optional)</DT> |
| <DD>TEXT[], default: NULL. |
| Two dimensional list of class labels that were used while training the model |
| for each dependent variable. |
| |
| @note |
| If you specify the class values parameter, |
| it must reflect how the dependent variable was 1-hot encoded for training. |
| If you accidently pick another order that does not match the 1-hot encoding, |
| the predictions would be wrong. |
| </DD> |
| |
| <DT>normalizing_const (optional)</DT> |
| <DD>DOUBLE PRECISION, default: 1.0. |
| The normalizing constant to divide each value in the 'independent_varname' |
| array by. For example, you would use 255 for this value if the image data is |
| in the form 0-255. |
| </DD> |
| |
| <DT>dependent_count (optional)</DT> |
| <DD>INTEGER, default: 1. |
| The number of dependent variables in the model. |
| </DD> |
| </DL> |
| |
| |
| @anchor example |
| @par Examples |
| |
| @note |
| Deep learning works best on very large datasets, |
| but that is not convenient for a quick introduction |
| to the syntax. So in this example we use an MLP on the well |
| known iris data set from https://archive.ics.uci.edu/ml/datasets/iris. |
| For more realistic examples with images please refer |
| to the deep learning notebooks |
| at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts. |
| |
| <h4>Classification</h4> |
| |
| -# Create an input data set. |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_data; |
| CREATE TABLE iris_data( |
| id serial, |
| attributes numeric[], |
| class_text varchar |
| ); |
| INSERT INTO iris_data(id, attributes, class_text) VALUES |
| (1,ARRAY[5.1,3.5,1.4,0.2],'Iris-setosa'), |
| (2,ARRAY[4.9,3.0,1.4,0.2],'Iris-setosa'), |
| (3,ARRAY[4.7,3.2,1.3,0.2],'Iris-setosa'), |
| (4,ARRAY[4.6,3.1,1.5,0.2],'Iris-setosa'), |
| (5,ARRAY[5.0,3.6,1.4,0.2],'Iris-setosa'), |
| (6,ARRAY[5.4,3.9,1.7,0.4],'Iris-setosa'), |
| (7,ARRAY[4.6,3.4,1.4,0.3],'Iris-setosa'), |
| (8,ARRAY[5.0,3.4,1.5,0.2],'Iris-setosa'), |
| (9,ARRAY[4.4,2.9,1.4,0.2],'Iris-setosa'), |
| (10,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'), |
| (11,ARRAY[5.4,3.7,1.5,0.2],'Iris-setosa'), |
| (12,ARRAY[4.8,3.4,1.6,0.2],'Iris-setosa'), |
| (13,ARRAY[4.8,3.0,1.4,0.1],'Iris-setosa'), |
| (14,ARRAY[4.3,3.0,1.1,0.1],'Iris-setosa'), |
| (15,ARRAY[5.8,4.0,1.2,0.2],'Iris-setosa'), |
| (16,ARRAY[5.7,4.4,1.5,0.4],'Iris-setosa'), |
| (17,ARRAY[5.4,3.9,1.3,0.4],'Iris-setosa'), |
| (18,ARRAY[5.1,3.5,1.4,0.3],'Iris-setosa'), |
| (19,ARRAY[5.7,3.8,1.7,0.3],'Iris-setosa'), |
| (20,ARRAY[5.1,3.8,1.5,0.3],'Iris-setosa'), |
| (21,ARRAY[5.4,3.4,1.7,0.2],'Iris-setosa'), |
| (22,ARRAY[5.1,3.7,1.5,0.4],'Iris-setosa'), |
| (23,ARRAY[4.6,3.6,1.0,0.2],'Iris-setosa'), |
| (24,ARRAY[5.1,3.3,1.7,0.5],'Iris-setosa'), |
| (25,ARRAY[4.8,3.4,1.9,0.2],'Iris-setosa'), |
| (26,ARRAY[5.0,3.0,1.6,0.2],'Iris-setosa'), |
| (27,ARRAY[5.0,3.4,1.6,0.4],'Iris-setosa'), |
| (28,ARRAY[5.2,3.5,1.5,0.2],'Iris-setosa'), |
| (29,ARRAY[5.2,3.4,1.4,0.2],'Iris-setosa'), |
| (30,ARRAY[4.7,3.2,1.6,0.2],'Iris-setosa'), |
| (31,ARRAY[4.8,3.1,1.6,0.2],'Iris-setosa'), |
| (32,ARRAY[5.4,3.4,1.5,0.4],'Iris-setosa'), |
| (33,ARRAY[5.2,4.1,1.5,0.1],'Iris-setosa'), |
| (34,ARRAY[5.5,4.2,1.4,0.2],'Iris-setosa'), |
| (35,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'), |
| (36,ARRAY[5.0,3.2,1.2,0.2],'Iris-setosa'), |
| (37,ARRAY[5.5,3.5,1.3,0.2],'Iris-setosa'), |
| (38,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'), |
| (39,ARRAY[4.4,3.0,1.3,0.2],'Iris-setosa'), |
| (40,ARRAY[5.1,3.4,1.5,0.2],'Iris-setosa'), |
| (41,ARRAY[5.0,3.5,1.3,0.3],'Iris-setosa'), |
| (42,ARRAY[4.5,2.3,1.3,0.3],'Iris-setosa'), |
| (43,ARRAY[4.4,3.2,1.3,0.2],'Iris-setosa'), |
| (44,ARRAY[5.0,3.5,1.6,0.6],'Iris-setosa'), |
| (45,ARRAY[5.1,3.8,1.9,0.4],'Iris-setosa'), |
| (46,ARRAY[4.8,3.0,1.4,0.3],'Iris-setosa'), |
| (47,ARRAY[5.1,3.8,1.6,0.2],'Iris-setosa'), |
| (48,ARRAY[4.6,3.2,1.4,0.2],'Iris-setosa'), |
| (49,ARRAY[5.3,3.7,1.5,0.2],'Iris-setosa'), |
| (50,ARRAY[5.0,3.3,1.4,0.2],'Iris-setosa'), |
| (51,ARRAY[7.0,3.2,4.7,1.4],'Iris-versicolor'), |
| (52,ARRAY[6.4,3.2,4.5,1.5],'Iris-versicolor'), |
| (53,ARRAY[6.9,3.1,4.9,1.5],'Iris-versicolor'), |
| (54,ARRAY[5.5,2.3,4.0,1.3],'Iris-versicolor'), |
| (55,ARRAY[6.5,2.8,4.6,1.5],'Iris-versicolor'), |
| (56,ARRAY[5.7,2.8,4.5,1.3],'Iris-versicolor'), |
| (57,ARRAY[6.3,3.3,4.7,1.6],'Iris-versicolor'), |
| (58,ARRAY[4.9,2.4,3.3,1.0],'Iris-versicolor'), |
| (59,ARRAY[6.6,2.9,4.6,1.3],'Iris-versicolor'), |
| (60,ARRAY[5.2,2.7,3.9,1.4],'Iris-versicolor'), |
| (61,ARRAY[5.0,2.0,3.5,1.0],'Iris-versicolor'), |
| (62,ARRAY[5.9,3.0,4.2,1.5],'Iris-versicolor'), |
| (63,ARRAY[6.0,2.2,4.0,1.0],'Iris-versicolor'), |
| (64,ARRAY[6.1,2.9,4.7,1.4],'Iris-versicolor'), |
| (65,ARRAY[5.6,2.9,3.6,1.3],'Iris-versicolor'), |
| (66,ARRAY[6.7,3.1,4.4,1.4],'Iris-versicolor'), |
| (67,ARRAY[5.6,3.0,4.5,1.5],'Iris-versicolor'), |
| (68,ARRAY[5.8,2.7,4.1,1.0],'Iris-versicolor'), |
| (69,ARRAY[6.2,2.2,4.5,1.5],'Iris-versicolor'), |
| (70,ARRAY[5.6,2.5,3.9,1.1],'Iris-versicolor'), |
| (71,ARRAY[5.9,3.2,4.8,1.8],'Iris-versicolor'), |
| (72,ARRAY[6.1,2.8,4.0,1.3],'Iris-versicolor'), |
| (73,ARRAY[6.3,2.5,4.9,1.5],'Iris-versicolor'), |
| (74,ARRAY[6.1,2.8,4.7,1.2],'Iris-versicolor'), |
| (75,ARRAY[6.4,2.9,4.3,1.3],'Iris-versicolor'), |
| (76,ARRAY[6.6,3.0,4.4,1.4],'Iris-versicolor'), |
| (77,ARRAY[6.8,2.8,4.8,1.4],'Iris-versicolor'), |
| (78,ARRAY[6.7,3.0,5.0,1.7],'Iris-versicolor'), |
| (79,ARRAY[6.0,2.9,4.5,1.5],'Iris-versicolor'), |
| (80,ARRAY[5.7,2.6,3.5,1.0],'Iris-versicolor'), |
| (81,ARRAY[5.5,2.4,3.8,1.1],'Iris-versicolor'), |
| (82,ARRAY[5.5,2.4,3.7,1.0],'Iris-versicolor'), |
| (83,ARRAY[5.8,2.7,3.9,1.2],'Iris-versicolor'), |
| (84,ARRAY[6.0,2.7,5.1,1.6],'Iris-versicolor'), |
| (85,ARRAY[5.4,3.0,4.5,1.5],'Iris-versicolor'), |
| (86,ARRAY[6.0,3.4,4.5,1.6],'Iris-versicolor'), |
| (87,ARRAY[6.7,3.1,4.7,1.5],'Iris-versicolor'), |
| (88,ARRAY[6.3,2.3,4.4,1.3],'Iris-versicolor'), |
| (89,ARRAY[5.6,3.0,4.1,1.3],'Iris-versicolor'), |
| (90,ARRAY[5.5,2.5,4.0,1.3],'Iris-versicolor'), |
| (91,ARRAY[5.5,2.6,4.4,1.2],'Iris-versicolor'), |
| (92,ARRAY[6.1,3.0,4.6,1.4],'Iris-versicolor'), |
| (93,ARRAY[5.8,2.6,4.0,1.2],'Iris-versicolor'), |
| (94,ARRAY[5.0,2.3,3.3,1.0],'Iris-versicolor'), |
| (95,ARRAY[5.6,2.7,4.2,1.3],'Iris-versicolor'), |
| (96,ARRAY[5.7,3.0,4.2,1.2],'Iris-versicolor'), |
| (97,ARRAY[5.7,2.9,4.2,1.3],'Iris-versicolor'), |
| (98,ARRAY[6.2,2.9,4.3,1.3],'Iris-versicolor'), |
| (99,ARRAY[5.1,2.5,3.0,1.1],'Iris-versicolor'), |
| (100,ARRAY[5.7,2.8,4.1,1.3],'Iris-versicolor'), |
| (101,ARRAY[6.3,3.3,6.0,2.5],'Iris-virginica'), |
| (102,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'), |
| (103,ARRAY[7.1,3.0,5.9,2.1],'Iris-virginica'), |
| (104,ARRAY[6.3,2.9,5.6,1.8],'Iris-virginica'), |
| (105,ARRAY[6.5,3.0,5.8,2.2],'Iris-virginica'), |
| (106,ARRAY[7.6,3.0,6.6,2.1],'Iris-virginica'), |
| (107,ARRAY[4.9,2.5,4.5,1.7],'Iris-virginica'), |
| (108,ARRAY[7.3,2.9,6.3,1.8],'Iris-virginica'), |
| (109,ARRAY[6.7,2.5,5.8,1.8],'Iris-virginica'), |
| (110,ARRAY[7.2,3.6,6.1,2.5],'Iris-virginica'), |
| (111,ARRAY[6.5,3.2,5.1,2.0],'Iris-virginica'), |
| (112,ARRAY[6.4,2.7,5.3,1.9],'Iris-virginica'), |
| (113,ARRAY[6.8,3.0,5.5,2.1],'Iris-virginica'), |
| (114,ARRAY[5.7,2.5,5.0,2.0],'Iris-virginica'), |
| (115,ARRAY[5.8,2.8,5.1,2.4],'Iris-virginica'), |
| (116,ARRAY[6.4,3.2,5.3,2.3],'Iris-virginica'), |
| (117,ARRAY[6.5,3.0,5.5,1.8],'Iris-virginica'), |
| (118,ARRAY[7.7,3.8,6.7,2.2],'Iris-virginica'), |
| (119,ARRAY[7.7,2.6,6.9,2.3],'Iris-virginica'), |
| (120,ARRAY[6.0,2.2,5.0,1.5],'Iris-virginica'), |
| (121,ARRAY[6.9,3.2,5.7,2.3],'Iris-virginica'), |
| (122,ARRAY[5.6,2.8,4.9,2.0],'Iris-virginica'), |
| (123,ARRAY[7.7,2.8,6.7,2.0],'Iris-virginica'), |
| (124,ARRAY[6.3,2.7,4.9,1.8],'Iris-virginica'), |
| (125,ARRAY[6.7,3.3,5.7,2.1],'Iris-virginica'), |
| (126,ARRAY[7.2,3.2,6.0,1.8],'Iris-virginica'), |
| (127,ARRAY[6.2,2.8,4.8,1.8],'Iris-virginica'), |
| (128,ARRAY[6.1,3.0,4.9,1.8],'Iris-virginica'), |
| (129,ARRAY[6.4,2.8,5.6,2.1],'Iris-virginica'), |
| (130,ARRAY[7.2,3.0,5.8,1.6],'Iris-virginica'), |
| (131,ARRAY[7.4,2.8,6.1,1.9],'Iris-virginica'), |
| (132,ARRAY[7.9,3.8,6.4,2.0],'Iris-virginica'), |
| (133,ARRAY[6.4,2.8,5.6,2.2],'Iris-virginica'), |
| (134,ARRAY[6.3,2.8,5.1,1.5],'Iris-virginica'), |
| (135,ARRAY[6.1,2.6,5.6,1.4],'Iris-virginica'), |
| (136,ARRAY[7.7,3.0,6.1,2.3],'Iris-virginica'), |
| (137,ARRAY[6.3,3.4,5.6,2.4],'Iris-virginica'), |
| (138,ARRAY[6.4,3.1,5.5,1.8],'Iris-virginica'), |
| (139,ARRAY[6.0,3.0,4.8,1.8],'Iris-virginica'), |
| (140,ARRAY[6.9,3.1,5.4,2.1],'Iris-virginica'), |
| (141,ARRAY[6.7,3.1,5.6,2.4],'Iris-virginica'), |
| (142,ARRAY[6.9,3.1,5.1,2.3],'Iris-virginica'), |
| (143,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'), |
| (144,ARRAY[6.8,3.2,5.9,2.3],'Iris-virginica'), |
| (145,ARRAY[6.7,3.3,5.7,2.5],'Iris-virginica'), |
| (146,ARRAY[6.7,3.0,5.2,2.3],'Iris-virginica'), |
| (147,ARRAY[6.3,2.5,5.0,1.9],'Iris-virginica'), |
| (148,ARRAY[6.5,3.0,5.2,2.0],'Iris-virginica'), |
| (149,ARRAY[6.2,3.4,5.4,2.3],'Iris-virginica'), |
| (150,ARRAY[5.9,3.0,5.1,1.8],'Iris-virginica'); |
| </pre> |
| Create a test/validation dataset from the training data: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_train, iris_test; |
| -- Set seed so results are reproducible |
| SELECT setseed(0); |
| SELECT madlib.train_test_split('iris_data', -- Source table |
| 'iris', -- Output table root name |
| 0.8, -- Train proportion |
| NULL, -- Test proportion (0.2) |
| NULL, -- Strata definition |
| NULL, -- Output all columns |
| NULL, -- Sample without replacement |
| TRUE -- Separate output tables |
| ); |
| SELECT COUNT(*) FROM iris_train; |
| </pre> |
| <pre class="result"> |
| count |
| ------+ |
| 120 |
| </pre> |
| |
| -# Call the preprocessor for deep learning. For the training dataset: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_train_packed, iris_train_packed_summary; |
| SELECT madlib.training_preprocessor_dl('iris_train', -- Source table |
| 'iris_train_packed', -- Output table |
| 'class_text', -- Dependent variable |
| 'attributes' -- Independent variable |
| ); |
| \\x on |
| SELECT * FROM iris_train_packed_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-----------+--------------------------------------------- |
| source_table | iris_train |
| output_table | iris_train_packed |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| dependent_vartype | {"character varying"} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| buffer_size | 40 |
| normalizing_const | 1 |
| num_classes | {3} |
| distribution_rules | all_segments |
| __internal_gpu_config__ | all_segments |
| </pre> |
| For the validation dataset: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_test_packed, iris_test_packed_summary; |
| SELECT madlib.validation_preprocessor_dl('iris_test', -- Source table |
| 'iris_test_packed', -- Output table |
| 'class_text', -- Dependent variable |
| 'attributes', -- Independent variable |
| 'iris_train_packed' -- From training preprocessor step |
| ); |
| SELECT * FROM iris_test_packed_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-----------+--------------------------------------------- |
| source_table | iris_test |
| output_table | iris_test_packed |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| dependent_vartype | {"character varying"} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| buffer_size | 10 |
| normalizing_const | 1 |
| num_classes | {3} |
| distribution_rules | all_segments |
| __internal_gpu_config__ | all_segments |
| </pre> |
| |
| -# Define and load model architecture. Use Keras to define |
| the model architecture: |
| <pre class="example"> |
| from tensorflow import keras |
| from tensorflow.keras.models import Sequential |
| from tensorflow.keras.layers import Dense |
| model_simple = Sequential() |
| model_simple.add(Dense(10, activation='relu', input_shape=(4,))) |
| model_simple.add(Dense(10, activation='relu')) |
| model_simple.add(Dense(3, activation='softmax')) |
| model_simple.summary() |
| \verbatim |
| |
| _________________________________________________________________ |
| Layer (type) Output Shape Param # |
| ================================================================= |
| dense_1 (Dense) (None, 10) 50 |
| _________________________________________________________________ |
| dense_2 (Dense) (None, 10) 110 |
| _________________________________________________________________ |
| dense_3 (Dense) (None, 3) 33 |
| ================================================================= |
| Total params: 193 |
| Trainable params: 193 |
| Non-trainable params: 0 |
| \endverbatim |
| </pre> |
| Export the model to JSON: |
| <pre class="example"> |
| model_simple.to_json() |
| </pre> |
| <pre class="result"> |
| '{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}' |
| </pre> |
| Load into model architecture table: |
| <pre class="example"> |
| DROP TABLE IF EXISTS model_arch_library; |
| SELECT madlib.load_keras_model('model_arch_library', -- Output table, |
| $$ |
| {"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"} |
| $$ |
| ::json, -- JSON blob |
| NULL, -- Weights |
| 'Sophie', -- Name |
| 'A simple model' -- Descr |
| ); |
| </pre> |
| |
| -# Train model and view summary table: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_model, iris_model_summary; |
| SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table |
| 'iris_model', -- model output table |
| 'model_arch_library', -- model arch table |
| 1, -- model arch id |
| $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params |
| $$ batch_size=5, epochs=3 $$, -- fit_params |
| 10 -- num_iterations |
| ); |
| SELECT * FROM iris_model_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------------+-------------------------------------------------------------------------- |
| source_table | iris_train_packed |
| model | iris_model |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| model_arch_table | model_arch_library |
| model_id | 1 |
| compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] |
| fit_params | batch_size=5, epochs=3 |
| num_iterations | 10 |
| validation_table | |
| object_table | |
| metrics_compute_frequency | 10 |
| name | |
| description | |
| model_type | madlib_keras |
| model_size | 0.7900390625 |
| start_training_time | 2021-02-01 15:58:43.760568 |
| end_training_time | 2021-02-01 15:58:44.470054 |
| metrics_elapsed_time | {0.709463119506836} |
| madlib_version | 1.20.0 |
| num_classes | {3} |
| dependent_vartype | {"character varying"} |
| normalizing_const | 1 |
| metrics_type | {accuracy} |
| loss_type | categorical_crossentropy |
| training_metrics_final | 0.800000011920929 |
| training_loss_final | 0.519745767116547 |
| training_metrics | {0.800000011920929} |
| training_loss | {0.519745767116547} |
| validation_metrics_final | |
| validation_loss_final | |
| validation_metrics | |
| validation_loss | |
| metrics_iters | {10} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| </pre> |
| |
| -# Use the test dataset to evaluate the model we built above: |
| <pre class="example"> |
| \\x off |
| DROP TABLE IF EXISTS iris_validate; |
| SELECT madlib.madlib_keras_evaluate('iris_model', -- model |
| 'iris_test_packed', -- test table |
| 'iris_validate' -- output table |
| ); |
| SELECT * FROM iris_validate; |
| </pre> |
| <pre class="result"> |
| loss | metric | metrics_type | loss_type |
| -------------------+-------------------+--------------+-------------------------- |
| 0.566911578178406 | 0.699999988079071 | {accuracy} | categorical_crossentropy |
| (1 row) |
| </pre> |
| |
| -# Predict. We will use the validation dataset for prediction |
| as well, which is not usual but serves to show the |
| syntax. The prediction is in the 'estimated_class_text' |
| column: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_predict; |
| SELECT madlib.madlib_keras_predict('iris_model', -- model |
| 'iris_test', -- test_table |
| 'id', -- id column |
| 'attributes', -- independent var |
| 'iris_predict' -- output table |
| ); |
| SELECT * FROM iris_predict ORDER BY id, rank; |
| </pre> |
| <pre class="result"> |
| id | class_name | class_value | prob | rank |
| -----+------------+-----------------+-------------+------ |
| 4 | class_text | Iris-setosa | 0.7689959 | 1 |
| 4 | class_text | Iris-virginica | 0.15600422 | 2 |
| 4 | class_text | Iris-versicolor | 0.07499986 | 3 |
| 9 | class_text | Iris-setosa | 0.7642913 | 1 |
| 9 | class_text | Iris-virginica | 0.15844841 | 2 |
| 9 | class_text | Iris-versicolor | 0.077260315 | 3 |
| 13 | class_text | Iris-setosa | 0.8160971 | 1 |
| 13 | class_text | Iris-virginica | 0.13053566 | 2 |
| 13 | class_text | Iris-versicolor | 0.05336728 | 3 |
| 14 | class_text | Iris-setosa | 0.804419 | 1 |
| 14 | class_text | Iris-virginica | 0.13591427 | 2 |
| 14 | class_text | Iris-versicolor | 0.05966685 | 3 |
| 15 | class_text | Iris-setosa | 0.88610095 | 1 |
| 15 | class_text | Iris-virginica | 0.08893245 | 2 |
| 15 | class_text | Iris-versicolor | 0.024966586 | 3 |
| 25 | class_text | Iris-setosa | 0.68195176 | 1 |
| 25 | class_text | Iris-virginica | 0.20334557 | 2 |
| 25 | class_text | Iris-versicolor | 0.114702694 | 3 |
| 34 | class_text | Iris-setosa | 0.8619849 | 1 |
| 34 | class_text | Iris-virginica | 0.1032386 | 2 |
| 34 | class_text | Iris-versicolor | 0.034776475 | 3 |
| 36 | class_text | Iris-setosa | 0.84423053 | 1 |
| 36 | class_text | Iris-virginica | 0.114072084 | 2 |
| 36 | class_text | Iris-versicolor | 0.04169741 | 3 |
| 39 | class_text | Iris-setosa | 0.79559565 | 1 |
| 39 | class_text | Iris-virginica | 0.13950573 | 2 |
| 39 | class_text | Iris-versicolor | 0.064898595 | 3 |
| 48 | class_text | Iris-setosa | 0.8010248 | 1 |
| 48 | class_text | Iris-virginica | 0.13615999 | 2 |
| 48 | class_text | Iris-versicolor | 0.06281526 | 3 |
| 56 | class_text | Iris-versicolor | 0.47732472 | 1 |
| 56 | class_text | Iris-virginica | 0.46635315 | 2 |
| 56 | class_text | Iris-setosa | 0.056322116 | 3 |
| 63 | class_text | Iris-virginica | 0.5329179 | 1 |
| 63 | class_text | Iris-versicolor | 0.38090497 | 2 |
| 63 | class_text | Iris-setosa | 0.086177126 | 3 |
| 65 | class_text | Iris-virginica | 0.4516514 | 1 |
| 65 | class_text | Iris-versicolor | 0.4330772 | 2 |
| 65 | class_text | Iris-setosa | 0.11527142 | 3 |
| 69 | class_text | Iris-virginica | 0.57348573 | 1 |
| 69 | class_text | Iris-versicolor | 0.36967018 | 2 |
| 69 | class_text | Iris-setosa | 0.05684407 | 3 |
| 72 | class_text | Iris-virginica | 0.4918356 | 1 |
| 72 | class_text | Iris-versicolor | 0.42640963 | 2 |
| 72 | class_text | Iris-setosa | 0.08175478 | 3 |
| 73 | class_text | Iris-virginica | 0.5534297 | 1 |
| 73 | class_text | Iris-versicolor | 0.39819974 | 2 |
| 73 | class_text | Iris-setosa | 0.04837051 | 3 |
| 75 | class_text | Iris-virginica | 0.4986787 | 1 |
| 75 | class_text | Iris-versicolor | 0.43546444 | 2 |
| 75 | class_text | Iris-setosa | 0.06585683 | 3 |
| 82 | class_text | Iris-virginica | 0.47533202 | 1 |
| 82 | class_text | Iris-versicolor | 0.43122545 | 2 |
| 82 | class_text | Iris-setosa | 0.09344252 | 3 |
| 90 | class_text | Iris-virginica | 0.47962278 | 1 |
| 90 | class_text | Iris-versicolor | 0.45068985 | 2 |
| 90 | class_text | Iris-setosa | 0.06968742 | 3 |
| 91 | class_text | Iris-virginica | 0.47005868 | 1 |
| 91 | class_text | Iris-versicolor | 0.4696341 | 2 |
| 91 | class_text | Iris-setosa | 0.060307216 | 3 |
| 97 | class_text | Iris-versicolor | 0.49070656 | 1 |
| 97 | class_text | Iris-virginica | 0.44852367 | 2 |
| 97 | class_text | Iris-setosa | 0.060769808 | 3 |
| 100 | class_text | Iris-versicolor | 0.47884703 | 1 |
| 100 | class_text | Iris-virginica | 0.4577389 | 2 |
| 100 | class_text | Iris-setosa | 0.06341412 | 3 |
| 102 | class_text | Iris-virginica | 0.5396443 | 1 |
| 102 | class_text | Iris-versicolor | 0.40945858 | 2 |
| 102 | class_text | Iris-setosa | 0.050897114 | 3 |
| 109 | class_text | Iris-virginica | 0.61228466 | 1 |
| 109 | class_text | Iris-versicolor | 0.3522025 | 2 |
| 109 | class_text | Iris-setosa | 0.03551281 | 3 |
| 114 | class_text | Iris-virginica | 0.562418 | 1 |
| 114 | class_text | Iris-versicolor | 0.38269255 | 2 |
| 114 | class_text | Iris-setosa | 0.05488944 | 3 |
| 128 | class_text | Iris-virginica | 0.50814027 | 1 |
| 128 | class_text | Iris-versicolor | 0.44240898 | 2 |
| 128 | class_text | Iris-setosa | 0.04945076 | 3 |
| 138 | class_text | Iris-virginica | 0.52319044 | 1 |
| 138 | class_text | Iris-versicolor | 0.43786547 | 2 |
| 138 | class_text | Iris-setosa | 0.03894412 | 3 |
| 140 | class_text | Iris-virginica | 0.5677875 | 1 |
| 140 | class_text | Iris-versicolor | 0.3936515 | 2 |
| 140 | class_text | Iris-setosa | 0.038560882 | 3 |
| 141 | class_text | Iris-virginica | 0.58414406 | 1 |
| 141 | class_text | Iris-versicolor | 0.3770253 | 2 |
| 141 | class_text | Iris-setosa | 0.03883058 | 3 |
| 150 | class_text | Iris-virginica | 0.5025033 | 1 |
| 150 | class_text | Iris-versicolor | 0.4495215 | 2 |
| 150 | class_text | Iris-setosa | 0.047975186 | 3 |
| (90 rows) |
| </pre> |
| Count missclassifications: |
| <pre class="example"> |
| SELECT COUNT(*) FROM iris_predict JOIN iris_test USING (id) |
| WHERE iris_predict.class_value != iris_test.class_text AND iris_predict.rank = 1; |
| </pre> |
| <pre class="result"> |
| count |
| -------+ |
| 9 |
| (1 row) |
| </pre> |
| Accuracy: |
| <pre class="example"> |
| SELECT round(count(*)*100/(150*0.2),2) as test_accuracy_percent from |
| (select iris_test.class_text as actual, iris_predict.class_value as estimated |
| from iris_predict inner join iris_test |
| on iris_test.id=iris_predict.id where iris_predict.rank = 1) q |
| WHERE q.actual=q.estimated; |
| </pre> |
| <pre class="result"> |
| test_accuracy_percent |
| -----------------------+ |
| 70.00 |
| (1 row) |
| </pre> |
| |
| -# Predict BYOM. |
| We will use the validation dataset for prediction |
| as well, which is not usual but serves to show the |
| syntax. See <a href="group__grp__keras__model__arch.html">Define Model Architectures</a> |
| for details on how to load the model architecture and weights. |
| In this example we will use weights we already have: |
| <pre class="example"> |
| UPDATE model_arch_library |
| SET model_weights = iris_model.model_weights |
| FROM iris_model |
| WHERE model_arch_library.model_id = 1; |
| </pre> |
| Now train using a model from the model architecture table directly |
| without referencing the model table from the MADlib training. Note that if you |
| specify the class values parameter as we do below, it must reflect how the dependent |
| variable was 1-hot encoded for training. In this example the 'training_preprocessor_dl()' |
| in Step 2 above encoded in the order {'Iris-setosa', 'Iris-versicolor', 'Iris-virginica'} so |
| this is the order we pass in the parameter. If we accidently pick another order that does |
| not match the 1-hot encoding, the predictions would be wrong. |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_predict_byom; |
| SELECT madlib.madlib_keras_predict_byom('model_arch_library', -- model arch table |
| 1, -- model arch id |
| 'iris_test', -- test_table |
| 'id', -- id column |
| 'attributes', -- independent var |
| 'iris_predict_byom', -- output table |
| 'response', -- prediction type |
| FALSE, -- use GPUs |
| ARRAY[ARRAY['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']], -- class values |
| 1.0 -- normalizing const |
| ); |
| SELECT * FROM iris_predict_byom ORDER BY id; |
| </pre> |
| The prediction is in the 'estimated_dependent_var' column: |
| <pre class="result"> |
| id | class_name | class_value | prob |
| -----+---------------+-----------------+------------ |
| 4 | dependent_var | Iris-setosa | 0.7689959 |
| 9 | dependent_var | Iris-setosa | 0.7642913 |
| 13 | dependent_var | Iris-setosa | 0.8160971 |
| 14 | dependent_var | Iris-setosa | 0.804419 |
| 15 | dependent_var | Iris-setosa | 0.88610095 |
| 25 | dependent_var | Iris-setosa | 0.68195176 |
| 34 | dependent_var | Iris-setosa | 0.8619849 |
| 36 | dependent_var | Iris-setosa | 0.84423053 |
| 39 | dependent_var | Iris-setosa | 0.79559565 |
| 48 | dependent_var | Iris-setosa | 0.8010248 |
| 56 | dependent_var | Iris-versicolor | 0.47732472 |
| 63 | dependent_var | Iris-virginica | 0.5329179 |
| 65 | dependent_var | Iris-virginica | 0.4516514 |
| 69 | dependent_var | Iris-virginica | 0.57348573 |
| 72 | dependent_var | Iris-virginica | 0.4918356 |
| 73 | dependent_var | Iris-virginica | 0.5534297 |
| 75 | dependent_var | Iris-virginica | 0.4986787 |
| 82 | dependent_var | Iris-virginica | 0.47533202 |
| 90 | dependent_var | Iris-virginica | 0.47962278 |
| 91 | dependent_var | Iris-virginica | 0.47005868 |
| 97 | dependent_var | Iris-versicolor | 0.49070656 |
| 100 | dependent_var | Iris-versicolor | 0.47884703 |
| 102 | dependent_var | Iris-virginica | 0.5396443 |
| 109 | dependent_var | Iris-virginica | 0.61228466 |
| 114 | dependent_var | Iris-virginica | 0.562418 |
| 128 | dependent_var | Iris-virginica | 0.50814027 |
| 138 | dependent_var | Iris-virginica | 0.52319044 |
| 140 | dependent_var | Iris-virginica | 0.5677875 |
| 141 | dependent_var | Iris-virginica | 0.58414406 |
| 150 | dependent_var | Iris-virginica | 0.5025033 |
| (30 rows) |
| </pre> |
| Count missclassifications: |
| <pre class="example"> |
| SELECT COUNT(*) FROM iris_predict_byom JOIN iris_test USING (id) |
| WHERE iris_predict_byom.class_value != iris_test.class_text; |
| </pre> |
| <pre class="result"> |
| count |
| -------+ |
| 9 |
| (1 row) |
| </pre> |
| Accuracy: |
| <pre class="example"> |
| SELECT round(count(*)*100/(150*0.2),2) as test_accuracy_percent from |
| (select iris_test.class_text as actual, iris_predict_byom.class_value as estimated |
| from iris_predict_byom inner join iris_test |
| on iris_test.id=iris_predict_byom.id) q |
| WHERE q.actual=q.estimated; |
| </pre> |
| <pre class="result"> |
| test_accuracy_percent |
| -----------------------+ |
| 70.00 |
| (1 row) |
| </pre> |
| |
| |
| <h4>Classification with Other Parameters</h4> |
| |
| -# Validation dataset. Now use a validation dataset |
| and compute metrics every 3rd iteration using |
| the 'metrics_compute_frequency' parameter. This can |
| help reduce run time if you do not need metrics |
| computed at every iteration. |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_model, iris_model_summary; |
| SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table |
| 'iris_model', -- model output table |
| 'model_arch_library', -- model arch table |
| 1, -- model arch id |
| $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params |
| $$ batch_size=5, epochs=3 $$, -- fit_params |
| 10, -- num_iterations |
| FALSE, -- use GPUs |
| 'iris_test_packed', -- validation dataset |
| 3, -- metrics compute frequency |
| FALSE, -- warm start |
| 'Sophie L.', -- name |
| 'Simple MLP for iris dataset' -- description |
| ); |
| \\x on |
| SELECT * FROM iris_model_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------------+-------------------------------------------------------------------------- |
| source_table | iris_train_packed |
| model | iris_model |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| model_arch_table | model_arch_library |
| model_id | 1 |
| compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] |
| fit_params | batch_size=5, epochs=3 |
| num_iterations | 10 |
| validation_table | iris_test_packed |
| object_table | |
| metrics_compute_frequency | 3 |
| name | Sophie L. |
| description | Simple MLP for iris dataset |
| model_type | madlib_keras |
| model_size | 0.7900390625 |
| start_training_time | 2021-01-29 14:41:16.943861 |
| end_training_time | 2021-01-29 14:41:19.478149 |
| metrics_elapsed_time | {2.3377411365509,2.42358803749084,2.49885511398315,2.53427410125732} |
| madlib_version | 1.20.0 |
| num_classes | {3} |
| dependent_vartype | {"character varying"} |
| normalizing_const | 1 |
| metrics_type | {accuracy} |
| loss_type | categorical_crossentropy |
| training_metrics_final | 0.883333325386047 |
| training_loss_final | 0.584357917308807 |
| training_metrics | {0.733333349227905,0.774999976158142,0.883333325386047,0.883333325386047} |
| training_loss | {0.765825688838959,0.664925456047058,0.605871021747589,0.584357917308807} |
| validation_metrics_final | 0.899999976158142 |
| validation_loss_final | 0.590348184108734 |
| validation_metrics | {0.699999988079071,0.866666674613953,0.899999976158142,0.899999976158142} |
| validation_loss | {0.81381630897522,0.691304981708527,0.616305589675903,0.590348184108734} |
| metrics_iters | {3,6,9,10} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| </pre> |
| |
| -# Predict probabilities for each class: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_predict; |
| SELECT madlib.madlib_keras_predict('iris_model', -- model |
| 'iris_test', -- test_table |
| 'id', -- id column |
| 'attributes', -- independent var |
| 'iris_predict', -- output table |
| 'prob' -- response type |
| ); |
| \\x off |
| SELECT * FROM iris_predict ORDER BY id; |
| </pre> |
| <pre class="result"> |
| id | class_name | class_value | prob | rank |
| -----+------------+-----------------+-------------+------ |
| 4 | class_text | Iris-versicolor | 0.34548566 | 2 |
| 4 | class_text | Iris-setosa | 0.57626975 | 1 |
| 4 | class_text | Iris-virginica | 0.07824467 | 3 |
| 7 | class_text | Iris-versicolor | 0.34442508 | 2 |
| 7 | class_text | Iris-setosa | 0.57735515 | 1 |
| 7 | class_text | Iris-virginica | 0.078219764 | 3 |
| 9 | class_text | Iris-versicolor | 0.3453845 | 2 |
| 9 | class_text | Iris-virginica | 0.08293749 | 3 |
| 9 | class_text | Iris-setosa | 0.57167804 | 1 |
| 12 | class_text | Iris-versicolor | 0.34616387 | 2 |
| 12 | class_text | Iris-setosa | 0.5793855 | 1 |
| 12 | class_text | Iris-virginica | 0.074450605 | 3 |
| 18 | class_text | Iris-versicolor | 0.34597218 | 2 |
| 18 | class_text | Iris-virginica | 0.07100027 | 3 |
| 18 | class_text | Iris-setosa | 0.58302754 | 1 |
| 20 | class_text | Iris-versicolor | 0.34480608 | 2 |
| 20 | class_text | Iris-setosa | 0.5856424 | 1 |
| 20 | class_text | Iris-virginica | 0.06955151 | 3 |
| 24 | class_text | Iris-versicolor | 0.38339624 | 2 |
| 24 | class_text | Iris-setosa | 0.5330486 | 1 |
| 24 | class_text | Iris-virginica | 0.083555184 | 3 |
| 30 | class_text | Iris-versicolor | 0.35101113 | 2 |
| 30 | class_text | Iris-setosa | 0.56958234 | 1 |
| 30 | class_text | Iris-virginica | 0.07940655 | 3 |
| 31 | class_text | Iris-versicolor | 0.3503181 | 2 |
| 31 | class_text | Iris-setosa | 0.5733414 | 1 |
| 31 | class_text | Iris-virginica | 0.07634052 | 3 |
| 33 | class_text | Iris-versicolor | 0.34489658 | 2 |
| 33 | class_text | Iris-setosa | 0.5847962 | 1 |
| 33 | class_text | Iris-virginica | 0.07030724 | 3 |
| 35 | class_text | Iris-versicolor | 0.34719768 | 2 |
| 35 | class_text | Iris-setosa | 0.577414 | 1 |
| 35 | class_text | Iris-virginica | 0.07538838 | 3 |
| 40 | class_text | Iris-versicolor | 0.3464746 | 2 |
| 40 | class_text | Iris-setosa | 0.58250487 | 1 |
| 40 | class_text | Iris-virginica | 0.071020484 | 3 |
| 41 | class_text | Iris-versicolor | 0.34581655 | 2 |
| 41 | class_text | Iris-setosa | 0.5805128 | 1 |
| 41 | class_text | Iris-virginica | 0.07367061 | 3 |
| 45 | class_text | Iris-versicolor | 0.38146245 | 2 |
| 45 | class_text | Iris-setosa | 0.52559936 | 1 |
| 45 | class_text | Iris-virginica | 0.09293811 | 3 |
| 51 | class_text | Iris-virginica | 0.41811863 | 2 |
| 51 | class_text | Iris-setosa | 0.07617204 | 3 |
| 51 | class_text | Iris-versicolor | 0.5057093 | 1 |
| 53 | class_text | Iris-virginica | 0.47048044 | 2 |
| 53 | class_text | Iris-versicolor | 0.47150916 | 1 |
| 53 | class_text | Iris-setosa | 0.058010455 | 3 |
| 57 | class_text | Iris-versicolor | 0.4443615 | 2 |
| 57 | class_text | Iris-setosa | 0.055230834 | 3 |
| 57 | class_text | Iris-virginica | 0.5004077 | 1 |
| 58 | class_text | Iris-virginica | 0.35905617 | 2 |
| 58 | class_text | Iris-setosa | 0.15329117 | 3 |
| 58 | class_text | Iris-versicolor | 0.4876526 | 1 |
| 69 | class_text | Iris-versicolor | 0.4485282 | 2 |
| 69 | class_text | Iris-virginica | 0.4913048 | 1 |
| 69 | class_text | Iris-setosa | 0.060167026 | 3 |
| 72 | class_text | Iris-virginica | 0.38764492 | 2 |
| 72 | class_text | Iris-versicolor | 0.5052213 | 1 |
| 72 | class_text | Iris-setosa | 0.10713379 | 3 |
| 74 | class_text | Iris-versicolor | 0.44894043 | 2 |
| 74 | class_text | Iris-setosa | 0.06307102 | 3 |
| 74 | class_text | Iris-virginica | 0.48798853 | 1 |
| 93 | class_text | Iris-virginica | 0.40836224 | 2 |
| 93 | class_text | Iris-setosa | 0.102442265 | 3 |
| 93 | class_text | Iris-versicolor | 0.48919544 | 1 |
| 94 | class_text | Iris-virginica | 0.35238466 | 2 |
| 94 | class_text | Iris-versicolor | 0.49192256 | 1 |
| 94 | class_text | Iris-setosa | 0.15569273 | 3 |
| 110 | class_text | Iris-versicolor | 0.2917817 | 2 |
| 110 | class_text | Iris-virginica | 0.6972358 | 1 |
| 110 | class_text | Iris-setosa | 0.010982483 | 3 |
| 113 | class_text | Iris-versicolor | 0.35037678 | 2 |
| 113 | class_text | Iris-setosa | 0.021288367 | 3 |
| 113 | class_text | Iris-virginica | 0.6283349 | 1 |
| 117 | class_text | Iris-versicolor | 0.35009244 | 2 |
| 117 | class_text | Iris-virginica | 0.6264066 | 1 |
| 117 | class_text | Iris-setosa | 0.023500985 | 3 |
| 123 | class_text | Iris-versicolor | 0.2849912 | 2 |
| 123 | class_text | Iris-virginica | 0.70571697 | 1 |
| 123 | class_text | Iris-setosa | 0.009291774 | 3 |
| 127 | class_text | Iris-versicolor | 0.4041788 | 2 |
| 127 | class_text | Iris-virginica | 0.55537915 | 1 |
| 127 | class_text | Iris-setosa | 0.040441982 | 3 |
| 130 | class_text | Iris-versicolor | 0.38396156 | 2 |
| 130 | class_text | Iris-virginica | 0.59018326 | 1 |
| 130 | class_text | Iris-setosa | 0.025855187 | 3 |
| 143 | class_text | Iris-versicolor | 0.33123586 | 2 |
| 143 | class_text | Iris-virginica | 0.6445185 | 1 |
| 143 | class_text | Iris-setosa | 0.024245638 | 3 |
| (30 rows) |
| </pre> |
| |
| -# Warm start. Next, use the warm_start parameter |
| to continue learning, using the coefficients from |
| the run above. Note that we don't drop the |
| model table or model summary table: |
| <pre class="example"> |
| SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table |
| 'iris_model', -- model output table |
| 'model_arch_library', -- model arch table |
| 1, -- model arch id |
| $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params |
| $$ batch_size=5, epochs=3 $$, -- fit_params |
| 5, -- num_iterations |
| FALSE, -- use GPUs |
| 'iris_test_packed', -- validation dataset |
| 1, -- metrics compute frequency |
| TRUE, -- warm start |
| 'Sophie L.', -- name |
| 'Simple MLP for iris dataset' -- description |
| ); |
| \\x on |
| SELECT * FROM iris_model_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------------+-------------------------------------------------------------------------------------------- |
| source_table | iris_train_packed |
| model | iris_model |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| model_arch_table | model_arch_library |
| model_id | 1 |
| compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] |
| fit_params | batch_size=5, epochs=3 |
| num_iterations | 5 |
| validation_table | iris_test_packed |
| object_table | |
| metrics_compute_frequency | 1 |
| name | Sophie L. |
| description | Simple MLP for iris dataset |
| model_type | madlib_keras |
| model_size | 0.7900390625 |
| start_training_time | 2021-01-29 14:42:28.780276 |
| end_training_time | 2021-01-29 14:42:31.177561 |
| metrics_elapsed_time | {2.24628114700317,2.28473520278931,2.32178020477295,2.35844302177429,2.39726710319519} |
| madlib_version | 1.20.0 |
| num_classes | {3} |
| dependent_vartype | {"character varying"} |
| normalizing_const | 1 |
| metrics_type | {accuracy} |
| loss_type | categorical_crossentropy |
| training_metrics_final | 0.916666686534882 |
| training_loss_final | 0.456518471240997 |
| training_metrics | {0.883333325386047,0.891666650772095,0.908333361148834,0.916666686534882,0.916666686534882} |
| training_loss | {0.559914350509644,0.537041485309601,0.513083755970001,0.47985765337944,0.456518471240997} |
| validation_metrics_final | 0.966666638851166 |
| validation_loss_final | 0.432968735694885 |
| validation_metrics | {0.899999976158142,0.899999976158142,0.933333337306976,0.966666638851166,0.966666638851166} |
| validation_loss | {0.558336615562439,0.529355347156525,0.496939331293106,0.462678134441376,0.432968735694885} |
| metrics_iters | {1,2,3,4,5} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| </pre> |
| Note that the loss and accuracy values pick up from where the previous run left off. |
| |
| <h4>Transfer Learning</h4> |
| Here we want |
| to start with initial weights from a pre-trained model |
| rather than training from scratch. We also want to use |
| a model architecture with the earlier feature layer(s) |
| frozen to save on training time. The example below is |
| somewhat contrived but gives you the idea of the steps. |
| |
| -# Define and load a model architecture with |
| the 1st hidden layer frozen: |
| |
| <pre class="example"> |
| model_transfer = Sequential() |
| model_transfer.add(Dense(10, activation='relu', input_shape=(4,), trainable=False)) |
| model_transfer.add(Dense(10, activation='relu')) |
| model_transfer.add(Dense(3, activation='softmax')) |
| model_simple.summary() |
| \verbatim |
| |
| _________________________________________________________________ |
| Layer (type) Output Shape Param # |
| ================================================================= |
| dense_1 (Dense) (None, 10) 50 |
| _________________________________________________________________ |
| dense_2 (Dense) (None, 10) 110 |
| _________________________________________________________________ |
| dense_3 (Dense) (None, 3) 33 |
| ================================================================= |
| Total params: 193 |
| Trainable params: 143 |
| Non-trainable params: 50 |
| \endverbatim |
| </pre> |
| Export the model to JSON: |
| <pre class="example"> |
| model_simple.to_json() |
| </pre> |
| <pre class="result"> |
| '{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": false, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}' |
| </pre> |
| Load into model architecture table: |
| <pre class="example"> |
| SELECT madlib.load_keras_model('model_arch_library', -- Output table, |
| $$ |
| {"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": false, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"} |
| $$ |
| ::json, -- JSON blob |
| NULL, -- Weights |
| 'Maria', -- Name |
| 'A transfer model' -- Descr |
| ); |
| </pre> |
| Fetch the weights from a previous MADlib run. (Normally |
| these would be downloaded from a source that trained |
| the same model architecture on a related dataset.) |
| <pre class="example"> |
| UPDATE model_arch_library |
| SET model_weights = iris_model.model_weights |
| FROM iris_model |
| WHERE model_arch_library.model_id = 2; |
| </pre> |
| Now train the model using the transfer model and the pre-trained weights: |
| <pre class="example"> |
| DROP TABLE IF EXISTS iris_model, iris_model_summary; |
| SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table |
| 'iris_model', -- model output table |
| 'model_arch_library', -- model arch table |
| 2, -- model arch id |
| $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params |
| $$ batch_size=5, epochs=3 $$, -- fit_params |
| 10 -- num_iterations |
| ); |
| \\x on |
| SELECT * FROM iris_model_summary; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-------------+-------------------------------------------------------------------------- |
| source_table | iris_train_packed |
| model | iris_model |
| dependent_varname | {class_text} |
| independent_varname | {attributes} |
| model_arch_table | model_arch_library |
| model_id | 2 |
| compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] |
| fit_params | batch_size=5, epochs=3 |
| num_iterations | 10 |
| validation_table | |
| object_table | |
| metrics_compute_frequency | 10 |
| name | |
| description | |
| model_type | madlib_keras |
| model_size | 0.7900390625 |
| start_training_time | 2021-01-29 14:44:51.176983 |
| end_training_time | 2021-01-29 14:44:53.666457 |
| metrics_elapsed_time | {2.48945999145508} |
| madlib_version | 1.20.0 |
| num_classes | {3} |
| dependent_vartype | {"character varying"} |
| normalizing_const | 1 |
| metrics_type | {accuracy} |
| loss_type | categorical_crossentropy |
| training_metrics_final | 0.949999988079071 |
| training_loss_final | 0.340020209550858 |
| training_metrics | {0.949999988079071} |
| training_loss | {0.340020209550858} |
| validation_metrics_final | |
| validation_loss_final | |
| validation_metrics | |
| validation_loss | |
| metrics_iters | {10} |
| class_text_class_values | {Iris-setosa,Iris-versicolor,Iris-virginica} |
| </pre> |
| |
| @anchor notes |
| @par Notes |
| |
| 1. Refer to the deep learning section of the Apache MADlib |
| wiki [5] for important information including supported libraries |
| and versions. |
| |
| 2. Classification is currently supported, not regression. |
| |
| 3. Reminder about the distinction between warm start and transfer learning. Warm start uses model |
| state (weights) from the model output table from a previous training run - |
| set the 'warm_start' parameter to TRUE in the fit function. |
| Transfer learning uses initial model state (weights) stored in the 'model_arch_table' - in this case set the |
| 'warm_start' parameter to FALSE in the fit function. |
| |
| @anchor background |
| @par Technical Background |
| |
| For an introduction to deep learning foundations, including MLP and CNN, |
| refer to [6]. |
| |
| This module trains a single large model across the database cluster |
| using the bulk synchronous parallel (BSP) approach, with model averaging [7]. |
| |
| On the effect of database cluster size: as the database cluster size increases, the per iteration |
| loss will be higher since the model only sees 1/n of the data, where n is the number of segments. |
| However, each iteration runs faster than single node because it is only traversing 1/n of the data. |
| For highly non-convex solution spaces, convergence behavior may diminish as cluster size increases. |
| Ensure that each segment has sufficient volume of data and examples of each class value. |
| |
| Alternatively, to train multiple models at the same time for model |
| architecture search or hyperparameter tuning, you can |
| use the methods in <a href="group__grp__model__selection.html">Train Multiple Models</a>, |
| which does not do model averaging and hence may have better covergence efficiency. |
| |
| @anchor literature |
| @literature |
| |
| @anchor mlp-lit-1 |
| [1] https://keras.io/ |
| |
| [2] https://www.tensorflow.org/ |
| |
| [3] "Neural Networks for Machine Learning", Lectures 6a and 6b on mini-batch gradient descent, |
| Geoffrey Hinton with Nitish Srivastava and Kevin Swersky, |
| http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf |
| |
| [4] https://keras.io/models/model/ |
| |
| [5] Deep learning section of Apache MADlib wiki, https://cwiki.apache.org/confluence/display/MADLIB/Deep+Learning |
| |
| [6] Deep Learning, Ian Goodfellow, Yoshua Bengio and Aaron Courville, MIT Press, 2016. |
| |
| [7] "Resource-Efficient and Reproducible Model Selection on Deep Learning Systems," Supun Nakandala, |
| Yuhao Zhang, and Arun Kumar, Technical Report, Computer Science and Engineering, University of California, |
| San Diego https://adalabucsd.github.io/papers/TR_2019_Cerebro.pdf. |
| |
| [8] Greenplum Database server configuration parameters https://gpdb.docs.pivotal.io/latest/ref_guide/config_params/guc-list.html |
| |
| @anchor related |
| @par Related Topics |
| |
| File madlib_keras.sql_in documenting the training, evaluate and predict functions. |
| |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit( |
| source_table VARCHAR, |
| model VARCHAR, |
| model_arch_table VARCHAR, |
| model_id INTEGER, |
| compile_params VARCHAR, |
| fit_params VARCHAR, |
| num_iterations INTEGER, |
| use_gpus BOOLEAN DEFAULT FALSE, |
| validation_table VARCHAR DEFAULT NULL, |
| metrics_compute_frequency INTEGER DEFAULT NULL, |
| warm_start BOOLEAN DEFAULT NULL, |
| name VARCHAR DEFAULT NULL, |
| description VARCHAR DEFAULT NULL, |
| object_table VARCHAR DEFAULT NULL |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras.fit(**globals()) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_transition( |
| state BYTEA, |
| dependent_var BYTEA[], |
| independent_var BYTEA[], |
| dependent_var_shape INTEGER[], |
| independent_var_shape INTEGER[], |
| model_architecture TEXT, |
| compile_params TEXT, |
| fit_params TEXT, |
| dist_key INTEGER, |
| dist_key_mapping INTEGER[], |
| current_seg_id INTEGER, |
| segments_per_host INTEGER[], |
| images_per_seg INTEGER[], |
| accessible_gpus_for_seg INTEGER[], |
| prev_serialized_weights BYTEA, |
| custom_function_map BYTEA |
| ) RETURNS BYTEA AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| import traceback |
| from sys import exc_info |
| import plpy |
| try: |
| return madlib_keras.fit_transition(**globals()) |
| except Exception as e: |
| etype, _, tb = exc_info() |
| detail = ''.join(traceback.format_exception(etype, e, tb)) |
| message = e.message + 'TransAggDetail' + detail |
| e.args = (message,) |
| raise e |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_transition_wide( |
| state BYTEA, |
| dependent_var1 BYTEA, |
| dependent_var2 BYTEA, |
| dependent_var3 BYTEA, |
| dependent_var4 BYTEA, |
| dependent_var5 BYTEA, |
| independent_var1 BYTEA, |
| independent_var2 BYTEA, |
| independent_var3 BYTEA, |
| independent_var4 BYTEA, |
| independent_var5 BYTEA, |
| dependent_var_shape INTEGER[], |
| independent_var_shape INTEGER[], |
| model_architecture TEXT, |
| compile_params TEXT, |
| fit_params TEXT, |
| dist_key INTEGER, |
| dist_key_mapping INTEGER[], |
| current_seg_id INTEGER, |
| segments_per_host INTEGER[], |
| images_per_seg INTEGER[], |
| accessible_gpus_for_seg INTEGER[], |
| prev_serialized_weights BYTEA, |
| custom_function_map BYTEA |
| ) RETURNS BYTEA AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| import traceback |
| from sys import exc_info |
| import plpy |
| try: |
| return madlib_keras.fit_transition_wide(**globals()) |
| except Exception as e: |
| etype, _, tb = exc_info() |
| detail = ''.join(traceback.format_exception(etype, e, tb)) |
| message = e.message + 'TransAggDetail' + detail |
| e.args = (message,) |
| raise e |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_merge( |
| state1 BYTEA, |
| state2 BYTEA |
| ) RETURNS BYTEA AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| import traceback |
| from sys import exc_info |
| import plpy |
| |
| try: |
| return madlib_keras.fit_merge(**globals()) |
| except Exception as e: |
| etype, _, tb = exc_info() |
| detail = ''.join(traceback.format_exception(etype, e, tb)) |
| message = e.message + 'MergeAggDetail' + detail |
| e.args = (message,) |
| raise e |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_final( |
| state BYTEA |
| ) RETURNS BYTEA AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| import traceback |
| from sys import exc_info |
| import plpy |
| try: |
| return madlib_keras.fit_final(**globals()) |
| except Exception as e: |
| etype, _, tb = exc_info() |
| detail = ''.join(traceback.format_exception(etype, e, tb)) |
| message = e.message + 'FinalAggDetail' + detail |
| e.args = (message,) |
| raise e |
| |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step( |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* dep_var_shape */ INTEGER[], |
| /* ind_var_shape */ INTEGER[], |
| /* model_architecture */ TEXT, |
| /* compile_params */ TEXT, |
| /* fit_params */ TEXT, |
| /* dist_key */ INTEGER, |
| /* dist_key_mapping */ INTEGER[], |
| /* current_seg_id */ INTEGER, |
| /* segments_per_host */ INTEGER[], |
| /* images_per_seg */ INTEGER[], |
| /* segments_per_host */ INTEGER[], |
| /* prev_serialized_weights */BYTEA, |
| /* custom_loss_cfunction */ BYTEA); |
| CREATE AGGREGATE MADLIB_SCHEMA.fit_step( |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* dep_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* ind_var */ BYTEA, |
| /* dep_var_shape */ INTEGER[], |
| /* ind_var_shape */ INTEGER[], |
| /* model_architecture */ TEXT, |
| /* compile_params */ TEXT, |
| /* fit_params */ TEXT, |
| /* dist_key */ INTEGER, |
| /* dist_key_mapping */ INTEGER[], |
| /* current_seg_id */ INTEGER, |
| /* segments_per_host */ INTEGER[], |
| /* images_per_seg */ INTEGER[], |
| /* segments_per_host */ INTEGER[], |
| /* prev_serialized_weights */BYTEA, |
| /* custom_loss_cfunction */ BYTEA |
| )( |
| STYPE=BYTEA, |
| SFUNC=MADLIB_SCHEMA.fit_transition_wide, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.fit_merge,') |
| FINALFUNC=MADLIB_SCHEMA.fit_final |
| ); |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step( |
| BYTEA[], |
| BYTEA[], |
| INTEGER[], |
| INTEGER[], |
| TEXT, |
| TEXT, |
| TEXT, |
| INTEGER, |
| INTEGER[], |
| INTEGER, |
| INTEGER[], |
| INTEGER[], |
| INTEGER[], |
| BYTEA, |
| BYTEA); |
| CREATE AGGREGATE MADLIB_SCHEMA.fit_step( |
| /* dep_var */ BYTEA[], |
| /* ind_var */ BYTEA[], |
| /* dep_var_shape */ INTEGER[], |
| /* ind_var_shape */ INTEGER[], |
| /* model_architecture */ TEXT, |
| /* compile_params */ TEXT, |
| /* fit_params */ TEXT, |
| /* dist_key */ INTEGER, |
| /* dist_key_mapping */ INTEGER[], |
| /* current_seg_id */ INTEGER, |
| /* segments_per_host */ INTEGER[], |
| /* images_per_seg */ INTEGER[], |
| /* accessible_gpus_for_seg */INTEGER[], |
| /* prev_serialized_weights */BYTEA, |
| /* custom_loss_cfunction */ BYTEA |
| )( |
| STYPE=BYTEA, |
| SFUNC=MADLIB_SCHEMA.fit_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.fit_merge,') |
| FINALFUNC=MADLIB_SCHEMA.fit_final |
| ); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict( |
| model_table VARCHAR, |
| test_table VARCHAR, |
| id_col VARCHAR, |
| independent_varname VARCHAR, |
| output_table VARCHAR, |
| pred_type VARCHAR DEFAULT 'prob', |
| use_gpus BOOLEAN DEFAULT FALSE, |
| mst_key INTEGER DEFAULT NULL |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras_predict.Predict(schema_madlib, |
| model_table, |
| test_table, |
| id_col, |
| independent_varname, |
| output_table, |
| pred_type, |
| use_gpus, |
| mst_key) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict( |
| model_table VARCHAR, |
| test_table VARCHAR, |
| id_col VARCHAR, |
| independent_varname VARCHAR, |
| output_table VARCHAR, |
| pred_type INTEGER, |
| use_gpus BOOLEAN DEFAULT FALSE, |
| mst_key INTEGER DEFAULT NULL |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras_predict.Predict(schema_madlib, |
| model_table, |
| test_table, |
| id_col, |
| independent_varname, |
| output_table, |
| pred_type, |
| use_gpus, |
| mst_key) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict( |
| model_table VARCHAR, |
| test_table VARCHAR, |
| id_col VARCHAR, |
| independent_varname VARCHAR, |
| output_table VARCHAR, |
| pred_type DOUBLE PRECISION, |
| use_gpus BOOLEAN DEFAULT FALSE, |
| mst_key INTEGER DEFAULT NULL |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras_predict.Predict(schema_madlib, |
| model_table, |
| test_table, |
| id_col, |
| independent_varname, |
| output_table, |
| pred_type, |
| use_gpus, |
| mst_key) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_predict( |
| independent_var REAL[], |
| independent_var2 REAL[], |
| independent_var3 REAL[], |
| independent_var4 REAL[], |
| independent_var5 REAL[], |
| model_architecture TEXT, |
| model_weights BYTEA, |
| normalizing_const DOUBLE PRECISION, |
| current_seg_id INTEGER, |
| seg_ids INTEGER[], |
| images_per_seg INTEGER[], |
| gpus_per_host INTEGER, |
| segments_per_host INTEGER[] |
| ) RETURNS DOUBLE PRECISION[] AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras_predict') |
| return madlib_keras_predict.internal_keras_predict_wide(**globals()) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| ------------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom( |
| model_arch_table VARCHAR, |
| model_id INTEGER, |
| test_table VARCHAR, |
| id_col VARCHAR, |
| independent_varname VARCHAR, |
| output_table VARCHAR, |
| pred_type VARCHAR DEFAULT 'prob', |
| use_gpus BOOLEAN DEFAULT NULL, |
| class_values TEXT[] DEFAULT NULL, |
| normalizing_const DOUBLE PRECISION DEFAULT NULL, |
| dependent_count INTEGER DEFAULT 1 |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras_predict.PredictBYOM(**globals()) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom( |
| model_arch_table VARCHAR, |
| model_id INTEGER, |
| test_table VARCHAR, |
| id_col VARCHAR, |
| independent_varname VARCHAR, |
| output_table VARCHAR, |
| pred_type INTEGER, |
| use_gpus BOOLEAN DEFAULT NULL, |
| class_values TEXT[] DEFAULT NULL, |
| normalizing_const DOUBLE PRECISION DEFAULT NULL |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras_predict.PredictBYOM(**globals()) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom( |
| model_arch_table VARCHAR, |
| model_id INTEGER, |
| test_table VARCHAR, |
| id_col VARCHAR, |
| independent_varname VARCHAR, |
| output_table VARCHAR, |
| pred_type DOUBLE PRECISION, |
| use_gpus BOOLEAN DEFAULT NULL, |
| class_values TEXT[] DEFAULT NULL, |
| normalizing_const DOUBLE PRECISION DEFAULT NULL |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras_predict.PredictBYOM(**globals()) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate( |
| model_table VARCHAR, |
| test_table VARCHAR, |
| output_table VARCHAR, |
| use_gpus BOOLEAN, |
| mst_key INTEGER |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras') |
| from utilities.control import SetGUC |
| with AOControl(False): |
| with SetGUC("plan_cache_mode", "force_generic_plan"): |
| madlib_keras.evaluate(**globals()) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate( |
| model_table VARCHAR, |
| test_table VARCHAR, |
| output_table VARCHAR, |
| use_gpus BOOLEAN |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_evaluate($1, $2, $3, $4, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate( |
| model_table VARCHAR, |
| test_table VARCHAR, |
| output_table VARCHAR |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_evaluate($1, $2, $3, NULL, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_eval_transition( |
| state REAL[], |
| dependent_var BYTEA[], |
| independent_var BYTEA[], |
| dependent_var_shape INTEGER[], |
| independent_var_shape INTEGER[], |
| model_architecture TEXT, |
| serialized_weights BYTEA, |
| compile_params TEXT, |
| dist_key INTEGER, |
| dist_key_mapping INTEGER[], |
| current_seg_id INTEGER, |
| segments_per_host INTEGER[], |
| images_per_seg INTEGER[], |
| accessible_gpus_for_seg INTEGER[], |
| should_clear_session BOOLEAN, |
| custom_function_map BYTEA |
| ) RETURNS REAL[] AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| return madlib_keras.internal_keras_eval_transition(**globals()) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_eval_merge( |
| state1 REAL[], |
| state2 REAL[] |
| ) RETURNS REAL[] AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| return madlib_keras.internal_keras_eval_merge(**globals()) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_eval_final( |
| state REAL[] |
| ) RETURNS REAL[2] AS $$ |
| PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras') |
| return madlib_keras.internal_keras_eval_final(**globals()) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.internal_keras_evaluate( |
| BYTEA[], |
| BYTEA[], |
| INTEGER[], |
| INTEGER[], |
| TEXT, |
| BYTEA, |
| TEXT, |
| INTEGER, |
| INTEGER[], |
| INTEGER, |
| INTEGER[], |
| INTEGER[], |
| INTEGER[], |
| BOOLEAN, |
| BYTEA); |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.internal_keras_evaluate( |
| /* dependent_var */ BYTEA[], |
| /* independent_var */ BYTEA[], |
| /* dependent_var_shape */ INTEGER[], |
| /* independent_var_shape */ INTEGER[], |
| /* model_architecture */ TEXT, |
| /* model_weights */ BYTEA, |
| /* compile_params */ TEXT, |
| /* dist_key */ INTEGER, |
| /* dist_key_mapping */ INTEGER[], |
| /* current_seg_id */ INTEGER, |
| /* segments_per_host */ INTEGER[], |
| /* images_per_seg*/ INTEGER[], |
| /* accessible_gpus_for_seg */ INTEGER[], |
| /* should_clear_session */ BOOLEAN, |
| /* custom_function_map */ BYTEA |
| )( |
| STYPE=REAL[], |
| INITCOND='{0,0,0}', |
| SFUNC=MADLIB_SCHEMA.internal_keras_eval_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.internal_keras_eval_merge,') |
| FINALFUNC=MADLIB_SCHEMA.internal_keras_eval_final |
| ); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit( |
| message VARCHAR |
| ) RETURNS VARCHAR AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras') |
| with AOControl(False): |
| return madlib_keras.fit_help(**globals()) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_fit(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate( |
| message VARCHAR |
| ) RETURNS VARCHAR AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras') |
| with AOControl(False): |
| return madlib_keras.evaluate_help(**globals()) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_evaluate(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict( |
| message VARCHAR |
| ) RETURNS VARCHAR AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| with AOControl(False): |
| return madlib_keras_predict.predict_help(**globals()) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_predict(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom( |
| message VARCHAR |
| ) RETURNS VARCHAR AS $$ |
| PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict') |
| with AOControl(False): |
| return madlib_keras_predict.predict_byom_help(**globals()) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.madlib_keras_predict_byom(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |