blob: c5d8d35873a84fd9417a7449507c8ff45bb2d90e [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*
* @file madlib_keras.sql_in
*
* @brief SQL functions for multilayer perceptron
* @date June 2012
*
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_keras
<div class="toc"><b>Contents</b><ul>
<li class="level1"><a href="#keras_fit">Fit</a></li>
<li class="level1"><a href="#keras_evaluate">Evaluate</a></li>
<li class="level1"><a href="#keras_predict">Predict</a></li>
<li class="level1"><a href="#example">Examples</a></li>
<li class="level1"><a href="#notes">Notes</a></li>
<li class="level1"><a href="#background">Technical Background</a></li>
<li class="level1"><a href="#literature">Literature</a></li>
<li class="level1"><a href="#related">Related Topics</a></li>
</ul></div>
\warning <em> This MADlib method is still in early stage development.
Interface and implementation are subject to change. </em>
This module allows you to use SQL to call deep learning
models designed in Keras [1], which is a high-level neural
network API written in Python.
Keras was developed for fast experimentation. It can run
on top of different backends and the one that is currently
supported by MADlib is TensorFlow [2]. The implementation
in MADlib is distributed and designed to train
a single large model across multiple segments (workers)
in a Greenplum database. PostgreSQL is also supported.
The main use case is image classification
using sequential models, which are made up of a
linear stack of layers. This includes multilayer perceptrons (MLPs)
and convolutional neural networks (CNNs). Regression is not
currently supported.
Before using Keras in MADlib you will need to mini-batch
your training and evaluation datasets by calling the
<a href="group__grp__input__preprocessor__dl.html">Preprocessor
for Images</a> which is a utility that prepares image data for
use by models that support mini-batch as an optimization option.
This is a one-time operation and you would only
need to re-run the preprocessor if your input data has changed.
The advantage of using mini-batching is that it
can perform better than stochastic gradient descent
because it uses more than one training example at a time,
typically resulting faster and smoother convergence [3].
@brief Solves image classification problems by calling
the Keras API
@anchor keras_fit
@par Fit
The fit (training) function has the following format:
<pre class="syntax">
madlib_keras_fit(
source_table,
model,
model_arch_table,
model_arch_id,
compile_params,
fit_params,
num_iterations,
gpus_per_host,
validation_table,
metrics_compute_frequency,
warm_start,
name,
description
)
</pre>
\b Arguments
<dl class="arglist">
<dt>source_table</dt>
<dd>TEXT. Name of the table containing the training data.
This is the name of the output
table from the image preprocessor. Independent
and dependent variables are specified in the preprocessor
step which is why you do not need to explictly state
them here as part of the fit function.</dd>
<dt>model</dt>
<dd>TEXT. Name of the output table containing the model.
Details of the output table are shown below.
</dd>
<dt>model_arch_table</dt>
<dd>TEXT. Name of the table containing the model
architecture and (optionally) initial weights to use for
training.
</dd>
<dt>model_arch_id</dt>
<dd>INTEGER. This is the id in 'model_arch_table'
containing the model architecture and (optionally)
initial weights to use for training.
</dd>
<DT>compile_params</DT>
<DD>TEXT.
Parameters passed to the compile method of the Keras
model class [4]. These parameters will be passed through as is
so they must conform to the Keras API definition.
As an example, you might use something like: <em>loss='categorical_crossentropy', optimizer='adam', metrics=['acc']</em>.
The mandatory parameters that must be specified are 'optimizer'
and 'loss'. Others are optional and will use the default
values as per Keras if not specified here. Also, when
specifying 'loss' and 'metrics' do <em>not</em> include the
module and submodule prefixes
like <em>loss='losses.categorical_crossentropy'</em>
or <em>optimizer='keras.optmizers.adam'</em>.
@note
The following loss function is
not supported: <em>sparse_categorical_crossentropy</em>.
The following metrics are not
supported: <em>sparse_categorical_accuracy, top_k_categorical_accuracy, sparse_top_k_categorical_accuracy</em> and custom metrics.
</DD>
<DT>fit_params </DT>
<DD>TEXT. Parameters passed to the fit method of the Keras
model class [4]. These will be passed through as is
so they must conform to the Keras API definition.
As an example, you might use something like:
<em>batch_size=128, epochs=4</em>.
There are no mandatory parameters so
if you specify NULL, it will use all default
values as per Keras.
</DD>
<DT>num_iterations</DT>
<DD>INTEGER. Number of iterations to train.
</DD>
<DT>gpus_per_host (optional)</DT>
<DD>INTEGER, default: 0 (i.e., CPU).
Number of GPUs per segment host to be used
for training the neural network.
For example, if you specify 4 for this parameter
and your database cluster is set up to have 4
segments per segment host, it means that each
segment will have a dedicated GPU.
A value of 0 means that CPUs, not GPUs, will
be used for training.
@note
We have seen some memory related issues when segments
share GPU resources.
For example, if you specify 1 for this parameter
and your database cluster is set up to have 4
segments per segment host, it means that all 4
segments on a segment host will share the same
GPU. The current recommended
configuration is 1 GPU per segment.
</DD>
<dt>validation_table (optional)</dt>
<dd>TEXT, default: none. Name of the table containing
the validation dataset.
Note that the validation dataset must be preprocessed
in the same way as the training dataset, so this
is the name of the output
table from running the image preprocessor on the validation dataset.
Using a validation dataset can mean a
longer training time, depending on its size.
This can be controlled using the 'metrics_compute_frequency'
paremeter described below.</dd>
<DT>metrics_compute_frequency (optional)</DT>
<DD>INTEGER, default: once at the end of training
after 'num_iterations'. Frequency to compute per-iteration
metrics for the training dataset and validation dataset
(if specified). There can be considerable cost to
computing metrics every iteration, especially if the
training dataset is large. This parameter is a way of
controlling the frequency of those computations.
For example, if you specify 5, then metrics will be computed
every 5 iterations as well as at the end of training
after 'num_iterations'. If you use the default,
metrics will be computed only
once after 'num_iterations' have completed.
</DD>
<DT>warm_start (optional)</DT>
<DD>BOOLEAN, default: FALSE.
Initalize weights with the coefficients
from the last call of the fit
function. If set to TRUE, weights will be
initialized from the model table
generated by the previous training run.
@note
The warm start feature works based on the name of the
model output table from a previous training run.
When using warm start, do not drop the model output table
or the model output summary table
before calling the fit function, since these are needed to obtain the
weights from the previous run.
If you are not using warm start, the model output table
and the model output table summary must be dropped in
the usual way before calling the training function.
</DD>
<DT>name (optional)</DT>
<DD>TEXT, default: NULL.
Free text string to identify a name, if desired.
</DD>
<DT>description (optional)</DT>
<DD>TEXT, default: NULL.
Free text string to provide a description, if desired.
</DD>
</dl>
<b>Output tables</b>
<br>
The model table produced by fit contains the following columns:
<table class="output">
<tr>
<th>model_data</th>
<td>BYTEA8. Byte array containing the weights of the neural net.</td>
</tr>
<tr>
<th>model_arch</th>
<td>TEXT. A JSON representation of the model architecture
used in training.</td>
</tr>
</table>
A summary table named \<model\>_summary is also created, which has the following columns:
<table class="output">
<tr>
<th>source_table</th>
<td>Source table used for training.</td>
</tr>
<tr>
<th>model</th>
<td>Model output table produced by training.</td>
</tr>
<tr>
<th>independent_varname</th>
<td>Independent variables column from the original
source table in the image preprocessing step.</td>
</tr>
<tr>
<th>dependent_varname</th>
<td>Dependent variable column from the original
source table in the image preprocessing step.</td>
</tr>
<tr>
<th>model_arch_table</th>
<td>Name of the table containing
the model architecture and (optionally) the
initial model weights.</td>
</tr>
<tr>
<th>model_arch_table_id</th>
<td>The id of the model in
the model architecture table used for training.</td>
</tr>
<tr>
<th>compile_params</th>
<td>Compile parameters passed to Keras.</td>
</tr>
<tr>
<th>fit_params</th>
<td>Fit parameters passed to Keras.</td>
</tr>
<tr>
<th>num_iterations</th>
<td>Number of iterations of training completed.</td>
</tr>
<tr>
<th>validation_table</th>
<td>Name of the table containing
the validation dataset (if specified).</td>
</tr>
<tr>
<th>metrics_compute_frequency</th>
<td>Frequency that per-iteration metrics are computed
for the training dataset and validation
dataset.</td>
</tr>
<tr>
<th>name</th>
<td>Name of the training run (free text).</td>
</tr>
<tr>
<th>description</th>
<td>Description of the training run (free text).</td>
</tr>
<tr>
<th>model_type</th>
<td>General identifier for type of model trained.
Currently says 'madlib_keras'.</td>
</tr>
<tr>
<th>model_size</th>
<td>Size of the model in KB. Models are stored in
'bytea' data format which is used for binary strings
in PostgreSQL type databases.</td>
</tr>
<tr>
<th>start_training_time</th>
<td>Timestamp for start of training.</td>
</tr>
<tr>
<th>end_training_time</th>
<td>Timestamp for end of training.</td>
</tr>
<tr>
<th>metrics_elapsed_time</th>
<td> Array of elapsed time for metric computations as
per the 'metrics_compute_frequency' parameter.
Useful for drawing a curve showing loss, accuracy or
other metrics as a function of time.
For example, if 'metrics_compute_frequency=5'
this would be an array of elapsed time for every 5th
iteration, plus the last iteration.</td>
</tr>
<tr>
<th>madlib_version</th>
<td>Version of MADlib used.</td>
</tr>
<tr>
<th>num_classes</th>
<td>Count of distinct classes values used.</td>
</tr>
<tr>
<th>class_values</th>
<td>Array of actual class values used.</td>
</tr>
<tr>
<th>dependent_vartype</th>
<td>Data type of the dependent variable.</td>
</tr>
<tr>
<th>normalizing_constant</th>
<td>Normalizing constant used from the
image preprocessing step.</td>
</tr>
<tr>
<th>metrics_type</th>
<td>Metric specified in the 'compile_params'.</td>
</tr>
<tr>
<th>training_metrics_final</th>
<td>Final value of the training
metric after all iterations have completed.
The metric reported is the one
specified in the 'metrics_type' parameter.</td>
</tr>
<tr>
<th>training_loss_final</th>
<td>Final value of the training loss after all
iterations have completed.</td>
</tr>
<tr>
<th>training_metrics</th>
<td>Array of training metrics as
per the 'metrics_compute_frequency' parameter.
For example, if 'metrics_compute_frequency=5'
this would be an array of metrics for every 5th
iteration, plus the last iteration.</td>
</tr>
<tr>
<th>training_loss</th>
<td>Array of training losses as
per the 'metrics_compute_frequency' parameter.
For example, if 'metrics_compute_frequency=5'
this would be an array of losses for every 5th
iteration, plus the last iteration.</td>
</tr>
<tr>
<th>validation_metrics_final</th>
<td>Final value of the validation
metric after all iterations have completed.
The metric reported is the one
specified in the 'metrics_type' parameter.</td>
</tr>
<tr>
<th>validation_loss_final</th>
<td>Final value of the validation loss after all
iterations have completed.</td>
</tr>
<tr>
<th>validation_metrics</th>
<td>Array of validation metrics as
per the 'metrics_compute_frequency' parameter.
For example, if 'metrics_compute_frequency=5'
this would be an array of metrics for every 5th
iteration, plus the last iteration.</td>
</tr>
<tr>
<th>validation_loss</th>
<td>Array of validation losses as
per the 'metrics_compute_frequency' parameter.
For example, if 'metrics_compute_frequency=5'
this would be an array of losses for every 5th
iteration, plus the last iteration.</td>
</tr>
<tr>
<th>metrics_iters</th>
<td>Array indicating the iterations for which
metrics are calculated, as derived from the
parameters 'num_iterations' and 'metrics_compute_frequency'.
For example, if 'num_iterations=5'
and 'metrics_compute_frequency=2', then 'metrics_iters' value
would be {2,4,5} indicating that metrics were computed
at iterations 2, 4 and 5 (at the end).
If 'num_iterations=5'
and 'metrics_compute_frequency=1', then 'metrics_iters' value
would be {1,2,3,4,5} indicating that metrics were computed
at every iteration.</td>
</tr>
</table>
@anchor keras_evaluate
@par Evaluate
The evaluation function has the following format:
<pre class="syntax">
madlib_keras_evaluate(
model_table,
test_table,
output_table,
gpus_per_host
)
</pre>
\b Arguments
<dl class="arglist">
<DT>model_table</DT>
<DD>TEXT. Name of the table containing the model
to use for validation.
</DD>
<DT>test_table</DT>
<dd>TEXT. Name of the table containing the evaluation dataset.
Note that test/validation data must be preprocessed in the same
way as the training dataset, so
this is the name of the output
table from the image preprocessor. Independent
and dependent variables are specified in the preprocessor
step which is why you do not need to explictly state
them here as part of the fit function.</dd>
<DT>output_table</DT>
<DD>TEXT. Name of table that validation output will be
written to. Table contains:</DD>
<table class="output">
<tr>
<th>loss</th>
<td>Loss value on evaluation dataset.</td>
</tr>
<tr>
<th>metric</th>
<td>Metric value on evaluation dataset, where 'metrics_type'
below identifies the type of metric.</td>
</tr>
<tr>
<th>metrics_type</th>
<td>Type of metric used that was used in the training step.</td>
</tr>
<DT>gpus_per_host (optional)</DT>
<DD>INTEGER, default: 0 (i.e., CPU).
Number of GPUs per segment host to be used
for training the neural network.
For example, if you specify 4 for this parameter
and your database cluster is set up to have 4
segments per segment host, it means that each
segment will have a dedicated GPU.
A value of 0 means that CPUs, not GPUs, will
be used for training.
@note
We have seen some memory related issues when segments
share GPU resources.
For example, if you specify 1 for this parameter
and your database cluster is set up to have 4
segments per segment host, it means that all 4
segments on a segment host will share the same
GPU. The current recommended
configuration is 1 GPU per segment.
</DD>
</DL>
@anchor keras_predict
@par Predict
The prediction function has the following format:
<pre class="syntax">
madlib_keras_predict(
model_table,
test_table,
id_col,
independent_varname,
output_table,
pred_type,
gpus_per_host
)
</pre>
\b Arguments
<dl class="arglist">
<DT>model_table</DT>
<DD>TEXT. Name of the table containing the model
to use for prediction.
</DD>
<DT>test_table</DT>
<DD>TEXT. Name of the table containing the dataset to
predict on. Note that test data is not preprocessed (unlike
fit and evaluate) so put one test image per row for prediction.
Also see the comment below for the 'independent_varname' parameter
regarding normalization.
</DD>
<DT>id_col</DT>
<DD>TEXT. Name of the id column in the test data table.
</DD>
<DT>independent_varname</DT>
<DD>TEXT. Column with independent variables in the test table.
If a 'normalizing_const' is specified when preprocessing the
training dataset, this same normalization will be applied to
the independent variables used in predict.
</DD>
<DT>output_table</DT>
<DD>TEXT. Name of the table that prediction output will be
written to. Table contains:</DD>
<table class="output">
<tr>
<th>id</th>
<td>Gives the 'id' for each prediction, corresponding to each row from the test_table.</td>
</tr>
<tr>
<th>estimated_COL_NAME</th>
<td>
(For pred_type='response') The estimated class
for classification, where
COL_NAME is the name of the column to be
predicted from test data.
</td>
</tr>
<tr>
<th>prob_CLASS</th>
<td>
(For pred_type='prob' for classification) The
probability of a given class.
There will be one column for each class
in the training data.
</td>
</tr>
<DT>pred_type (optional)</DT>
<DD>TEXT, default: 'response'. The type of output
desired, where 'response' gives the actual prediction
and 'prob' gives the probability value for each class.
</DD>
<DT>gpus_per_host (optional)</DT>
<DD>INTEGER, default: 0 (i.e., CPU).
Number of GPUs per segment host to be used
for training the neural network.
For example, if you specify 4 for this parameter
and your database cluster is set up to have 4
segments per segment host, it means that each
segment will have a dedicated GPU.
A value of 0 means that CPUs, not GPUs, will
be used for training.
@note
We have seen some memory related issues when segments
share GPU resources.
For example, if you specify 1 for this parameter
and your database cluster is set up to have 4
segments per segment host, it means that all 4
segments on a segment host will share the same
GPU. The current recommended
configuration is 1 GPU per segment.
</DD>
</DL>
@anchor example
@par Examples
@note
Deep learning works best on very large datasets,
but that is not convenient for a quick introduction
to the syntax. So in this example we use an MLP on the well
known iris data set from https://archive.ics.uci.edu/ml/datasets/iris.
For more realistic examples with images please refer
to the deep learning notebooks
at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts.
<h4>Classification</h4>
-# Create an input data set.
<pre class="example">
DROP TABLE IF EXISTS iris_data;
CREATE TABLE iris_data(
id serial,
attributes numeric[],
class_text varchar
);
INSERT INTO iris_data(id, attributes, class_text) VALUES
(1,ARRAY[5.1,3.5,1.4,0.2],'Iris-setosa'),
(2,ARRAY[4.9,3.0,1.4,0.2],'Iris-setosa'),
(3,ARRAY[4.7,3.2,1.3,0.2],'Iris-setosa'),
(4,ARRAY[4.6,3.1,1.5,0.2],'Iris-setosa'),
(5,ARRAY[5.0,3.6,1.4,0.2],'Iris-setosa'),
(6,ARRAY[5.4,3.9,1.7,0.4],'Iris-setosa'),
(7,ARRAY[4.6,3.4,1.4,0.3],'Iris-setosa'),
(8,ARRAY[5.0,3.4,1.5,0.2],'Iris-setosa'),
(9,ARRAY[4.4,2.9,1.4,0.2],'Iris-setosa'),
(10,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'),
(11,ARRAY[5.4,3.7,1.5,0.2],'Iris-setosa'),
(12,ARRAY[4.8,3.4,1.6,0.2],'Iris-setosa'),
(13,ARRAY[4.8,3.0,1.4,0.1],'Iris-setosa'),
(14,ARRAY[4.3,3.0,1.1,0.1],'Iris-setosa'),
(15,ARRAY[5.8,4.0,1.2,0.2],'Iris-setosa'),
(16,ARRAY[5.7,4.4,1.5,0.4],'Iris-setosa'),
(17,ARRAY[5.4,3.9,1.3,0.4],'Iris-setosa'),
(18,ARRAY[5.1,3.5,1.4,0.3],'Iris-setosa'),
(19,ARRAY[5.7,3.8,1.7,0.3],'Iris-setosa'),
(20,ARRAY[5.1,3.8,1.5,0.3],'Iris-setosa'),
(21,ARRAY[5.4,3.4,1.7,0.2],'Iris-setosa'),
(22,ARRAY[5.1,3.7,1.5,0.4],'Iris-setosa'),
(23,ARRAY[4.6,3.6,1.0,0.2],'Iris-setosa'),
(24,ARRAY[5.1,3.3,1.7,0.5],'Iris-setosa'),
(25,ARRAY[4.8,3.4,1.9,0.2],'Iris-setosa'),
(26,ARRAY[5.0,3.0,1.6,0.2],'Iris-setosa'),
(27,ARRAY[5.0,3.4,1.6,0.4],'Iris-setosa'),
(28,ARRAY[5.2,3.5,1.5,0.2],'Iris-setosa'),
(29,ARRAY[5.2,3.4,1.4,0.2],'Iris-setosa'),
(30,ARRAY[4.7,3.2,1.6,0.2],'Iris-setosa'),
(31,ARRAY[4.8,3.1,1.6,0.2],'Iris-setosa'),
(32,ARRAY[5.4,3.4,1.5,0.4],'Iris-setosa'),
(33,ARRAY[5.2,4.1,1.5,0.1],'Iris-setosa'),
(34,ARRAY[5.5,4.2,1.4,0.2],'Iris-setosa'),
(35,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'),
(36,ARRAY[5.0,3.2,1.2,0.2],'Iris-setosa'),
(37,ARRAY[5.5,3.5,1.3,0.2],'Iris-setosa'),
(38,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'),
(39,ARRAY[4.4,3.0,1.3,0.2],'Iris-setosa'),
(40,ARRAY[5.1,3.4,1.5,0.2],'Iris-setosa'),
(41,ARRAY[5.0,3.5,1.3,0.3],'Iris-setosa'),
(42,ARRAY[4.5,2.3,1.3,0.3],'Iris-setosa'),
(43,ARRAY[4.4,3.2,1.3,0.2],'Iris-setosa'),
(44,ARRAY[5.0,3.5,1.6,0.6],'Iris-setosa'),
(45,ARRAY[5.1,3.8,1.9,0.4],'Iris-setosa'),
(46,ARRAY[4.8,3.0,1.4,0.3],'Iris-setosa'),
(47,ARRAY[5.1,3.8,1.6,0.2],'Iris-setosa'),
(48,ARRAY[4.6,3.2,1.4,0.2],'Iris-setosa'),
(49,ARRAY[5.3,3.7,1.5,0.2],'Iris-setosa'),
(50,ARRAY[5.0,3.3,1.4,0.2],'Iris-setosa'),
(51,ARRAY[7.0,3.2,4.7,1.4],'Iris-versicolor'),
(52,ARRAY[6.4,3.2,4.5,1.5],'Iris-versicolor'),
(53,ARRAY[6.9,3.1,4.9,1.5],'Iris-versicolor'),
(54,ARRAY[5.5,2.3,4.0,1.3],'Iris-versicolor'),
(55,ARRAY[6.5,2.8,4.6,1.5],'Iris-versicolor'),
(56,ARRAY[5.7,2.8,4.5,1.3],'Iris-versicolor'),
(57,ARRAY[6.3,3.3,4.7,1.6],'Iris-versicolor'),
(58,ARRAY[4.9,2.4,3.3,1.0],'Iris-versicolor'),
(59,ARRAY[6.6,2.9,4.6,1.3],'Iris-versicolor'),
(60,ARRAY[5.2,2.7,3.9,1.4],'Iris-versicolor'),
(61,ARRAY[5.0,2.0,3.5,1.0],'Iris-versicolor'),
(62,ARRAY[5.9,3.0,4.2,1.5],'Iris-versicolor'),
(63,ARRAY[6.0,2.2,4.0,1.0],'Iris-versicolor'),
(64,ARRAY[6.1,2.9,4.7,1.4],'Iris-versicolor'),
(65,ARRAY[5.6,2.9,3.6,1.3],'Iris-versicolor'),
(66,ARRAY[6.7,3.1,4.4,1.4],'Iris-versicolor'),
(67,ARRAY[5.6,3.0,4.5,1.5],'Iris-versicolor'),
(68,ARRAY[5.8,2.7,4.1,1.0],'Iris-versicolor'),
(69,ARRAY[6.2,2.2,4.5,1.5],'Iris-versicolor'),
(70,ARRAY[5.6,2.5,3.9,1.1],'Iris-versicolor'),
(71,ARRAY[5.9,3.2,4.8,1.8],'Iris-versicolor'),
(72,ARRAY[6.1,2.8,4.0,1.3],'Iris-versicolor'),
(73,ARRAY[6.3,2.5,4.9,1.5],'Iris-versicolor'),
(74,ARRAY[6.1,2.8,4.7,1.2],'Iris-versicolor'),
(75,ARRAY[6.4,2.9,4.3,1.3],'Iris-versicolor'),
(76,ARRAY[6.6,3.0,4.4,1.4],'Iris-versicolor'),
(77,ARRAY[6.8,2.8,4.8,1.4],'Iris-versicolor'),
(78,ARRAY[6.7,3.0,5.0,1.7],'Iris-versicolor'),
(79,ARRAY[6.0,2.9,4.5,1.5],'Iris-versicolor'),
(80,ARRAY[5.7,2.6,3.5,1.0],'Iris-versicolor'),
(81,ARRAY[5.5,2.4,3.8,1.1],'Iris-versicolor'),
(82,ARRAY[5.5,2.4,3.7,1.0],'Iris-versicolor'),
(83,ARRAY[5.8,2.7,3.9,1.2],'Iris-versicolor'),
(84,ARRAY[6.0,2.7,5.1,1.6],'Iris-versicolor'),
(85,ARRAY[5.4,3.0,4.5,1.5],'Iris-versicolor'),
(86,ARRAY[6.0,3.4,4.5,1.6],'Iris-versicolor'),
(87,ARRAY[6.7,3.1,4.7,1.5],'Iris-versicolor'),
(88,ARRAY[6.3,2.3,4.4,1.3],'Iris-versicolor'),
(89,ARRAY[5.6,3.0,4.1,1.3],'Iris-versicolor'),
(90,ARRAY[5.5,2.5,4.0,1.3],'Iris-versicolor'),
(91,ARRAY[5.5,2.6,4.4,1.2],'Iris-versicolor'),
(92,ARRAY[6.1,3.0,4.6,1.4],'Iris-versicolor'),
(93,ARRAY[5.8,2.6,4.0,1.2],'Iris-versicolor'),
(94,ARRAY[5.0,2.3,3.3,1.0],'Iris-versicolor'),
(95,ARRAY[5.6,2.7,4.2,1.3],'Iris-versicolor'),
(96,ARRAY[5.7,3.0,4.2,1.2],'Iris-versicolor'),
(97,ARRAY[5.7,2.9,4.2,1.3],'Iris-versicolor'),
(98,ARRAY[6.2,2.9,4.3,1.3],'Iris-versicolor'),
(99,ARRAY[5.1,2.5,3.0,1.1],'Iris-versicolor'),
(100,ARRAY[5.7,2.8,4.1,1.3],'Iris-versicolor'),
(101,ARRAY[6.3,3.3,6.0,2.5],'Iris-virginica'),
(102,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'),
(103,ARRAY[7.1,3.0,5.9,2.1],'Iris-virginica'),
(104,ARRAY[6.3,2.9,5.6,1.8],'Iris-virginica'),
(105,ARRAY[6.5,3.0,5.8,2.2],'Iris-virginica'),
(106,ARRAY[7.6,3.0,6.6,2.1],'Iris-virginica'),
(107,ARRAY[4.9,2.5,4.5,1.7],'Iris-virginica'),
(108,ARRAY[7.3,2.9,6.3,1.8],'Iris-virginica'),
(109,ARRAY[6.7,2.5,5.8,1.8],'Iris-virginica'),
(110,ARRAY[7.2,3.6,6.1,2.5],'Iris-virginica'),
(111,ARRAY[6.5,3.2,5.1,2.0],'Iris-virginica'),
(112,ARRAY[6.4,2.7,5.3,1.9],'Iris-virginica'),
(113,ARRAY[6.8,3.0,5.5,2.1],'Iris-virginica'),
(114,ARRAY[5.7,2.5,5.0,2.0],'Iris-virginica'),
(115,ARRAY[5.8,2.8,5.1,2.4],'Iris-virginica'),
(116,ARRAY[6.4,3.2,5.3,2.3],'Iris-virginica'),
(117,ARRAY[6.5,3.0,5.5,1.8],'Iris-virginica'),
(118,ARRAY[7.7,3.8,6.7,2.2],'Iris-virginica'),
(119,ARRAY[7.7,2.6,6.9,2.3],'Iris-virginica'),
(120,ARRAY[6.0,2.2,5.0,1.5],'Iris-virginica'),
(121,ARRAY[6.9,3.2,5.7,2.3],'Iris-virginica'),
(122,ARRAY[5.6,2.8,4.9,2.0],'Iris-virginica'),
(123,ARRAY[7.7,2.8,6.7,2.0],'Iris-virginica'),
(124,ARRAY[6.3,2.7,4.9,1.8],'Iris-virginica'),
(125,ARRAY[6.7,3.3,5.7,2.1],'Iris-virginica'),
(126,ARRAY[7.2,3.2,6.0,1.8],'Iris-virginica'),
(127,ARRAY[6.2,2.8,4.8,1.8],'Iris-virginica'),
(128,ARRAY[6.1,3.0,4.9,1.8],'Iris-virginica'),
(129,ARRAY[6.4,2.8,5.6,2.1],'Iris-virginica'),
(130,ARRAY[7.2,3.0,5.8,1.6],'Iris-virginica'),
(131,ARRAY[7.4,2.8,6.1,1.9],'Iris-virginica'),
(132,ARRAY[7.9,3.8,6.4,2.0],'Iris-virginica'),
(133,ARRAY[6.4,2.8,5.6,2.2],'Iris-virginica'),
(134,ARRAY[6.3,2.8,5.1,1.5],'Iris-virginica'),
(135,ARRAY[6.1,2.6,5.6,1.4],'Iris-virginica'),
(136,ARRAY[7.7,3.0,6.1,2.3],'Iris-virginica'),
(137,ARRAY[6.3,3.4,5.6,2.4],'Iris-virginica'),
(138,ARRAY[6.4,3.1,5.5,1.8],'Iris-virginica'),
(139,ARRAY[6.0,3.0,4.8,1.8],'Iris-virginica'),
(140,ARRAY[6.9,3.1,5.4,2.1],'Iris-virginica'),
(141,ARRAY[6.7,3.1,5.6,2.4],'Iris-virginica'),
(142,ARRAY[6.9,3.1,5.1,2.3],'Iris-virginica'),
(143,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'),
(144,ARRAY[6.8,3.2,5.9,2.3],'Iris-virginica'),
(145,ARRAY[6.7,3.3,5.7,2.5],'Iris-virginica'),
(146,ARRAY[6.7,3.0,5.2,2.3],'Iris-virginica'),
(147,ARRAY[6.3,2.5,5.0,1.9],'Iris-virginica'),
(148,ARRAY[6.5,3.0,5.2,2.0],'Iris-virginica'),
(149,ARRAY[6.2,3.4,5.4,2.3],'Iris-virginica'),
(150,ARRAY[5.9,3.0,5.1,1.8],'Iris-virginica');
</pre>
Create a test/validation dataset from the training data:
<pre class="example">
DROP TABLE IF EXISTS iris_train, iris_test;
-- Set seed so results are reproducible
SELECT setseed(0);
SELECT madlib.train_test_split('iris_data', -- Source table
'iris', -- Output table root name
0.8, -- Train proportion
NULL, -- Test proportion (0.2)
NULL, -- Strata definition
NULL, -- Output all columns
NULL, -- Sample without replacement
TRUE -- Separate output tables
);
SELECT COUNT(*) FROM iris_train;
</pre>
<pre class="result">
count
------+
120
</pre>
-# Call the preprocessor for deep learning. For the training dataset:
<pre class="example">
DROP TABLE IF EXISTS mlp_prediction;
\\x off
DROP TABLE IF EXISTS iris_train_packed, iris_train_packed_summary;
SELECT madlib.training_preprocessor_dl('iris_train', -- Source table
'iris_train_packed', -- Output table
'class_text', -- Dependent variable
'attributes' -- Independent variable
);
SELECT * FROM iris_train_packed_summary;
</pre>
<pre class="result">
-[ RECORD 1 ]-------+---------------------------------------------
source_table | iris_train
output_table | iris_train_packed
dependent_varname | class_text
independent_varname | attributes
dependent_vartype | character varying
class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
buffer_size | 60
normalizing_const | 1.0
num_classes | 3
</pre>
For the validation dataset:
<pre class="example">
DROP TABLE IF EXISTS iris_test_packed, iris_test_packed_summary;
SELECT madlib.validation_preprocessor_dl('iris_test', -- Source table
'iris_test_packed', -- Output table
'class_text', -- Dependent variable
'attributes', -- Independent variable
'iris_train_packed' -- From training preprocessor step
);
SELECT * FROM iris_test_packed_summary;
</pre>
<pre class="result">
-[ RECORD 1 ]-------+---------------------------------------------
source_table | iris_test
output_table | iris_test_packed
dependent_varname | class_text
independent_varname | attributes
dependent_vartype | character varying
class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
buffer_size | 15
normalizing_const | 1.0
num_classes | 3
</pre>
-# Define and load model architecture. Use Keras to define
the model architecture:
<pre class="example">
import keras
from keras.models import Sequential
from keras.layers import Dense
model_simple = Sequential()
model_simple.add(Dense(10, activation='relu', input_shape=(4,)))
model_simple.add(Dense(10, activation='relu'))
model_simple.add(Dense(3, activation='softmax'))
model_simple.summary()
\verbatim
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 10) 50
_________________________________________________________________
dense_2 (Dense) (None, 10) 110
_________________________________________________________________
dense_3 (Dense) (None, 3) 33
=================================================================
Total params: 193
Trainable params: 193
Non-trainable params: 0
\endverbatim
</pre>
Export the model to JSON:
<pre class="example">
model_simple.to_json()
</pre>
<pre class="result">
'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}'
</pre>
Load into model architecture table:
<pre class="example">
DROP TABLE IF EXISTS model_arch_library;
SELECT madlib.load_keras_model('model_arch_library', -- Output table,
$$
{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}
$$
::json, -- JSON blob
NULL, -- Weights
'Sophie', -- Name
'A simple model' -- Descr
);
</pre>
-# Train model and view summary table:
<pre class="example">
DROP TABLE IF EXISTS iris_model, iris_model_summary;
SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table
'iris_model', -- model output table
'model_arch_library', -- model arch table
1, -- model arch id
$$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params
$$ batch_size=5, epochs=3 $$, -- fit_params
10 -- num_iterations
);
SELECT * FROM iris_model_summary;
</pre>
<pre class="result">
-[ RECORD 1 ]-------------+--------------------------------------------------------------------------
source_table | iris_train_packed
model | iris_model
dependent_varname | class_text
independent_varname | attributes
model_arch_table | model_arch_library
model_arch_id | 1
compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']
fit_params | batch_size=5, epochs=3
num_iterations | 10
validation_table |
metrics_compute_frequency | 10
name |
description |
model_type | madlib_keras
model_size | 0.7900390625
start_training_time | 2019-06-05 20:55:15.785034
end_training_time | 2019-06-05 20:55:25.373035
metrics_elapsed_time | {9.58799290657043}
madlib_version | 1.16
num_classes | 3
class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
dependent_vartype | character varying
normalizing_const | 1
metrics_type | {accuracy}
training_metrics_final | 0.766666650772
training_loss_final | 0.721103310585
training_metrics | {0.766666650772095}
training_loss | {0.721103310585022}
validation_metrics_final |
validation_loss_final |
validation_metrics |
validation_loss |
metrics_iters | {10}
</pre>
-# Use the test dataset to evaluate the model we built above:
<pre class="example">
DROP TABLE IF EXISTS iris_validate;
SELECT madlib.madlib_keras_evaluate('iris_model', -- model
'iris_test_packed', -- test table
'iris_validate' -- output table
);
SELECT * FROM iris_validate;
</pre>
<pre class="result">
loss | metric | metrics_type
-------------------+-------------------+--------------
0.719491899013519 | 0.800000011920929 | {accuracy}
(1 row)
</pre>
-# Predict. We will use the validation dataset for prediction
as well, which is not usual but serves to show the
syntax. The prediction is in the 'estimated_class_text'
column:
<pre class="example">
DROP TABLE IF EXISTS iris_predict;
SELECT madlib.madlib_keras_predict('iris_model', -- model
'iris_test', -- test_table
'id', -- id column
'attributes', -- independent var
'iris_predict' -- output table
);
SELECT * FROM iris_predict ORDER BY id;
</pre>
<pre class="result">
id | estimated_class_text
-----+----------------------
4 | Iris-setosa
6 | Iris-setosa
8 | Iris-setosa
12 | Iris-setosa
13 | Iris-setosa
15 | Iris-setosa
24 | Iris-setosa
30 | Iris-setosa
38 | Iris-setosa
49 | Iris-setosa
60 | Iris-virginica
68 | Iris-versicolor
69 | Iris-versicolor
76 | Iris-versicolor
78 | Iris-versicolor
81 | Iris-versicolor
85 | Iris-virginica
90 | Iris-versicolor
91 | Iris-versicolor
94 | Iris-virginica
104 | Iris-virginica
106 | Iris-versicolor
107 | Iris-virginica
110 | Iris-virginica
119 | Iris-versicolor
127 | Iris-virginica
129 | Iris-virginica
134 | Iris-versicolor
139 | Iris-virginica
144 | Iris-virginica
(30 rows)
</pre>
Count missclassifications:
<pre class="example">
SELECT COUNT(*) FROM iris_predict JOIN iris_test USING (id)
WHERE iris_predict.estimated_class_text != iris_test.class_text;
</pre>
<pre class="result">
count
-------+
6
(1 row)
</pre>
Percent missclassifications:
<pre class="example">
SELECT round(count(*)*100/(150*0.2),2) as test_accuracy_percent from
(select iris_test.class_text as actual, iris_predict.estimated_class_text as estimated
from iris_predict inner join iris_test
on iris_test.id=iris_predict.id) q
WHERE q.actual=q.estimated;
</pre>
<pre class="result">
test_accuracy_percent
-----------------------+
80.00
(1 row)
</pre>
<h4>Classification with Other Parameters</h4>
-# Validation dataset. Now use a validation dataset
and compute metrics every 3rd iteration using
the 'metrics_compute_frequency' parameter. This can
help reduce run time if you do not need metrics
computed at every iteration.
<pre class="example">
DROP TABLE IF EXISTS iris_model, iris_model_summary;
SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table
'iris_model', -- model output table
'model_arch_library', -- model arch table
1, -- model arch id
$$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params
$$ batch_size=5, epochs=3 $$, -- fit_params
10, -- num_iterations
0, -- GPUs per host
'iris_test_packed', -- validation dataset
3, -- metrics compute frequency
FALSE, -- warm start
'Sophie L.', -- name
'Simple MLP for iris dataset' -- description
);
SELECT * FROM iris_model_summary;
</pre>
<pre class="result">
-[ RECORD 1 ]-------------+--------------------------------------------------------------------------
source_table | iris_train_packed
model | iris_model
dependent_varname | class_text
independent_varname | attributes
model_arch_table | model_arch_library
model_arch_id | 1
compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']
fit_params | batch_size=5, epochs=3
num_iterations | 10
validation_table | iris_test_packed
metrics_compute_frequency | 3
name | Sophie L.
description | Simple MLP for iris dataset
model_type | madlib_keras
model_size | 0.7900390625
start_training_time | 2019-06-05 20:58:23.224629
end_training_time | 2019-06-05 20:58:35.477499
metrics_elapsed_time | {4.69859290122986,8.2062520980835,10.8104848861694,12.2528700828552}
madlib_version | 1.16
num_classes | 3
class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
dependent_vartype | character varying
normalizing_const | 1
metrics_type | {accuracy}
training_metrics_final | 0.941666662693
training_loss_final | 0.40586027503
training_metrics | {0.699999988079071,0.800000011920929,0.899999976158142,0.941666662693024}
training_loss | {0.825238645076752,0.534248650074005,0.427499741315842,0.405860275030136}
validation_metrics_final | 0.866666674614
validation_loss_final | 0.409001916647
validation_metrics | {0.733333349227905,0.733333349227905,0.866666674613953,0.866666674613953}
validation_loss | {0.827081918716431,0.536275088787079,0.431326270103455,0.409001916646957}
metrics_iters | {3,6,9,10}
</pre>
-# Predict probabilities for each class:
<pre class="example">
DROP TABLE IF EXISTS iris_predict;
SELECT madlib.madlib_keras_predict('iris_model', -- model
'iris_test', -- test_table
'id', -- id column
'attributes', -- independent var
'iris_predict', -- output table
'prob' -- response type
);
SELECT * FROM iris_predict ORDER BY id;
</pre>
<pre class="result">
id | prob_Iris-setosa | prob_Iris-versicolor | prob_Iris-virginica
-----+------------------+----------------------+---------------------
4 | 0.9241953 | 0.059390426 | 0.01641435
6 | 0.9657151 | 0.02809224 | 0.0061926916
8 | 0.9543316 | 0.03670931 | 0.008959154
12 | 0.93851465 | 0.048681837 | 0.012803554
13 | 0.93832576 | 0.04893658 | 0.012737647
15 | 0.98717564 | 0.01091238 | 0.0019119986
24 | 0.9240628 | 0.060805064 | 0.015132156
30 | 0.92063266 | 0.062279057 | 0.017088294
38 | 0.9353765 | 0.051353406 | 0.013270103
49 | 0.9709265 | 0.023811856 | 0.005261566
60 | 0.034395564 | 0.5260507 | 0.43955377
68 | 0.031360663 | 0.53689945 | 0.43173987
69 | 0.0098787155 | 0.46121457 | 0.52890676
76 | 0.031186827 | 0.5644549 | 0.40435827
78 | 0.00982633 | 0.48929632 | 0.5008774
81 | 0.03658528 | 0.53248984 | 0.4309249
85 | 0.015423619 | 0.48452598 | 0.5000504
90 | 0.026857043 | 0.5155698 | 0.45757324
91 | 0.013675574 | 0.47155368 | 0.5147708
94 | 0.073440716 | 0.5418821 | 0.3846772
104 | 0.0021637122 | 0.3680499 | 0.62978643
106 | 0.00052832486 | 0.30891812 | 0.6905536
107 | 0.007315576 | 0.40949163 | 0.5831927
110 | 0.0022259138 | 0.4058138 | 0.59196025
119 | 0.00018505375 | 0.24510723 | 0.7547077
127 | 0.009542585 | 0.46958733 | 0.52087003
129 | 0.0019719477 | 0.36288205 | 0.635146
134 | 0.0056418083 | 0.43401477 | 0.56034344
139 | 0.01067015 | 0.4755573 | 0.51377255
144 | 0.0018909549 | 0.37689638 | 0.6212126
(30 rows)
</pre>
-# Warm start. Next, use the warm_start parameter
to continue learning, using the coefficients from
the run above. Note that we don't drop the
model table or model summary table:
<pre class="example">
SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table
'iris_model', -- model output table
'model_arch_library', -- model arch table
1, -- model arch id
$$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params
$$ batch_size=5, epochs=3 $$, -- fit_params
5, -- num_iterations
0, -- GPUs per host
'iris_test_packed', -- validation dataset
1, -- metrics compute frequency
TRUE, -- warm start
'Sophie L.', -- name
'Simple MLP for iris dataset' -- description
);
SELECT * FROM iris_model_summary;
</pre>
<pre class="result">
-[ RECORD 1 ]-------------+--------------------------------------------------------------------------------------------
source_table | iris_train_packed
model | iris_model
dependent_varname | class_text
independent_varname | attributes
model_arch_table | model_arch_library
model_arch_id | 1
compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']
fit_params | batch_size=5, epochs=3
num_iterations | 5
validation_table | iris_test_packed
metrics_compute_frequency | 1
name | Sophie L.
description | Simple MLP for iris dataset
model_type | madlib_keras
model_size | 0.7900390625
start_training_time | 2019-06-05 20:59:43.971792
end_training_time | 2019-06-05 20:59:51.654586
metrics_elapsed_time | {2.89326310157776,4.14273309707642,5.24781513214111,6.34498596191406,7.68279695510864}
madlib_version | 1.16
num_classes | 3
class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
dependent_vartype | character varying
normalizing_const | 1
metrics_type | {accuracy}
training_metrics_final | 0.933333337307
training_loss_final | 0.334455043077
training_metrics | {0.933333337306976,0.933333337306976,0.975000023841858,0.975000023841858,0.933333337306976}
training_loss | {0.386842548847198,0.370587915182114,0.357161343097687,0.344598710536957,0.334455043077469}
validation_metrics_final | 0.866666674614
validation_loss_final | 0.34414178133
validation_metrics | {0.866666674613953,0.866666674613953,0.933333337306976,0.866666674613953,0.866666674613953}
validation_loss | {0.391442179679871,0.376414686441422,0.362262904644012,0.351912915706635,0.344141781330109}
metrics_iters | {1,2,3,4,5}
</pre>
Note that the loss and accuracy values pick up from where the previous run left off.
<h4>Transfer Learning</h4>
Here we want
to start with initial weights from a pre-trained model
rather than training from scratch. We also want to use
a model architecture with the earlier feature layer(s)
frozen to save on training time. The example below is
somewhat contrived but gives you the idea of the steps.
-# Define and load a model architecture with
the 1st hidden layer frozen:
<pre class="example">
model_transfer = Sequential()
model_transfer.add(Dense(10, activation='relu', input_shape=(4,), trainable=False))
model_transfer.add(Dense(10, activation='relu'))
model_transfer.add(Dense(3, activation='softmax'))
model_simple.summary()
\verbatim
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 10) 50
_________________________________________________________________
dense_2 (Dense) (None, 10) 110
_________________________________________________________________
dense_3 (Dense) (None, 3) 33
=================================================================
Total params: 193
Trainable params: 143
Non-trainable params: 50
\endverbatim
</pre>
Export the model to JSON:
<pre class="example">
model_simple.to_json()
</pre>
<pre class="result">
'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": false, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}'
</pre>
Load into model architecture table:
<pre class="example">
SELECT madlib.load_keras_model('model_arch_library', -- Output table,
$$
{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": false, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}
$$
::json, -- JSON blob
NULL, -- Weights
'Maria', -- Name
'A transfer model' -- Descr
);
</pre>
Fetch the weights from a previous MADlib run. (Normally
these would be downloaded from a source that trained
the same model architecture on a related dataset.)
<pre class="example">
UPDATE model_arch_library SET model_weights = model_data FROM iris_model WHERE model_id = 2;
</pre>
Now train the model using the transfer model and the pre-trained weights:
<pre class="example">
DROP TABLE IF EXISTS iris_model, iris_model_summary;
SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table
'iris_model', -- model output table
'model_arch_library', -- model arch table
2, -- model arch id
$$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params
$$ batch_size=5, epochs=3 $$, -- fit_params
10 -- num_iterations
);
SELECT * FROM iris_model_summary;
</pre>
<pre class="result">
-[ RECORD 1 ]-------------+--------------------------------------------------------------------------
source_table | iris_train_packed
model | iris_model
dependent_varname | class_text
independent_varname | attributes
model_arch_table | model_arch_library
model_arch_id | 2
compile_params | loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']
fit_params | batch_size=5, epochs=3
num_iterations | 10
validation_table |
metrics_compute_frequency | 10
name |
description |
model_type | madlib_keras
model_size | 0.7900390625
start_training_time | 2019-06-05 21:01:03.998422
end_training_time | 2019-06-05 21:01:13.525838
metrics_elapsed_time | {9.52741599082947}
madlib_version | 1.16
num_classes | 3
class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
dependent_vartype | character varying
normalizing_const | 1
metrics_type | {accuracy}
training_metrics_final | 0.975000023842
training_loss_final | 0.245171800256
training_metrics | {0.975000023841858}
training_loss | {0.245171800255775}
validation_metrics_final |
validation_loss_final |
validation_metrics |
validation_loss |
metrics_iters | {10}
</pre>
@anchor notes
@par Notes
1. Refer to the deep learning section of the Apache MADlib
wiki [5] for important information including supported libraries
and versions.
2. Classification is currently supported, not regression.
3. On the effect of database cluster size: as the database cluster
size increases, the per iteration loss will be higher since the
model only sees 1/n of the data, where n is the number of segments.
However, each iteration runs faster than single node because it is only
traversing 1/n of the data. For large data sets, all else being equal,
a bigger cluster will achieve a given accuracy faster than a single node
although it may take more iterations to achieve that accuracy.
However, for highly non-convex solution spaces, convergence behavior
may diminish as cluster size increases. Ensure that each segment has
sufficient volume of data and examples of each class value.
@anchor background
@par Technical Background
For an introduction to deep learning foundations, including MLP and CNN,
refer to [6].
@anchor literature
@literature
@anchor mlp-lit-1
[1] https://keras.io/
[2] https://www.tensorflow.org/
[3] "Neural Networks for Machine Learning", Lectures 6a and 6b on mini-batch gradient descent,
Geoffrey Hinton with Nitish Srivastava and Kevin Swersky,
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
[4] https://keras.io/models/model/
[5] Deep learning section of Apache MADlib wiki, https://cwiki.apache.org/confluence/display/MADLIB/Deep+Learning
[6] Deep Learning, Ian Goodfellow, Yoshua Bengio and Aaron Courville, MIT Press, 2016.
@anchor related
@par Related Topics
File madlib_keras.sql_in documenting the training, evaluate and predict functions.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
source_table VARCHAR,
model VARCHAR,
model_arch_table VARCHAR,
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
gpus_per_host INTEGER,
validation_table VARCHAR,
metrics_compute_frequency INTEGER,
warm_start BOOLEAN,
name VARCHAR,
description VARCHAR
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras')
with AOControl(False):
madlib_keras.fit(**globals())
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
source_table VARCHAR,
model VARCHAR,
model_arch_table VARCHAR,
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
gpus_per_host INTEGER,
validation_table VARCHAR,
metrics_compute_frequency INTEGER,
warm_start BOOLEAN,
name VARCHAR
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
source_table VARCHAR,
model VARCHAR,
model_arch_table VARCHAR,
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
gpus_per_host INTEGER,
validation_table VARCHAR,
metrics_compute_frequency INTEGER,
warm_start BOOLEAN
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
source_table VARCHAR,
model VARCHAR,
model_arch_table VARCHAR,
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
gpus_per_host INTEGER,
validation_table VARCHAR,
metrics_compute_frequency INTEGER
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, NULL, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
source_table VARCHAR,
model VARCHAR,
model_arch_table VARCHAR,
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
gpus_per_host INTEGER,
validation_table VARCHAR
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL, NULL, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
source_table VARCHAR,
model VARCHAR,
model_arch_table VARCHAR,
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
gpus_per_host INTEGER
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL, NULL, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
source_table VARCHAR,
model VARCHAR,
model_arch_table VARCHAR,
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, 0, NULL, NULL, NULL, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_transition(
state BYTEA,
dependent_var SMALLINT[],
independent_var REAL[],
model_architecture TEXT,
compile_params TEXT,
fit_params TEXT,
current_seg_id INTEGER,
seg_ids INTEGER[],
images_per_seg INTEGER[],
gpus_per_host INTEGER,
segments_per_host INTEGER,
prev_serialized_weights BYTEA
) RETURNS BYTEA AS $$
PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
return madlib_keras.fit_transition(**globals())
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_merge(
state1 BYTEA,
state2 BYTEA
) RETURNS BYTEA AS $$
PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
return madlib_keras.fit_merge(**globals())
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.fit_final(
state BYTEA
) RETURNS BYTEA AS $$
PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
return madlib_keras.fit_final(**globals())
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step(
SMALLINT[],
REAL[],
TEXT,
TEXT,
TEXT,
INTEGER,
INTEGER[],
INTEGER[],
INTEGER,
INTEGER,
BYTEA);
CREATE AGGREGATE MADLIB_SCHEMA.fit_step(
/* dep_var */ SMALLINT[],
/* ind_var */ REAL[],
/* model_architecture */ TEXT,
/* compile_params */ TEXT,
/* fit_params */ TEXT,
/* current_seg_id */ INTEGER,
/* seg_ids*/ INTEGER[],
/* images_per_seg*/ INTEGER[],
/* gpus_per_host */ INTEGER,
/* segments_per_host */ INTEGER,
/* serialized_weights */ BYTEA
)(
STYPE=BYTEA,
SFUNC=MADLIB_SCHEMA.fit_transition,
m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.fit_merge,')
FINALFUNC=MADLIB_SCHEMA.fit_final
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
model_table VARCHAR,
test_table VARCHAR,
id_col VARCHAR,
independent_varname VARCHAR,
output_table VARCHAR,
pred_type VARCHAR,
gpus_per_host INTEGER
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict')
with AOControl(False):
madlib_keras_predict.predict(schema_madlib,
model_table,
test_table,
id_col,
independent_varname,
output_table,
pred_type,
gpus_per_host)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
model_table VARCHAR,
test_table VARCHAR,
id_col VARCHAR,
independent_varname VARCHAR,
output_table VARCHAR,
pred_type VARCHAR
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_predict($1, $2, $3, $4, $5, $6, 0);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
model_table VARCHAR,
test_table VARCHAR,
id_col VARCHAR,
independent_varname VARCHAR,
output_table VARCHAR
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_predict($1, $2, $3, $4, $5, NULL, 0);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_predict(
independent_var REAL[],
model_architecture TEXT,
model_data BYTEA,
is_response BOOLEAN,
normalizing_const DOUBLE PRECISION,
current_seg_id INTEGER,
seg_ids INTEGER[],
images_per_seg INTEGER[],
gpus_per_host INTEGER,
segments_per_host INTEGER
) RETURNS DOUBLE PRECISION[] AS $$
PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras_predict')
return madlib_keras_predict.internal_keras_predict(**globals())
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate(
model_table VARCHAR,
test_table VARCHAR,
output_table VARCHAR,
gpus_per_host INTEGER
) RETURNS VOID AS $$
PythonFunction(`deep_learning', `madlib_keras', `evaluate')
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate(
model_table VARCHAR,
test_table VARCHAR,
output_table VARCHAR
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_evaluate($1, $2, $3, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_eval_transition(
state REAL[3],
dependent_var SMALLINT[],
independent_var REAL[],
model_architecture TEXT,
serialized_weights BYTEA,
compile_params TEXT,
current_seg_id INTEGER,
seg_ids INTEGER[],
images_per_seg INTEGER[],
gpus_per_host INTEGER,
segments_per_host INTEGER
) RETURNS REAL[3] AS $$
PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
return madlib_keras.internal_keras_eval_transition(**globals())
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_eval_merge(
state1 REAL[3],
state2 REAL[3]
) RETURNS REAL[3] AS $$
PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
return madlib_keras.internal_keras_eval_merge(**globals())
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_eval_final(
state REAL[3]
) RETURNS REAL[2] AS $$
PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
return madlib_keras.internal_keras_eval_final(**globals())
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.internal_keras_evaluate(
SMALLINT[],
REAL[],
TEXT,
BYTEA,
TEXT,
INTEGER,
INTEGER[],
INTEGER[],
INTEGER,
INTEGER);
CREATE AGGREGATE MADLIB_SCHEMA.internal_keras_evaluate(
/* dependent_var */ SMALLINT[],
/* independent_var */ REAL[],
/* model_architecture */ TEXT,
/* model_data */ BYTEA,
/* compile_params */ TEXT,
/* current_seg_id */ INTEGER,
/* seg_ids */ INTEGER[],
/* images_per_seg*/ INTEGER[],
/* gpus_per_host */ INTEGER,
/* segments_per_host */ INTEGER
)(
STYPE=REAL[3],
INITCOND='{0,0,0}',
SFUNC=MADLIB_SCHEMA.internal_keras_eval_transition,
m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.internal_keras_eval_merge,')
FINALFUNC=MADLIB_SCHEMA.internal_keras_eval_final
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras')
with AOControl(False):
return madlib_keras.fit_help(**globals())
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.madlib_keras_fit('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras')
with AOControl(False):
return madlib_keras.evaluate_help(**globals())
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.madlib_keras_evaluate('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict')
with AOControl(False):
return madlib_keras_predict.predict_help(**globals())
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.madlib_keras_predict('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');