blob: 3c3b7bdc204bb689e42b499a64ecca214d8fd6fd [file] [log] [blame]
<!-- HTML header for doxygen 1.8.4-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.13"/>
<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
<title>MADlib: Define Model Architectures</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtreedata.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { init_search(); });
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
<!-- hack in the navigation tree -->
<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
<!-- google analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-45382226-1', 'madlib.apache.org');
ga('send', 'pageview');
</script>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
<td style="padding-left: 0.5em;">
<div id="projectname">
<span id="projectnumber">1.21.0</span>
</div>
<div id="projectbrief">User Documentation for Apache MADlib</div>
</td>
<td> <div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.13 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
<div id="nav-sync" class="sync"></div>
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('group__grp__keras__model__arch.html','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="header">
<div class="headertitle">
<div class="title">Define Model Architectures<div class="ingroups"><a class="el" href="group__grp__dl.html">Deep Learning</a> &raquo; <a class="el" href="group__grp__model__prep.html">Model Preparation</a></div></div> </div>
</div><!--header-->
<div class="contents">
<div class="toc"><b>Contents</b><ul>
<li class="level1">
<a href="#load_keras_model">Load Model</a> </li>
<li class="level1">
<a href="#delete_keras_model">Delete Model</a> </li>
<li class="level1">
<a href="#example">Examples</a> </li>
<li class="level1">
<a href="#related">Related Topics</a> </li>
</ul>
</div><p>This function loads model architectures and weights into a table for use by deep learning algorithms.</p>
<p>Model architecture is in JSON form and model weights are in the form of PostgreSQL binary data types (bytea). If the output table already exists, a new row is inserted into the table so it can act as a repository for multiple model architectures and weights.</p>
<p>There is also a function to delete a model from the table.</p>
<p>MADlib's deep learning methods are designed to use the TensorFlow package and its built in Keras functions. To ensure consistency, please use tensorflow.keras objects (models, layers, etc.) instead of importing Keras and using its objects.</p>
<p><a class="anchor" id="load_keras_model"></a></p><dl class="section user"><dt>Load Model</dt><dd></dd></dl>
<pre class="syntax">
load_keras_model(
keras_model_arch_table,
model_arch,
model_weights,
name,
description
)
</pre><p> <b>Arguments</b> </p><dl class="arglist">
<dt>keras_model_arch_table </dt>
<dd><p class="startdd">VARCHAR. Output table to load keras model architecture and weights. </p>
<p class="enddd"></p>
</dd>
<dt>model_arch </dt>
<dd><p class="startdd">JSON. JSON of the model architecture to load. </p><dl class="section note"><dt>Note</dt><dd>Please note that every input layer must have the 'input_shape' stated explicitly in the model architecture. MADlib has this requirement because, in some cases, the JSON representation may not have the input shape by default and it has to be read from the JSON for fit() type functions.</dd></dl>
<p class="enddd"></p>
</dd>
<dt>model_weights (optional) </dt>
<dd><p class="startdd">bytea. Model weights to load as a PostgreSQL binary data type. </p>
<p class="enddd"></p>
</dd>
<dt>name (optional) </dt>
<dd><p class="startdd">TEXT, default: NULL. Free text string to provide a name, if desired. </p>
<p class="enddd"></p>
</dd>
<dt>description (optional) </dt>
<dd><p class="startdd">TEXT, default: NULL. Free text string to provide a description, if desired. </p>
<p class="enddd"></p>
</dd>
</dl>
<p><b>Output table</b> <br />
The output table contains the following columns: </p><table class="output">
<tr>
<th>model_id </th><td>SERIAL PRIMARY KEY. Model ID. </td></tr>
<tr>
<th>model_arch </th><td>JSON. JSON blob of the model architecture. </td></tr>
<tr>
<th>model_weights </th><td>BYTEA. Weights of the model which may be used for warm start or transfer learning. Weights are stored as a PostgreSQL binary data type. </td></tr>
<tr>
<th>name </th><td>TEXT. Name of model (free text). </td></tr>
<tr>
<th>description </th><td>TEXT. Description of model (free text). </td></tr>
<tr>
<th>__internal_madlib_id__ </th><td>TEXT. Unique id for model arch. This is an id used internally be MADlib. </td></tr>
</table>
<p><a class="anchor" id="delete_keras_model"></a></p><dl class="section user"><dt>Delete Model</dt><dd></dd></dl>
<pre class="syntax">
delete_keras_model(
keras_model_arch_table
model_id
)
</pre><p> <b>Arguments</b> </p><dl class="arglist">
<dt>keras_model_arch_table </dt>
<dd><p class="startdd">VARCHAR. Table containing model architectures and weights. </p>
<p class="enddd"></p>
</dd>
<dt>model_id </dt>
<dd>INTEGER. The id of the model to be deleted. </dd>
</dl>
<p><a class="anchor" id="example"></a></p><dl class="section user"><dt>Examples</dt><dd><ol type="1">
<li>Define model architecture. Use tensorflow.keras to define the model architecture: <pre class="example">
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model_simple = Sequential()
model_simple.add(Dense(10, activation='relu', input_shape=(4,)))
model_simple.add(Dense(10, activation='relu'))
model_simple.add(Dense(3, activation='softmax'))
model_simple.summary()
<pre class="fragment">_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 10) 50
_________________________________________________________________
dense_2 (Dense) (None, 10) 110
_________________________________________________________________
dense_3 (Dense) (None, 3) 33
=================================================================
Total params: 193
Trainable params: 193
Non-trainable params: 0
</pre>
</pre> Export the model to JSON: <pre class="example">
model_simple.to_json()
</pre> <pre class="result">
'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}'
</pre></li>
<li>Load into model architecture table: <pre class="example">
DROP TABLE IF EXISTS model_arch_library;
SELECT madlib.load_keras_model('model_arch_library', -- Output table,
$$
{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}
$$
::json, -- JSON blob
NULL, -- Weights
'Sophie', -- Name
'A simple model' -- Descr
);
SELECT COUNT(*) FROM model_arch_library;
</pre> <pre class="result">
count
-------+
1
</pre> Load another model architecture: <pre class="example">
SELECT madlib.load_keras_model('model_arch_library', -- Output table,
$$
{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "batch_input_shape": [null, 4], "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_2", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 10, "use_bias": true, "activity_regularizer": null}}, {"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_3", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "units": 3, "use_bias": true, "activity_regularizer": null}}], "backend": "tensorflow"}
$$
::json, -- JSON blob
NULL, -- Weights
'Maria', -- Name
'Also a simple model' -- Descr
);
SELECT COUNT(*) FROM model_arch_library;
</pre> <pre class="result">
count
-------+
2
</pre></li>
<li>Load model weights. To load weights from previous MADlib run, use UPDATE to load directly into the table. For example, if 'model_weights' are the weights in the output table 'iris_model' from a previous run of '<a class="el" href="madlib__keras_8sql__in.html#a544c395da869ee39bef3555de77c4195">madlib_keras_fit()</a>' : <pre class="example">
UPDATE model_arch_library SET model_weights = model_weights FROM iris_model WHERE model_id = 2;
SELECT model_id, name, description, (model_weights IS NOT NULL) AS has_model_weights FROM model_arch_library ORDER BY model_id;
</pre> <pre class="result">
model_id | name | description | has_model_weights
----------+--------+---------------------+-------------------
1 | Sophie | A simple model | f
2 | Maria | Also a simple model | t
</pre></li>
<li>To load weights from Keras using a PL/Python function, we need to flatten then serialize the weights to store as a PostgreSQL binary data type. Byte format is more efficient on space and memory compared to a numeric array. The model weights will be de-serialized when passed to Keras functions. <pre class="example">
CREATE OR REPLACE FUNCTION load_weights() RETURNS VOID AS
$$
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential
import numpy as np
import plpy
#
# create model
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(4,)))
model.add(Dense(10, activation='relu'))
model.add(Dense(3, activation='softmax'))
#
# get weights, flatten and serialize
weights = model.get_weights()
weights_flat = [w.flatten() for w in weights]
weights1d = np.concatenate(weights_flat).ravel()
weights_bytea = weights1d.tostring()
#
# load query
load_query = plpy.prepare("""SELECT madlib.load_keras_model(
'model_arch_library',
$1, $2)
""", ['json','bytea'])
plpy.execute(load_query, [model.to_json(), weights_bytea])
$$ language plpythonu;
-- Call load function
SELECT load_weights();
SELECT model_id, name, description, (model_weights IS NOT NULL) AS has_model_weights FROM model_arch_library ORDER BY model_id;
</pre> <pre class="result">
model_id | name | description | has_model_weights
----------+--------+---------------------+-------------------
1 | Sophie | A simple model | f
2 | Maria | Also a simple model | t
3 | Ella | Model x | t
</pre></li>
<li>Load weights from Keras using psycopg2. (Psycopg is a PostgreSQL database adapter for the Python programming language.) As above we need to flatten then serialize the weights to store as a PostgreSQL binary data type. Note that the psycopg2.Binary function used below will increase the size of the Python object for the weights, so if your model is large it might be better to use a PL/Python function as above. <pre class="example">
import psycopg2
import psycopg2 as p2
conn = p2.connect('postgresql://gpadmin@35.239.240.26:5432/madlib')
cur = conn.cursor()
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential
import numpy as np
#
# create model
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(4,)))
model.add(Dense(10, activation='relu'))
model.add(Dense(3, activation='softmax'))
#
# get weights, flatten and serialize
weights = model.get_weights()
weights_flat = [w.flatten() for w in weights]
weights1d = np.concatenate(weights_flat).ravel()
weights_bytea = psycopg2.Binary(weights1d.tostring())
#
# load query
query = "SELECT madlib.load_keras_model('model_arch_library', %s,%s)"
cur.execute(query,[model.to_json(),weights_bytea])
conn.commit()
SELECT model_id, name, description, (model_weights IS NOT NULL) AS has_model_weights FROM model_arch_library ORDER BY model_id;
</pre> <pre class="result">
model_id | name | description | has_model_weights
----------+--------+---------------------+-------------------
1 | Sophie | A simple model | f
2 | Maria | Also a simple model | t
3 | Ella | Model x | t
4 | Grace | Model y | t
</pre></li>
<li>Delete one of the models: <pre class="example">
SELECT madlib.delete_keras_model('model_arch_library', -- Output table
1 -- Model id
);
SELECT model_id, name, description, (model_weights IS NOT NULL) AS has_model_weights FROM model_arch_library ORDER BY model_id;
</pre> <pre class="result">
model_id | name | description | has_model_weights
----------+-------+---------------------+-------------------
2 | Maria | Also a simple model | t
3 | Ella | Model x | t
4 | Grace | Model y | t
</pre></li>
</ol>
</dd></dl>
<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
<p>See <a class="el" href="keras__model__arch__table_8sql__in.html">keras_model_arch_table.sql_in</a> </p>
</div><!-- contents -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="footer">Generated on Thu Feb 23 2023 19:26:39 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
</ul>
</div>
</body>
</html>