blob: b4dc80fed9142b8c5a7a93f123b1dcd23e40a9e6 [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Transfer Learning Using Keras and MADlib\n",
"\n",
"This is a transfer learning example based on https://keras.io/examples/mnist_transfer_cnn/ \n",
"\n",
"To load images into tables we use the script called <em>madlib_image_loader.py</em> located at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts/Deep-learning which uses the Python Imaging Library so supports multiple formats http://www.pythonware.com/products/pil/\n",
"\n",
"## Table of contents\n",
"<a href=\"#import_libraries\">1. Import libraries</a>\n",
"\n",
"<a href=\"#load_and_prepare_data\">2. Load and prepare data</a>\n",
"\n",
"<a href=\"#image_preproc\">3. Call image preprocessor</a>\n",
"\n",
"<a href=\"#define_and_load_model\">4. Define and load model architecture</a>\n",
"\n",
"<a href=\"#train\">5. Train</a>\n",
"\n",
"<a href=\"#transfer_learning\">6. Transfer learning</a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%load_ext sql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Greenplum Database 5.x on GCP - via tunnel\n",
"%sql postgresql://gpadmin@localhost:8000/madlib\n",
" \n",
"# PostgreSQL local\n",
"#%sql postgresql://fmcquillan@localhost:5432/madlib"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>version</th>\n",
" </tr>\n",
" <tr>\n",
" <td>MADlib version: 1.18.0-dev, git revision: rel/v1.17.0-91-g16070e5, cmake configuration time: Mon Mar 8 16:58:24 UTC 2021, build type: release, build system: Linux-3.10.0-1160.11.1.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'MADlib version: 1.18.0-dev, git revision: rel/v1.17.0-91-g16070e5, cmake configuration time: Mon Mar 8 16:58:24 UTC 2021, build type: release, build system: Linux-3.10.0-1160.11.1.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5',)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select madlib.version();\n",
"#%sql select version();"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"import_libraries\"></a>\n",
"# 1. Import libraries\n",
"From https://keras.io/examples/mnist_transfer_cnn/ import libraries and define some params"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"\n",
"import datetime\n",
"from tensorflow import keras\n",
"from tensorflow.keras.datasets import mnist\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten\n",
"from tensorflow.keras.layers import Conv2D, MaxPooling2D\n",
"from tensorflow.keras import backend as K\n",
"\n",
"now = datetime.datetime.now\n",
"\n",
"batch_size = 128\n",
"num_classes = 5\n",
"epochs = 5\n",
"\n",
"# input image dimensions\n",
"img_rows, img_cols = 28, 28\n",
"# number of convolutional filters to use\n",
"filters = 32\n",
"# size of pooling area for max pooling\n",
"pool_size = 2\n",
"# convolution kernel size\n",
"kernel_size = 3\n",
"\n",
"if K.image_data_format() == 'channels_first':\n",
" input_shape = (1, img_rows, img_cols)\n",
"else:\n",
" input_shape = (img_rows, img_cols, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Others needed in this workbook"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"load_and_prepare_data\"></a>\n",
"# 2. Load and prepare data\n",
"\n",
"First load MNIST data from Keras, consisting of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# the data, split between train and test sets\n",
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"\n",
"# create two datasets one with digits below 5 and one with 5 and above\n",
"x_train_lt5 = x_train[y_train < 5]\n",
"y_train_lt5 = y_train[y_train < 5]\n",
"x_test_lt5 = x_test[y_test < 5]\n",
"y_test_lt5 = y_test[y_test < 5]\n",
"\n",
"x_train_gte5 = x_train[y_train >= 5]\n",
"y_train_gte5 = y_train[y_train >= 5] - 5\n",
"x_test_gte5 = x_test[y_test >= 5]\n",
"y_test_gte5 = y_test[y_test >= 5] - 5\n",
"\n",
"# reshape to match model architecture\n",
"x_train_lt5=x_train_lt5.reshape(len(x_train_lt5), *input_shape)\n",
"x_test_lt5 = x_test_lt5.reshape(len(x_test_lt5), *input_shape)\n",
"x_train_gte5=x_train_gte5.reshape(len(x_train_gte5), *input_shape)\n",
"x_test_gte5 = x_test_gte5.reshape(len(x_test_gte5), *input_shape)\n",
"\n",
"y_train_lt5=y_train_lt5.reshape(len(y_train_lt5), 1)\n",
"y_test_lt5 = y_test_lt5.reshape(len(y_test_lt5), 1)\n",
"y_train_gte5=y_train_gte5.reshape(len(y_train_gte5), 1)\n",
"y_test_gte5 = y_test_gte5.reshape(len(y_test_gte5), 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"check x shape"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(30596, 28, 28, 1)\n",
"(29404, 28, 28, 1)\n",
"(5139, 28, 28, 1)\n",
"(4861, 28, 28, 1)\n"
]
}
],
"source": [
"print(x_train_lt5.shape)\n",
"print(x_train_gte5.shape)\n",
"print(x_test_lt5.shape)\n",
"print(x_test_gte5.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"check y shape"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(30596, 1)\n",
"(29404, 1)\n",
"(5139, 1)\n",
"(4861, 1)\n"
]
}
],
"source": [
"print(y_train_lt5.shape)\n",
"print(y_train_gte5.shape)\n",
"print(y_test_lt5.shape)\n",
"print(y_test_gte5.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load datasets into tables using image loader scripts called <em>madlib_image_loader.py</em> located at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts/Deep-learning"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# MADlib tools directory\n",
"import sys\n",
"import os\n",
"madlib_site_dir = '/Users/fmcquillan/Documents/Product/MADlib/Demos/data'\n",
"sys.path.append(madlib_site_dir)\n",
"\n",
"# Import image loader module\n",
"from madlib_image_loader import ImageLoader, DbCredentials"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# Specify database credentials, for connecting to db\n",
"#db_creds = DbCredentials(user='fmcquillan',\n",
"# host='localhost',\n",
"# port='5432',\n",
"# password='')\n",
"\n",
"# Specify database credentials, for connecting to db\n",
"db_creds = DbCredentials(user='gpadmin', \n",
" db_name='madlib',\n",
" host='localhost',\n",
" port='8000',\n",
" password='')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# Initialize ImageLoader (increase num_workers to run faster)\n",
"iloader = ImageLoader(num_workers=5, db_creds=db_creds)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"MainProcess: Connected to madlib db.\n",
"Executing: CREATE TABLE train_lt5 (id SERIAL, x REAL[], y TEXT)\n",
"CREATE TABLE\n",
"Created table train_lt5 in madlib db\n",
"Spawning 5 workers...\n",
"Initializing PoolWorker-1 [pid 45812]\n",
"PoolWorker-1: Created temporary directory /tmp/madlib_cUr8Y3iHQ6\n",
"Initializing PoolWorker-2 [pid 45813]\n",
"PoolWorker-2: Created temporary directory /tmp/madlib_Qg47OkNvGJ\n",
"Initializing PoolWorker-3 [pid 45814]\n",
"PoolWorker-3: Created temporary directory /tmp/madlib_znyrA6s1Nt\n",
"Initializing PoolWorker-4 [pid 45815]\n",
"PoolWorker-5: Created temporary directory /tmp/madlib_DO931mpYQ8\n",
"PoolWorker-4: Created temporary directory /tmp/madlib_yWXBCrh4jL\n",
"Initializing PoolWorker-5 [pid 45816]\n",
"PoolWorker-1: Connected to madlib db.\n",
"PoolWorker-3: Connected to madlib db.\n",
"PoolWorker-5: Connected to madlib db.\n",
"PoolWorker-4: Connected to madlib db.\n",
"PoolWorker-2: Connected to madlib db.\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_cUr8Y3iHQ6/train_lt50000.tmp\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_yWXBCrh4jL/train_lt50000.tmp\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_znyrA6s1Nt/train_lt50000.tmp\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_DO931mpYQ8/train_lt50000.tmp\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_Qg47OkNvGJ/train_lt50000.tmp\n",
"PoolWorker-1: Loaded 1000 images into train_lt5\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_cUr8Y3iHQ6/train_lt50001.tmp\n",
"PoolWorker-5: Loaded 1000 images into train_lt5\n",
"PoolWorker-4: Loaded 1000 images into train_lt5\n",
"PoolWorker-3: Loaded 1000 images into train_lt5\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_yWXBCrh4jL/train_lt50001.tmp\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_znyrA6s1Nt/train_lt50001.tmp\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_DO931mpYQ8/train_lt50001.tmp\n",
"PoolWorker-2: Loaded 1000 images into train_lt5\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_Qg47OkNvGJ/train_lt50001.tmp\n",
"PoolWorker-1: Loaded 1000 images into train_lt5\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_cUr8Y3iHQ6/train_lt50002.tmp\n",
"PoolWorker-5: Loaded 1000 images into train_lt5\n",
"PoolWorker-4: Loaded 1000 images into train_lt5\n",
"PoolWorker-3: Loaded 1000 images into train_lt5\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_znyrA6s1Nt/train_lt50002.tmp\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_yWXBCrh4jL/train_lt50002.tmp\n",
"PoolWorker-2: Loaded 1000 images into train_lt5\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_DO931mpYQ8/train_lt50002.tmp\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_Qg47OkNvGJ/train_lt50002.tmp\n",
"PoolWorker-1: Loaded 1000 images into train_lt5\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_cUr8Y3iHQ6/train_lt50003.tmp\n",
"PoolWorker-4: Loaded 1000 images into train_lt5\n",
"PoolWorker-3: Loaded 1000 images into train_lt5\n",
"PoolWorker-5: Loaded 1000 images into train_lt5\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_znyrA6s1Nt/train_lt50003.tmp\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_DO931mpYQ8/train_lt50003.tmp\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_yWXBCrh4jL/train_lt50003.tmp\n",
"PoolWorker-2: Loaded 1000 images into train_lt5\n",
"PoolWorker-1: Loaded 1000 images into train_lt5\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_Qg47OkNvGJ/train_lt50003.tmp\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_cUr8Y3iHQ6/train_lt50004.tmp\n",
"PoolWorker-3: Loaded 1000 images into train_lt5\n",
"PoolWorker-4: Loaded 1000 images into train_lt5\n",
"PoolWorker-5: Loaded 1000 images into train_lt5\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_yWXBCrh4jL/train_lt50004.tmp\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_DO931mpYQ8/train_lt50004.tmp\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_znyrA6s1Nt/train_lt50004.tmp\n",
"PoolWorker-2: Loaded 1000 images into train_lt5\n",
"PoolWorker-1: Loaded 1000 images into train_lt5\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_cUr8Y3iHQ6/train_lt50005.tmp\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_Qg47OkNvGJ/train_lt50004.tmp\n",
"PoolWorker-5: Loaded 1000 images into train_lt5\n",
"PoolWorker-3: Loaded 1000 images into train_lt5\n",
"PoolWorker-4: Loaded 1000 images into train_lt5\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_DO931mpYQ8/train_lt50005.tmp\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_yWXBCrh4jL/train_lt50005.tmp\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_znyrA6s1Nt/train_lt50005.tmp\n",
"PoolWorker-1: Loaded 1000 images into train_lt5\n",
"PoolWorker-2: Loaded 1000 images into train_lt5\n",
"PoolWorker-1: Wrote 596 images to /tmp/madlib_cUr8Y3iHQ6/train_lt50006.tmp\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_Qg47OkNvGJ/train_lt50005.tmp\n",
"PoolWorker-1: Loaded 596 images into train_lt5\n",
"PoolWorker-4: Loaded 1000 images into train_lt5\n",
"PoolWorker-5: Loaded 1000 images into train_lt5\n",
"PoolWorker-3: Loaded 1000 images into train_lt5\n",
"PoolWorker-2: Loaded 1000 images into train_lt5\n",
"PoolWorker-5: Removed temporary directory /tmp/madlib_DO931mpYQ8\n",
"PoolWorker-2: Removed temporary directory /tmp/madlib_Qg47OkNvGJ\n",
"PoolWorker-3: Removed temporary directory /tmp/madlib_znyrA6s1Nt\n",
"PoolWorker-4: Removed temporary directory /tmp/madlib_yWXBCrh4jL\n",
"PoolWorker-1: Removed temporary directory /tmp/madlib_cUr8Y3iHQ6\n",
"Done! Loaded 30596 images in 87.504554987s\n",
"5 workers terminated.\n",
"MainProcess: Connected to madlib db.\n",
"Executing: CREATE TABLE test_lt5 (id SERIAL, x REAL[], y TEXT)\n",
"CREATE TABLE\n",
"Created table test_lt5 in madlib db\n",
"Spawning 5 workers...\n",
"Initializing PoolWorker-6 [pid 45832]\n",
"PoolWorker-6: Created temporary directory /tmp/madlib_zskbC1CxH3\n",
"Initializing PoolWorker-7 [pid 45833]\n",
"PoolWorker-7: Created temporary directory /tmp/madlib_nq4NtQVTcA\n",
"Initializing PoolWorker-8 [pid 45834]\n",
"PoolWorker-8: Created temporary directory /tmp/madlib_ottKdv45hY\n",
"Initializing PoolWorker-9 [pid 45835]\n",
"PoolWorker-9: Created temporary directory /tmp/madlib_H2i9eCgqnz\n",
"Initializing PoolWorker-10 [pid 45836]\n",
"PoolWorker-10: Created temporary directory /tmp/madlib_iLL7sa8Utg\n",
"PoolWorker-6: Connected to madlib db.\n",
"PoolWorker-7: Connected to madlib db.\n",
"PoolWorker-10: Connected to madlib db.\n",
"PoolWorker-9: Connected to madlib db.\n",
"PoolWorker-8: Connected to madlib db.\n",
"PoolWorker-7: Wrote 1000 images to /tmp/madlib_nq4NtQVTcA/test_lt50000.tmp\n",
"PoolWorker-6: Wrote 1000 images to /tmp/madlib_zskbC1CxH3/test_lt50000.tmp\n",
"PoolWorker-10: Wrote 1000 images to /tmp/madlib_iLL7sa8Utg/test_lt50000.tmp\n",
"PoolWorker-8: Wrote 1000 images to /tmp/madlib_ottKdv45hY/test_lt50000.tmp\n",
"PoolWorker-9: Wrote 1000 images to /tmp/madlib_H2i9eCgqnz/test_lt50000.tmp\n",
"PoolWorker-7: Loaded 1000 images into test_lt5\n",
"PoolWorker-7: Wrote 139 images to /tmp/madlib_nq4NtQVTcA/test_lt50001.tmp\n",
"PoolWorker-8: Loaded 1000 images into test_lt5\n",
"PoolWorker-10: Loaded 1000 images into test_lt5\n",
"PoolWorker-6: Loaded 1000 images into test_lt5\n",
"PoolWorker-9: Loaded 1000 images into test_lt5\n",
"PoolWorker-7: Loaded 139 images into test_lt5\n",
"PoolWorker-8: Removed temporary directory /tmp/madlib_ottKdv45hY\n",
"PoolWorker-6: Removed temporary directory /tmp/madlib_zskbC1CxH3\n",
"PoolWorker-10: Removed temporary directory /tmp/madlib_iLL7sa8Utg\n",
"PoolWorker-9: Removed temporary directory /tmp/madlib_H2i9eCgqnz\n",
"PoolWorker-7: Removed temporary directory /tmp/madlib_nq4NtQVTcA\n",
"Done! Loaded 5139 images in 16.0366249084s\n",
"5 workers terminated.\n",
"MainProcess: Connected to madlib db.\n",
"Executing: CREATE TABLE train_gte5 (id SERIAL, x REAL[], y TEXT)\n",
"CREATE TABLE\n",
"Created table train_gte5 in madlib db\n",
"Spawning 5 workers...\n",
"Initializing PoolWorker-11 [pid 45842]\n",
"PoolWorker-11: Created temporary directory /tmp/madlib_tCjXbcR1tF\n",
"Initializing PoolWorker-12 [pid 45843]\n",
"PoolWorker-12: Created temporary directory /tmp/madlib_aXM7FynwWa\n",
"Initializing PoolWorker-13 [pid 45844]\n",
"PoolWorker-13: Created temporary directory /tmp/madlib_IUgLA1Lmzg\n",
"PoolWorker-15: Created temporary directory /tmp/madlib_WXvDvrTZ3D\n",
"Initializing PoolWorker-14 [pid 45845]\n",
"PoolWorker-14: Created temporary directory /tmp/madlib_qzfjEpqz4M\n",
"Initializing PoolWorker-15 [pid 45846]\n",
"PoolWorker-13: Connected to madlib db.\n",
"PoolWorker-15: Connected to madlib db.\n",
"PoolWorker-12: Connected to madlib db.\n",
"PoolWorker-11: Connected to madlib db.\n",
"PoolWorker-14: Connected to madlib db.\n",
"PoolWorker-13: Wrote 1000 images to /tmp/madlib_IUgLA1Lmzg/train_gte50000.tmp\n",
"PoolWorker-11: Wrote 1000 images to /tmp/madlib_tCjXbcR1tF/train_gte50000.tmp\n",
"PoolWorker-15: Wrote 1000 images to /tmp/madlib_WXvDvrTZ3D/train_gte50000.tmp\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PoolWorker-12: Wrote 1000 images to /tmp/madlib_aXM7FynwWa/train_gte50000.tmp\n",
"PoolWorker-14: Wrote 1000 images to /tmp/madlib_qzfjEpqz4M/train_gte50000.tmp\n",
"PoolWorker-13: Loaded 1000 images into train_gte5\n",
"PoolWorker-13: Wrote 1000 images to /tmp/madlib_IUgLA1Lmzg/train_gte50001.tmp\n",
"PoolWorker-11: Loaded 1000 images into train_gte5\n",
"PoolWorker-15: Loaded 1000 images into train_gte5\n",
"PoolWorker-11: Wrote 1000 images to /tmp/madlib_tCjXbcR1tF/train_gte50001.tmp\n",
"PoolWorker-15: Wrote 1000 images to /tmp/madlib_WXvDvrTZ3D/train_gte50001.tmp\n",
"PoolWorker-14: Loaded 1000 images into train_gte5\n",
"PoolWorker-12: Loaded 1000 images into train_gte5\n",
"PoolWorker-14: Wrote 1000 images to /tmp/madlib_qzfjEpqz4M/train_gte50001.tmp\n",
"PoolWorker-12: Wrote 1000 images to /tmp/madlib_aXM7FynwWa/train_gte50001.tmp\n",
"PoolWorker-13: Loaded 1000 images into train_gte5\n",
"PoolWorker-13: Wrote 1000 images to /tmp/madlib_IUgLA1Lmzg/train_gte50002.tmp\n",
"PoolWorker-15: Loaded 1000 images into train_gte5\n",
"PoolWorker-11: Loaded 1000 images into train_gte5\n",
"PoolWorker-15: Wrote 1000 images to /tmp/madlib_WXvDvrTZ3D/train_gte50002.tmp\n",
"PoolWorker-11: Wrote 1000 images to /tmp/madlib_tCjXbcR1tF/train_gte50002.tmp\n",
"PoolWorker-12: Loaded 1000 images into train_gte5\n",
"PoolWorker-14: Loaded 1000 images into train_gte5\n",
"PoolWorker-14: Wrote 1000 images to /tmp/madlib_qzfjEpqz4M/train_gte50002.tmp\n",
"PoolWorker-12: Wrote 1000 images to /tmp/madlib_aXM7FynwWa/train_gte50002.tmp\n",
"PoolWorker-13: Loaded 1000 images into train_gte5\n",
"PoolWorker-13: Wrote 1000 images to /tmp/madlib_IUgLA1Lmzg/train_gte50003.tmp\n",
"PoolWorker-15: Loaded 1000 images into train_gte5\n",
"PoolWorker-11: Loaded 1000 images into train_gte5\n",
"PoolWorker-11: Wrote 1000 images to /tmp/madlib_tCjXbcR1tF/train_gte50003.tmp\n",
"PoolWorker-15: Wrote 1000 images to /tmp/madlib_WXvDvrTZ3D/train_gte50003.tmp\n",
"PoolWorker-14: Loaded 1000 images into train_gte5\n",
"PoolWorker-12: Loaded 1000 images into train_gte5\n",
"PoolWorker-14: Wrote 1000 images to /tmp/madlib_qzfjEpqz4M/train_gte50003.tmp\n",
"PoolWorker-12: Wrote 1000 images to /tmp/madlib_aXM7FynwWa/train_gte50003.tmp\n",
"PoolWorker-13: Loaded 1000 images into train_gte5\n",
"PoolWorker-13: Wrote 1000 images to /tmp/madlib_IUgLA1Lmzg/train_gte50004.tmp\n",
"PoolWorker-11: Loaded 1000 images into train_gte5\n",
"PoolWorker-15: Loaded 1000 images into train_gte5\n",
"PoolWorker-11: Wrote 1000 images to /tmp/madlib_tCjXbcR1tF/train_gte50004.tmp\n",
"PoolWorker-15: Wrote 1000 images to /tmp/madlib_WXvDvrTZ3D/train_gte50004.tmp\n",
"PoolWorker-14: Loaded 1000 images into train_gte5\n",
"PoolWorker-12: Loaded 1000 images into train_gte5\n",
"PoolWorker-13: Loaded 1000 images into train_gte5\n",
"PoolWorker-12: Wrote 1000 images to /tmp/madlib_aXM7FynwWa/train_gte50004.tmp\n",
"PoolWorker-14: Wrote 1000 images to /tmp/madlib_qzfjEpqz4M/train_gte50004.tmp\n",
"PoolWorker-13: Wrote 1000 images to /tmp/madlib_IUgLA1Lmzg/train_gte50005.tmp\n",
"PoolWorker-11: Loaded 1000 images into train_gte5\n",
"PoolWorker-11: Wrote 1000 images to /tmp/madlib_tCjXbcR1tF/train_gte50005.tmp\n",
"PoolWorker-15: Loaded 1000 images into train_gte5\n",
"PoolWorker-15: Wrote 1000 images to /tmp/madlib_WXvDvrTZ3D/train_gte50005.tmp\n",
"PoolWorker-12: Loaded 1000 images into train_gte5\n",
"PoolWorker-13: Loaded 1000 images into train_gte5\n",
"PoolWorker-14: Loaded 1000 images into train_gte5\n",
"PoolWorker-14: Wrote 404 images to /tmp/madlib_qzfjEpqz4M/train_gte50005.tmp\n",
"PoolWorker-12: Wrote 1000 images to /tmp/madlib_aXM7FynwWa/train_gte50005.tmp\n",
"PoolWorker-11: Loaded 1000 images into train_gte5\n",
"PoolWorker-14: Loaded 404 images into train_gte5\n",
"PoolWorker-15: Loaded 1000 images into train_gte5\n",
"PoolWorker-12: Loaded 1000 images into train_gte5\n",
"PoolWorker-13: Removed temporary directory /tmp/madlib_IUgLA1Lmzg\n",
"PoolWorker-11: Removed temporary directory /tmp/madlib_tCjXbcR1tF\n",
"PoolWorker-14: Removed temporary directory /tmp/madlib_qzfjEpqz4M\n",
"PoolWorker-15: Removed temporary directory /tmp/madlib_WXvDvrTZ3D\n",
"PoolWorker-12: Removed temporary directory /tmp/madlib_aXM7FynwWa\n",
"Done! Loaded 29404 images in 83.1629951s\n",
"5 workers terminated.\n",
"MainProcess: Connected to madlib db.\n",
"Executing: CREATE TABLE test_gte5 (id SERIAL, x REAL[], y TEXT)\n",
"CREATE TABLE\n",
"Created table test_gte5 in madlib db\n",
"Spawning 5 workers...\n",
"Initializing PoolWorker-16 [pid 45874]\n",
"PoolWorker-16: Created temporary directory /tmp/madlib_UOsDRcoLhX\n",
"Initializing PoolWorker-17 [pid 45875]\n",
"PoolWorker-17: Created temporary directory /tmp/madlib_GhQsnIWE2l\n",
"Initializing PoolWorker-18 [pid 45876]\n",
"PoolWorker-18: Created temporary directory /tmp/madlib_byJ6L1H4Ib\n",
"Initializing PoolWorker-19 [pid 45877]\n",
"PoolWorker-19: Created temporary directory /tmp/madlib_sKmIx32QXS\n",
"PoolWorker-20: Created temporary directory /tmp/madlib_dXB8oQfWZy\n",
"Initializing PoolWorker-20 [pid 45878]\n",
"PoolWorker-18: Connected to madlib db.\n",
"PoolWorker-17: Connected to madlib db.\n",
"PoolWorker-19: Connected to madlib db.\n",
"PoolWorker-16: Connected to madlib db.\n",
"PoolWorker-20: Connected to madlib db.\n",
"PoolWorker-20: Wrote 861 images to /tmp/madlib_dXB8oQfWZy/test_gte50000.tmp\n",
"PoolWorker-17: Wrote 1000 images to /tmp/madlib_GhQsnIWE2l/test_gte50000.tmp\n",
"PoolWorker-18: Wrote 1000 images to /tmp/madlib_byJ6L1H4Ib/test_gte50000.tmp\n",
"PoolWorker-19: Wrote 1000 images to /tmp/madlib_sKmIx32QXS/test_gte50000.tmp\n",
"PoolWorker-16: Wrote 1000 images to /tmp/madlib_UOsDRcoLhX/test_gte50000.tmp\n",
"PoolWorker-20: Loaded 861 images into test_gte5\n",
"PoolWorker-17: Loaded 1000 images into test_gte5\n",
"PoolWorker-18: Loaded 1000 images into test_gte5\n",
"PoolWorker-19: Loaded 1000 images into test_gte5\n",
"PoolWorker-16: Loaded 1000 images into test_gte5\n",
"PoolWorker-19: Removed temporary directory /tmp/madlib_sKmIx32QXS\n",
"PoolWorker-20: Removed temporary directory /tmp/madlib_dXB8oQfWZy\n",
"PoolWorker-16: Removed temporary directory /tmp/madlib_UOsDRcoLhX\n",
"PoolWorker-17: Removed temporary directory /tmp/madlib_GhQsnIWE2l\n",
"PoolWorker-18: Removed temporary directory /tmp/madlib_byJ6L1H4Ib\n",
"Done! Loaded 4861 images in 15.0834438801s\n",
"5 workers terminated.\n"
]
}
],
"source": [
"# Drop tables\n",
"%sql DROP TABLE IF EXISTS train_lt5, test_lt5, train_gte5, test_gte5\n",
"\n",
"# Save images to temporary directories and load into database\n",
"iloader.load_dataset_from_np(x_train_lt5, y_train_lt5, 'train_lt5', append=False)\n",
"iloader.load_dataset_from_np(x_test_lt5, y_test_lt5, 'test_lt5', append=False)\n",
"iloader.load_dataset_from_np(x_train_gte5, y_train_gte5, 'train_gte5', append=False)\n",
"iloader.load_dataset_from_np(x_test_gte5, y_test_gte5, 'test_gte5', append=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"image_preproc\"></a>\n",
"# 3. Call image preprocessor\n",
"\n",
"Transforms from one image per row to multiple images per row for batch optimization. Also normalizes and one-hot encodes.\n",
"\n",
"Training dataset < 5"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>y_class_values</th>\n",
" <th>buffer_size</th>\n",
" <th>normalizing_const</th>\n",
" <th>num_classes</th>\n",
" <th>distribution_rules</th>\n",
" <th>__internal_gpu_config__</th>\n",
" </tr>\n",
" <tr>\n",
" <td>train_lt5</td>\n",
" <td>train_lt5_packed</td>\n",
" <td>[u'y']</td>\n",
" <td>[u'x']</td>\n",
" <td>[u'text']</td>\n",
" <td>[u'0', u'1', u'2', u'3', u'4']</td>\n",
" <td>957</td>\n",
" <td>255.0</td>\n",
" <td>[5]</td>\n",
" <td>all_segments</td>\n",
" <td>all_segments</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'train_lt5', u'train_lt5_packed', [u'y'], [u'x'], [u'text'], [u'0', u'1', u'2', u'3', u'4'], 957, 255.0, [5], 'all_segments', 'all_segments')]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS train_lt5_packed, train_lt5_packed_summary;\n",
"\n",
"SELECT madlib.training_preprocessor_dl('train_lt5', -- Source table\n",
" 'train_lt5_packed', -- Output table\n",
" 'y', -- Dependent variable\n",
" 'x', -- Independent variable\n",
" 1000, -- Buffer size\n",
" 255 -- Normalizing constant\n",
" );\n",
"\n",
"SELECT * FROM train_lt5_packed_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Test dataset < 5"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>y_class_values</th>\n",
" <th>buffer_size</th>\n",
" <th>normalizing_const</th>\n",
" <th>num_classes</th>\n",
" <th>distribution_rules</th>\n",
" <th>__internal_gpu_config__</th>\n",
" </tr>\n",
" <tr>\n",
" <td>test_lt5</td>\n",
" <td>test_lt5_packed</td>\n",
" <td>[u'y']</td>\n",
" <td>[u'x']</td>\n",
" <td>[u'text']</td>\n",
" <td>[u'0', u'1', u'2', u'3', u'4']</td>\n",
" <td>2570</td>\n",
" <td>255.0</td>\n",
" <td>[5]</td>\n",
" <td>all_segments</td>\n",
" <td>all_segments</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'test_lt5', u'test_lt5_packed', [u'y'], [u'x'], [u'text'], [u'0', u'1', u'2', u'3', u'4'], 2570, 255.0, [5], 'all_segments', 'all_segments')]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS test_lt5_packed, test_lt5_packed_summary;\n",
"\n",
"SELECT madlib.validation_preprocessor_dl('test_lt5', -- Source table\n",
" 'test_lt5_packed', -- Output table\n",
" 'y', -- Dependent variable\n",
" 'x', -- Independent variable\n",
" 'train_lt5_packed' -- Training preproc table\n",
" );\n",
"\n",
"SELECT * FROM test_lt5_packed_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Training dataset >= 5"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>y_class_values</th>\n",
" <th>buffer_size</th>\n",
" <th>normalizing_const</th>\n",
" <th>num_classes</th>\n",
" <th>distribution_rules</th>\n",
" <th>__internal_gpu_config__</th>\n",
" </tr>\n",
" <tr>\n",
" <td>train_gte5</td>\n",
" <td>train_gte5_packed</td>\n",
" <td>[u'y']</td>\n",
" <td>[u'x']</td>\n",
" <td>[u'text']</td>\n",
" <td>[u'0', u'1', u'2', u'3', u'4']</td>\n",
" <td>981</td>\n",
" <td>255.0</td>\n",
" <td>[5]</td>\n",
" <td>all_segments</td>\n",
" <td>all_segments</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'train_gte5', u'train_gte5_packed', [u'y'], [u'x'], [u'text'], [u'0', u'1', u'2', u'3', u'4'], 981, 255.0, [5], 'all_segments', 'all_segments')]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS train_gte5_packed, train_gte5_packed_summary;\n",
"\n",
"SELECT madlib.training_preprocessor_dl('train_gte5', -- Source table\n",
" 'train_gte5_packed', -- Output table\n",
" 'y', -- Dependent variable\n",
" 'x', -- Independent variable\n",
" 1000, -- Buffer size\n",
" 255 -- Normalizing constant\n",
" );\n",
"\n",
"SELECT * FROM train_gte5_packed_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Test dataset >= 5"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>y_class_values</th>\n",
" <th>buffer_size</th>\n",
" <th>normalizing_const</th>\n",
" <th>num_classes</th>\n",
" <th>distribution_rules</th>\n",
" <th>__internal_gpu_config__</th>\n",
" </tr>\n",
" <tr>\n",
" <td>test_gte5</td>\n",
" <td>test_gte5_packed</td>\n",
" <td>[u'y']</td>\n",
" <td>[u'x']</td>\n",
" <td>[u'text']</td>\n",
" <td>[u'0', u'1', u'2', u'3', u'4']</td>\n",
" <td>2431</td>\n",
" <td>255.0</td>\n",
" <td>[5]</td>\n",
" <td>all_segments</td>\n",
" <td>all_segments</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'test_gte5', u'test_gte5_packed', [u'y'], [u'x'], [u'text'], [u'0', u'1', u'2', u'3', u'4'], 2431, 255.0, [5], 'all_segments', 'all_segments')]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS test_gte5_packed, test_gte5_packed_summary;\n",
"\n",
"SELECT madlib.validation_preprocessor_dl('test_gte5', -- Source table\n",
" 'test_gte5_packed', -- Output table\n",
" 'y', -- Dependent variable\n",
" 'x', -- Independent variable\n",
" 'train_gte5_packed' -- Training preproc table\n",
" );\n",
"\n",
"SELECT * FROM test_gte5_packed_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"define_and_load_model\"></a>\n",
"# 4. Define and load model architecture\n",
"\n",
"Model with feature and classification layers trainable"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/fmcquillan/Library/Python/2.7/lib/python/site-packages/tensorflow/python/ops/init_ops.py:1251: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Call initializer instance with the dtype argument instead of passing it to the constructor\n",
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"conv2d (Conv2D) (None, 26, 26, 32) 320 \n",
"_________________________________________________________________\n",
"activation (Activation) (None, 26, 26, 32) 0 \n",
"_________________________________________________________________\n",
"conv2d_1 (Conv2D) (None, 24, 24, 32) 9248 \n",
"_________________________________________________________________\n",
"activation_1 (Activation) (None, 24, 24, 32) 0 \n",
"_________________________________________________________________\n",
"max_pooling2d (MaxPooling2D) (None, 12, 12, 32) 0 \n",
"_________________________________________________________________\n",
"dropout (Dropout) (None, 12, 12, 32) 0 \n",
"_________________________________________________________________\n",
"flatten (Flatten) (None, 4608) 0 \n",
"_________________________________________________________________\n",
"dense (Dense) (None, 128) 589952 \n",
"_________________________________________________________________\n",
"activation_2 (Activation) (None, 128) 0 \n",
"_________________________________________________________________\n",
"dropout_1 (Dropout) (None, 128) 0 \n",
"_________________________________________________________________\n",
"dense_1 (Dense) (None, 5) 645 \n",
"_________________________________________________________________\n",
"activation_3 (Activation) (None, 5) 0 \n",
"=================================================================\n",
"Total params: 600,165\n",
"Trainable params: 600,165\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"# define two groups of layers: feature (convolutions) and classification (dense)\n",
"feature_layers = [\n",
" Conv2D(filters, kernel_size,\n",
" padding='valid',\n",
" input_shape=input_shape),\n",
" Activation('relu'),\n",
" Conv2D(filters, kernel_size),\n",
" Activation('relu'),\n",
" MaxPooling2D(pool_size=pool_size),\n",
" Dropout(0.25),\n",
" Flatten(),\n",
"]\n",
"\n",
"classification_layers = [\n",
" Dense(128),\n",
" Activation('relu'),\n",
" Dropout(0.5),\n",
" Dense(num_classes),\n",
" Activation('softmax')\n",
"]\n",
"\n",
"# create complete model\n",
"model = Sequential(feature_layers + classification_layers)\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load into model architecture table using psycopg2"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>model_id</th>\n",
" <th>name</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>feature + classification layers trainable</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, u'feature + classification layers trainable')]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import psycopg2 as p2\n",
"#conn = p2.connect('postgresql://gpadmin@35.239.240.26:5432/madlib')\n",
"conn = p2.connect('postgresql://gpadmin@localhost:8000/madlib')\n",
"cur = conn.cursor()\n",
"\n",
"%sql DROP TABLE IF EXISTS model_arch_library;\n",
"query = \"SELECT madlib.load_keras_model('model_arch_library', %s, NULL, %s)\"\n",
"cur.execute(query,[model.to_json(), \"feature + classification layers trainable\"])\n",
"conn.commit()\n",
"\n",
"# check model loaded OK\n",
"%sql SELECT model_id, name FROM model_arch_library;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Model with feature layers frozen"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"conv2d (Conv2D) (None, 26, 26, 32) 320 \n",
"_________________________________________________________________\n",
"activation (Activation) (None, 26, 26, 32) 0 \n",
"_________________________________________________________________\n",
"conv2d_1 (Conv2D) (None, 24, 24, 32) 9248 \n",
"_________________________________________________________________\n",
"activation_1 (Activation) (None, 24, 24, 32) 0 \n",
"_________________________________________________________________\n",
"max_pooling2d (MaxPooling2D) (None, 12, 12, 32) 0 \n",
"_________________________________________________________________\n",
"dropout (Dropout) (None, 12, 12, 32) 0 \n",
"_________________________________________________________________\n",
"flatten (Flatten) (None, 4608) 0 \n",
"_________________________________________________________________\n",
"dense (Dense) (None, 128) 589952 \n",
"_________________________________________________________________\n",
"activation_2 (Activation) (None, 128) 0 \n",
"_________________________________________________________________\n",
"dropout_1 (Dropout) (None, 128) 0 \n",
"_________________________________________________________________\n",
"dense_1 (Dense) (None, 5) 645 \n",
"_________________________________________________________________\n",
"activation_3 (Activation) (None, 5) 0 \n",
"=================================================================\n",
"Total params: 600,165\n",
"Trainable params: 590,597\n",
"Non-trainable params: 9,568\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"# freeze feature layers\n",
"for l in feature_layers:\n",
" l.trainable = False\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load into transfer model architecture table using psycopg2"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>model_id</th>\n",
" <th>name</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>feature + classification layers trainable</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>only classification layers trainable</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, u'feature + classification layers trainable'),\n",
" (2, u'only classification layers trainable')]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cur.execute(query,[model.to_json(), \"only classification layers trainable\"])\n",
"conn.commit()\n",
"\n",
"# check model loaded OK\n",
"%sql SELECT model_id, name FROM model_arch_library ORDER BY model_id;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"train\"></a>\n",
"# 5. Train\n",
"Train the model for 5-digit classification [0..4] "
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>madlib_keras_fit</th>\n",
" </tr>\n",
" <tr>\n",
" <td></td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[('',)]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS mnist_model, mnist_model_summary;\n",
"\n",
"SELECT madlib.madlib_keras_fit('train_lt5_packed', -- source table\n",
" 'mnist_model', -- model output table\n",
" 'model_arch_library', -- model arch table\n",
" 1, -- model arch id\n",
" $$ loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']$$, -- compile_params\n",
" $$ batch_size=128, epochs=1 $$, -- fit_params\n",
" 5 -- num_iterations\n",
" );"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View the model summary:"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>model</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>model_arch_table</th>\n",
" <th>model_id</th>\n",
" <th>compile_params</th>\n",
" <th>fit_params</th>\n",
" <th>num_iterations</th>\n",
" <th>validation_table</th>\n",
" <th>object_table</th>\n",
" <th>metrics_compute_frequency</th>\n",
" <th>name</th>\n",
" <th>description</th>\n",
" <th>model_type</th>\n",
" <th>model_size</th>\n",
" <th>start_training_time</th>\n",
" <th>end_training_time</th>\n",
" <th>metrics_elapsed_time</th>\n",
" <th>madlib_version</th>\n",
" <th>num_classes</th>\n",
" <th>dependent_vartype</th>\n",
" <th>normalizing_const</th>\n",
" <th>metrics_type</th>\n",
" <th>loss_type</th>\n",
" <th>training_metrics_final</th>\n",
" <th>training_loss_final</th>\n",
" <th>training_metrics</th>\n",
" <th>training_loss</th>\n",
" <th>validation_metrics_final</th>\n",
" <th>validation_loss_final</th>\n",
" <th>validation_metrics</th>\n",
" <th>validation_loss</th>\n",
" <th>metrics_iters</th>\n",
" <th>y_class_values</th>\n",
" </tr>\n",
" <tr>\n",
" <td>train_lt5_packed</td>\n",
" <td>mnist_model</td>\n",
" <td>[u'y']</td>\n",
" <td>[u'x']</td>\n",
" <td>model_arch_library</td>\n",
" <td>1</td>\n",
" <td> loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']</td>\n",
" <td> batch_size=128, epochs=1 </td>\n",
" <td>5</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>5</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>madlib_keras</td>\n",
" <td>2344.43066406</td>\n",
" <td>2021-03-08 20:52:42.139646</td>\n",
" <td>2021-03-08 20:53:56.573492</td>\n",
" <td>[74.4337520599365]</td>\n",
" <td>1.18.0-dev</td>\n",
" <td>[5]</td>\n",
" <td>[u'text']</td>\n",
" <td>255.0</td>\n",
" <td>[u'accuracy']</td>\n",
" <td>categorical_crossentropy</td>\n",
" <td>0.579748988152</td>\n",
" <td>1.5176358223</td>\n",
" <td>[0.57974898815155]</td>\n",
" <td>[1.51763582229614]</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>[5]</td>\n",
" <td>[u'0', u'1', u'2', u'3', u'4']</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'train_lt5_packed', u'mnist_model', [u'y'], [u'x'], u'model_arch_library', 1, u\" loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']\", u' batch_size=128, epochs=1 ', 5, None, None, 5, None, None, u'madlib_keras', 2344.43066406, datetime.datetime(2021, 3, 8, 20, 52, 42, 139646), datetime.datetime(2021, 3, 8, 20, 53, 56, 573492), [74.4337520599365], u'1.18.0-dev', [5], [u'text'], 255.0, [u'accuracy'], u'categorical_crossentropy', 0.57974898815155, 1.51763582229614, [0.57974898815155], [1.51763582229614], None, None, None, None, [5], [u'0', u'1', u'2', u'3', u'4'])]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM mnist_model_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Evaluate using test data"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>loss</th>\n",
" <th>metric</th>\n",
" <th>metrics_type</th>\n",
" <th>loss_type</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1.51568281651</td>\n",
" <td>0.603619396687</td>\n",
" <td>[u'accuracy']</td>\n",
" <td>categorical_crossentropy</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1.51568281650543, 0.603619396686554, [u'accuracy'], u'categorical_crossentropy')]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS mnist_validate;\n",
"\n",
"SELECT madlib.madlib_keras_evaluate('mnist_model', -- model\n",
" 'test_lt5_packed', -- test table\n",
" 'mnist_validate' -- output table\n",
" );\n",
"\n",
"SELECT * FROM mnist_validate;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"transfer_learning\"></a>\n",
"# 6. Transfer learning"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use UPDATE to load trained weights from previous run into the model library table:"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"UPDATE model_arch_library\n",
"SET model_weights = mnist_model.model_weights\n",
"FROM mnist_model\n",
"WHERE model_arch_library.model_id = 2;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Transfer: train dense layers for new classification task [5..9]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>madlib_keras_fit</th>\n",
" </tr>\n",
" <tr>\n",
" <td></td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[('',)]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS mnist_transfer_model, mnist_transfer_model_summary;\n",
"\n",
"SELECT madlib.madlib_keras_fit('train_gte5_packed', -- source table\n",
" 'mnist_transfer_model',-- model output table\n",
" 'model_arch_library', -- model arch table\n",
" 2, -- model arch id\n",
" $$ loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']$$, -- compile_params\n",
" $$ batch_size=128, epochs=1 $$, -- fit_params\n",
" 5 -- num_iterations\n",
" );"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View the model summary"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>model</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>model_arch_table</th>\n",
" <th>model_id</th>\n",
" <th>compile_params</th>\n",
" <th>fit_params</th>\n",
" <th>num_iterations</th>\n",
" <th>validation_table</th>\n",
" <th>object_table</th>\n",
" <th>metrics_compute_frequency</th>\n",
" <th>name</th>\n",
" <th>description</th>\n",
" <th>model_type</th>\n",
" <th>model_size</th>\n",
" <th>start_training_time</th>\n",
" <th>end_training_time</th>\n",
" <th>metrics_elapsed_time</th>\n",
" <th>madlib_version</th>\n",
" <th>num_classes</th>\n",
" <th>dependent_vartype</th>\n",
" <th>normalizing_const</th>\n",
" <th>metrics_type</th>\n",
" <th>loss_type</th>\n",
" <th>training_metrics_final</th>\n",
" <th>training_loss_final</th>\n",
" <th>training_metrics</th>\n",
" <th>training_loss</th>\n",
" <th>validation_metrics_final</th>\n",
" <th>validation_loss_final</th>\n",
" <th>validation_metrics</th>\n",
" <th>validation_loss</th>\n",
" <th>metrics_iters</th>\n",
" <th>y_class_values</th>\n",
" </tr>\n",
" <tr>\n",
" <td>train_gte5_packed</td>\n",
" <td>mnist_transfer_model</td>\n",
" <td>[u'y']</td>\n",
" <td>[u'x']</td>\n",
" <td>model_arch_library</td>\n",
" <td>2</td>\n",
" <td> loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']</td>\n",
" <td> batch_size=128, epochs=1 </td>\n",
" <td>5</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>5</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>madlib_keras</td>\n",
" <td>2344.43066406</td>\n",
" <td>2021-03-08 20:54:00.641208</td>\n",
" <td>2021-03-08 20:54:29.067638</td>\n",
" <td>[28.4263310432434]</td>\n",
" <td>1.18.0-dev</td>\n",
" <td>[5]</td>\n",
" <td>[u'text']</td>\n",
" <td>255.0</td>\n",
" <td>[u'accuracy']</td>\n",
" <td>categorical_crossentropy</td>\n",
" <td>0.6377363801</td>\n",
" <td>1.52131533623</td>\n",
" <td>[0.63773638010025]</td>\n",
" <td>[1.52131533622742]</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>[5]</td>\n",
" <td>[u'0', u'1', u'2', u'3', u'4']</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'train_gte5_packed', u'mnist_transfer_model', [u'y'], [u'x'], u'model_arch_library', 2, u\" loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']\", u' batch_size=128, epochs=1 ', 5, None, None, 5, None, None, u'madlib_keras', 2344.43066406, datetime.datetime(2021, 3, 8, 20, 54, 0, 641208), datetime.datetime(2021, 3, 8, 20, 54, 29, 67638), [28.4263310432434], u'1.18.0-dev', [5], [u'text'], 255.0, [u'accuracy'], u'categorical_crossentropy', 0.63773638010025, 1.52131533622742, [0.63773638010025], [1.52131533622742], None, None, None, None, [5], [u'0', u'1', u'2', u'3', u'4'])]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM mnist_transfer_model_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Evaluate using test data"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>loss</th>\n",
" <th>metric</th>\n",
" <th>metrics_type</th>\n",
" <th>loss_type</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1.52041304111</td>\n",
" <td>0.625385701656</td>\n",
" <td>[u'accuracy']</td>\n",
" <td>categorical_crossentropy</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1.52041304111481, 0.625385701656342, [u'accuracy'], u'categorical_crossentropy')]"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS mnist_transfer_validate;\n",
"\n",
"SELECT madlib.madlib_keras_evaluate('mnist_transfer_model', -- model\n",
" 'test_gte5_packed', -- test table\n",
" 'mnist_transfer_validate' -- output table\n",
" );\n",
"\n",
"SELECT * FROM mnist_transfer_validate;"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.16"
}
},
"nbformat": 4,
"nbformat_minor": 1
}