blob: 86848f64d280f0398cd97cc0fc1d578bb03002cd [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Inference for CIFAR-10 dataset using predict BYOM\n",
"The predict BYOM function allows you to do inference using models that have not been trained with MADlib, but rather imported or created elsewhere. It was added in MADlib 1.17.\n",
"\n",
"In this workbook we train a model in Python using\n",
"https://keras.io/examples/cifar10_cnn/\n",
"and run inference on the validation set.\n",
"\n",
"## Table of contents\n",
"\n",
"<a href=\"#setup\">1. Setup</a>\n",
"\n",
"<a href=\"#train_model\">2. Train model in Python</a>\n",
"\n",
"<a href=\"#load_model\">3. Load model into table</a>\n",
"\n",
"<a href=\"#load_images\">4. Get validation data set and load into table</a>\n",
"\n",
"<a href=\"#inference\">5. Inference</a>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"setup\"></a>\n",
"# 1. Setup"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext sql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Greenplum Database 5.x on GCP - via tunnel\n",
"%sql postgresql://gpadmin@localhost:8000/madlib\n",
" \n",
"# PostgreSQL local\n",
"#%sql postgresql://fmcquillan@localhost:5432/madlib"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>version</th>\n",
" </tr>\n",
" <tr>\n",
" <td>MADlib version: 1.18.0-dev, git revision: rel/v1.17.0-91-g16070e5, cmake configuration time: Mon Mar 8 16:58:24 UTC 2021, build type: release, build system: Linux-3.10.0-1160.11.1.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'MADlib version: 1.18.0-dev, git revision: rel/v1.17.0-91-g16070e5, cmake configuration time: Mon Mar 8 16:58:24 UTC 2021, build type: release, build system: Linux-3.10.0-1160.11.1.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5',)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select madlib.version();\n",
"#%sql select version();"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"train_model\"></a>\n",
"# 2. Train model in Python\n",
"\n",
"Train a model in Python using https://keras.io/examples/cifar10_cnn/\n",
"\n",
"Define model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"x_train shape: (50000, 32, 32, 3)\n",
"50000 train samples\n",
"10000 test samples\n",
"WARNING:tensorflow:From /Users/fmcquillan/Library/Python/2.7/lib/python/site-packages/tensorflow/python/ops/init_ops.py:1251: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Call initializer instance with the dtype argument instead of passing it to the constructor\n"
]
}
],
"source": [
"from __future__ import print_function\n",
"from tensorflow import keras\n",
"from tensorflow.keras.datasets import cifar10\n",
"from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten\n",
"from tensorflow.keras.layers import Conv2D, MaxPooling2D\n",
"import os\n",
"\n",
"batch_size = 32\n",
"num_classes = 10\n",
"epochs = 25\n",
"data_augmentation = True\n",
"num_predictions = 20\n",
"#save_dir = os.path.join(os.getcwd(), 'saved_models')\n",
"#model_name = 'keras_cifar10_trained_model.h5'\n",
"\n",
"# The data, split between train and test sets:\n",
"(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
"print('x_train shape:', x_train.shape)\n",
"print(x_train.shape[0], 'train samples')\n",
"print(x_test.shape[0], 'test samples')\n",
"\n",
"# Convert class vectors to binary class matrices.\n",
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
"y_test = keras.utils.to_categorical(y_test, num_classes)\n",
"\n",
"model = Sequential()\n",
"model.add(Conv2D(32, (3, 3), padding='same',\n",
" input_shape=x_train.shape[1:]))\n",
"model.add(Activation('relu'))\n",
"model.add(Conv2D(32, (3, 3)))\n",
"model.add(Activation('relu'))\n",
"model.add(MaxPooling2D(pool_size=(2, 2)))\n",
"model.add(Dropout(0.25))\n",
"\n",
"model.add(Conv2D(64, (3, 3), padding='same'))\n",
"model.add(Activation('relu'))\n",
"model.add(Conv2D(64, (3, 3)))\n",
"model.add(Activation('relu'))\n",
"model.add(MaxPooling2D(pool_size=(2, 2)))\n",
"model.add(Dropout(0.25))\n",
"\n",
"model.add(Flatten())\n",
"model.add(Dense(512))\n",
"model.add(Activation('relu'))\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(num_classes))\n",
"model.add(Activation('softmax'))\n",
"\n",
"# initiate RMSprop optimizer\n",
"opt = keras.optimizers.RMSprop(lr=0.0001, decay=1e-6)\n",
"\n",
"# Let's train the model using RMSprop\n",
"model.compile(loss='categorical_crossentropy',\n",
" optimizer=opt,\n",
" metrics=['accuracy']);"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'{\"class_name\": \"Sequential\", \"keras_version\": \"2.2.4-tf\", \"config\": {\"layers\": [{\"class_name\": \"Conv2D\", \"config\": {\"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"dtype\": \"float32\", \"seed\": null}}, \"name\": \"conv2d\", \"kernel_constraint\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"dtype\": \"float32\", \"activation\": \"linear\", \"trainable\": true, \"data_format\": \"channels_last\", \"padding\": \"same\", \"strides\": [1, 1], \"dilation_rate\": [1, 1], \"kernel_regularizer\": null, \"filters\": 32, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {\"dtype\": \"float32\"}}, \"batch_input_shape\": [null, 32, 32, 3], \"use_bias\": true, \"activity_regularizer\": null, \"kernel_size\": [3, 3]}}, {\"class_name\": \"Activation\", \"config\": {\"dtype\": \"float32\", \"activation\": \"relu\", \"trainable\": true, \"name\": \"activation\"}}, {\"class_name\": \"Conv2D\", \"config\": {\"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"dtype\": \"float32\", \"seed\": null}}, \"name\": \"conv2d_1\", \"kernel_constraint\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"dtype\": \"float32\", \"activation\": \"linear\", \"trainable\": true, \"data_format\": \"channels_last\", \"padding\": \"valid\", \"strides\": [1, 1], \"dilation_rate\": [1, 1], \"kernel_regularizer\": null, \"filters\": 32, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {\"dtype\": \"float32\"}}, \"use_bias\": true, \"activity_regularizer\": null, \"kernel_size\": [3, 3]}}, {\"class_name\": \"Activation\", \"config\": {\"dtype\": \"float32\", \"activation\": \"relu\", \"trainable\": true, \"name\": \"activation_1\"}}, {\"class_name\": \"MaxPooling2D\", \"config\": {\"name\": \"max_pooling2d\", \"dtype\": \"float32\", \"trainable\": true, \"data_format\": \"channels_last\", \"pool_size\": [2, 2], \"padding\": \"valid\", \"strides\": [2, 2]}}, {\"class_name\": \"Dropout\", \"config\": {\"name\": \"dropout\", \"dtype\": \"float32\", \"trainable\": true, \"rate\": 0.25, \"seed\": null, \"noise_shape\": null}}, {\"class_name\": \"Conv2D\", \"config\": {\"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"dtype\": \"float32\", \"seed\": null}}, \"name\": \"conv2d_2\", \"kernel_constraint\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"dtype\": \"float32\", \"activation\": \"linear\", \"trainable\": true, \"data_format\": \"channels_last\", \"padding\": \"same\", \"strides\": [1, 1], \"dilation_rate\": [1, 1], \"kernel_regularizer\": null, \"filters\": 64, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {\"dtype\": \"float32\"}}, \"use_bias\": true, \"activity_regularizer\": null, \"kernel_size\": [3, 3]}}, {\"class_name\": \"Activation\", \"config\": {\"dtype\": \"float32\", \"activation\": \"relu\", \"trainable\": true, \"name\": \"activation_2\"}}, {\"class_name\": \"Conv2D\", \"config\": {\"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"dtype\": \"float32\", \"seed\": null}}, \"name\": \"conv2d_3\", \"kernel_constraint\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"dtype\": \"float32\", \"activation\": \"linear\", \"trainable\": true, \"data_format\": \"channels_last\", \"padding\": \"valid\", \"strides\": [1, 1], \"dilation_rate\": [1, 1], \"kernel_regularizer\": null, \"filters\": 64, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {\"dtype\": \"float32\"}}, \"use_bias\": true, \"activity_regularizer\": null, \"kernel_size\": [3, 3]}}, {\"class_name\": \"Activation\", \"config\": {\"dtype\": \"float32\", \"activation\": \"relu\", \"trainable\": true, \"name\": \"activation_3\"}}, {\"class_name\": \"MaxPooling2D\", \"config\": {\"name\": \"max_pooling2d_1\", \"dtype\": \"float32\", \"trainable\": true, \"data_format\": \"channels_last\", \"pool_size\": [2, 2], \"padding\": \"valid\", \"strides\": [2, 2]}}, {\"class_name\": \"Dropout\", \"config\": {\"name\": \"dropout_1\", \"dtype\": \"float32\", \"trainable\": true, \"rate\": 0.25, \"seed\": null, \"noise_shape\": null}}, {\"class_name\": \"Flatten\", \"config\": {\"dtype\": \"float32\", \"trainable\": true, \"name\": \"flatten\", \"data_format\": \"channels_last\"}}, {\"class_name\": \"Dense\", \"config\": {\"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"dtype\": \"float32\", \"seed\": null}}, \"name\": \"dense\", \"kernel_constraint\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"dtype\": \"float32\", \"activation\": \"linear\", \"trainable\": true, \"kernel_regularizer\": null, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {\"dtype\": \"float32\"}}, \"units\": 512, \"use_bias\": true, \"activity_regularizer\": null}}, {\"class_name\": \"Activation\", \"config\": {\"dtype\": \"float32\", \"activation\": \"relu\", \"trainable\": true, \"name\": \"activation_4\"}}, {\"class_name\": \"Dropout\", \"config\": {\"name\": \"dropout_2\", \"dtype\": \"float32\", \"trainable\": true, \"rate\": 0.5, \"seed\": null, \"noise_shape\": null}}, {\"class_name\": \"Dense\", \"config\": {\"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"dtype\": \"float32\", \"seed\": null}}, \"name\": \"dense_1\", \"kernel_constraint\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"dtype\": \"float32\", \"activation\": \"linear\", \"trainable\": true, \"kernel_regularizer\": null, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {\"dtype\": \"float32\"}}, \"units\": 10, \"use_bias\": true, \"activity_regularizer\": null}}, {\"class_name\": \"Activation\", \"config\": {\"dtype\": \"float32\", \"activation\": \"softmax\", \"trainable\": true, \"name\": \"activation_5\"}}], \"name\": \"sequential\"}, \"backend\": \"tensorflow\"}'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.to_json()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using real-time data augmentation.\n",
"Epoch 1/25\n",
"1563/1563 [==============================] - 88s 56ms/step - loss: 1.9260 - acc: 0.2931 - val_loss: 1.7065 - val_acc: 0.3839\n",
"Epoch 2/25\n",
"1563/1563 [==============================] - 95s 61ms/step - loss: 1.6191 - acc: 0.4075 - val_loss: 1.4352 - val_acc: 0.4829\n",
"Epoch 3/25\n",
"1563/1563 [==============================] - 94s 60ms/step - loss: 1.4935 - acc: 0.4605 - val_loss: 1.3301 - val_acc: 0.5153\n",
"Epoch 4/25\n",
"1563/1563 [==============================] - 94s 60ms/step - loss: 1.4039 - acc: 0.4978 - val_loss: 1.2473 - val_acc: 0.5525\n",
"Epoch 5/25\n",
"1563/1563 [==============================] - 93s 60ms/step - loss: 1.3317 - acc: 0.5238 - val_loss: 1.1748 - val_acc: 0.5796\n",
"Epoch 6/25\n",
"1563/1563 [==============================] - 98s 63ms/step - loss: 1.2711 - acc: 0.5486 - val_loss: 1.1139 - val_acc: 0.6057\n",
"Epoch 7/25\n",
"1563/1563 [==============================] - 97s 62ms/step - loss: 1.2252 - acc: 0.5654 - val_loss: 1.0854 - val_acc: 0.6185\n",
"Epoch 8/25\n",
"1563/1563 [==============================] - 101s 64ms/step - loss: 1.1810 - acc: 0.5810 - val_loss: 1.0498 - val_acc: 0.6252\n",
"Epoch 9/25\n",
"1563/1563 [==============================] - 99s 63ms/step - loss: 1.1507 - acc: 0.5926 - val_loss: 1.0188 - val_acc: 0.6408\n",
"Epoch 10/25\n",
"1563/1563 [==============================] - 99s 63ms/step - loss: 1.1101 - acc: 0.6078 - val_loss: 1.0167 - val_acc: 0.6456\n",
"Epoch 11/25\n",
"1563/1563 [==============================] - 97s 62ms/step - loss: 1.0870 - acc: 0.6156 - val_loss: 0.9796 - val_acc: 0.6538\n",
"Epoch 12/25\n",
"1563/1563 [==============================] - 98s 63ms/step - loss: 1.0580 - acc: 0.6249 - val_loss: 0.9458 - val_acc: 0.6663\n",
"Epoch 13/25\n",
"1563/1563 [==============================] - 106s 68ms/step - loss: 1.0387 - acc: 0.6324 - val_loss: 0.8871 - val_acc: 0.6869\n",
"Epoch 14/25\n",
"1563/1563 [==============================] - 106s 68ms/step - loss: 1.0234 - acc: 0.6391 - val_loss: 0.9362 - val_acc: 0.6731\n",
"Epoch 15/25\n",
"1563/1563 [==============================] - 106s 68ms/step - loss: 1.0014 - acc: 0.6486 - val_loss: 0.9410 - val_acc: 0.6711\n",
"Epoch 16/25\n",
"1563/1563 [==============================] - 107s 69ms/step - loss: 0.9868 - acc: 0.6559 - val_loss: 0.9640 - val_acc: 0.6688\n",
"Epoch 17/25\n",
"1563/1563 [==============================] - 102s 66ms/step - loss: 0.9695 - acc: 0.6608 - val_loss: 0.9420 - val_acc: 0.6724\n",
"Epoch 18/25\n",
"1563/1563 [==============================] - 113s 73ms/step - loss: 0.9630 - acc: 0.6624 - val_loss: 0.8186 - val_acc: 0.7190\n",
"Epoch 19/25\n",
"1563/1563 [==============================] - 129s 83ms/step - loss: 0.9508 - acc: 0.6675 - val_loss: 0.8731 - val_acc: 0.6972\n",
"Epoch 20/25\n",
"1563/1563 [==============================] - 188s 121ms/step - loss: 0.9413 - acc: 0.6729 - val_loss: 0.8053 - val_acc: 0.7184\n",
"Epoch 21/25\n",
"1563/1563 [==============================] - 139s 89ms/step - loss: 0.9326 - acc: 0.6745 - val_loss: 0.8060 - val_acc: 0.7158\n",
"Epoch 22/25\n",
"1563/1563 [==============================] - 139s 89ms/step - loss: 0.9195 - acc: 0.6828 - val_loss: 0.8315 - val_acc: 0.7116\n",
"Epoch 23/25\n",
"1563/1563 [==============================] - 349s 224ms/step - loss: 0.9152 - acc: 0.6835 - val_loss: 0.8245 - val_acc: 0.7126\n",
"Epoch 24/25\n",
"1563/1563 [==============================] - 371s 237ms/step - loss: 0.9035 - acc: 0.6880 - val_loss: 0.7956 - val_acc: 0.7262\n",
"Epoch 25/25\n",
"1563/1563 [==============================] - 361s 231ms/step - loss: 0.8991 - acc: 0.6885 - val_loss: 0.7894 - val_acc: 0.7285\n"
]
},
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x1449559d0>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10000/10000 [==============================] - 13s 1ms/sample - loss: 0.7894 - acc: 0.7285\n",
"Test loss: 0.789413549900055\n",
"Test accuracy: 0.7285\n"
]
}
],
"source": [
"x_train = x_train.astype('float32')\n",
"x_test = x_test.astype('float32')\n",
"x_train /= 255\n",
"x_test /= 255\n",
"\n",
"if not data_augmentation:\n",
" print('Not using data augmentation.')\n",
" model.fit(x_train, y_train,\n",
" batch_size=batch_size,\n",
" epochs=epochs,\n",
" validation_data=(x_test, y_test),\n",
" shuffle=True)\n",
"else:\n",
" print('Using real-time data augmentation.')\n",
" # This will do preprocessing and realtime data augmentation:\n",
" datagen = ImageDataGenerator(\n",
" featurewise_center=False, # set input mean to 0 over the dataset\n",
" samplewise_center=False, # set each sample mean to 0\n",
" featurewise_std_normalization=False, # divide inputs by std of the dataset\n",
" samplewise_std_normalization=False, # divide each input by its std\n",
" zca_whitening=False, # apply ZCA whitening\n",
" zca_epsilon=1e-06, # epsilon for ZCA whitening\n",
" rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)\n",
" # randomly shift images horizontally (fraction of total width)\n",
" width_shift_range=0.1,\n",
" # randomly shift images vertically (fraction of total height)\n",
" height_shift_range=0.1,\n",
" shear_range=0., # set range for random shear\n",
" zoom_range=0., # set range for random zoom\n",
" channel_shift_range=0., # set range for random channel shifts\n",
" # set mode for filling points outside the input boundaries\n",
" fill_mode='nearest',\n",
" cval=0., # value used for fill_mode = \"constant\"\n",
" horizontal_flip=True, # randomly flip images\n",
" vertical_flip=False, # randomly flip images\n",
" # set rescaling factor (applied before any other transformation)\n",
" rescale=None,\n",
" # set function that will be applied on each input\n",
" preprocessing_function=None,\n",
" # image data format, either \"channels_first\" or \"channels_last\"\n",
" data_format=None,\n",
" # fraction of images reserved for validation (strictly between 0 and 1)\n",
" validation_split=0.0)\n",
"\n",
" # Compute quantities required for feature-wise normalization\n",
" # (std, mean, and principal components if ZCA whitening is applied).\n",
" datagen.fit(x_train)\n",
"\n",
" # Fit the model on the batches generated by datagen.flow().\n",
" model.fit_generator(datagen.flow(x_train, y_train,\n",
" batch_size=batch_size),\n",
" epochs=epochs,\n",
" validation_data=(x_test, y_test),\n",
" workers=1)\n",
"\n",
"# Save model and weights\n",
"#if not os.path.isdir(save_dir):\n",
"# os.makedirs(save_dir)\n",
"#model_path = os.path.join(save_dir, model_name)\n",
"#model.save(model_path)\n",
"#print('Saved trained model at %s ' % model_path)\n",
"\n",
"# Score trained model.\n",
"scores = model.evaluate(x_test, y_test, verbose=1)\n",
"print('Test loss:', scores[0])\n",
"print('Test accuracy:', scores[1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"load_model\"></a>\n",
"# 3. Load model into table\n",
"\n",
"Load the model architecture and weights into the model architecture table"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>model_id</th>\n",
" <th>name</th>\n",
" <th>description</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>CIFAR10 model</td>\n",
" <td>CNN model with weights trained on CIFAR10.</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, u'CIFAR10 model', u'CNN model with weights trained on CIFAR10.')]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import psycopg2 as p2\n",
"conn = p2.connect('postgresql://gpadmin@localhost:8000/madlib')\n",
"#conn = p2.connect('postgresql://fmcquillan@localhost:5432/madlib')\n",
"cur = conn.cursor()\n",
"\n",
"from tensorflow.keras.layers import *\n",
"from tensorflow.keras import Sequential\n",
"import numpy as np\n",
"\n",
"# get weights, flatten and serialize\n",
"weights = model.get_weights()\n",
"weights_flat = [w.flatten() for w in weights]\n",
"weights1d = np.concatenate(weights_flat).ravel()\n",
"weights_bytea = p2.Binary(weights1d.tostring())\n",
"\n",
"%sql DROP TABLE IF EXISTS model_arch_library_cifar10;\n",
"query = \"SELECT madlib.load_keras_model('model_arch_library_cifar10', %s,%s,%s,%s)\"\n",
"cur.execute(query,[model.to_json(), weights_bytea, \"CIFAR10 model\", \"CNN model with weights trained on CIFAR10.\"])\n",
"conn.commit()\n",
"\n",
"# check weights loaded OK\n",
"%sql SELECT model_id, name, description FROM model_arch_library_cifar10;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"load_images\"></a>\n",
"# 4. Get validation data set and load into table\n",
"\n",
"First set up image loader using the script called <em>madlib_image_loader.py</em> located at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts/Deep-learning"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"madlib_site_dir = '/Users/fmcquillan/Documents/Product/MADlib/Demos/data'\n",
"sys.path.append(madlib_site_dir)\n",
"\n",
"# Import image loader module\n",
"from madlib_image_loader import ImageLoader, DbCredentials\n",
"\n",
"# Specify database credentials, for connecting to db\n",
"#db_creds = DbCredentials(user='fmcquillan',\n",
"# host='localhost',\n",
"# port='5432',\n",
"# password='')\n",
"\n",
"# Specify database credentials, for connecting to db\n",
"db_creds = DbCredentials(user='gpadmin', \n",
" db_name='madlib',\n",
" host='localhost',\n",
" port='8000',\n",
" password='')\n",
"\n",
"# Initialize ImageLoader (increase num_workers to run faster)\n",
"iloader = ImageLoader(num_workers=5, db_creds=db_creds)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next load CIFAR-10 data from Keras consisting of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"MainProcess: Connected to madlib db.\n",
"Executing: CREATE TABLE cifar_10_test_data (id SERIAL, x REAL[], y TEXT)\n",
"CREATE TABLE\n",
"Created table cifar_10_test_data in madlib db\n",
"Spawning 5 workers...\n",
"Initializing PoolWorker-1 [pid 34877]\n",
"PoolWorker-1: Created temporary directory /tmp/madlib_xKS6PcacbB\n",
"Initializing PoolWorker-2 [pid 34878]\n",
"PoolWorker-2: Created temporary directory /tmp/madlib_RB5gvLkpc2\n",
"Initializing PoolWorker-3 [pid 34879]\n",
"PoolWorker-3: Created temporary directory /tmp/madlib_hU0p2y5lZq\n",
"Initializing PoolWorker-4 [pid 34880]\n",
"PoolWorker-4: Created temporary directory /tmp/madlib_K4phT5brYw\n",
"Initializing PoolWorker-5 [pid 34881]\n",
"PoolWorker-5: Created temporary directory /tmp/madlib_dcgGz3psbs\n",
"PoolWorker-1: Connected to madlib db.\n",
"PoolWorker-2: Connected to madlib db.\n",
"PoolWorker-3: Connected to madlib db.\n",
"PoolWorker-4: Connected to madlib db.\n",
"PoolWorker-5: Connected to madlib db.\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_xKS6PcacbB/cifar_10_test_data0000.tmp\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_hU0p2y5lZq/cifar_10_test_data0000.tmp\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_RB5gvLkpc2/cifar_10_test_data0000.tmp\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_K4phT5brYw/cifar_10_test_data0000.tmp\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_dcgGz3psbs/cifar_10_test_data0000.tmp\n",
"PoolWorker-1: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-1: Wrote 1000 images to /tmp/madlib_xKS6PcacbB/cifar_10_test_data0001.tmp\n",
"PoolWorker-3: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-3: Wrote 1000 images to /tmp/madlib_hU0p2y5lZq/cifar_10_test_data0001.tmp\n",
"PoolWorker-5: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-4: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-2: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-5: Wrote 1000 images to /tmp/madlib_dcgGz3psbs/cifar_10_test_data0001.tmp\n",
"PoolWorker-4: Wrote 1000 images to /tmp/madlib_K4phT5brYw/cifar_10_test_data0001.tmp\n",
"PoolWorker-2: Wrote 1000 images to /tmp/madlib_RB5gvLkpc2/cifar_10_test_data0001.tmp\n",
"PoolWorker-1: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-3: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-5: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-4: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-2: Loaded 1000 images into cifar_10_test_data\n",
"PoolWorker-5: Removed temporary directory /tmp/madlib_dcgGz3psbs\n",
"PoolWorker-4: Removed temporary directory /tmp/madlib_K4phT5brYw\n",
"PoolWorker-3: Removed temporary directory /tmp/madlib_hU0p2y5lZq\n",
"PoolWorker-2: Removed temporary directory /tmp/madlib_RB5gvLkpc2\n",
"PoolWorker-1: Removed temporary directory /tmp/madlib_xKS6PcacbB\n",
"Done! Loaded 10000 images in 120.575361967s\n",
"5 workers terminated.\n"
]
}
],
"source": [
"from tensorflow.keras.datasets import cifar10\n",
"\n",
"# Load dataset into np array\n",
"(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
"\n",
"%sql DROP TABLE IF EXISTS cifar_10_test_data;\n",
"\n",
"# Save images to temporary directories and load into database\n",
"#iloader.load_dataset_from_np(x_train, y_train, 'cifar_10_train_data', append=False)\n",
"iloader.load_dataset_from_np(x_test, y_test, 'cifar_10_test_data', append=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a id=\"inference\"></a>\n",
"# 5. Inference"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"10 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>id</th>\n",
" <th>class_name</th>\n",
" <th>class_value</th>\n",
" <th>prob</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>dependent_var</td>\n",
" <td>3</td>\n",
" <td>0.41348055</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>dependent_var</td>\n",
" <td>8</td>\n",
" <td>0.9928235</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>dependent_var</td>\n",
" <td>1</td>\n",
" <td>0.52061397</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>dependent_var</td>\n",
" <td>0</td>\n",
" <td>0.619041</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>dependent_var</td>\n",
" <td>6</td>\n",
" <td>0.99125576</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>dependent_var</td>\n",
" <td>6</td>\n",
" <td>0.9728794</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>dependent_var</td>\n",
" <td>1</td>\n",
" <td>0.86263895</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>dependent_var</td>\n",
" <td>6</td>\n",
" <td>0.6395346</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>dependent_var</td>\n",
" <td>3</td>\n",
" <td>0.75210774</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>dependent_var</td>\n",
" <td>9</td>\n",
" <td>0.4934366</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, u'dependent_var', u'3', 0.41348055),\n",
" (2, u'dependent_var', u'8', 0.9928235),\n",
" (3, u'dependent_var', u'1', 0.52061397),\n",
" (4, u'dependent_var', u'0', 0.619041),\n",
" (5, u'dependent_var', u'6', 0.99125576),\n",
" (6, u'dependent_var', u'6', 0.9728794),\n",
" (7, u'dependent_var', u'1', 0.86263895),\n",
" (8, u'dependent_var', u'6', 0.6395346),\n",
" (9, u'dependent_var', u'3', 0.75210774),\n",
" (10, u'dependent_var', u'9', 0.4934366)]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS cifar10_predict_byom;\n",
"\n",
"SELECT madlib.madlib_keras_predict_byom('model_arch_library_cifar10', -- model arch table\n",
" 1, -- model arch id\n",
" 'cifar_10_test_data', -- test_table\n",
" 'id', -- id column\n",
" 'x', -- independent var\n",
" 'cifar10_predict_byom', -- output table\n",
" 'response', -- prediction type\n",
" FALSE, -- use gpus\n",
" NULL, -- class values\n",
" 255.0 -- normalizing const\n",
" );\n",
"SELECT * FROM cifar10_predict_byom ORDER BY id LIMIT 10;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Number of missclassifications:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <td>2715</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(2715L,)]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT COUNT(*) FROM cifar10_predict_byom JOIN cifar_10_test_data USING (id)\n",
"WHERE cifar10_predict_byom.class_value != cifar_10_test_data.y;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Predict accuracy. From https://keras.io/examples/cifar10_cnn/ accuracy claim is 75% on validation set after 25 epochs. From run above test accuracy slighly different but MADlib predict BYOM matches:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>test_accuracy_percent</th>\n",
" </tr>\n",
" <tr>\n",
" <td>72.85</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(Decimal('72.85'),)]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT round(count(*)*100.0/10000.0, 2) as test_accuracy_percent from\n",
" (select cifar_10_test_data.y as actual, cifar10_predict_byom.class_value as estimated\n",
" from cifar10_predict_byom inner join cifar_10_test_data\n",
" on cifar_10_test_data.id=cifar10_predict_byom.id) q\n",
"WHERE q.actual=q.estimated;"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.16"
}
},
"nbformat": 4,
"nbformat_minor": 2
}