| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Imports" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "%load_ext autoreload\n", |
| "%autoreload 2\n", |
| "%matplotlib inline\n", |
| "\n", |
| "import math\n", |
| "import multiprocessing as mp\n", |
| "import os\n", |
| "\n", |
| "import keras\n", |
| "import keras.backend as K\n", |
| "from keras.applications.resnet50 import ResNet50\n", |
| "from keras.callbacks import ModelCheckpoint, TensorBoard\n", |
| "from keras.initializers import VarianceScaling\n", |
| "from keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D, Input, Lambda\n", |
| "from keras.models import Model, load_model\n", |
| "from keras.optimizers import SGD\n", |
| "from keras.preprocessing.image import ImageDataGenerator\n", |
| "from keras.regularizers import l2\n", |
| "import matplotlib.pyplot as plt\n", |
| "import numpy as np\n", |
| "import pandas as pd\n", |
| "from PIL import Image\n", |
| "import tensorflow as tf\n", |
| "\n", |
| "# After move to Keras 2.0 API, need to check if this can still be used.\n", |
| "# from preprocessing.image import ImageDataGenerator # multiprocessing ImageDataGenerator\n", |
| "\n", |
| "plt.rcParams['figure.figsize'] = (10, 10)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Settings" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# os.environ['CUDA_VISIBLE_DEVICES'] = \"\"\n", |
| "size = 224\n", |
| "channels = 3\n", |
| "data_format = 'channels_last' # channels_first is too slow, prob due to unnecessary conversions\n", |
| "classes = 3\n", |
| "p = 0.01\n", |
| "val_p = 0.01\n", |
| "num_gpus = 4\n", |
| "batch_size = 32 * num_gpus # for 2 GPUs, 32/GPU has 1.2x systems speedup over 16/GPU\n", |
| "train_dir = \"train_updated_norm_v3\"\n", |
| "val_dir = \"val_updated_norm_v3\"\n", |
| "new_run = True\n", |
| "experiment_template = \"resnet50-{p}%-{num_gpus}-gpu-{batch_size}-batch-size-{train_dir}-data-{val_p}%-val-sanity\"\n", |
| "experiment = experiment_template.format(p=int(p*100), val_p=int(val_p*100), num_gpus=num_gpus,\n", |
| " batch_size=batch_size, train_dir=train_dir)\n", |
| "print(experiment)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "K.set_image_data_format(data_format)\n", |
| "if data_format == 'channels_first':\n", |
| " input_shape = (channels, size, size)\n", |
| "else:\n", |
| " input_shape = (size, size, channels)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Setup experiment directory" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "def get_run_dir(path, new_run):\n", |
| " \"\"\"Create a directory for this training run.\"\"\"\n", |
| " os.makedirs(path, exist_ok=True)\n", |
| " num_experiments = len(os.listdir(path))\n", |
| " if new_run:\n", |
| " run = num_experiments # run 0, 1, 2, ...\n", |
| " else:\n", |
| " run = min(0, num_experiments - 1) # continue training\n", |
| " run_dir = os.path.join(path, str(run))\n", |
| " os.makedirs(run_dir, exist_ok=True)\n", |
| " return run_dir\n", |
| "\n", |
| "def get_experiment_dir(experiment, new_run):\n", |
| " \"\"\"Create an experiment directory for this experiment.\"\"\"\n", |
| " base_dir = os.path.join(\"experiments\", \"keras\", experiment)\n", |
| " exp_dir = get_run_dir(base_dir, new_run)\n", |
| " return exp_dir\n", |
| "\n", |
| "exp_dir = get_experiment_dir(experiment, new_run=new_run)\n", |
| "print(exp_dir)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Create train & val data generators" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "def preprocess_input(x):\n", |
| " \"\"\"\n", |
| " Preprocesses a tensor encoding a batch of images.\n", |
| "\n", |
| " Adapted from keras/applications/imagenet_utils.py\n", |
| "\n", |
| " # Arguments\n", |
| " x: input Numpy tensor, 4D of shape (N, H, W, C).\n", |
| " # Returns\n", |
| " Preprocessed tensor.\n", |
| " \"\"\"\n", |
| " # Zero-center by subtracting mean pixel value per channel\n", |
| " # based on means from a 50%, evenly-distributed sample.\n", |
| " # Means: updated-data norm v3, norm, no-norm original\n", |
| " x[:, :, :, 0] -= 183.36777842 #189.54944625 #194.27633667\n", |
| " x[:, :, :, 1] -= 138.81743141 #152.73427159 #145.3067627\n", |
| " x[:, :, :, 2] -= 166.07406199 #176.89543273 #181.27861023 \n", |
| " x = x[:, :, :, ::-1] # 'RGB'->'BGR' due to pretrained ResNet\n", |
| " return x\n", |
| "\n", |
| "# Multi-GPU exploitation\n", |
| "def split(x, num_splits):\n", |
| " \"\"\"Split batch into K equal-sized batches.\"\"\"\n", |
| " # Split tensors evenly, even if it means throwing away a few examples.\n", |
| " samples = math.floor(len(x) / num_splits)\n", |
| " x_splits = [arr[:samples] for arr in np.array_split(x, num_splits)]\n", |
| " return x_splits\n", |
| "\n", |
| "def gen_preprocessed_batch(batch_generator, num_gpus):\n", |
| " \"\"\"Yield preprocessed batches of x,y data.\"\"\"\n", |
| " for xs, ys in batch_generator:\n", |
| " yield split(preprocess_input(xs), num_gpus), split(ys, num_gpus)\n", |
| "# yield split(xs, num_gpus), split(ys, num_gpus) # for tf aug experiments" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "K.image_data_format()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "train_save_dir = \"images/{stage}/{p}\".format(stage=train_dir, p=p)\n", |
| "val_save_dir = \"images/{stage}/{p}\".format(stage=val_dir, p=val_p)\n", |
| "print(train_save_dir, val_save_dir)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Create train & val image generators\n", |
| "#try:\n", |
| "# # For interactive work, kill any existing pool.\n", |
| "# pool.terminate()\n", |
| "#except:\n", |
| "# pass\n", |
| "#pool = mp.Pool(processes=8)\n", |
| "#train_datagen = ImageDataGenerator(pool=pool, horizontal_flip=True, vertical_flip=True,\n", |
| "# rotation_range=180, shear_range=0.1, fill_mode='reflect')\n", |
| "#val_datagen = ImageDataGenerator(pool=pool)\n", |
| "\n", |
| "train_datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True) #, samplewise_center=True)\n", |
| " #rotation_range=180, shear_range=0.1, fill_mode='reflect')\n", |
| "val_datagen = ImageDataGenerator()\n", |
| "train_generator_orig = train_datagen.flow_from_directory(train_save_dir, batch_size=batch_size, target_size=(size, size))\n", |
| "val_generator_orig = val_datagen.flow_from_directory(val_save_dir, batch_size=batch_size, target_size=(size, size))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Create train & val preprocessed generators\n", |
| "train_generator = gen_preprocessed_batch(train_generator_orig, num_gpus)\n", |
| "val_generator = gen_preprocessed_batch(val_generator_orig, num_gpus)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Get number of batches" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": false |
| }, |
| "outputs": [], |
| "source": [ |
| "# Number of examples.\n", |
| "tc = train_generator_orig.samples\n", |
| "vc = val_generator_orig.samples\n", |
| "\n", |
| "# Number of batches for multi-GPU exploitation.\n", |
| "# Note: Multi-GPU exploitation for data parallelism splits mini-batches\n", |
| "# into a set of micro-batches to be run in parallel on each GPU, but\n", |
| "# Keras will view the set of micro-batches as a single batch with\n", |
| "# multiple sources of inputs (i.e. Keras will view a set of examples\n", |
| "# being run in parallel as a single example with multiple sources of\n", |
| "# inputs).\n", |
| "train_batches = int(math.ceil(tc/batch_size))\n", |
| "val_batches = int(math.ceil(vc/batch_size))\n", |
| "\n", |
| "# Class counts (just for information)\n", |
| "train_class_counts = np.bincount(train_generator_orig.classes)\n", |
| "val_class_counts = np.bincount(val_generator_orig.classes)\n", |
| "\n", |
| "print(tc, vc)\n", |
| "print(train_batches, val_batches)\n", |
| "print(train_class_counts / np.sum(train_class_counts), val_class_counts / np.sum(val_class_counts))" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Generate class weights for training" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "class_counts = np.bincount(train_generator_orig.classes)\n", |
| "class_weights = dict(zip(range(classes), min(class_counts) / class_counts))\n", |
| "print(class_counts)\n", |
| "print(class_weights)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Plot random images (Optional)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "def show_random_image(save_dir):\n", |
| " c = np.random.randint(1, 4)\n", |
| " class_dir = os.path.join(save_dir, str(c))\n", |
| " files = os.listdir(class_dir)\n", |
| " i = np.random.randint(0, len(files))\n", |
| " fname = os.path.join(class_dir, files[i])\n", |
| " print(fname)\n", |
| " img = Image.open(fname)\n", |
| " plt.imshow(img)\n", |
| "\n", |
| "# show_random_image(train_save_dir)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "def plot(gen):\n", |
| " r, c = 6, 6\n", |
| " fig, ax = plt.subplots(r, c)\n", |
| " plt.setp(ax, xticks=[], yticks=[])\n", |
| " plt.tight_layout()\n", |
| " x, y = next(gen)\n", |
| " batch_size = x.shape[0]\n", |
| " for i in range(r):\n", |
| " for j in range(c):\n", |
| " if i*c + j < batch_size:\n", |
| " im = x[i*c + j].astype(np.uint8)\n", |
| " if K.image_data_format() == 'channels_first':\n", |
| " im = im.transpose(1,2,0) # (C,H,W) -> (H,W,C)\n", |
| " ax[i][j].imshow(im)\n", |
| " ax[i][j].set_xlabel(y[i*c + j])\n", |
| "\n", |
| "plot(train_generator_orig)\n", |
| "plot(val_generator_orig)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": { |
| "collapsed": true |
| }, |
| "source": [ |
| "# Training\n", |
| "1. Setup ResNet50 pretrained model with new input & output layers.\n", |
| "2. Train new output layers (all others frozen).\n", |
| "3. Fine tune [some subset of the] original layers.\n", |
| "4. Profit." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Setup training metrics & callbacks" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Setup training metrics & callbacks\n", |
| "# Careful, TensorBoard callback could OOM with large validation set\n", |
| "# TODO: Add input images to TensorBoard output (maybe as a separate callback)\n", |
| "# TODO: Monitor size of input queues with callbacks\n", |
| "model_filename = os.path.join(exp_dir, \"{val_loss:.2f}-{epoch:02d}.hdf5\")\n", |
| "checkpointer = ModelCheckpoint(model_filename)\n", |
| "tensorboard = TensorBoard(log_dir=exp_dir, write_graph=False)\n", |
| "callbacks = [checkpointer, tensorboard]\n", |
| "metrics = ['accuracy'] #, fmeasure, precision, recall]" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Setup ResNet50 model" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "## Color augmentation\n", |
| "## TODO: Visualize this in TensorBoard with custom callback every ~100 iterations\n", |
| "#def preprocess(x):\n", |
| "# # import these inside this function so that future model loads\n", |
| "# # will not complain about `tf` not being defined\n", |
| "# import tensorflow as tf\n", |
| "# import keras.backend as K\n", |
| "# \n", |
| "# def augment(img):\n", |
| "# img = tf.image.random_brightness(img, max_delta=64/255)\n", |
| "# img = tf.image.random_saturation(img, lower=0, upper=0.25)\n", |
| "# img = tf.image.random_hue(img, max_delta=0.04)\n", |
| "# img = tf.image.random_contrast(img, lower=0, upper=0.75)\n", |
| "# return img\n", |
| "# \n", |
| "# # Fix dimensions for tf.image ops\n", |
| "# if K.image_data_format() == 'channels_first':\n", |
| "# x = tf.transpose(x, [0,2,3,1]) # (N,C,H,W) -> (N,H,W,C)\n", |
| "# \n", |
| "# # Augment during training.\n", |
| "# x = K.in_train_phase(tf.map_fn(augment, x, swap_memory=True), x)\n", |
| "# \n", |
| "# # Zero-center by subtracting mean pixel value per channel\n", |
| "# # based on means from a 50%, evenly-distributed sample.\n", |
| "# # Means: updated-data norm v3, norm, no-norm original\n", |
| "# x = x - [183.36777842, 138.81743141, 166.07406199]\n", |
| "# x = tf.reverse(x, axis=[-1])\n", |
| "# \n", |
| "# if K.image_data_format() == 'channels_first':\n", |
| "# x = tf.transpose(x, [0,3,1,2]) # (N,H,W,C) -> (N,C,H,W)\n", |
| "# return x" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": false |
| }, |
| "outputs": [], |
| "source": [ |
| "K.clear_session()\n", |
| "\n", |
| "# Create model by replacing classifier of ResNet50 model with new\n", |
| "# classifier specific to the breast cancer problem.\n", |
| "with tf.device(\"/cpu\"):\n", |
| " inputs = Input(shape=input_shape)\n", |
| " x = inputs\n", |
| " #x = Lambda(preprocess)(x)\n", |
| " resnet50_base = ResNet50(include_top=False, input_shape=input_shape, input_tensor=x) #weights=None)\n", |
| " x = Flatten()(resnet50_base.output) # could also use GlobalAveragePooling2D since output is (None, 1, 1, 2048)\n", |
| " x = Dropout(0.5)(x)\n", |
| " # init Dense weights with Gaussian scaled by sqrt(1/fan_in)\n", |
| " preds = Dense(classes, kernel_initializer=VarianceScaling(), activation=\"softmax\")(x)\n", |
| "# resnet50 = Model(input=resnet50_base.input, output=preds, name=\"resnet50\")\n", |
| " resnet50 = Model(inputs=inputs, outputs=preds, name=\"resnet50\")\n", |
| "\n", |
| "# Multi-GPU exploitation via a linear combination of GPU loss functions.\n", |
| "ins = []\n", |
| "outs = []\n", |
| "for i in range(num_gpus):\n", |
| " with tf.device(\"/gpu:{}\".format(i)):\n", |
| " x = Input(shape=input_shape) # split of batch\n", |
| " out = resnet50(x) # run split on shared model\n", |
| " ins.append(x)\n", |
| " outs.append(out)\n", |
| "model = Model(inputs=ins, outputs=outs) # multi-GPU, data-parallel model\n", |
| "\n", |
| "# Freeze all pre-trained ResNet layers.\n", |
| "for layer in resnet50_base.layers:\n", |
| " layer.trainable = False\n", |
| "\n", |
| "# Compile model.\n", |
| "#optim = SGD(lr=0.1, momentum=0.9, decay=0.99, nesterov=True)\n", |
| "#optim = keras.optimizers.RMSprop(lr=0.05)\n", |
| "optim = keras.optimizers.Adam(lr=0.001)\n", |
| "model.compile(optimizer=optim, loss=\"categorical_crossentropy\",\n", |
| " loss_weights=[1/num_gpus]*num_gpus, metrics=metrics)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Explore model\n", |
| "# for x in model.inputs + model.outputs + model.metrics_tensors + model.targets:\n", |
| "# print(x.name, x.device) # check that tensor devices exploit multi-GPU\n", |
| "\n", |
| "# for i, layer in enumerate(resnet50.layers):\n", |
| "# print(i, layer.name, layer.input_shape, layer.output_shape)\n", |
| "\n", |
| "# print(model.summary())\n", |
| "print(resnet50.summary())" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Visualize Model\n", |
| "from IPython.display import SVG\n", |
| "from keras.utils.vis_utils import model_to_dot\n", |
| "SVG(model_to_dot(resnet50).create(prog='dot', format='svg'))" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Train new softmax classifier" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Dual-GPU speedup: ~1.7-1.8x\n", |
| "# Keras device placement improvements (metrics, losses) (no val or callbacks, full model):\n", |
| "# batch_size=32, 2 gpus, 100 iters, no keras changes: 128s, 108s, 107s\n", |
| "# batch_size=32, 2 gpus, 100 iters, w/ keras changes: 94s, 75s, 75s\n", |
| "# batch_size=32, 1 gpu, 100 iters, w/ keras changes: 148s, 133s, 133s\n", |
| "# batch_size=64, 2 gpus, 50 iters, w/ keras changes: 93s, 74s, 75s\n", |
| "# batch_size=128, 2 gpus, 25 iters, w/ keras changes: 90s, 73s, 74s\n", |
| "epochs = 4\n", |
| "hist1 = model.fit_generator(train_generator, steps_per_epoch=train_batches,\n", |
| " validation_data=val_generator, validation_steps=val_batches,\n", |
| " epochs=epochs, class_weight=class_weights, callbacks=callbacks) #,\n", |
| " #max_q_size=8, nb_worker=1, pickle_safe=False)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Fine-tune model" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Explore model\n", |
| "# for x in model.inputs + model.outputs + model.metrics_tensors + model.targets:\n", |
| "# print(x.name, x.device) # check that tensor devices exploit multi-GPU\n", |
| "\n", |
| "for i, layer in enumerate(resnet50_base.layers):\n", |
| " print(i, layer.name, layer.input_shape, layer.output_shape)\n", |
| "\n", |
| "# print(model.summary())\n", |
| "# print(model.get_layer(\"resnet50\").summary())" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Unfreeze some subset of the model and fine-tune by training slowly with low lr.\n", |
| "for layer in resnet50_base.layers[164:]: #[154:]: # unfreeze final 2 residual blocks + exit flow ([154:])\n", |
| " layer.trainable = True\n", |
| "# if hasattr(layer, 'W_regularizer'):\n", |
| "# layer.W_regularizer = l2(1e-4)\n", |
| "\n", |
| "optim = SGD(lr=0.0001, momentum=0.9)\n", |
| "# optim = keras.optimizers.Adam(lr=0.001)\n", |
| "model.compile(optimizer=optim, loss=\"categorical_crossentropy\",\n", |
| " loss_weights=[1/num_gpus]*num_gpus, metrics=metrics)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "print(model.summary())" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# model.load_weights(os.path.join(\"experiments/keras/resnet50-100%-2-gpu-64-batch-size/0\", \"5.08-08.hdf5\"))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "initial_epoch = epochs\n", |
| "epochs = initial_epoch + 20\n", |
| "hist2 = model.fit_generator(train_generator, steps_per_epoch=train_batches,\n", |
| " validation_data=val_generator, validation_steps=val_batches,\n", |
| " epochs=epochs, initial_epoch=initial_epoch,\n", |
| " class_weight=class_weights, callbacks=callbacks) #,\n", |
| " #max_q_size=8, nb_worker=1, pickle_safe=False)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Evaluate model on validation set" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "raw_metrics = model.evaluate_generator(val_generator, steps=val_batches) #,\n", |
| " #max_q_size=8, nb_worker=1, pickle_safe=False)\n", |
| "labeled_metrics = list(zip(model.metrics_names, raw_metrics))\n", |
| "losses = [v for k,v in labeled_metrics if k == \"loss\"]\n", |
| "accuracies = [v for k,v in labeled_metrics if k.endswith(\"acc\")]\n", |
| "loss = sum(losses) / num_gpus\n", |
| "acc = sum(accuracies) / num_gpus\n", |
| "metrics = {\"loss\": loss, \"acc\": acc}\n", |
| "print(labeled_metrics)\n", |
| "print(metrics)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Save model" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "filename = \"{acc:.5}_acc_{loss:.5}_loss_model.hdf5\".format(**metrics)\n", |
| "fullpath = os.path.join(exp_dir, filename)\n", |
| "model.save(fullpath)\n", |
| "print(\"Saved model file to {}\".format(fullpath))" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Cleanup" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# # Stop processes cleanly. Otherwise, zombie processes will\n", |
| "# # persist and hold onto GPU memory.\n", |
| "# try:\n", |
| "# pool.terminate()\n", |
| "# except:\n", |
| "# pass\n", |
| "# for p in mp.active_children():\n", |
| "# p.terminate()\n", |
| "# mp.active_children()" |
| ] |
| } |
| ], |
| "metadata": { |
| "anaconda-cloud": {}, |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.6.2" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |