| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Imports" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "%load_ext autoreload\n", |
| "%autoreload 2\n", |
| "%matplotlib inline\n", |
| "\n", |
| "import math\n", |
| "import multiprocessing as mp\n", |
| "import os\n", |
| "\n", |
| "import keras\n", |
| "import keras.backend as K\n", |
| "from keras.applications.resnet50 import ResNet50\n", |
| "from keras.callbacks import ModelCheckpoint, TensorBoard\n", |
| "from keras.initializers import VarianceScaling\n", |
| "from keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D, Input, Lambda, merge\n", |
| "from keras.models import Model, load_model\n", |
| "from keras.optimizers import SGD\n", |
| "# from keras.preprocessing.image import ImageDataGenerator\n", |
| "from keras.regularizers import l2\n", |
| "from keras.utils import to_categorical\n", |
| "import matplotlib.pyplot as plt\n", |
| "import numpy as np\n", |
| "import pandas as pd\n", |
| "from PIL import Image\n", |
| "import tensorflow as tf\n", |
| "\n", |
| "# After move to Keras 2.0 API, need to check if this can still be used.\n", |
| "from preprocessing.image_eval import ImageDataGenerator # multiprocessing ImageDataGenerator\n", |
| "\n", |
| "plt.rcParams['figure.figsize'] = (10, 10)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Settings" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# NOTE: Need to update the following for each model\n", |
| "# 1. train & val data dirs\n", |
| "# 2. train & val data percentages\n", |
| "# 3. experiment directory\n", |
| "# 4. model file\n", |
| "# 5. preprocessing channel means" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "#os.environ['CUDA_VISIBLE_DEVICES'] = \"\"\n", |
| "size = 224\n", |
| "channels = 3\n", |
| "classes = 3\n", |
| "p = 0.01 # 0.01\n", |
| "val_p = 0.01 #0.01\n", |
| "num_gpus = 4\n", |
| "batch_size = 32 * num_gpus # for 2 GPUs, 32/GPU has 1.2x systems speedup over 16/GPU\n", |
| "train_dir = \"train_updated_norm_v3\"\n", |
| "val_dir = \"val_updated_norm_v3\"\n", |
| "run = 13\n", |
| "# exp_dir = \"experiments/keras/resnet50-1%-4-gpu-128-batch-size-updated-norm-v3-data-1%-val-sanity/4\"\n", |
| "experiment_template = \"resnet50-{p}%-{num_gpus}-gpu-{batch_size}-batch-size-{train_dir}-data-{val_p}%-val-sanity/{run}\"\n", |
| "experiment = experiment_template.format(p=int(p*100), val_p=int(val_p*100), num_gpus=num_gpus,\n", |
| " batch_size=batch_size, train_dir=train_dir, run=run)\n", |
| "model_file = \"0.38936_acc_0.27847_loss_model.hdf5\"\n", |
| "exp_dir = os.path.join(\"experiments\", \"keras\", experiment)\n", |
| "# experiment_name = model_file.replace(\"/\", \"_\")[:-5]\n", |
| "print(exp_dir)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# os.makedirs(os.path.join(\"results\", experiment_name), exist_ok=True)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Load model" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "model = load_model(os.path.join(exp_dir, model_file))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "print(model.summary())\n", |
| "print(model.get_layer(\"resnet50\").summary())" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Visualize Model\n", |
| "from IPython.display import SVG\n", |
| "from keras.utils.vis_utils import model_to_dot\n", |
| "SVG(model_to_dot(model.get_layer(\"resnet50\")).create(prog='dot', format='svg'))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Note: previous `model` is already compiled and ready to go.\n", |
| "# However, it may have been built for multi-GPU training, so it\n", |
| "# would still require multiple parallel inputs at eval time.\n", |
| "# Even worse, the device settings will not be retained, so all\n", |
| "# towers would be run on one device. To fix this, we can extract\n", |
| "# a single tower, rewrap in a multi-GPU block, and recompile.\n", |
| "\n", |
| "# Extract single tower\n", |
| "resnet50 = model.get_layer(\"resnet50\")\n", |
| "#model.save(\"resnet50-100%-4-gpu-128-batch-size-updated-norm-v3-data-1%-val-dropout_0_1.56-19_NO_GPU_TOWERS.hdf5\")\n", |
| "\n", |
| "# Multi-GPU exploitation via a linear combination of GPU loss functions.\n", |
| "ins = []\n", |
| "outs = []\n", |
| "for i in range(num_gpus):\n", |
| " with tf.device(\"/gpu:{}\".format(i)):\n", |
| " x = Input(shape=(size,size,channels)) # split of batch\n", |
| " out = resnet50(x) # run split on shared model\n", |
| " ins.append(x)\n", |
| " outs.append(out)\n", |
| "model = Model(inputs=ins, outputs=outs) # multi-GPU, data-parallel model\n", |
| "\n", |
| "# Compile model.\n", |
| "metrics = ['accuracy']\n", |
| "model.compile(optimizer=\"sgd\", loss=\"categorical_crossentropy\",\n", |
| " loss_weights=[1/num_gpus]*num_gpus, metrics=metrics)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# # Explore model\n", |
| "for x in model.inputs + model.outputs + model.metrics_tensors + model.targets:\n", |
| " print(x.name, x.device) # check that tensor devices exploit multi-GPU\n", |
| "\n", |
| "# print(model.summary())\n", |
| "\n", |
| "# print(resnet50.summary())" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Create train & val data generators" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "train_save_dir = \"images/{stage}/{p}\".format(stage=train_dir, p=p)\n", |
| "val_save_dir = \"images/{stage}/{p}\".format(stage=val_dir, p=val_p)\n", |
| "print(train_save_dir, val_save_dir)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "def preprocess_input(x):\n", |
| " \"\"\"\n", |
| " Preprocesses a tensor encoding a batch of images.\n", |
| "\n", |
| " Adapted from keras/applications/imagenet_utils.py\n", |
| "\n", |
| " # Arguments\n", |
| " x: input Numpy tensor, 4D of shape (N, H, W, C).\n", |
| " # Returns\n", |
| " Preprocessed tensor.\n", |
| " \"\"\"\n", |
| " # Zero-center by subtracting mean pixel value per channel\n", |
| " # based on means from a 50%, evenly-distributed sample.\n", |
| " # Means: updated-data norm v3, norm, no-norm original\n", |
| " x[:, :, :, 0] -= 183.36777842 #189.54944625 #194.27633667\n", |
| " x[:, :, :, 1] -= 138.81743141 #152.73427159 #145.3067627\n", |
| " x[:, :, :, 2] -= 166.07406199 #176.89543273 #181.27861023 \n", |
| " x = x[:, :, :, ::-1] # 'RGB'->'BGR'\n", |
| " return x\n", |
| "\n", |
| "# Multi-GPU exploitation\n", |
| "def split(x, num_splits):\n", |
| " \"\"\"Split batch into K equal-sized batches.\"\"\"\n", |
| " # Split tensors evenly, even if it means throwing away a few examples.\n", |
| " samples = math.floor(len(x) / num_splits)\n", |
| " x_splits = [arr[:samples] for arr in np.array_split(x, num_splits)]\n", |
| " return x_splits\n", |
| "\n", |
| "def gen_preprocessed_batch(batch_generator, num_gpus):\n", |
| " \"\"\"Yield preprocessed batches of x,y data.\"\"\"\n", |
| "# for xs, ys in batch_generator:\n", |
| "# yield split(preprocess_input(xs), num_gpus), split(ys, num_gpus)\n", |
| "# yield split(xs, num_gpus), split(ys, num_gpus) for tf aug experiments\n", |
| " for xs, ys, filenames in batch_generator:\n", |
| " yield split(preprocess_input(xs), num_gpus), split(ys, num_gpus), split(filenames, num_gpus)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Create train & val image generators\n", |
| "try:\n", |
| " # For interactive work, kill any existing pool.\n", |
| " pool.terminate()\n", |
| "except:\n", |
| " pass\n", |
| "pool = mp.Pool(processes=8)\n", |
| "train_datagen = ImageDataGenerator(pool=pool) #, horizontal_flip=True, vertical_flip=True,\n", |
| "# rotation_range=180, shear_range=0.1, fill_mode='reflect')\n", |
| "val_datagen = ImageDataGenerator(pool=pool)\n", |
| "#train_datagen = ImageDataGenerator()\n", |
| "#val_datagen = ImageDataGenerator()\n", |
| "train_generator_orig = train_datagen.flow_from_directory(train_save_dir, batch_size=batch_size, target_size=(size, size))\n", |
| "val_generator_orig = val_datagen.flow_from_directory(val_save_dir, batch_size=batch_size, target_size=(size, size))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Create train & val preprocessed generators\n", |
| "train_generator = gen_preprocessed_batch(train_generator_orig, num_gpus)\n", |
| "val_generator = gen_preprocessed_batch(val_generator_orig, num_gpus)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Get number of samples" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "scrolled": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# Number of examples.\n", |
| "tc = train_generator_orig.nb_sample\n", |
| "vc = val_generator_orig.nb_sample\n", |
| "#tc = train_generator_orig.samples\n", |
| "#vc = val_generator_orig.samples\n", |
| "\n", |
| "# Number of batches for multi-GPU exploitation.\n", |
| "# Note: Multi-GPU exploitation for data parallelism splits mini-batches\n", |
| "# into a set of micro-batches to be run in parallel on each GPU, but\n", |
| "# Keras will view the set of micro-batches as a single batch with\n", |
| "# multiple sources of inputs (i.e. Keras will view a set of examples\n", |
| "# being run in parallel as a single example with multiple sources of\n", |
| "# inputs).\n", |
| "train_batches = int(math.ceil(tc/batch_size))\n", |
| "val_batches = int(math.ceil(vc/batch_size))\n", |
| "\n", |
| "# Class counts (just for information)\n", |
| "train_class_counts = np.bincount(train_generator_orig.classes)\n", |
| "val_class_counts = np.bincount(val_generator_orig.classes)\n", |
| "\n", |
| "print(tc, vc)\n", |
| "print(train_batches, val_batches)\n", |
| "print(train_class_counts / np.sum(train_class_counts), val_class_counts / np.sum(val_class_counts))" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Generate class weights for training" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "class_counts = np.bincount(train_generator_orig.classes)\n", |
| "class_weights = dict(zip(range(classes), min(class_counts) / class_counts))\n", |
| "print(class_counts)\n", |
| "print(class_weights)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Plot random images (Optional)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "def show_random_image(save_dir):\n", |
| " c = np.random.randint(1, 4)\n", |
| " class_dir = os.path.join(save_dir, str(c))\n", |
| " files = os.listdir(class_dir)\n", |
| " i = np.random.randint(0, len(files))\n", |
| " fname = os.path.join(class_dir, files[i])\n", |
| " print(fname)\n", |
| " img = Image.open(fname)\n", |
| " plt.imshow(img)\n", |
| "\n", |
| "# show_random_image(train_save_dir)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "def plot(gen):\n", |
| " r, c = 6, 6\n", |
| " fig, ax = plt.subplots(r, c)\n", |
| " plt.setp(ax, xticks=[], yticks=[])\n", |
| " plt.tight_layout()\n", |
| " x, y, fname = next(gen)\n", |
| " batch_size = x.shape[0]\n", |
| " for i in range(r):\n", |
| " for j in range(c):\n", |
| " if i*c + j < batch_size:\n", |
| " im = x[i*c + j].astype(np.uint8)\n", |
| " if K.image_data_format() == 'channels_first':\n", |
| " im = im.transpose(1,2,0) # (C,H,W) -> (H,W,C)\n", |
| " ax[i][j].imshow(im)\n", |
| " ax[i][j].set_xlabel(y[i*c + j])\n", |
| "\n", |
| "plot(train_generator_orig)\n", |
| "plot(val_generator_orig)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Evaluate previous model checkpoint" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# NOTE: We could call the `model.evaluate*` methods,\n", |
| "# but that would not allow us to create contingency\n", |
| "# matrices. Instead, we repeatedly loop over batches\n", |
| "# of data, collecting both the true labels and\n", |
| "# predictions. Then, we can compute any metrics\n", |
| "# desired, including 3x3 contingency matrices." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# def extract_metrics(model, raw_metrics):\n", |
| "# labeled_metrics = list(zip(model.metrics_names, raw_metrics))\n", |
| "# losses = [v for k,v in labeled_metrics if k == \"loss\"]\n", |
| "# accuracies = [v for k,v in labeled_metrics if k.endswith(\"acc\")]\n", |
| "# loss = sum(losses) / num_gpus\n", |
| "# acc = sum(accuracies) / num_gpus\n", |
| "# metrics = {\"loss\": loss, \"acc\": acc}\n", |
| "# return labeled_metrics, metrics\n", |
| "\n", |
| "# raw_metrics = model.evaluate_generator(val_generator, val_samples=32,\n", |
| "# max_q_size=8, nb_worker=1, pickle_safe=False)\n", |
| "\n", |
| "# labeled_metrics, metrics = extract_metrics(model, raw_metrics)\n", |
| "# print(labeled_metrics)\n", |
| "# print(metrics)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Get predictions\n", |
| "for dataset in [(\"train\", p, tc, val_generator)]: #, (\"val\", val_p, vc, val_generator)]:\n", |
| " name, perc, count, gen = dataset\n", |
| "\n", |
| " ys = []\n", |
| " preds = []\n", |
| " fnames = []\n", |
| " batches = math.floor(count / batch_size)\n", |
| " for i in range(batches):\n", |
| " # Get batch.\n", |
| "# x, y = next(gen)\n", |
| " x, y, fname = next(gen)\n", |
| "\n", |
| " # Get predictions\n", |
| " pred = model.predict(x)\n", |
| "\n", |
| " # Store y and predictions\n", |
| " ys.extend(y) # y is always a list of parallel batches, even if only 1 batch\n", |
| " if isinstance(pred, list):\n", |
| " preds.extend(pred)\n", |
| " else:\n", |
| " preds.append(pred)\n", |
| " fnames.extend(fname)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| " # Create DataFrames\n", |
| " y = np.concatenate(ys)\n", |
| " pred = np.concatenate(preds)\n", |
| " fname = np.concatenate(fnames)\n", |
| " y_df = pd.DataFrame(y, columns=[1,2,3])\n", |
| " pred_df = pd.DataFrame(pred, columns=[1,2,3])\n", |
| " fname_df = pd.DataFrame(np.atleast_2d(fname).T, columns=[\"filenames\"])\n", |
| "\n", |
| " # Create class, prediction, slide_num DataFrames\n", |
| " y_class_df = y_df.idxmax(axis=1)\n", |
| " pred_class_df = pred_df.idxmax(axis=1)\n", |
| " y_class_df.name = \"actual\"\n", |
| " pred_class_df.name = \"predicted\"\n", |
| " slide_info_df = fname_df.filenames.str.extract('(?P<class>\\d)\\/\\d+_(?P<slide_num>\\d+)_\\d+.jpeg', expand=True)\n", |
| " slide_info_df[\"class\"] = slide_info_df[\"class\"].astype(int)\n", |
| " slide_info_df[\"slide_num\"] = slide_info_df[\"slide_num\"].astype(int)\n", |
| " df = pd.concat([fname_df, slide_info_df, y_class_df, pred_class_df], axis=1)\n", |
| " \n", |
| " # sanity check\n", |
| " assert np.allclose(df[\"class\"], df.actual)\n", |
| " \n", |
| " # Create Contingency matrix\n", |
| " contingency_mat = pd.crosstab(df.actual, df.predicted)\n", |
| "\n", |
| "# # Save DataFrames\n", |
| "# y_df.to_csv(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-y_df.csv\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), header=True)\n", |
| "# pred_df.to_csv(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-pred_df.csv\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), header=True)\n", |
| "# df.to_csv(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-df.csv\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), header=True)\n", |
| "\n", |
| "# # Save results\n", |
| "# with open(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-results.txt\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), 'w') as f:\n", |
| "# print(\"Dataset: {}\".format(name), file=f)\n", |
| "# print(\"Number of samples: {}\".format(len(y_df)), file=f)\n", |
| "# print(contingency_mat, file=f)\n", |
| "# print(\"Accuracy: {}\".format(np.mean(np.equal(y_class, pred_class))), file=f)\n", |
| " print(\"Number of samples: {}\".format(len(y_df)))\n", |
| " print(contingency_mat)\n", |
| " print(\"Accuracy: {}\".format(np.mean(np.equal(y_class_df, pred_class_df))))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "len(y_df), len(pred_df), len(fname_df), len(df)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "df" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "df2 = df.loc[:, [\"slide_num\", \"actual\", \"predicted\"]]\n", |
| "df2" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "df3 = df2.groupby(\"slide_num\").mean()\n", |
| "df3[\"predicted_round\"] = df3.predicted.map(round)\n", |
| "df3" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "sum(df3.actual == df3.predicted_round) / len(df3)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "pd.crosstab(df3.actual, df3.predicted_round)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "scrolled": true |
| }, |
| "outputs": [], |
| "source": [ |
| "gb = df2.groupby([\"slide_num\"]) #, \"predicted\"])\n", |
| "gb.describe()" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "---" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Read in predictions + true DataFrames and extract metrics" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# # Read DataFrames\n", |
| "# y_df = pd.read_csv(os.path.join(exp_dir, \"{}-y_df.csv\".format(model_file[:-5])), index_col=0)\n", |
| "# pred_df = pd.read_csv(os.path.join(exp_dir, \"{}-pred_df.csv\".format(model_file[:-5])), index_col=0)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# # Create Contingency matrix\n", |
| "# y_class = y_df.idxmax(axis=1)\n", |
| "# pred_class = pred_df.idxmax(axis=1)\n", |
| "# y_class.name = \"Actual\"\n", |
| "# pred_class.name = \"Predicted\"\n", |
| "# contingency_mat = pd.crosstab(y_class, pred_class)\n", |
| "\n", |
| "# print(\"Number of samples: {}\".format(len(y_df)))\n", |
| "# print(contingency_mat)\n", |
| "# print(\"Accuracy: {}\".format(np.mean(np.equal(y_class, pred_class))))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# # --- Alternate approach with NumPy arrays only\n", |
| "# y_c = np.argmax(y, axis=1) + 1\n", |
| "# pred_c = np.argmax(pred, axis=1) + 1\n", |
| "# y_actu = pd.Series(y_c, name=\"Actual\")\n", |
| "# y_pred = pd.Series(pred_c, name=\"Predicted\")\n", |
| "# contingency_mat = pd.crosstab(y_actu, y_pred)\n", |
| "\n", |
| "# print(\"Number of samples: {}\".format(len(y_c)))\n", |
| "# print(contingency_mat)\n", |
| "# print(\"Accuracy: {}\".format(np.mean(np.equal(y_c, pred_c))))" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": { |
| "collapsed": true |
| }, |
| "source": [ |
| "---" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Sample images + predictions & write to disk" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "# path_template = os.path.join(\"visualize\", \"{dataset}\", \"Pred_{pred}-Actual_{actual}\")\n", |
| "# for dataset in [\"train\", \"val\"]:\n", |
| "# for i in range(3):\n", |
| "# for j in range(3):\n", |
| "# os.makedirs(path_template.format(dataset=dataset, pred=i+1, actual=j+1), exist_ok=True)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true, |
| "scrolled": false |
| }, |
| "outputs": [], |
| "source": [ |
| "# filename_template = os.path.join(path_template, \"{hash}.jpeg\")\n", |
| "# batches = 8\n", |
| "\n", |
| "# for dataset in [(\"train\", train_generator_orig), (\"val\", val_generator_orig)]:\n", |
| "# name, gen = dataset\n", |
| "# print(name)\n", |
| " \n", |
| "# for i in range(batches):\n", |
| "# # Get batch.\n", |
| "# x_orig, y_orig = next(gen)\n", |
| "# x = preprocess_input(np.copy(x_orig))\n", |
| "# y = y_orig\n", |
| "\n", |
| "# # Get predictions\n", |
| "# raw_preds = model.predict(x)\n", |
| "# raw_metrics = model.evaluate(x, y)\n", |
| "# labeled_metrics, metrics = extract_metrics(model, raw_metrics)\n", |
| "\n", |
| "# # Create contingency matrix\n", |
| "# y = np.argmax(y, axis=1)+1\n", |
| "# preds = np.argmax(raw_preds, axis=1)+1\n", |
| "# y_actu = pd.Series(y, name=\"Actual\")\n", |
| "# y_pred = pd.Series(preds, name=\"Predicted\")\n", |
| "# contingency_mat = pd.crosstab(y_actu, y_pred)\n", |
| "\n", |
| "# # # Output images in directories based on misclassification.\n", |
| "# # def plot(x, y):\n", |
| "# # r, c = 6, 6\n", |
| "# # fig, ax = plt.subplots(r, c)\n", |
| "# # plt.setp(ax, xticks=[], yticks=[])\n", |
| "# # plt.tight_layout()\n", |
| "# # batch_size = x.shape[0]\n", |
| "# # for i in range(r):\n", |
| "# # for j in range(c):\n", |
| "# # if i*c + j < batch_size:\n", |
| "# # ax[i][j].imshow(x[i*c + j].astype(np.uint8))\n", |
| "# # ax[i][j].set_xlabel(\"{preds}-{y}\".format(y=y[i*c + j], preds=preds[i*c + j]))\n", |
| "\n", |
| "# # plot(x_orig, y)\n", |
| "# # plt.show()\n", |
| "\n", |
| "# for n in range(x_orig.shape[0]):\n", |
| "# img = Image.fromarray(x_orig[n].astype(np.uint8), 'RGB')\n", |
| "# filename = filename_template.format(dataset=name, pred=preds[n], actual=y[n], hash=np.random.randint(1e6))\n", |
| "# img.save(filename)\n", |
| "\n", |
| "# print(contingency_mat)\n", |
| "# print(np.mean(y==preds))\n", |
| "# print(labeled_metrics)\n", |
| "# print(metrics)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "---" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Predict" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "x, label, _ = (next(train_generator_orig))\n", |
| "Image.fromarray((x[0]).astype(np.uint8))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "preds = resnet50.predict(preprocess_input(x[0].reshape(1, 224, 224, 3)))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "print(\"Actual: {}\".format(label[0]))\n", |
| "print(\"Pred: {}\".format(preds[0]))" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "---" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Cleanup" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Stop processes cleanly. Otherwise, zombie processes will\n", |
| "# persist and hold onto GPU memory.\n", |
| "try:\n", |
| " pool.terminate()\n", |
| "except:\n", |
| " pass\n", |
| "for p in mp.active_children():\n", |
| " p.terminate()\n", |
| "mp.active_children()" |
| ] |
| } |
| ], |
| "metadata": { |
| "anaconda-cloud": {}, |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.6.1" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |