projects/breast_cancer/MachineLearning-Keras-Eval.ipynb - systemds - Git at Google

 {
  "cells": [
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Imports"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n",
     "%matplotlib inline\n",
     "\n",
     "import math\n",
     "import multiprocessing as mp\n",
     "import os\n",
     "\n",
     "import keras\n",
     "import keras.backend as K\n",
     "from keras.applications.resnet50 import ResNet50\n",
     "from keras.callbacks import ModelCheckpoint, TensorBoard\n",
     "from keras.initializers import VarianceScaling\n",
     "from keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D, Input, Lambda, merge\n",
     "from keras.models import Model, load_model\n",
     "from keras.optimizers import SGD\n",
     "# from keras.preprocessing.image import ImageDataGenerator\n",
     "from keras.regularizers import l2\n",
     "from keras.utils import to_categorical\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "from PIL import Image\n",
     "import tensorflow as tf\n",
     "\n",
     "# After move to Keras 2.0 API, need to check if this can still be used.\n",
     "from preprocessing.image_eval import ImageDataGenerator  # multiprocessing ImageDataGenerator\n",
     "\n",
     "plt.rcParams['figure.figsize'] = (10, 10)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Settings"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# NOTE: Need to update the following for each model\n",
     "# 1. train & val data dirs\n",
     "# 2. train & val data percentages\n",
     "# 3. experiment directory\n",
     "# 4. model file\n",
     "# 5. preprocessing channel means"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "#os.environ['CUDA_VISIBLE_DEVICES'] = \"\"\n",
     "size = 224\n",
     "channels = 3\n",
     "classes = 3\n",
     "p = 0.01  # 0.01\n",
     "val_p = 0.01  #0.01\n",
     "num_gpus = 4\n",
     "batch_size = 32 * num_gpus  # for 2 GPUs, 32/GPU has 1.2x systems speedup over 16/GPU\n",
     "train_dir = \"train_updated_norm_v3\"\n",
     "val_dir = \"val_updated_norm_v3\"\n",
     "run = 13\n",
     "# exp_dir = \"experiments/keras/resnet50-1%-4-gpu-128-batch-size-updated-norm-v3-data-1%-val-sanity/4\"\n",
     "experiment_template = \"resnet50-{p}%-{num_gpus}-gpu-{batch_size}-batch-size-{train_dir}-data-{val_p}%-val-sanity/{run}\"\n",
     "experiment = experiment_template.format(p=int(p*100), val_p=int(val_p*100), num_gpus=num_gpus,\n",
     "                                        batch_size=batch_size, train_dir=train_dir, run=run)\n",
     "model_file = \"0.38936_acc_0.27847_loss_model.hdf5\"\n",
     "exp_dir = os.path.join(\"experiments\", \"keras\", experiment)\n",
     "# experiment_name = model_file.replace(\"/\", \"_\")[:-5]\n",
     "print(exp_dir)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# os.makedirs(os.path.join(\"results\", experiment_name), exist_ok=True)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Load model"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "model = load_model(os.path.join(exp_dir, model_file))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "print(model.summary())\n",
     "print(model.get_layer(\"resnet50\").summary())"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Visualize Model\n",
     "from IPython.display import SVG\n",
     "from keras.utils.vis_utils import model_to_dot\n",
     "SVG(model_to_dot(model.get_layer(\"resnet50\")).create(prog='dot', format='svg'))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# Note: previous `model` is already compiled and ready to go.\n",
     "# However, it may have been built for multi-GPU training, so it\n",
     "# would still require multiple parallel inputs at eval time.\n",
     "# Even worse, the device settings will not be retained, so all\n",
     "# towers would be run on one device.  To fix this, we can extract\n",
     "# a single tower, rewrap in a multi-GPU block, and recompile.\n",
     "\n",
     "# Extract single tower\n",
     "resnet50 = model.get_layer(\"resnet50\")\n",
     "#model.save(\"resnet50-100%-4-gpu-128-batch-size-updated-norm-v3-data-1%-val-dropout_0_1.56-19_NO_GPU_TOWERS.hdf5\")\n",
     "\n",
     "# Multi-GPU exploitation via a linear combination of GPU loss functions.\n",
     "ins = []\n",
     "outs = []\n",
     "for i in range(num_gpus):\n",
     "  with tf.device(\"/gpu:{}\".format(i)):\n",
     "    x = Input(shape=(size,size,channels))  # split of batch\n",
     "    out = resnet50(x)  # run split on shared model\n",
     "    ins.append(x)\n",
     "    outs.append(out)\n",
     "model = Model(inputs=ins, outputs=outs)  # multi-GPU, data-parallel model\n",
     "\n",
     "# Compile model.\n",
     "metrics = ['accuracy']\n",
     "model.compile(optimizer=\"sgd\", loss=\"categorical_crossentropy\",\n",
     "              loss_weights=[1/num_gpus]*num_gpus, metrics=metrics)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# # Explore model\n",
     "for x in model.inputs + model.outputs + model.metrics_tensors + model.targets:\n",
     "  print(x.name, x.device)  # check that tensor devices exploit multi-GPU\n",
     "\n",
     "# print(model.summary())\n",
     "\n",
     "# print(resnet50.summary())"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Create train & val data generators"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "train_save_dir = \"images/{stage}/{p}\".format(stage=train_dir, p=p)\n",
     "val_save_dir = \"images/{stage}/{p}\".format(stage=val_dir, p=val_p)\n",
     "print(train_save_dir, val_save_dir)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "def preprocess_input(x):\n",
     "  \"\"\"\n",
     "  Preprocesses a tensor encoding a batch of images.\n",
     "\n",
     "  Adapted from keras/applications/imagenet_utils.py\n",
     "\n",
     "  # Arguments\n",
     "      x: input Numpy tensor, 4D of shape (N, H, W, C).\n",
     "  # Returns\n",
     "      Preprocessed tensor.\n",
     "  \"\"\"\n",
     "  # Zero-center by subtracting mean pixel value per channel\n",
     "  # based on means from a 50%, evenly-distributed sample.\n",
     "  # Means: updated-data norm v3, norm, no-norm original\n",
     "  x[:, :, :, 0] -= 183.36777842  #189.54944625  #194.27633667\n",
     "  x[:, :, :, 1] -= 138.81743141  #152.73427159  #145.3067627\n",
     "  x[:, :, :, 2] -= 166.07406199  #176.89543273  #181.27861023 \n",
     "  x = x[:, :, :, ::-1]  # 'RGB'->'BGR'\n",
     "  return x\n",
     "\n",
     "# Multi-GPU exploitation\n",
     "def split(x, num_splits):\n",
     "  \"\"\"Split batch into K equal-sized batches.\"\"\"\n",
     "  # Split tensors evenly, even if it means throwing away a few examples.\n",
     "  samples = math.floor(len(x) / num_splits)\n",
     "  x_splits = [arr[:samples] for arr in np.array_split(x, num_splits)]\n",
     "  return x_splits\n",
     "\n",
     "def gen_preprocessed_batch(batch_generator, num_gpus):\n",
     "  \"\"\"Yield preprocessed batches of x,y data.\"\"\"\n",
     "#   for xs, ys in batch_generator:\n",
     "#     yield split(preprocess_input(xs), num_gpus), split(ys, num_gpus)\n",
     "#     yield split(xs, num_gpus), split(ys, num_gpus)  for tf aug experiments\n",
     "  for xs, ys, filenames in batch_generator:\n",
     "    yield split(preprocess_input(xs), num_gpus), split(ys, num_gpus), split(filenames, num_gpus)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Create train & val image generators\n",
     "try:\n",
     "  # For interactive work, kill any existing pool.\n",
     "  pool.terminate()\n",
     "except:\n",
     "  pass\n",
     "pool = mp.Pool(processes=8)\n",
     "train_datagen = ImageDataGenerator(pool=pool) #, horizontal_flip=True, vertical_flip=True,\n",
     "#                                    rotation_range=180, shear_range=0.1, fill_mode='reflect')\n",
     "val_datagen = ImageDataGenerator(pool=pool)\n",
     "#train_datagen = ImageDataGenerator()\n",
     "#val_datagen = ImageDataGenerator()\n",
     "train_generator_orig = train_datagen.flow_from_directory(train_save_dir, batch_size=batch_size, target_size=(size, size))\n",
     "val_generator_orig = val_datagen.flow_from_directory(val_save_dir, batch_size=batch_size, target_size=(size, size))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# Create train & val preprocessed generators\n",
     "train_generator = gen_preprocessed_batch(train_generator_orig, num_gpus)\n",
     "val_generator = gen_preprocessed_batch(val_generator_orig, num_gpus)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Get number of samples"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "scrolled": true
    },
    "outputs": [],
    "source": [
     "# Number of examples.\n",
     "tc = train_generator_orig.nb_sample\n",
     "vc = val_generator_orig.nb_sample\n",
     "#tc = train_generator_orig.samples\n",
     "#vc = val_generator_orig.samples\n",
     "\n",
     "# Number of batches for multi-GPU exploitation.\n",
     "# Note: Multi-GPU exploitation for data parallelism splits mini-batches\n",
     "# into a set of micro-batches to be run in parallel on each GPU, but\n",
     "# Keras will view the set of micro-batches as a single batch with\n",
     "# multiple sources of inputs (i.e. Keras will view a set of examples\n",
     "# being run in parallel as a single example with multiple sources of\n",
     "# inputs).\n",
     "train_batches = int(math.ceil(tc/batch_size))\n",
     "val_batches = int(math.ceil(vc/batch_size))\n",
     "\n",
     "# Class counts (just for information)\n",
     "train_class_counts = np.bincount(train_generator_orig.classes)\n",
     "val_class_counts = np.bincount(val_generator_orig.classes)\n",
     "\n",
     "print(tc, vc)\n",
     "print(train_batches, val_batches)\n",
     "print(train_class_counts / np.sum(train_class_counts), val_class_counts / np.sum(val_class_counts))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Generate class weights for training"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "class_counts = np.bincount(train_generator_orig.classes)\n",
     "class_weights = dict(zip(range(classes), min(class_counts) / class_counts))\n",
     "print(class_counts)\n",
     "print(class_weights)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Plot random images (Optional)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "def show_random_image(save_dir):\n",
     "  c = np.random.randint(1, 4)\n",
     "  class_dir = os.path.join(save_dir, str(c))\n",
     "  files = os.listdir(class_dir)\n",
     "  i = np.random.randint(0, len(files))\n",
     "  fname = os.path.join(class_dir, files[i])\n",
     "  print(fname)\n",
     "  img = Image.open(fname)\n",
     "  plt.imshow(img)\n",
     "\n",
     "# show_random_image(train_save_dir)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "def plot(gen):\n",
     "  r, c = 6, 6\n",
     "  fig, ax = plt.subplots(r, c)\n",
     "  plt.setp(ax, xticks=[], yticks=[])\n",
     "  plt.tight_layout()\n",
     "  x, y, fname = next(gen)\n",
     "  batch_size = x.shape[0]\n",
     "  for i in range(r):\n",
     "    for j in range(c):\n",
     "      if i*c + j < batch_size:\n",
     "        im = x[i*c + j].astype(np.uint8)\n",
     "        if K.image_data_format() == 'channels_first':\n",
     "          im = im.transpose(1,2,0)  # (C,H,W) -> (H,W,C)\n",
     "        ax[i][j].imshow(im)\n",
     "        ax[i][j].set_xlabel(y[i*c + j])\n",
     "\n",
     "plot(train_generator_orig)\n",
     "plot(val_generator_orig)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Evaluate previous model checkpoint"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# NOTE: We could call the `model.evaluate*` methods,\n",
     "# but that would not allow us to create contingency\n",
     "# matrices.  Instead, we repeatedly loop over batches\n",
     "# of data, collecting both the true labels and\n",
     "# predictions.  Then, we can compute any metrics\n",
     "# desired, including 3x3 contingency matrices."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# def extract_metrics(model, raw_metrics):\n",
     "#   labeled_metrics = list(zip(model.metrics_names, raw_metrics))\n",
     "#   losses = [v for k,v in labeled_metrics if k == \"loss\"]\n",
     "#   accuracies = [v for k,v in labeled_metrics if k.endswith(\"acc\")]\n",
     "#   loss = sum(losses) / num_gpus\n",
     "#   acc = sum(accuracies) / num_gpus\n",
     "#   metrics = {\"loss\": loss, \"acc\": acc}\n",
     "#   return labeled_metrics, metrics\n",
     "\n",
     "# raw_metrics = model.evaluate_generator(val_generator, val_samples=32,\n",
     "#                                        max_q_size=8, nb_worker=1, pickle_safe=False)\n",
     "\n",
     "# labeled_metrics, metrics = extract_metrics(model, raw_metrics)\n",
     "# print(labeled_metrics)\n",
     "# print(metrics)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Get predictions\n",
     "for dataset in [(\"train\", p, tc, val_generator)]:  #, (\"val\", val_p, vc, val_generator)]:\n",
     "  name, perc, count, gen = dataset\n",
     "\n",
     "  ys = []\n",
     "  preds = []\n",
     "  fnames = []\n",
     "  batches = math.floor(count / batch_size)\n",
     "  for i in range(batches):\n",
     "    # Get batch.\n",
     "#     x, y = next(gen)\n",
     "    x, y, fname = next(gen)\n",
     "\n",
     "    # Get predictions\n",
     "    pred = model.predict(x)\n",
     "\n",
     "    # Store y and predictions\n",
     "    ys.extend(y)  # y is always a list of parallel batches, even if only 1 batch\n",
     "    if isinstance(pred, list):\n",
     "      preds.extend(pred)\n",
     "    else:\n",
     "      preds.append(pred)\n",
     "    fnames.extend(fname)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "  # Create DataFrames\n",
     "  y = np.concatenate(ys)\n",
     "  pred = np.concatenate(preds)\n",
     "  fname = np.concatenate(fnames)\n",
     "  y_df = pd.DataFrame(y, columns=[1,2,3])\n",
     "  pred_df = pd.DataFrame(pred, columns=[1,2,3])\n",
     "  fname_df = pd.DataFrame(np.atleast_2d(fname).T, columns=[\"filenames\"])\n",
     "\n",
     "  # Create class, prediction, slide_num DataFrames\n",
     "  y_class_df = y_df.idxmax(axis=1)\n",
     "  pred_class_df = pred_df.idxmax(axis=1)\n",
     "  y_class_df.name = \"actual\"\n",
     "  pred_class_df.name = \"predicted\"\n",
     "  slide_info_df = fname_df.filenames.str.extract('(?P<class>\\d)\\/\\d+_(?P<slide_num>\\d+)_\\d+.jpeg', expand=True)\n",
     "  slide_info_df[\"class\"] = slide_info_df[\"class\"].astype(int)\n",
     "  slide_info_df[\"slide_num\"] = slide_info_df[\"slide_num\"].astype(int)\n",
     "  df = pd.concat([fname_df, slide_info_df, y_class_df, pred_class_df], axis=1)\n",
     "  \n",
     "  # sanity check\n",
     "  assert np.allclose(df[\"class\"], df.actual)\n",
     "  \n",
     "  # Create Contingency matrix\n",
     "  contingency_mat = pd.crosstab(df.actual, df.predicted)\n",
     "\n",
     "#   # Save DataFrames\n",
     "#   y_df.to_csv(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-y_df.csv\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), header=True)\n",
     "#   pred_df.to_csv(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-pred_df.csv\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), header=True)\n",
     "#   df.to_csv(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-df.csv\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), header=True)\n",
     "\n",
     "#   # Save results\n",
     "#   with open(os.path.join(exp_dir, \"{model_ck}-{perc}%-{data}-results.txt\".format(model_ck=model_file[:-5], perc=100*perc, data=name)), 'w') as f:\n",
     "#     print(\"Dataset: {}\".format(name), file=f)\n",
     "#     print(\"Number of samples: {}\".format(len(y_df)), file=f)\n",
     "#     print(contingency_mat, file=f)\n",
     "#     print(\"Accuracy: {}\".format(np.mean(np.equal(y_class, pred_class))), file=f)\n",
     "  print(\"Number of samples: {}\".format(len(y_df)))\n",
     "  print(contingency_mat)\n",
     "  print(\"Accuracy: {}\".format(np.mean(np.equal(y_class_df, pred_class_df))))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "len(y_df), len(pred_df), len(fname_df), len(df)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "df"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "df2 = df.loc[:, [\"slide_num\", \"actual\", \"predicted\"]]\n",
     "df2"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "df3 = df2.groupby(\"slide_num\").mean()\n",
     "df3[\"predicted_round\"] = df3.predicted.map(round)\n",
     "df3"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "sum(df3.actual == df3.predicted_round) / len(df3)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "pd.crosstab(df3.actual, df3.predicted_round)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "scrolled": true
    },
    "outputs": [],
    "source": [
     "gb = df2.groupby([\"slide_num\"])  #, \"predicted\"])\n",
     "gb.describe()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "---"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Read in predictions + true DataFrames and extract metrics"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# # Read DataFrames\n",
     "# y_df = pd.read_csv(os.path.join(exp_dir, \"{}-y_df.csv\".format(model_file[:-5])), index_col=0)\n",
     "# pred_df = pd.read_csv(os.path.join(exp_dir, \"{}-pred_df.csv\".format(model_file[:-5])), index_col=0)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# # Create Contingency matrix\n",
     "# y_class = y_df.idxmax(axis=1)\n",
     "# pred_class = pred_df.idxmax(axis=1)\n",
     "# y_class.name = \"Actual\"\n",
     "# pred_class.name = \"Predicted\"\n",
     "# contingency_mat = pd.crosstab(y_class, pred_class)\n",
     "\n",
     "# print(\"Number of samples: {}\".format(len(y_df)))\n",
     "# print(contingency_mat)\n",
     "# print(\"Accuracy: {}\".format(np.mean(np.equal(y_class, pred_class))))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
    "source": [
     "# # --- Alternate approach with NumPy arrays only\n",
     "# y_c = np.argmax(y, axis=1) + 1\n",
     "# pred_c = np.argmax(pred, axis=1) + 1\n",
     "# y_actu = pd.Series(y_c, name=\"Actual\")\n",
     "# y_pred = pd.Series(pred_c, name=\"Predicted\")\n",
     "# contingency_mat = pd.crosstab(y_actu, y_pred)\n",
     "\n",
     "# print(\"Number of samples: {}\".format(len(y_c)))\n",
     "# print(contingency_mat)\n",
     "# print(\"Accuracy: {}\".format(np.mean(np.equal(y_c, pred_c))))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "collapsed": true
    },
    "source": [
     "---"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Sample images + predictions & write to disk"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "# path_template = os.path.join(\"visualize\", \"{dataset}\", \"Pred_{pred}-Actual_{actual}\")\n",
     "# for dataset in [\"train\", \"val\"]:\n",
     "#   for i in range(3):\n",
     "#     for j in range(3):\n",
     "#       os.makedirs(path_template.format(dataset=dataset, pred=i+1, actual=j+1), exist_ok=True)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true,
     "scrolled": false
    },
    "outputs": [],
    "source": [
     "# filename_template = os.path.join(path_template, \"{hash}.jpeg\")\n",
     "# batches = 8\n",
     "\n",
     "# for dataset in [(\"train\", train_generator_orig), (\"val\", val_generator_orig)]:\n",
     "#   name, gen = dataset\n",
     "#   print(name)\n",
     "  \n",
     "#   for i in range(batches):\n",
     "#     # Get batch.\n",
     "#     x_orig, y_orig = next(gen)\n",
     "#     x = preprocess_input(np.copy(x_orig))\n",
     "#     y = y_orig\n",
     "\n",
     "#     # Get predictions\n",
     "#     raw_preds = model.predict(x)\n",
     "#     raw_metrics = model.evaluate(x, y)\n",
     "#     labeled_metrics, metrics = extract_metrics(model, raw_metrics)\n",
     "\n",
     "#     # Create contingency matrix\n",
     "#     y = np.argmax(y, axis=1)+1\n",
     "#     preds = np.argmax(raw_preds, axis=1)+1\n",
     "#     y_actu = pd.Series(y, name=\"Actual\")\n",
     "#     y_pred = pd.Series(preds, name=\"Predicted\")\n",
     "#     contingency_mat = pd.crosstab(y_actu, y_pred)\n",
     "\n",
     "# #     # Output images in directories based on misclassification.\n",
     "# #     def plot(x, y):\n",
     "# #       r, c = 6, 6\n",
     "# #       fig, ax = plt.subplots(r, c)\n",
     "# #       plt.setp(ax, xticks=[], yticks=[])\n",
     "# #       plt.tight_layout()\n",
     "# #       batch_size = x.shape[0]\n",
     "# #       for i in range(r):\n",
     "# #         for j in range(c):\n",
     "# #           if i*c + j < batch_size:\n",
     "# #             ax[i][j].imshow(x[i*c + j].astype(np.uint8))\n",
     "# #             ax[i][j].set_xlabel(\"{preds}-{y}\".format(y=y[i*c + j], preds=preds[i*c + j]))\n",
     "\n",
     "# #     plot(x_orig, y)\n",
     "# #     plt.show()\n",
     "\n",
     "#     for n in range(x_orig.shape[0]):\n",
     "#       img = Image.fromarray(x_orig[n].astype(np.uint8), 'RGB')\n",
     "#       filename = filename_template.format(dataset=name, pred=preds[n], actual=y[n], hash=np.random.randint(1e6))\n",
     "#       img.save(filename)\n",
     "\n",
     "#     print(contingency_mat)\n",
     "#     print(np.mean(y==preds))\n",
     "#     print(labeled_metrics)\n",
     "#     print(metrics)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "---"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Predict"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "x, label, _ = (next(train_generator_orig))\n",
     "Image.fromarray((x[0]).astype(np.uint8))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "preds = resnet50.predict(preprocess_input(x[0].reshape(1, 224, 224, 3)))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "print(\"Actual: {}\".format(label[0]))\n",
     "print(\"Pred:   {}\".format(preds[0]))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "---"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Cleanup"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Stop processes cleanly.  Otherwise, zombie processes will\n",
     "# persist and hold onto GPU memory.\n",
     "try:\n",
     "    pool.terminate()\n",
     "except:\n",
     "    pass\n",
     "for p in mp.active_children():\n",
     "  p.terminate()\n",
     "mp.active_children()"
    ]
   }
  ],
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
     "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.1"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 1
 }