integration-tests/examples/test_templates/tensor/template_preparation_tensor.ipynb - incubator-datalab - Git at Google

 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os, cv2, random\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "%matplotlib inline \n",
     "from keras.models import Sequential, load_model\n",
     "from keras.layers import Dropout, Flatten, Convolution2D, MaxPooling2D, Dense, Activation\n",
     "from keras.optimizers import Adam\n",
     "from keras.callbacks import Callback, EarlyStopping\n",
     "from keras.callbacks import BaseLogger, TensorBoard"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Constants definition"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "TRAIN_DIR = '/home/dlab-user/train/'\n",
     "TEST_DIR = '/home/dlab-user/test/'\n",
     "ROWS = 128\n",
     "COLS = 128\n",
     "CHANNELS = 3\n",
     "TRAIN_IMAGES_COUNT = 1000\n",
     "PATH_TO_LOGS = '/home/dlab-user/logs'"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Reading and adjusting images for training"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "all_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)[:TRAIN_IMAGES_COUNT]]\n",
     "test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]\n",
     "random.shuffle(all_images)\n",
     "test_coeff = int(len (all_images) * .9)\n",
     "\n",
     "train_images, test_images = all_images[:test_coeff], all_images[test_coeff:]\n",
     "\n",
     "def read_image(file_path):\n",
     "    img = cv2.imread(file_path, cv2.IMREAD_COLOR)\n",
     "    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC).reshape(ROWS, COLS, CHANNELS)\n",
     "\n",
     "def prepare_data(images):\n",
     "    count = len(images)\n",
     "    data = np.ndarray((count, ROWS, COLS, CHANNELS), dtype=np.uint8)\n",
     "\n",
     "    for i, image_file in enumerate(images):\n",
     "        image = read_image(image_file)\n",
     "        data[i] = image#.T\n",
     "    return data\n",
     "\n",
     "train = prepare_data(train_images)\n",
     "test = prepare_data(test_images)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Image counts"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "print(\"Train shape: {}\".format(train.shape))\n",
     "print(\"Test shape: {}\".format(test.shape))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Assigning labels to training images"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "labels = []\n",
     "for i in train_images:\n",
     "    if 'dog' in i.split(\"/\")[-1] :\n",
     "        labels.append(1)\n",
     "    else:\n",
     "        labels.append(0)\n",
     "        \n",
     "labels_test = []\n",
     "for i in test_images:\n",
     "    if 'dog' in i.split(\"/\")[-1] :\n",
     "        labels_test.append(1)\n",
     "    else:\n",
     "        labels_test.append(0)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Building a convnet"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "optimizer = Adam(lr=1e-6)\n",
     "objective = 'binary_crossentropy'\n",
     "\n",
     "def build_model():\n",
     "    \n",
     "    model = Sequential()\n",
     "\n",
     "    model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(ROWS, COLS, 3), activation='relu'))\n",
     "    model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu'))\n",
     "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
     "\n",
     "    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))\n",
     "    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))\n",
     "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
     "    \n",
     "    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))\n",
     "    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))\n",
     "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
     "\n",
     "    model.add(Flatten())\n",
     "    model.add(Dense(256, activation='relu'))\n",
     "    model.add(Dropout(0.5))\n",
     "    \n",
     "    model.add(Dense(1))\n",
     "    model.add(Activation('sigmoid'))\n",
     "    \n",
     "    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])\n",
     "    return model\n",
     "\n",
     "\n",
     "model = build_model()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Training the model"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "This block takes about 2.5-3 hours to execute if training on whole dataset of 22500 images"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "nb_epoch = 10\n",
     "batch_size = 16\n",
     "\n",
     "class LossHistory(Callback):\n",
     "    def on_train_begin(self, logs={}):\n",
     "        self.losses = []\n",
     "        self.val_losses = []\n",
     "        \n",
     "    def on_epoch_end(self, batch, logs={}):\n",
     "        self.losses.append(logs.get('loss'))\n",
     "        self.val_losses.append(logs.get('val_loss'))\n",
     "\n",
     "early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')        \n",
     "        \n",
     "def train_and_test_model():\n",
     "    history = LossHistory()\n",
     "    tensorboard = TensorBoard(log_dir=PATH_TO_LOGS)\n",
     "    model.fit(train, labels, batch_size=batch_size, nb_epoch=nb_epoch,\n",
     "              validation_split=0.25, verbose=2, shuffle=True, callbacks=[history, early_stopping, tensorboard])\n",
     "    \n",
     "\n",
     "    predictions = model.predict(test, verbose=2)\n",
     "    return predictions, history\n",
     "\n",
     "predictions, history = train_and_test_model()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Saving the model and weights"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "path_to_model = '/home/dlab-user/model_1000.json'\n",
     "path_to_weights = '/home/dlab-user/weigths_1000.h5'\n",
     "\n",
     "model.save(path_to_model)\n",
     "model.save_weights(path_to_weights)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Plotting learning curves"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "loss = history.losses\n",
     "val_loss = history.val_losses\n",
     "\n",
     "plt.xlabel('Epochs')\n",
     "plt.ylabel('Loss')\n",
     "plt.title('VGG-16 Loss Trend')\n",
     "plt.plot(loss, 'blue', label='Training Loss')\n",
     "plt.plot(val_loss, 'green', label='Validation Loss')\n",
     "plt.xticks(range(0,len(loss))[0::2])\n",
     "plt.legend()\n",
     "plt.show()"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
    "display_name": "Python 2",
    "language": "python",
    "name": "KERNEL_NAME"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
     "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
    "version": "2.7.13"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os, cv2, random\n",
	"import numpy as np\n",
	"import matplotlib.pyplot as plt\n",
	"%matplotlib inline \n",
	"from keras.models import Sequential, load_model\n",
	"from keras.layers import Dropout, Flatten, Convolution2D, MaxPooling2D, Dense, Activation\n",
	"from keras.optimizers import Adam\n",
	"from keras.callbacks import Callback, EarlyStopping\n",
	"from keras.callbacks import BaseLogger, TensorBoard"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Constants definition"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"TRAIN_DIR = '/home/dlab-user/train/'\n",
	"TEST_DIR = '/home/dlab-user/test/'\n",
	"ROWS = 128\n",
	"COLS = 128\n",
	"CHANNELS = 3\n",
	"TRAIN_IMAGES_COUNT = 1000\n",
	"PATH_TO_LOGS = '/home/dlab-user/logs'"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Reading and adjusting images for training"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"all_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)[:TRAIN_IMAGES_COUNT]]\n",
	"test_images = [TEST_DIR+i for i in os.listdir(TEST_DIR)]\n",
	"random.shuffle(all_images)\n",
	"test_coeff = int(len (all_images) * .9)\n",
	"\n",
	"train_images, test_images = all_images[:test_coeff], all_images[test_coeff:]\n",
	"\n",
	"def read_image(file_path):\n",
	" img = cv2.imread(file_path, cv2.IMREAD_COLOR)\n",
	" return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC).reshape(ROWS, COLS, CHANNELS)\n",
	"\n",
	"def prepare_data(images):\n",
	" count = len(images)\n",
	" data = np.ndarray((count, ROWS, COLS, CHANNELS), dtype=np.uint8)\n",
	"\n",
	" for i, image_file in enumerate(images):\n",
	" image = read_image(image_file)\n",
	" data[i] = image#.T\n",
	" return data\n",
	"\n",
	"train = prepare_data(train_images)\n",
	"test = prepare_data(test_images)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Image counts"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"print(\"Train shape: {}\".format(train.shape))\n",
	"print(\"Test shape: {}\".format(test.shape))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Assigning labels to training images"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"labels = []\n",
	"for i in train_images:\n",
	" if 'dog' in i.split(\"/\")[-1] :\n",
	" labels.append(1)\n",
	" else:\n",
	" labels.append(0)\n",
	" \n",
	"labels_test = []\n",
	"for i in test_images:\n",
	" if 'dog' in i.split(\"/\")[-1] :\n",
	" labels_test.append(1)\n",
	" else:\n",
	" labels_test.append(0)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Building a convnet"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"optimizer = Adam(lr=1e-6)\n",
	"objective = 'binary_crossentropy'\n",
	"\n",
	"def build_model():\n",
	" \n",
	" model = Sequential()\n",
	"\n",
	" model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(ROWS, COLS, 3), activation='relu'))\n",
	" model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu'))\n",
	" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
	"\n",
	" model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))\n",
	" model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))\n",
	" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
	" \n",
	" model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))\n",
	" model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))\n",
	" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
	"\n",
	" model.add(Flatten())\n",
	" model.add(Dense(256, activation='relu'))\n",
	" model.add(Dropout(0.5))\n",
	" \n",
	" model.add(Dense(1))\n",
	" model.add(Activation('sigmoid'))\n",
	" \n",
	" model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])\n",
	" return model\n",
	"\n",
	"\n",
	"model = build_model()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Training the model"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This block takes about 2.5-3 hours to execute if training on whole dataset of 22500 images"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"nb_epoch = 10\n",
	"batch_size = 16\n",
	"\n",
	"class LossHistory(Callback):\n",
	" def on_train_begin(self, logs={}):\n",
	" self.losses = []\n",
	" self.val_losses = []\n",
	" \n",
	" def on_epoch_end(self, batch, logs={}):\n",
	" self.losses.append(logs.get('loss'))\n",
	" self.val_losses.append(logs.get('val_loss'))\n",
	"\n",
	"early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') \n",
	" \n",
	"def train_and_test_model():\n",
	" history = LossHistory()\n",
	" tensorboard = TensorBoard(log_dir=PATH_TO_LOGS)\n",
	" model.fit(train, labels, batch_size=batch_size, nb_epoch=nb_epoch,\n",
	" validation_split=0.25, verbose=2, shuffle=True, callbacks=[history, early_stopping, tensorboard])\n",
	" \n",
	"\n",
	" predictions = model.predict(test, verbose=2)\n",
	" return predictions, history\n",
	"\n",
	"predictions, history = train_and_test_model()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Saving the model and weights"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"path_to_model = '/home/dlab-user/model_1000.json'\n",
	"path_to_weights = '/home/dlab-user/weigths_1000.h5'\n",
	"\n",
	"model.save(path_to_model)\n",
	"model.save_weights(path_to_weights)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Plotting learning curves"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"loss = history.losses\n",
	"val_loss = history.val_losses\n",
	"\n",
	"plt.xlabel('Epochs')\n",
	"plt.ylabel('Loss')\n",
	"plt.title('VGG-16 Loss Trend')\n",
	"plt.plot(loss, 'blue', label='Training Loss')\n",
	"plt.plot(val_loss, 'green', label='Validation Loss')\n",
	"plt.xticks(range(0,len(loss))[0::2])\n",
	"plt.legend()\n",
	"plt.show()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "KERNEL_NAME"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.13"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}