scripts/nn/examples/Example - MNIST LeNet deprecated.ipynb - systemds - Git at Google

 {
  "cells": [
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Quick Setup - Warning: Deprecated"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Create a SystemDS MLContext object\n",
     "from systemds import MLContext, dml\n",
     "ml = MLContext(sc)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Download Data - MNIST"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9].  Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "%%sh\n",
     "mkdir -p data/mnist/\n",
     "cd data/mnist/\n",
     "curl -O https://pjreddie.com/media/files/mnist_train.csv\n",
     "curl -O https://pjreddie.com/media/files/mnist_test.csv"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## SystemDS \"LeNet\" Neural Network"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### 1. Train"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "script_string = \"\"\"\n",
     "source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n",
     "\n",
     "# Read training data\n",
     "data = read($data, format=\"csv\")\n",
     "n = nrow(data)\n",
     "\n",
     "# Extract images and labels\n",
     "images = data[,2:ncol(data)]\n",
     "labels = data[,1]\n",
     "\n",
     "# Scale images to [-1,1], and one-hot encode the labels\n",
     "images = (images / 255.0) * 2 - 1\n",
     "labels = table(seq(1, n), labels+1, n, 10)\n",
     "\n",
     "# Split into training (55,000 examples) and validation (5,000 examples)\n",
     "X = images[5001:nrow(images),]\n",
     "X_val = images[1:5000,]\n",
     "y = labels[5001:nrow(images),]\n",
     "y_val = labels[1:5000,]\n",
     "\n",
     "# Train\n",
     "epochs = 10\n",
     "[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win, epochs)\n",
     "\"\"\"\n",
     "script = (dml(script_string).input(\"$data\", \"data/mnist/mnist_train.csv\")\n",
     "                            .input(C=1, Hin=28, Win=28)\n",
     "                            .output(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))\n",
     "W1, b1, W2, b2, W3, b3, W4, b4 = (ml.execute(script)\n",
     "                                    .get(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### 2. Compute Test Accuracy"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "script_string = \"\"\"\n",
     "source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n",
     "\n",
     "# Read test data\n",
     "data = read($data, format=\"csv\")\n",
     "n = nrow(data)\n",
     "\n",
     "# Extract images and labels\n",
     "X_test = data[,2:ncol(data)]\n",
     "y_test = data[,1]\n",
     "\n",
     "# Scale images to [-1,1], and one-hot encode the labels\n",
     "X_test = (X_test / 255.0) * 2 - 1\n",
     "y_test = table(seq(1, n), y_test+1, n, 10)\n",
     "\n",
     "# Eval on test set\n",
     "probs = mnist_lenet::predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n",
     "[loss, accuracy] = mnist_lenet::eval(probs, y_test)\n",
     "\n",
     "print(\"Test Accuracy: \" + accuracy)\n",
     "\"\"\"\n",
     "script = dml(script_string).input(**{\"$data\": \"data/mnist/mnist_train.csv\",\n",
     "                                     \"C\": 1, \"Hin\": 28, \"Win\": 28,\n",
     "                                     \"W1\": W1, \"b1\": b1,\n",
     "                                     \"W2\": W2, \"b2\": b2,\n",
     "                                     \"W3\": W3, \"b3\": b3,\n",
     "                                     \"W4\": W4, \"b4\": b4})\n",
     "ml.execute(script)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### 3. Extract Model Into Spark DataFrames For Future Use"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "W1_df = W1.toDF()\n",
     "b1_df = b1.toDF()\n",
     "W2_df = W2.toDF()\n",
     "b2_df = b2.toDF()\n",
     "W3_df = W3.toDF()\n",
     "b3_df = b3.toDF()\n",
     "W4_df = W4.toDF()\n",
     "b4_df = b4.toDF()\n",
     "W1_df, b1_df, W2_df, b2_df, W3_df, b3_df, W4_df, b4_df"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
    "display_name": "Python 3 + Spark 2.x + SystemDS",
    "language": "python",
    "name": "pyspark3_2.x"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
     "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.1"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 1
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Quick Setup - Warning: Deprecated"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Create a SystemDS MLContext object\n",
	"from systemds import MLContext, dml\n",
	"ml = MLContext(sc)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Download Data - MNIST"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9]. Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"%%sh\n",
	"mkdir -p data/mnist/\n",
	"cd data/mnist/\n",
	"curl -O https://pjreddie.com/media/files/mnist_train.csv\n",
	"curl -O https://pjreddie.com/media/files/mnist_test.csv"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## SystemDS \"LeNet\" Neural Network"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 1. Train"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"script_string = \"\"\"\n",
	"source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n",
	"\n",
	"# Read training data\n",
	"data = read($data, format=\"csv\")\n",
	"n = nrow(data)\n",
	"\n",
	"# Extract images and labels\n",
	"images = data[,2:ncol(data)]\n",
	"labels = data[,1]\n",
	"\n",
	"# Scale images to [-1,1], and one-hot encode the labels\n",
	"images = (images / 255.0) * 2 - 1\n",
	"labels = table(seq(1, n), labels+1, n, 10)\n",
	"\n",
	"# Split into training (55,000 examples) and validation (5,000 examples)\n",
	"X = images[5001:nrow(images),]\n",
	"X_val = images[1:5000,]\n",
	"y = labels[5001:nrow(images),]\n",
	"y_val = labels[1:5000,]\n",
	"\n",
	"# Train\n",
	"epochs = 10\n",
	"[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win, epochs)\n",
	"\"\"\"\n",
	"script = (dml(script_string).input(\"$data\", \"data/mnist/mnist_train.csv\")\n",
	" .input(C=1, Hin=28, Win=28)\n",
	" .output(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))\n",
	"W1, b1, W2, b2, W3, b3, W4, b4 = (ml.execute(script)\n",
	" .get(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 2. Compute Test Accuracy"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"script_string = \"\"\"\n",
	"source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n",
	"\n",
	"# Read test data\n",
	"data = read($data, format=\"csv\")\n",
	"n = nrow(data)\n",
	"\n",
	"# Extract images and labels\n",
	"X_test = data[,2:ncol(data)]\n",
	"y_test = data[,1]\n",
	"\n",
	"# Scale images to [-1,1], and one-hot encode the labels\n",
	"X_test = (X_test / 255.0) * 2 - 1\n",
	"y_test = table(seq(1, n), y_test+1, n, 10)\n",
	"\n",
	"# Eval on test set\n",
	"probs = mnist_lenet::predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n",
	"[loss, accuracy] = mnist_lenet::eval(probs, y_test)\n",
	"\n",
	"print(\"Test Accuracy: \" + accuracy)\n",
	"\"\"\"\n",
	"script = dml(script_string).input(**{\"$data\": \"data/mnist/mnist_train.csv\",\n",
	" \"C\": 1, \"Hin\": 28, \"Win\": 28,\n",
	" \"W1\": W1, \"b1\": b1,\n",
	" \"W2\": W2, \"b2\": b2,\n",
	" \"W3\": W3, \"b3\": b3,\n",
	" \"W4\": W4, \"b4\": b4})\n",
	"ml.execute(script)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 3. Extract Model Into Spark DataFrames For Future Use"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"W1_df = W1.toDF()\n",
	"b1_df = b1.toDF()\n",
	"W2_df = W2.toDF()\n",
	"b2_df = b2.toDF()\n",
	"W3_df = W3.toDF()\n",
	"b3_df = b3.toDF()\n",
	"W4_df = W4.toDF()\n",
	"b4_df = b4.toDF()\n",
	"W1_df, b1_df, W2_df, b2_df, W3_df, b3_df, W4_df, b4_df"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 + Spark 2.x + SystemDS",
	"language": "python",
	"name": "pyspark3_2.x"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}