blob: f0939f7679e0446bd4d30f82964163ba14d6580f [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Quick Setup - Warning: Deprecated"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# Create a SystemDS MLContext object\n",
"from systemds import MLContext, dml\n",
"ml = MLContext(sc)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download Data - MNIST"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9]. Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%sh\n",
"mkdir -p data/mnist/\n",
"cd data/mnist/\n",
"curl -O https://pjreddie.com/media/files/mnist_train.csv\n",
"curl -O https://pjreddie.com/media/files/mnist_test.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## SystemDS Softmax Model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"training = \"\"\"\n",
"source(\"nn/examples/mnist_softmax.dml\") as mnist_softmax\n",
"\n",
"# Read training data\n",
"data = read($data, format=\"csv\")\n",
"n = nrow(data)\n",
"\n",
"# Extract images and labels\n",
"images = data[,2:ncol(data)]\n",
"labels = data[,1]\n",
"\n",
"# Scale images to [0,1], and one-hot encode the labels\n",
"images = images / 255.0\n",
"labels = table(seq(1, n), labels+1, n, 10)\n",
"\n",
"# Split into training (55,000 examples) and validation (5,000 examples)\n",
"X = images[5001:nrow(images),]\n",
"X_val = images[1:5000,]\n",
"y = labels[5001:nrow(images),]\n",
"y_val = labels[1:5000,]\n",
"\n",
"# Train\n",
"epochs = 1\n",
"[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs)\n",
"\"\"\"\n",
"script = dml(training).input(\"$data\", \"data/mnist/mnist_train.csv\").output(\"W\", \"b\")\n",
"W, b = ml.execute(script).get(\"W\", \"b\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Compute Test Accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"testing = \"\"\"\n",
"source(\"nn/examples/mnist_softmax.dml\") as mnist_softmax\n",
"\n",
"# Read test data\n",
"data = read($data, format=\"csv\")\n",
"n = nrow(data)\n",
"\n",
"# Extract images and labels\n",
"X_test = data[,2:ncol(data)]\n",
"y_test = data[,1]\n",
"\n",
"# Scale images to [0,1], and one-hot encode the labels\n",
"X_test = X_test / 255.0\n",
"y_test = table(seq(1, n), y_test+1, n, 10)\n",
"\n",
"# Eval on test set\n",
"probs = mnist_softmax::predict(X_test, W, b)\n",
"[loss, accuracy] = mnist_softmax::eval(probs, y_test)\n",
"\n",
"print(\"Test Accuracy: \" + accuracy)\n",
"\"\"\"\n",
"script = dml(testing).input(\"$data\", \"data/mnist/mnist_test.csv\", W=W, b=b)\n",
"ml.execute(script)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Extract Model Into Spark DataFrames For Future Use"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"W_df = W.toDF()\n",
"b_df = b.toDF()\n",
"W_df, b_df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 1
}