blob: 1246862b5e3d18b5196b54095419ae0a4ecb76dd [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SystemML PySpark Recommendation Demo"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"%matplotlib inline\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['figure.figsize'] = (10, 6)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"\r",
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r",
" 7 11.2M 7 916k 0 0 1051k 0 0:00:10 --:--:-- 0:00:10 1051k\r",
" 17 11.2M 17 2064k 0 0 1159k 0 0:00:09 0:00:01 0:00:08 1159k\r",
" 29 11.2M 29 3424k 0 0 1234k 0 0:00:09 0:00:02 0:00:07 1234k\r",
" 39 11.2M 39 4564k 0 0 1207k 0 0:00:09 0:00:03 0:00:06 1207k\r",
" 44 11.2M 44 5118k 0 0 1073k 0 0:00:10 0:00:04 0:00:06 1073k\r",
" 47 11.2M 47 5527k 0 0 951k 0 0:00:12 0:00:05 0:00:07 933k\r",
" 50 11.2M 50 5804k 0 0 857k 0 0:00:13 0:00:06 0:00:07 749k\r",
" 53 11.2M 53 6142k 0 0 790k 0 0:00:14 0:00:07 0:00:07 544k\r",
" 58 11.2M 58 6715k 0 0 765k 0 0:00:15 0:00:08 0:00:07 431k\r",
" 68 11.2M 68 7932k 0 0 811k 0 0:00:14 0:00:09 0:00:05 562k\r",
" 83 11.2M 83 9585k 0 0 890k 0 0:00:12 0:00:10 0:00:02 818k\r",
" 96 11.2M 96 10.8M 0 0 941k 0 0:00:12 0:00:11 0:00:01 1055k\r",
"100 11.2M 100 11.2M 0 0 955k 0 0:00:12 0:00:12 --:--:-- 1254k\n"
]
}
],
"source": [
"%%sh\n",
"# Download dataset\n",
"curl -O http://snap.stanford.edu/data/amazon0601.txt.gz\n",
"gunzip amazon0601.txt.gz"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total number of products: 500\n"
]
}
],
"source": [
"# Load data\n",
"import pyspark.sql.functions as F\n",
"dataPath = \"amazon0601.txt\"\n",
"\n",
"X_train = (sc.textFile(dataPath)\n",
" .filter(lambda l: not l.startswith(\"#\"))\n",
" .map(lambda l: l.split(\"\\t\"))\n",
" .map(lambda prods: (int(prods[0]), int(prods[1]), 1.0))\n",
" .toDF((\"prod_i\", \"prod_j\", \"x_ij\"))\n",
" .filter(\"prod_i < 500 AND prod_j < 500\") # Filter for memory constraints\n",
" .cache())\n",
"\n",
"max_prod_i = X_train.select(F.max(\"prod_i\")).first()[0]\n",
"max_prod_j = X_train.select(F.max(\"prod_j\")).first()[0]\n",
"numProducts = max(max_prod_i, max_prod_j) + 1 # 0-based indexing\n",
"print(\"Total number of products: {}\".format(numProducts))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SystemML - Poisson Nonnegative Matrix Factorization (PNMF)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create SystemML MLContext\n",
"from SystemML import MLContext\n",
"ml = MLContext(sc)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Define PNMF kernel in SystemML's DSL using the R-like syntax for PNMF\n",
"pnmf = \"\"\"\n",
"# data & args\n",
"X = read($X)\n",
"X = X+1 # change product IDs to be 1-based, rather than 0-based\n",
"V = table(X[,1], X[,2])\n",
"size = ifdef($size, -1)\n",
"if(size > -1) {\n",
" V = V[1:size,1:size]\n",
"}\n",
"max_iteration = as.integer($maxiter)\n",
"rank = as.integer($rank)\n",
"\n",
"n = nrow(V)\n",
"m = ncol(V)\n",
"range = 0.01\n",
"W = Rand(rows=n, cols=rank, min=0, max=range, pdf=\"uniform\")\n",
"H = Rand(rows=rank, cols=m, min=0, max=range, pdf=\"uniform\")\n",
"losses = matrix(0, rows=max_iteration, cols=1)\n",
"\n",
"# run PNMF\n",
"i=1\n",
"while(i <= max_iteration) {\n",
" # update params\n",
" H = (H * (t(W) %*% (V/(W%*%H))))/t(colSums(W)) \n",
" W = (W * ((V/(W%*%H)) %*% t(H)))/t(rowSums(H))\n",
" \n",
" # compute loss\n",
" losses[i,] = -1 * (sum(V*log(W%*%H)) - as.scalar(colSums(W)%*%rowSums(H)))\n",
" i = i + 1;\n",
"}\n",
"\n",
"# write outputs\n",
"write(losses, $lossout)\n",
"write(W, $Wout)\n",
"write(H, $Hout)\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Run the PNMF script on SystemML with Spark\n",
"ml.reset()\n",
"outputs = ml.executeScript(pnmf, {\"X\": X_train, \"maxiter\": 100, \"rank\": 10}, [\"W\", \"H\", \"losses\"])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.text.Text at 0x111c89050>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAngAAAGJCAYAAAAZsU4bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu4XVV97//3NzcSICEkECMJEJSkDQXFqGAR67b8qqKt\npKeCwXrPaXsMCuqprXg8JdqeavFoU63w/LRpBApSRD1KpSge3SoIGqEYBSzbC5CLCZgLoJKQy/f8\nMeciM8t932tlXfb79TzzWXONeRuTpXk+e8wxxozMRJIkSd1jQqsrIEmSpMYy4EmSJHUZA54kSVKX\nMeBJkiR1GQOeJElSlzHgSZIkdRkDniSNUkScEBGPNnpfSRorA56kpomI+yPiVxHxaET8LCLWRMSh\n5bbeiHg8IuZV9j8rIn5ad/zOiJhVd97/iIh9EXFc+f2TEbGrvM5j5ee5dcccW9n2WHn8Lyplzx/p\n/WXmTzNzRqP3HamIuCoi/qoZ55bUmQx4kpopgZeXwWYJ8BzgPZVtvwD+Zz/HVNd/CpxfK4iIk4Fp\n/ez3d5k5IzOnl5+fPuCkmesr26aXx5xSKbu1vvIR4b+RkjqS/3hJarYAyMyfAf8OnFzZ9hHg/Ig4\nYZDjrwJeX/n+euCKBtUrDigoWsL+MSL+PSIeA86MiD8oWwwfKVsU31PZ/+kRsa/y/ZsRsTIibi1b\nBW+MiJkj3bfc/saIeCAiHoqIiyNifUT8zohvMuLMiFgbEdsj4vaIOK2ybXlE/LS8/o8i4ryyfGFE\nfD0idpTX/5eRXldSaxnwJB0UEXEs8DLgzkrxRuATwPsGOfR2YHpE/EbZovYq4F+oC2cNdD5wSdnK\ndzvwGPDqzDwC+APgwoh4WWX/+vc9ng+8FpgDHA68Y6T7RsQpwD8A5wHzgKOBp4z0RiJiNvBvwAeB\n2cA/AjdGxBERMR34EHBW2cL6fGBdeej/Av4tM2cC84GPjfTaklrLgCep2f5PRGwDvgF8DXh/3fYP\nAL8fEYsHOUetFe/3gHuBTf3s886I2Fa2VD00hvp+LjO/A5CZT2Rmb2beW37/PvCvwAsHOX51Zv4k\nM3cCnwZOHcW+ryzr8e3M3E3xWHs0gfYPgB9k5nWZuS8z/wX4CfDycvs+4JSIOCQzt2TmD8vy3cCC\niDim/G9w2yiuLamFDHiSmu2czJyVmSdk5lszc1d1Y2b+nKJl6a8HOce/AK8G3gBcOcA+Hyyvc2Rm\nzhlDfddXv0TEb0fE18pHlTuA5cBRgxy/ubL+K4qWuZHue0y1Hpn5K2D7MOpe7xjggbqyB4B5mfkY\nRQviW4DNEfGFiFhY7vMOYArw3Yj4XkS8bhTXltRCBjxJzTaclqf/DbwIeHZ/GzPzQYrBFmcDn21c\n1fpV/xj1UxSta/PKR5arad7j4ZqfUTwaBSAiDgOOHMV5NgEL6sqOo3g0TmZ+KTN/D5gL/Bj4/8vy\nLZn5J5l5DEUA/HhEHD+K60tqEQOepJbLzEcoQt5fDLLbm4DfzczHD06tnnQ4sD0zd0fE84BlddtH\nEvaGu++ngaURcVpETKboo1gfPOtNjohDKstkiv53J0XEuRExMSJeDTwd+GJEzI2I34+IacAe4JfA\nXoBy/2PK8z5C8Sh37wjuU1KLGfAkNdNgoaR+20cogkb99CfFSjGP3J39bWtg3forezPwgYh4BHgX\nRR+8gY4Zqk7D2rfs6/d24HqK1raHga3AroGOAd5N8Zi3tnypfPz9irLePwcuopi25hFgIvBOila+\nh4HfBi4oz3U6sLYcSXw9sCIzNwxxb5LaSGSO9d/IQU4esRr4fWBLZj6jLHsuxYisyRQdeVdk5nfL\nbRdT/JW+B7goM79cli8BPglMBW7MzLeV5VMo+uM8m+Ifr1eVj3IkqWuUI153AMdl5sZW10dS+2t2\nC94a4CV1ZZcC78nMZwGXUAzfJyJOopgSYDFFP5vLIqL2OONyYHlmLgIWRUTtnMuBbZm5EFhVnluS\nOl45/960iDgc+DBwh+FO0nA1NeBl5i38+sivnwFHlOszKTv7UjxGuDYz92Tm/UAfcFpEzAWmZ+ba\ncr8rgaXl+jnsn/D0euCsht+EJLXGH1I8Pn2QYmDE+YPvLkn7TWrBNd8F3BoRH6LocHxGWT4PqM61\ntLEs2wNU+35sKMtrx6wHyMy95azrszJzWxPrL0lNl5lvouiyIkkj1opBFquBt2bmcRSdiP+5gedu\n9tQFkiRJba8VLXinl/MukZnXR8Q/leUbgWMr+80vywYqrx6zKSImAjMGar2LiOaNJpEkSWqwzBx1\nw9XBaMGrf6F3X0S8ECAizqLoawfwBWBZREwpXzx+IvCdzNwMPFLOBxXA64DPV46pvYT8XOCrg1Uk\nM106dLnkkktaXgcXf7vxuPj7de7ib9fZy1g1tQUvIq4BeoDZEfEgxajZP6UYITsF2Fl+JzPviYjr\ngHvYP31K7Q4v4MBpUm4qy1cDV0VEH8UcUfUTkEqSJI07TQ14mfnqATadPsD+7+fXX0ROZt4BnNJP\n+S6KqVUkSZJU8k0W6gg9PT2troJGyd+us/n7dS5/u/GtqW+yaCcRkePlXiVJUmeLCLLNB1lIkiTp\nIDLgSZIkdRkDniRJUpcx4EmSJHUZA54kSVKXMeBJkiR1GQOeJElSlzHgSZIkdRkDniRJUpcx4EmS\nJHUZA54kSVKXMeBJkiR1GQOeJElSlzHgSZIkdRkDniRJUpcx4EmSJHUZA54kSVKXMeBJkiR1GQOe\nJElSlxlXAS+z1TWQJElqvnEV8H7841bXQJIkqfnGVcC7445W10CSJKn5DHiSJEldxoAnSZLUZSLH\nyciDiMiZM5Nt2yCi1bWRJEkaWESQmaNOLOOqBe/ww+EnP2l1LSRJkpprXAW8Zz/bx7SSJKn7GfAk\nSZK6jAFPkiSpy4yrQRabNyeLF8PWrQ60kCRJ7ctBFiPwlKfAoYfCT3/a6ppIkiQ1T1MDXkSsjogt\nEbGuUnZtRNxZLj+NiDsr2y6OiL6IuDciXlwpXxIR6yLivohYVSmfUp6vLyJui4jjhqqTj2klSVK3\na3YL3hrgJdWCzFyWmUsycwnwGeCzABGxGDgPWAycDVwW8eSD1MuB5Zm5CFgUEbVzLge2ZeZCYBVw\n6VAVMuBJkqRu19SAl5m3ANsH2eU84Jpy/Rzg2szck5n3A33AaRExF5iemWvL/a4EllaOuaJcvx44\na6g6GfAkSVK3a1kfvIh4AbA5M2tTD88D1ld22ViWzQM2VMo3lGUHHJOZe4EdETFrsOvWAt44GVsi\nSZLGoVYOsjgf+FSDzznkaJO5c2HaNHjggQZfWZIkqU1MasVFI2Ii8F+AJZXijcCxle/zy7KByqvH\nbCrPOSMztw103ZUrVwLFK8uuuKKHSy7pGdN9SJIkNUJvby+9vb0NO1/T58GLiAXADZl5SqXspcBf\nZuaLKmUnAVcDp1M8er0ZWJiZGRG3AxcCa4EvAh/JzJsiYgVwcmauiIhlwNLMXDZAPbJ2r+99L+za\nBX/7t42/X0mSpLFq63nwIuIa4FsUI18fjIg3lpteRd3j2cy8B7gOuAe4EViR+9PnBcBq4D6gLzNv\nKstXA0dFRB/wNuBdw6mXAy0kSVI3G1dvsqjd66ZN8IxnwMMP+0YLSZLUftq6Ba9dHXMMTJ4MDz7Y\n6ppIkiQ13rgMeOBjWkmS1L0MeJIkSV3GgCdJktRlxn3AGydjTCRJ0jgybgPeMcfAxImwYcPQ+0qS\nJHWScRvwIuCEExxJK0mSus+4DXgAT30q/Oxnra6FJElSYxnwDHiSJKnLGPAMeJIkqcsY8Ax4kiSp\ny4zrgDd3Lmze3OpaSJIkNda4Dni24EmSpG5kwDPgSZKkLhM5Tl7lEBFZf69798LUqfD44zBpUosq\nJkmSVCciyMwY7fHjugVv4kQ46ijYsqXVNZEkSWqccR3wwMe0kiSp+4z7gOdIWkmS1G3GfcCzBU+S\nJHUbA54BT5IkdRkDngFPkiR1GQOeAU+SJHUZA54BT5IkdRkD3lMdRStJkrrLuH6TBcDOnXDEEcVn\njHq+aEmSpMbxTRZjNHUqHHoobNvW6ppIkiQ1xrgPeGA/PEmS1F0MeBjwJElSdzHgYcCTJEndxYCH\nAU+SJHUXAx4wd65TpUiSpO5hwMMWPEmS1F0MeBjwJElSd2lqwIuI1RGxJSLW1ZW/NSLujYjvR8QH\nKuUXR0Rfue3FlfIlEbEuIu6LiFWV8ikRcW15zG0Rcdxo6mnAkyRJ3aTZLXhrgJdUCyKiB/gD4JTM\nPAX432X5YuA8YDFwNnBZxJPvlrgcWJ6Zi4BFEVE753JgW2YuBFYBl46mkgY8SZLUTZoa8DLzFmB7\nXfGbgQ9k5p5yn5+X5ecA12bmnsy8H+gDTouIucD0zFxb7nclsLRyzBXl+vXAWaOp54wZsHcv/OIX\nozlakiSpvbSiD94i4Hci4vaI+FpEPLssnwesr+y3sSybB2yolG8oyw44JjP3AjsiYtZIKxThSFpJ\nktQ9WhHwJgFHZubzgL8APt3Ac4/6pbw+ppUkSd1iUguuuR74LEBmro2IvRExm6LFrjpIYn5ZthE4\ntp9yKts2RcREYEZmbhvowitXrnxyvaenh56enie/G/AkSVKr9Pb20tvb27DzRWY27GT9XiBiAXBD\nOaCCiPhTYF5mXhIRi4CbM/P4iDgJuBo4neLR683AwszMiLgduBBYC3wR+Ehm3hQRK4CTM3NFRCwD\nlmbmsgHqkYPd61vfCieeCBdd1KAblyRJGqWIIDNH/WSyqS14EXEN0APMjogHgUuAfwbWRMT3gV3A\n6wAy856IuA64B9gNrKgksguATwJTgRsz86ayfDVwVUT0AVuBfsPdcNiCJ0mSukXTW/DaxVAteGvW\nQG8vXHHFgLtIkiQdFGNtwfNNFiVH0UqSpG5hwCv5iFaSJHULA17JgCdJkrqFffBK+/bB1KnF2yym\nTDmIFZMkSapjH7wGmTABjj4atmxpdU0kSZLGxoBX4WNaSZLUDQx4FY6klSRJ3cCAV2ELniRJ6gYG\nvAoDniRJ6gYGvAoDniRJ6gYGvAoDniRJ6gYGvAoDniRJ6gYGvIq5cw14kiSp8/kmi4pdu2D6dNi5\ns5j4WJIkqRV8k0UDHXJIEfC2bm11TSRJkkbPgFfHfniSJKnTGfDqGPAkSVKnM+DVMeBJkqROZ8Cr\nY8CTJEmdzoBXZ+5c2Ly51bWQJEkaPQNeHVvwJElSpzPg1bEFT5IkdToDXp05c+Dhh1tdC0mSpNEz\n4NU5+mh46KFW10KSJGn0fFVZnb17izda7NwJkyYdhIpJkiTV8VVlDTZxIhx5pK8rkyRJncuA1w/7\n4UmSpE5mwOuH/fAkSVInM+D14+ijbcGTJEmdy4DXDwOeJEnqZAa8ftgHT5IkdTIDXj/sgydJkjqZ\nAa8fPqKVJEmdrKkBLyJWR8SWiFhXKbskIjZExJ3l8tLKtosjoi8i7o2IF1fKl0TEuoi4LyJWVcqn\nRMS15TG3RcRxjai3AU+SJHWyZrfgrQFe0k/5hzNzSbncBBARi4HzgMXA2cBlEVGbwflyYHlmLgIW\nRUTtnMuBbZm5EFgFXNqIStsHT5IkdbKmBrzMvAXY3s+m/l69cQ5wbWbuycz7gT7gtIiYC0zPzLXl\nflcCSyvHXFGuXw+c1Yh62wdPkiR1slb1wXtLRNwVEf8UEUeUZfOA9ZV9NpZl84ANlfINZdkBx2Tm\nXmBHRMwaa+Vmz4YdO4r30kqSJHWaVgS8y4CnZeapwGbgQw0896hfyls1cSLMnOn7aCVJUmeadLAv\nmJnV3m2fAG4o1zcCx1a2zS/LBiqvHrMpIiYCMzJz20DXXrly5ZPrPT099PT0DFjPWj+8OXMGvx9J\nkqSx6u3tpbe3t2Hni8xs2Mn6vUDEAuCGzDyl/D43MzeX628HnpuZr46Ik4CrgdMpHr3eDCzMzIyI\n24ELgbXAF4GPZOZNEbECODkzV0TEMmBpZi4boB45knt94Qth5Up40YtGdduSJEmjFhFk5qifTDa1\nBS8irgF6gNkR8SBwCfCiiDgV2AfcD/wZQGbeExHXAfcAu4EVlUR2AfBJYCpwY23kLbAauCoi+oCt\nQL/hbjScKkWSJHWqprfgtYuRtuC9+c1w8slwwQVNrJQkSVI/xtqC55ssBuBceJIkqVMZ8AbgXHiS\nJKlTGfAGYB88SZLUqQx4AzDgSZKkTmXAG4B98CRJUqcy4A3APniSJKlTOU3KAPbsgalTYdeu4tVl\nkiRJB4vTpDTJpElwxBGwbcAXn0mSJLUnA94g7IcnSZI6kQFvEPbDkyRJnciANwinSpEkSZ3IgDcI\nA54kSepEBrxBGPAkSVInMuANYs4c++BJkqTOY8AbhC14kiSpExnwBmHAkyRJnciANwgDniRJ6kQG\nvEHYB0+SJHUi30U7iN27Ydo0eOIJmGAUliRJB4nvom2iyZNhxgzfRytJkjqLAW8I9sOTJEmdxoA3\nBPvhSZKkTmPAG4IteJIkqdMY8IZgwJMkSZ3GgDcEA54kSeo0Brwh2AdPkiR1GgPeEGzBkyRJncaA\nNwQDniRJ6jQGvCEY8CRJUqcZVsCLiKdHxCHlek9EXBgRM5tbtfZgHzxJktRphtuC9xlgb0ScCHwc\nOBa4pmm1aiNHHVW8qmzfvlbXRJIkaXiGG/D2ZeYe4A+Bj2bmO4GnNq9a7WPyZDj8cNi+vdU1kSRJ\nGp7hBrzdEXE+8Hrg38qyyc2pUvuxH54kSeokww14bwR+G/hfmfnTiDgBuGqogyJidURsiYh1/Wz7\n7xGxLyJmVcoujoi+iLg3Il5cKV8SEesi4r6IWFUpnxIR15bH3BYRxw3zfkbEfniSJKmTDCvgZeY9\nmXlhZn4qIo4Epmfm3w3j0DXAS+oLI2I+8HvAA5WyxcB5wGLgbOCyiIhy8+XA8sxcBCyKiNo5lwPb\nMnMhsAq4dDj3M1K24EmSpE4y3FG0vRExo2xtuxP4RER8eKjjMvMWoL/ea38PvLOu7Bzg2szck5n3\nA33AaRExlyJQri33uxJYWjnminL9euCs4dzPSBnwJElSJxnuI9ojMvNR4L8AV2bm6cD/N5oLRsQr\ngPWZ+f26TfOA9ZXvG8uyecCGSvmGsuyAYzJzL7Cj+si3UQx4kiSpk0wa7n4R8VSKR6j/Y7QXi4hp\nwLspHs82Qwy2ceXKlU+u9/T00NPTM6yTzpkDP/rRWKolSZI0sN7eXnp7ext2vuEGvPcBXwJuzcy1\nEfE0ikeoI/V0YAHwvbJ/3Xzgzog4jaLFrjpIYn5ZtpFi3r36cirbNkXERGBGZm4b6OLVgDcSRx8N\nt902qkMlSZKGVN/w9N73vndM5xvuIItPZ+YzMvPN5fefZOYfDfMaUS5k5g8yc25mPi0zT6B43Pqs\nzHwI+ALwqnJk7AnAicB3MnMz8EhEnFaGwtcBny/P/QWKqVsAzgW+Osw6jYiPaCVJUicZ7iCL+RHx\nuYh4qFw+U46EHeq4a4BvUYx8fTAi3li3S7I//N0DXAfcA9wIrMjMLPe7AFgN3Af0ZeZNZflq4KiI\n6APeBrxrOPczUgY8SZLUSWJ/hhpkp4ibKV5NVpv77jXAH2dms/rSNVxE5HDutT+bNsGSJbB5c4Mr\nJUmS1I+IIDMHHVsw6PHDDHh3ZeapQ5W1s7EEvCeegMMOg127YMJwxx1LkiSN0lgD3nDjytaIeE1E\nTCyX1wBbR3vRTjNlShHwduxodU0kSZKGNtyA9yaKKVI2Az8DXgm8oUl1akv2w5MkSZ1iuKNoH8jM\nV2Tm0Zk5JzOXAsMdRdsVfB+tJEnqFGPpUfaOhtWiA8yZA1u2tLoWkiRJQxtLwBt1x79OdPzx8OCD\nra6FJEnS0MYS8EY3JLVDLVgA99/f6lpIkiQNbdBXlUXEY/Qf5AKY1pQatakFC+CrTXlPhiRJUmMN\nGvAyc/rBqki7swVPkiR1CqftHaZawBvlXMmSJEkHjQFvmGbOLN5isX17q2siSZI0OAPeCPiYVpIk\ndQID3ggY8CRJUicw4I2AAU+SJHUCA94IHH+8AU+SJLU/A94I2IInSZI6gQFvBAx4kiSpExjwRsC5\n8CRJUicw4I3AzJkQATt2tLomkiRJAzPgjUCEj2klSVL7M+CNkAFPkiS1OwPeCBnwJElSuzPgjZAB\nT5IktTsD3ggZ8CRJUrsz4I2QAU+SJLU7A94IOReeJElqdwa8EZo5s/h0LjxJktSuDHgj5Fx4kiSp\n3RnwRsGAJ0mS2pkBbxQMeJIkqZ0Z8Ebh+OMNeJIkqX0Z8EZhwQJ44IFW10KSJKl/TQ14EbE6IrZE\nxLpK2fsi4nsRcVdEfCUi5le2XRwRfRFxb0S8uFK+JCLWRcR9EbGqUj4lIq4tj7ktIo5r5v3U+IhW\nkiS1s2a34K0BXlJXdmlmPjMzTwU+D1wCEBEnAecBi4GzgcsiIspjLgeWZ+YiYFFE1M65HNiWmQuB\nVcClTb2bkgFPkiS1s6YGvMy8BdheV/aLytfDgK3l+iuAazNzT2beD/QBp0XEXGB6Zq4t97sSWFqu\nnwNcUa5fD5zV8Jvox5FHwr59zoUnSZLaU0v64EXE30TEg8AbgPeXxfOA9ZXdNpZl84ANlfINZdkB\nx2TmXmBHRMxqXs0LzoUnSZLaWUsCXma+JzOPo3iEu2qo/Ucght6lMQx4kiSpXU1q8fWvAW4s1zcC\nx1a2zS/LBiqvHrMpIiYCMzJz20AXW7ly5ZPrPT099PT0jLriBjxJktQovb299Pb2Nux8kZkNO1m/\nF4hYANyQmaeU30/MzB+V628FTsvM15aDLK4GTqd49HozsDAzMyJuBy4E1gJfBD6SmTdFxArg5Mxc\nERHLgKWZuWyAemQj7/XDH4YHH4RVjWx/lCRJAiKCzBz1k8mmtuBFxDVADzC77HN3CfDyiPgNYA/w\nE+DNAJl5T0RcB9wD7AZWVBLZBcAnganAjZl5U1m+GrgqIvooBmv0G+6aYcEC+MY3DtbVJEmShq/p\nLXjtotEteHfeCW96E9x1V8NOKUmSBIy9Bc83WYySffAkSVK7MuCNknPhSZKkdmXAGyXnwpMkSe3K\ngDcGBjxJktSODHhjsGABPPBAq2shSZJ0IAPeGNiCJ0mS2pEBbwyOP96AJ0mS2o8BbwxswZMkSe3I\niY7H4NFH4ZhjYPt2mDy5oaeWJEnjmBMdt9CMGXDCCbBuXatrIkmStJ8Bb4zOOAO+9a1W10KSJGk/\nA94YGfAkSVK7MeCN0RlnwK23troWkiRJ+xnwxujEE+Hxx2H9+lbXRJIkqWDAG6OIohXvtttaXRNJ\nkqSCAa8B7IcnSZLaiQGvAQx4kiSpnTjRcQM8/jgcdRQ8/DAcemhTLiFJksYRJzpuA9Omwcknw3e/\n2+qaSJIkGfAaxse0kiSpXRjwGsSAJ0mS2oV98BpkwwY49dSiH16M+om5JEmSffDaxvz5cNhh0NfX\n6ppIkqTxzoDXQD6mlSRJ7cCA10AGPEmS1A4MeA1kwJMkSe3AQRYNtHs3zJoF69fDzJlNvZQkSepi\nDrJoI5Mnw3OeA7ff3uqaSJKk8cyA12A+ppUkSa1mwGswA54kSWo1++A12LZtsGBB8TlpUtMvJ0mS\nupB98NrMrFnFpMc/+EGrayJJksYrA14TnHEGfPObra6FJEkar5oa8CJidURsiYh1lbJLI+LeiLgr\nIj4TETMq2y6OiL5y+4sr5UsiYl1E3BcRqyrlUyLi2vKY2yLiuGbez3C9/OVw/fWtroUkSRqvmt2C\ntwZ4SV3Zl4HfysxTgT7gYoCIOAk4D1gMnA1cFhG1Z8+XA8szcxGwKCJq51wObMvMhcAq4NJm3sxw\nvexlcPfdcP/9ra6JJEkaj5oa8DLzFmB7XdlXMnNf+fV2YH65/grg2szck5n3U4S/0yJiLjA9M9eW\n+10JLC3XzwGuKNevB85qyo2M0CGHwLnnwjXXtLomkiRpPGp1H7w3ATeW6/OA9ZVtG8uyecCGSvmG\nsuyAYzJzL7AjImY1s8LD9ZrXwFVXwTgZpCxJktpIywJeRPwPYHdmfqqRp23gucbkjDPgiSfgjjta\nXRNJkjTetGSmtoh4A/Ay4HcrxRuBYyvf55dlA5VXj9kUEROBGZm5baDrrly58sn1np4eenp6RnsL\nQ4rY34r3nOc07TKSJKkL9Pb20tvb27DzNX2i44hYANyQmaeU318KfAj4nczcWtnvJOBq4HSKR683\nAwszMyPiduBCYC3wReAjmXlTRKwATs7MFRGxDFiamcsGqMdBmei4qq8PzjwTNmwo3lMrSZI0HG09\n0XFEXAN8i2Lk64MR8Ubgo8DhwM0RcWdEXAaQmfcA1wH3UPTLW1FJZBcAq4H7gL7MvKksXw0cFRF9\nwNuAdzXzfkZq4UJ42tPg5ptbXRNJkjSe+KqyJvvYx+CWW+BTjexpKEmSutpYW/AMeE3285/D058O\n69fDjBlD7y9JktTWj2gFRx0FPT3w2c+2uiaSJGm8MOAdBK99bTGaVpIk6WDwEe1BsHMnHHMMrFsH\n8+cPvb8kSRrffETbAaZOhT/6I19dJkmSDg4D3kFSe0w7ThpMJUlSCxnwDpIzz4Rdu6CBk1RLkiT1\ny4B3kEyYAO95D1xyia14kiSpuQx4B9GrXw2bN8NXv9rqmkiSpG5mwDuIJk2Cv/orWLnSVjxJktQ8\nBryD7Pzz4eGH4f/+31bXRJIkdSsD3kE2cWLRimdfPEmS1CwGvBZ41atg+3a4+eZW10SSJHUjA14L\n2IonSZKayYDXIueeC48+Cl/6UqtrIkmSuo0Br0UmTixa8GzFkyRJjWbAa6FXvhJ++Uu46aZW10SS\nJHUTA14LTZgAf/3X8Bd/Abt3t7o2kiSpWxjwWmzpUpg/H1atanVNJElSt4gcJx3AIiLb9V5//GM4\n/XS44w44/vhW10aSJLVaRJCZMdrjbcFrA09/Orz97fCWtzjgQpIkjZ0Br028851FS97nP9/qmkiS\npE7nI9o28vWvw2tfC3ffDdOnt7o2kiSpVcb6iNaA12be8AaYPRs+9KFW10SSJLWKAW+YOiXgPfww\nnHxy8YZKLH7kAAAPGklEQVSLU09tdW0kSVIrOMiiyxx9NPzt38Kf/Rns3dvq2kiSpE5kwGtDb3wj\nTJsG73tfq2siSZI6kQGvDU2YAP/6r7BmDXzmM62ujSRJ6jT2wWtjd9wBL30pfOUr8Mxntro2kiTp\nYLEPXhd79rPhox8tXmf28MOtro0kSeoUtuB1gHe/G269FW6+GaZMaXVtJElSszlNyjB1csDbtw/O\nOQfmz4fLL291bSRJUrP5iHYcmDABrr66eNPFxz7W6tpIkqR219SAFxGrI2JLRKyrlL0yIn4QEXsj\nYknd/hdHRF9E3BsRL66UL4mIdRFxX0SsqpRPiYhry2Nui4jjmnk/rTRjBnzhC/DBD8LFFztHniRJ\nGlizW/DWAC+pK/s+8IfA16uFEbEYOA9YDJwNXBYRtabJy4HlmbkIWBQRtXMuB7Zl5kJgFXBpU+6i\nTZx4IqxdC9/+Nrz85bBtW6trJEmS2lFTA15m3gJsryv7z8zsA+qfK58DXJuZezLzfqAPOC0i5gLT\nM3Ntud+VwNLKMVeU69cDZzX+LtrL0UfDl78MJ50Ez30urFs39DGSJGl8aac+ePOA9ZXvG8uyecCG\nSvmGsuyAYzJzL7AjImY1v6qtNWkSfPjDxZsuzjoLrruu1TWSJEntpJ0CXiOMerRJJ/rjPy5a8/7y\nL4vXm23a1OoaSZKkdjCp1RWo2AgcW/k+vywbqLx6zKaImAjMyMwBe6atXLnyyfWenh56enoaUe+W\netaz4K674P3vh1NOgQsvhD//czjssFbXTJIkDVdvby+9vb0NO1/T58GLiAXADZl5Sl3514A/z8w7\nyu8nAVcDp1M8er0ZWJiZGRG3AxcCa4EvAh/JzJsiYgVwcmauiIhlwNLMXDZAPTp2Hrzhuv/+YoTt\nN78Jf/M38LrXFVOsSJKkztLWEx1HxDVADzAb2AJcQjHo4qPAUcAO4K7MPLvc/2KKkbG7gYsy88tl\n+bOBTwJTgRsz86Ky/BDgKuBZwFZgWTlAo7+6dH3Aq7n9dnjHO+BXv4K3vAWWLYPDD291rSRJ0nC1\ndcBrJ+Mp4AFkwr//O3z84/CNb8C558Kf/mnxfltJktTeDHjDNN4CXtWmTbBmDXziEzBrVtGi9zu/\nA0uW+G5bSZLakQFvmMZzwKvZtw++8hW44Yain96Pf1zMpfeCF8CZZ8IzngFz5kCMq7HIkiS1HwPe\nMBnwft2OHfCtbxVh79Zb4e67i0e7v/mbxbJ4cfH2jGOPhfnzi/DnoA1JkprPgDdMBrzhefhh+OEP\n4d57i8++Pti4ETZsgEcegac+tQh78+cXwa8W/o49Fo47zhZASZIawYA3TAa8sdu5s+jPt359Efjq\nPx94AHbtKlr9qsuCBXDMMTBvHsyY0eq7kCSp/RnwhsmAd3Ds2FH07evrgx/9qPhcv75oBdy4sWjd\nO+aYYpk9G448slhmzSo+Z84sJmk+/PADP6dNg0MPLT59TCxJ6nYGvGEy4LVeJjz2WBH0Nm2Cbdtg\n+/Ziqa0/8gj88pfwi18US239V7+Cxx8vWhEnTy6CXjX0VT8POwymTy+Www8vPmfMKELkrFlFsJw9\nu1ifMcNHypKk9mPAGyYDXnfILB4DP/74/tBXXf/Vr/aHw8ce2788+mgRILduLcLk1q3FsnPn/sA3\nezYcddT+lsWZMw9cZs2CpzylWA49tNX/JSRJ3cyAN0wGPPXniSf2h72f/3z/544dv75s3QpbthTL\npEn7w968efsHmlQHoDz1qcV+kiSNlAFvmAx4apTao+Za2KuNMq4fePLww0WL4Pz5+0PgvHn7B5zU\n+iL6mFiSVM+AN0wGPB1se/bA5s0HBsBNm/b3QawNPMksWgLnzDlwOfLIol/h1Kn7+xxOm1b0QZw0\n6deXiROLpbo+cWIxKKW2TJxYhMkJE/r/hOKzv/Xa9/r1obbXr0uShmbAGyYDntrVY4/BQw8Vy5Yt\n+9d37Njfx3Dnzv3re/bsX3bv3r++d2+xVNf37du/VL9nHvi5b19Rl8xiqV+vfa9fH2p7f+rD40iX\n+nPUn3egc9fCb3WZPLkI0IccUnzWlmqIrobmgcJvfwF2oFA71L6Dhej6e6oF89p6faCvD/D164Mt\n1WPrl+ofE/X/nQb6Xv/fvf579Y8MSQa8YTPgSa3VX3gc6VI7tvrZ33p/y759B4bjPXuKPpi7dhUB\neufO/QN4du/eH5ZrgXnPnv7Db3//rAz0T81Q+w4Woge7r2pQrw/1/e033P/e/Z27+sdD/X+f+vXa\nHyHVstofJbXy2vd9+wYOhP21RlcD7FB/DPQXkIe7rb9z1rYPtj7SgN9fEJ80qXhf+OTJxVJdr4Xk\n2vohh+z/Q6X6Wd2/dkztj5Xh1G2w+xvsj4LqHwcDbR9Ji3+7hP+x1mOg+5w9u799DXjDYsCTpPZV\nC4z1AbG29BdeBwurMPAfFcPd1t85a9sHWx9JwK/de3+huhaEn3ii+Kxfr22vldf+SKl+1rbXB+vh\n1m2g+xvsD4LqevWPjPr7G+q/51B1O9jGWo/B7nPr1l/ff6wBzzF+kqSWmzChaKGS1Bi+E0CSJKnL\nGPAkSZK6jAFPkiSpyxjwJEmSuowBT5IkqcsY8CRJkrqMAU+SJKnLGPAkSZK6jAFPkiSpyxjwJEmS\nuowBT5IkqcsY8CRJkrqMAU+SJKnLGPAkSZK6jAFPkiSpyxjwJEmSuowBT5IkqcsY8CRJkrpMUwNe\nRKyOiC0Rsa5SdmREfDki/jMivhQRR1S2XRwRfRFxb0S8uFK+JCLWRcR9EbGqUj4lIq4tj7ktIo5r\n5v1IkiR1gma34K0BXlJX9i7gK5n5G8BXgYsBIuIk4DxgMXA2cFlERHnM5cDyzFwELIqI2jmXA9sy\ncyGwCri0mTej1unt7W11FTRK/nadzd+vc/nbjW9NDXiZeQuwva74HOCKcv0KYGm5/grg2szck5n3\nA33AaRExF5iemWvL/a6sHFM91/XAWQ2/CbUF/6HqXP52nc3fr3P5241vreiDNycztwBk5mZgTlk+\nD1hf2W9jWTYP2FAp31CWHXBMZu4FdkTErOZVXZIkqf21wyCLbOC5YuhdJEmSultkNjJf9XOBiOOB\nGzLzGeX3e4GezNxSPn79WmYujoh3AZmZf1fudxNwCfBAbZ+yfBnwwsx8c22fzPx2REwEfpaZc369\nFhARzb1RSZKkBsrMUTdcTWpkRQYQHNiy9gXgDcDfAa8HPl8pvzoi/p7i0euJwHcyMyPikYg4DVgL\nvA74SOWY1wPfBs6lGLTRr7H8R5IkSeokTW3Bi4hrgB5gNrCFokXu/wCfBo6laJ07LzN3lPtfTDEy\ndjdwUWZ+uSx/NvBJYCpwY2ZeVJYfAlwFPAvYCiwrB2hIkiSNW01/RCtJkqSDqx0GWTRdRLw0In5Y\nTpT8l62ujwYWEfMj4qsRcXdEfD8iLizLB5wgW+0lIiZExJ0R8YXyu79dh4iIIyLi0+Vk83dHxOn+\nfp2jfFnA3eWLAa4uXwbg79emGvUyiIF0fcCLiAnAP1JMuPxbwPkR8ZutrZUGsQd4R2b+FvDbwAXl\n79XvBNlqSxcB91S++9t1jn+g6AazGHgm8EP8/TpCOaDxT4BnlYMaJwHn4+/Xzhr1Moh+dX3AA04D\n+jLzgczcDVxLMUGy2lBmbs7Mu8r1XwD3AvMZeIJstZGImA+8DPinSrG/XQeIiBnACzJzDUA56fwj\n+Pt1ikeBJ4DDImISMI1iPll/vzbViJdBDHb+8RDw6idQrk6UrDYWEQuAU4HbgacMMEG22svfA+/k\nwPkt/e06wwnAzyNiTfmI/eMRcSj+fh0hM7cDHwIepAh2j2TmV/D36zQjfRnEgMZDwFMHiojDKV4/\nd1HZklc/GsjRQW0mIl4ObClbYAd7dOBv154mAUuAj2XmEuCXFI+L/P9eB4iIpwFvB44HjqFoyftj\n/P063ah/r/EQ8DYCx1W+zy/L1KbKxwvXA1dlZm2exC0R8ZRy+1zgoVbVTwN6PvCKiPgJ8CngdyPi\nKmCzv11H2ACsz8zvlt8/QxH4/P9eZ3gOcGtmbitf3fk54Az8/TrNQL/XRorp5WqGzDLjIeCtBU6M\niOMjYgqwjGKCZLWvfwbuycx/qJTVJsiGAyfIVpvIzHdn5nGZ+TSK/599NTNfC9yAv13bKx8LrY+I\nRWXRWcDd+P+9TvGfwPMiYmrZ+f4sisFO/n7tbaCXQcCvvwxiWTky+gTKl0EMeuLxMA9eRLyUYnTY\nBGB1Zn6gxVXSACLi+cA3gO9TNE0n8G6K/yFfRz8TZKv9RMQLgf+ema+IiFn423WEiHgmxQCZycBP\ngDcCE/H36wgR8U6KcLAX+A/gvwLT8fdrS416GcSA5x8PAU+SJGk8GQ+PaCVJksYVA54kSVKXMeBJ\nkiR1GQOeJElSlzHgSZIkdRkDniRJUpcx4EkaVyLisfLz+Ig4v8Hnvrju+y2NPL8kDZcBT9J4U5v8\n8wTg1SM5MCImDrHLuw+4UOaZIzm/JDWKAU/SePV+4MyIuDMiLoqICRFxaUR8OyLuiog/geKtHBHx\njYj4PMWru4iIz0XE2oj4fkT817Ls/cC08nxXlWWP1S4WER8s9/9eRJxXOffXIuLTEXFv7ThJGqtJ\nra6AJLXIuyhfpwZQBrodmXl6+d7qWyOi9iqgZwG/lZkPlt/fmJk7ImIqsDYiPpOZF0fEBZm5pHKN\nLM/9R8AzMvOUiJhTHvP1cp9TgZOAzeU1z8jMbzXzxiV1P1vwJKnwYuB1EfEfwLeBWcDCctt3KuEO\n4G0RcRdwOzC/st9Ang98CiAzHwJ6gedWzv2zLN4beRewYOy3Imm8swVPkgoBvDUzbz6gMOKFwC/r\nvv8ucHpm7oqIrwFTK+cY7rVqdlXW9+K/y5IawBY8SeNNLVw9BkyvlH8JWBERkwAiYmFEHNrP8UcA\n28tw95vA8yrbnqgdX3etbwKvKvv5HQ28APhOA+5FkvrlX4qSxpvaKNp1wL7ykewnM/MfImIBcGdE\nBPAQsLSf428C/ltE3A38J3BbZdvHgXURcUdmvrZ2rcz8XEQ8D/gesA94Z2Y+FBGLB6ibJI1JFN0+\nJEmS1C18RCtJktRlDHiSJEldxoAnSZLUZQx4kiRJXcaAJ0mS1GUMeJIkSV3GgCdJktRlDHiSJEld\n5v8B1ALYNEsawJYAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x111b9b390>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Plot training loss over time\n",
"losses = outputs.getDF(sqlContext, \"losses\")\n",
"xy = losses.sort(losses.ID).map(lambda r: (r[0], r[1])).collect()\n",
"x, y = zip(*xy)\n",
"plt.plot(x, y)\n",
"plt.xlabel('Iteration')\n",
"plt.ylabel('Loss')\n",
"plt.title('PNMF Training Loss')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}