blob: 0c47c6636fbb1098424b96e4e0ab21f37c62bcb0 [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SystemML PySpark Recommendation Demo"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"%matplotlib inline\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from systemml import MLContext, dml # pip install systeml\n",
"plt.rcParams['figure.figsize'] = (10, 6)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"\r",
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r",
" 1 11.2M 1 151k 0 0 851k 0 0:00:13 --:--:-- 0:00:13 848k\r",
" 16 11.2M 16 1887k 0 0 1601k 0 0:00:07 0:00:01 0:00:06 1600k\r",
" 31 11.2M 31 3662k 0 0 1685k 0 0:00:06 0:00:02 0:00:04 1685k\r",
" 44 11.2M 44 5135k 0 0 1615k 0 0:00:07 0:00:03 0:00:04 1614k\r",
" 61 11.2M 61 7038k 0 0 1686k 0 0:00:06 0:00:04 0:00:02 1685k\r",
" 76 11.2M 76 8816k 0 0 1703k 0 0:00:06 0:00:05 0:00:01 1734k\r",
" 90 11.2M 90 10.1M 0 0 1687k 0 0:00:06 0:00:06 --:--:-- 1708k\r",
"100 11.2M 100 11.2M 0 0 1710k 0 0:00:06 0:00:06 --:--:-- 1722k\n"
]
}
],
"source": [
"%%sh\n",
"# Download dataset\n",
"curl -O http://snap.stanford.edu/data/amazon0601.txt.gz\n",
"gunzip amazon0601.txt.gz"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total number of products: 500\n"
]
}
],
"source": [
"# Load data\n",
"import pyspark.sql.functions as F\n",
"dataPath = \"amazon0601.txt\"\n",
"\n",
"X_train = (sc.textFile(dataPath)\n",
" .filter(lambda l: not l.startswith(\"#\"))\n",
" .map(lambda l: l.split(\"\\t\"))\n",
" .map(lambda prods: (int(prods[0]), int(prods[1]), 1.0))\n",
" .toDF((\"prod_i\", \"prod_j\", \"x_ij\"))\n",
" .filter(\"prod_i < 500 AND prod_j < 500\") # Filter for memory constraints\n",
" .cache())\n",
"\n",
"max_prod_i = X_train.select(F.max(\"prod_i\")).first()[0]\n",
"max_prod_j = X_train.select(F.max(\"prod_j\")).first()[0]\n",
"numProducts = max(max_prod_i, max_prod_j) + 1 # 0-based indexing\n",
"print(\"Total number of products: {}\".format(numProducts))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SystemML - Poisson Nonnegative Matrix Factorization (PNMF)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Create SystemML MLContext\n",
"ml = MLContext(sc)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Define PNMF kernel in SystemML's DSL using the R-like syntax for PNMF\n",
"pnmf = \"\"\"\n",
"# data & args\n",
"X = X+1 # change product IDs to be 1-based, rather than 0-based\n",
"V = table(X[,1], X[,2])\n",
"size = ifdef($size, -1)\n",
"if(size > -1) {\n",
" V = V[1:size,1:size]\n",
"}\n",
"\n",
"n = nrow(V)\n",
"m = ncol(V)\n",
"range = 0.01\n",
"W = Rand(rows=n, cols=rank, min=0, max=range, pdf=\"uniform\")\n",
"H = Rand(rows=rank, cols=m, min=0, max=range, pdf=\"uniform\")\n",
"losses = matrix(0, rows=max_iter, cols=1)\n",
"\n",
"# run PNMF\n",
"i=1\n",
"while(i <= max_iter) {\n",
" # update params\n",
" H = (H * (t(W) %*% (V/(W%*%H))))/t(colSums(W)) \n",
" W = (W * ((V/(W%*%H)) %*% t(H)))/t(rowSums(H))\n",
" \n",
" # compute loss\n",
" losses[i,] = -1 * (sum(V*log(W%*%H)) - as.scalar(colSums(W)%*%rowSums(H)))\n",
" i = i + 1;\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Run the PNMF script on SystemML with Spark\n",
"script = dml(pnmf).input(X=X_train, max_iter=100, rank=10).output(\"W\", \"H\", \"losses\")\n",
"W, H, losses = ml.execute(script).get(\"W\", \"H\", \"losses\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.text.Text at 0x10ba407b8>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAngAAAGJCAYAAAAZsU4bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xm8XWV99/3PLwMzJCGSBBIggMxCFR+minqEMloFW6Vg\nLQhorTi1PreP0vpIrI+3Q0urlgoOKYIyieCt+FAGbzlWrAjIpEIgyHgCJIAJ85Dk/O4/1jpmZbPP\nyT7n7J09nM/79Vqvvda1pmud7Y5frrWua0VmIkmSpN4xqd0VkCRJUnMZ8CRJknqMAU+SJKnHGPAk\nSZJ6jAFPkiSpxxjwJEmSeowBT5LGICJ2iIinmr2tJDWDAU9SS0TE/RHxXEQ8FRGPRMR/RMQm5br+\niHg+IuZWtj8kIu6r2f+FiNiy5ri3RsRgRGxXLn8rIl4sz/N0+fmOmn22rax7utz/mUrZ60Z7fZl5\nX2Zu0extRysivh0Rn2rFsSV1LwOepFZJ4M1lsNkH2Bf4ZGXdM8D/W2ef6vx9wPFDBRHxKmCjOtt9\nITO3yMzNy89L1jpo5kOVdZuX++xVKft5beUjwn8fJXUt/wGT1EoBkJmPAP8JvKqy7ivA8RGx4wj7\nfxs4sbJ8InBuk+oVaxUULWFnRsR/RsTTwEER8ZaIuKVs5bs/Ij5Z2X6niBisLP8sIhZExM/L7a+I\niOmj3bZcf1JEPBARyyLitIh4KCLeMOqLjDgoIm6MiOURcX1E7FdZd0pE3Fee/56IOLYs3zkifhoR\nK8rzf2e055XUfgY8SS0XEdsCRwE3V4qXAN8APj3CrtcDm0fErmWL2rHAd6gJZ010PHB62cp3PfA0\n8JdlK+RbgA9HxFGV7Wvf9Xg88FfALGAz4KOj3TYi9gK+THGtc4GtgNmjvZCImAn8CPgnYCbwb8AV\nETEtIjYHzgAOKa/tdcDt5a6fBX6UmdOBecC/j/bcktrPgCeplf5XRPwe+C/gWuBzNes/D/xpROw+\nwjGGWvEOBRYBD9fZ5mMR8fuypWrZOOr7/cy8ASAzX8rM/sy8o1z+NXAx8MYR9l+Ymfdm5gvAJcCr\nx7Dt28t6/DIzV1Lc1h5LoH0L8JvM/G5mDmbm+cC9wJvL9YPAXhGxYWYuzcxFZflKYH5EbFP+DX4x\nhnNLajMDnqRWOjozt8zMHTLzQ5n5YnVlZj4OnAl8ZoRjfAd4J/Bu4Lxhtvmn8jwzMnPWOOr7UHUh\nIg6MiGvLW5UrgFOAV4yw/6OV+ecoWuZGu+021Xpk5nPA8gbqXmsb4IGasgeAuZn5NEUL4geBRyPi\nhxGxc7nNR4ENgJsi4raIOGEM55bUZgY8Sa3USMvTPwNvAl5bb2VmPkjR2eJI4LLmVa2u2tuoF1K0\nrs0tb1kupHW3h4c8QnFrFICI2BSYMYbjPAzMrynbjuLWOJl5VWYeCswBfgd8rSxfmpnvzcxtKALg\n1yNi+zGcX1IbGfAktVVmPkkR8v6fETY7GTg4M59fP7X6g82A5Zm5MiIOAI6rWT+asNfotpcAx0TE\nfhExFfhHXh48a02NiA0r01SK5+/2iIh3RMTkiHgnsBPFc3hzIuJPI2JjYBXwbPlJuf025XGfpLiV\nu3oU1ympAxjwJLXKSKGkdt1XKAJG7fAnxUwxjtzN9dY1sW71yt4PfD4ingQ+QfEM3nD7rKtODW1b\nPuv3d8D3KFrbHgOeAF4cbh/g7ylu8w5NV5W3v99a1vtx4CMUw9asACYDH6No5XsMOJCitQ5gf+DG\nsifx94BTM3NgHdcmqcNE5nj/nRzh4BELgT8Flmbm3mXZHwFnU4xltRL4QGbeWK77CsVtmGeBd2fm\nrWX5icA/UPyj+NnMPK8s3wf4VnmsKzLzb1t2MZLUBmWP1xXAdpm5pN31kdQdWt2Cdw5weE3ZFymG\nIXgNcHq5TDn0wE6ZuTPwPooQSETMAD5FMUjq/sDpETGtPNZZwHsycxdgl4ioPZckdZ1y/L2NI2Iz\n4F+AXxnuJI1GSwNeZl7Hy3t/DQJDAW065QO/FLcSziv3+yUwLSJmUwTEqzPzyfLWwtXAERExB9h8\naEiDct9jWnYxkrT+vI3i9umDFB0jjh95c0la25Q2nPPvgKsi4gyKh47/uCyfy9pDFAyUZbXlSyrl\nA3W2l6SulpknU3QskaQxaUcni/cDH8nM7SjC3n+U5bU9zILimbt6Pc9GKpckSZrQ2tGCd2JmfgQg\nM78XEd8syweAbSvbzaO4RTEA9NWUXzvC9nVFhOFPkiR1jcwc87ib66MFr/al3ksi4o0AEXEIsLgs\n/yFwQll+ALAiM5cCVwGHlu9PnEHxuqKrMvNR4KlyrKgo9/3BSBXJTKcunE4//fS218HJ72+iTn5/\n3Tv53XX3NF4tbcGLiAsoWt9mRsSDFL1m3wt8JSImAy8Afw2QmVdExFERcQ/FMCknleXLI+IzwE0U\nt2A/nUVnC4BTWXuYlCtbeT2SJEndoKUBLzPfOcyq/2uY7T84TPm3KIJcbfmvgL3GWD1JkqSe5Jss\n1PH6+vraXQWNg99fd/P7615+dxNbS99k0UkiIifKtUqSpO4WEWSHd7KQJEnSemTAkyRJ6jEGPEmS\npB5jwJMkSeoxBjxJkqQeY8CTJEnqMQY8SZKkHmPAkyRJ6jEGPEmSpB5jwJMkSeoxBjxJkqQeY8CT\nJEnqMQY8SZKkHmPAkyRJ6jEGPEmSpB5jwJMkSeoxBjxJkqQeM2ED3gMPwNe/3u5aSJIkNd+EDXir\nVsFnP9vuWkiSJDXfhA14O+wAv/89rFjR7ppIkiQ114QNeJMmwateBb/+dbtrIkmS1FwTNuAB7L03\n3H57u2shSZLUXBM64O21lwFPkiT1ngkd8GzBkyRJvSgys911WC8iImuvdfly2G47ePLJ4pk8SZKk\nThARZGaMdf8JHWtmzCim++5rd00kSZKaZ0IHPPA2rSRJ6j0GvL0dKkWSJPUWA54teJIkqccY8Ax4\nkiSpx0zoXrRQvJN2iy3gscdg003bUDFJkqQaHd2LNiIWRsTSiLi9UnZRRNxcTvdFxM2VdadFxOKI\nuDMiDquUHxERiyLi7oj4eKV8fkRcHxF3RcSFETFltHWcMgV22w1++9vxXKkkSVLnaPUt2nOAw6sF\nmXlcZu6TmfsAlwKXAUTE7sCxwO7AkcBXozAJOLM8zp7A8RGxW3m4LwBnZOauwArglLFU0tu0kiSp\nl7Q04GXmdcDyETY5FrignD8auCgzV2Xm/cBiYL9yWpyZD2TmSuCicluAgylCIsC5wNvGUk8DniRJ\n6iVt62QREa8HHs3Me8uiucBDlU2WlGW15QPA3IiYCSzPzMFK+TZjqYsBT5Ik9ZJ29qI9Hriwslzv\nQcJcR3ntujH1GBkKeBOkv4kkSepxo+6U0AwRMRn4M2CfSvEAsG1leR7wMEWI2662PDMfj4jpETGp\nbMUb2n5YCxYs+MN8X18ffX19AMyaBRtsAEuWwLx5Y70qSZKksenv76e/v79px2v5MCkRMR+4PDP3\nqpQdAXw8M99UKdsDOB/Yn+K27DXAzhStjHcBhwCPADcAx2Xmooi4GLgsMy+OiLOA2zLz7GHqUXeY\nlCGHHQZ/+7dw1FHjuVpJkqTx6/RhUi4A/hvYJSIejIiTylV/wdq3Z8nMO4DvAncAVwCnZmE18EHg\nauC3FB0xFpW7fQL4aETcDWwJLBxrXX0OT5Ik9YoJP9DxkPPOgyuvhAsuGHYTSZKk9aKjW/C6iS14\nkiSpV9iCV3rxRZg+HVasgA03XI8VkyRJqmELXpNsuCHstBPceWe7ayJJkjQ+BrwKb9NKkqReYMCr\nMOBJkqReYMCrMOBJkqReYMCr2GsvA54kSep+BryKefOK3rRLl7a7JpIkSWNnwKuI8DatJEnqfga8\nGrvsAvfe2+5aSJIkjZ0Br8acOfDoo+2uhSRJ0tgZ8GoY8CRJUrcz4NUw4EmSpG5nwKthwJMkSd3O\ngFfDgCdJkrqdAa/G7NlFwMtsd00kSZLGxoBXY7PNYMoUeOqpdtdEkiRpbAx4dXibVpIkdTMDXh0G\nPEmS1M0MeHUY8CRJUjcz4NVhwJMkSd3MgFfHnDmwdGm7ayFJkjQ2Brw6bMGTJEndzIBXhwFPkiR1\nMwNeHQY8SZLUzQx4dRjwJElSN4ucIO/kiohs9FpXroRNNoEXXoDJk1tcMUmSpBoRQWbGWPe3Ba+O\nqVNh+nR44ol210SSJGn0DHjD8DatJEnqVga8YRjwJElStzLgDcOAJ0mSupUBbxgGPEmS1K0MeMMw\n4EmSpG7V0oAXEQsjYmlE3F5T/qGIWBQRv46Iz1fKT4uIxRFxZ0QcVik/otz+7oj4eKV8fkRcHxF3\nRcSFETGlWXU34EmSpG7V6ha8c4DDqwUR0Qe8BXhVZu4F/HNZvjtwLLA7cCTw1ShMAs4sj7MncHxE\n7FYe7gvAGZm5K7ACOKVZFTfgSZKkbtXSgJeZ1wHLa4rfD3w+M1eV2zxelh8NXJSZqzLzfmAxsF85\nLc7MBzJzJXBRuS3AwcCl5fy5wNuaVXcDniRJ6lbteAZvF+AN5a3VayPitWX5XOChynZLyrLa8gFg\nbkTMBJZn5mClfJtmVdKAJ0mSulXTnlkb5TmnZ+YBEbEvcAmwI1DvdRxJ/RCa5fa1+zTtvWszZsAz\nz8CLL8KGGzbrqJIkSa3XjoD3EHAZQGbeGBGry9a4AWC7ynbzgIcpQtzLyjPz8YiYHhGTyla8oe2H\ntWDBgj/M9/X10dfXN+y2kybB7NmwdClst92wm0mSJI1bf38//f39TTteZDat0av+CSLmA5eXHSqI\niL8G5mbm6RGxC3BNZm4fEXsA5wP7U9yWvQbYmaIF7y7gEOAR4AbguMxcFBEXA5dl5sURcRZwW2ae\nPUw9crTXuu++8O//DvvtN+rLliRJGrOIIDPr3d1sSEtb8CLiAqAPmBkRDwKnA/8BnBMRvwZeBE4A\nyMw7IuK7wB3ASuDUMpGtjogPAldThL2FmbmoPMUngIsi4jPALcDCZtbf5/AkSVI3ankLXqcYSwve\ne99btOL99V+3qFKSJEl1jLcFzzdZjMAWPEmS1I0MeCMw4EmSpG5kwBuBAU+SJHUjA94IDHiSJKkb\nGfBGYMCTJEndyIA3gtmzi4A3QToaS5KkHmHAG8Fmm8HkyfD00+2uiSRJUuMMeOvgbVpJktRtDHjr\nYMCTJEndxoC3DgY8SZLUbQx462DAkyRJ3caAtw4GPEmS1G0MeOtgwJMkSd3GgLcOBjxJktRtDHjr\nYMCTJEndxoC3DgY8SZLUbSInyHu4IiLHcq0rV8Imm8ALLxRvtZAkSWq1iCAzY6z724K3DlOnwvTp\n8MQT7a6JJElSYwx4DfA2rSRJ6iYGvAYY8CRJUjcx4DXAgCdJkrqJAa8BBjxJktRNDHgNMOBJkqRu\nYsBrgAFPkiR1EwNeAwx4kiSpmxjwGmDAkyRJ3cSA14A5c+CRR9pdC0mSpMYY8BowYwY8+yy8+GK7\nayJJkrRuBrwGTJoEW20Fy5a1uyaSJEnrZsBr0OzZsHRpu2shSZK0bga8BhnwJElStzDgNciAJ0mS\nuoUBr0EGPEmS1C1aGvAiYmFELI2I2ytlp0fEQETcXE5HVNadFhGLI+LOiDisUn5ERCyKiLsj4uOV\n8vkRcX1E3BURF0bElFZdiwFPkiR1i1a34J0DHF6n/F8yc59yuhIgInYHjgV2B44EvhqFScCZ5XH2\nBI6PiN3K43wBOCMzdwVWAKe06kIc7FiSJHWLlga8zLwOWF5nVdQpOxq4KDNXZeb9wGJgv3JanJkP\nZOZK4KJyW4CDgUvL+XOBtzWx+muxBU+SJHWLdj2D94GIuDUivhkR08qyucBDlW2WlGW15QPA3IiY\nCSzPzMFK+TatqrABT5IkdYt2BLyvAjtl5quBR4EzyvJ6rXq5jvLaddmsStYy4EmSpG7Rsk4Jw8nM\nxyqL3wAuL+cHgG0r6+YBD1OEuO1qyzPz8YiYHhGTyla8oe2HtWDBgj/M9/X10dfX13C9Z86Ep56C\nlSth6tSGd5MkSVqn/v5++vv7m3a8yGxZo1dxgoj5wOWZuVe5PCczHy3n/w7YNzPfGRF7AOcD+1Pc\nlr0G2JmilfEu4BDgEeAG4LjMXBQRFwOXZebFEXEWcFtmnj1MPXK817r11nDTTTB37rgOI0mSNKKI\nIDPr3cVsSEtb8CLiAqAPmBkRDwKnA2+KiFcDg8D9wPsAMvOOiPgucAewEji1TGSrI+KDwNUUYW9h\nZi4qT/EJ4KKI+AxwC7CwldczdJvWgCdJkjpZy1vwOkUzWvCOOAI+8hE48sgmVUqSJKmO8bbg+SaL\nUZg927HwJElS5zPgjYI9aSVJUjcw4I2CAU+SJHUDA94oGPAkSVI3MOCNggFPkiR1AwPeKBjwJElS\nNzDgjYIBT5IkdQPHwRuF1atho43gued8XZkkSWodx8FbjyZPLt5J+9hj695WkiSpXQx4o+RtWkmS\n1OkMeKNkwJMkSZ3OgDdKBjxJktTpDHijZMCTJEmdzoA3SgY8SZLU6Qx4ozRnjgFPkiR1NgPeKNmC\nJ0mSOp0Bb5Rmz4ZHH213LSRJkoZnwBslW/AkSVKn81Vlo7RqFWy8MTz/PEyZ0oSKSZIk1fBVZevZ\nlCkwYwY8/ni7ayJJklSfAW8MvE0rSZI6mQFvDAx4kiSpkxnwxsCx8CRJUicz4I2BLXiSJKmTGfDG\nwLHwJElSJzPgjYEteJIkqZMZ8MbAgCdJkjqZAW8MDHiSJKmTGfDGwIAnSZI6WUMBLyJ2iogNy/m+\niPhwRExvbdU611ZbwRNPwOrV7a6JJEnSyzXagncpsDoiXgl8HdgWuKBltepwU6fC9OlFyJMkSeo0\njQa8wcxcBbwN+LfM/Biwdeuq1fm8TStJkjpVowFvZUQcD5wI/Kgsm9qaKnUHx8KTJEmdqtGAdxJw\nIPDZzLwvInYAvrOunSJiYUQsjYjb66z7HxExGBFbVsq+EhGLI+LWiHh1pfzEiLg7Iu6KiBMq5ftE\nxO3lui81eC1NYQueJEnqVA0FvMy8IzM/nJkXRsQMYPPM/HwDu54DHF5bGBHzgD8BHqiUHQnslJk7\nA+8Dzi7LZwCfAvYF9gdOj4hp5W5nAe/JzF2AXSLiZedqFQOeJEnqVI32ou2PiC3K1rabgW9ExL+s\na7/MvA5YXmfVvwIfqyk7Gjiv3O+XwLSImE0REK/OzCczcwVwNXBERMyhCJo3lPufBxzTyPU0gwFP\nkiR1qkZv0U7LzKeAPwPOy8z9KVrgRi0i3gI8lJm/rlk1F3iosjxQltWWL6mUD9TZfr0w4EmSpE41\npdHtImJr4FjgH8Z6sojYuNz/0Hqr6yxnnXLWUT6sBQsW/GG+r6+Pvr6+kTYf0Zw5BjxJktQc/f39\n9Pf3N+14jQa8fwSuAn6emTdGxI7A4jGcbydgPnBbRAQwD7g5IvajaIHbtrLtPODhsryvpvzaEbYf\nVjXgjZcteJIkqVlqG54+/elPj+t4jXayuCQz987M95fL92bmnzd4jignMvM3mTknM3fMzB0oQtpr\nMnMZ8EPgBICIOABYkZlLKYLloRExrexwcShwVWY+CjwVEfuVYfEE4AeNXvh4GfAkSVKnarSTxbyI\n+H5ELCuHPbm07Am7rv0uAP6boofrgxFxUs0mf7jVmplXAPdFxD3A14BTy/LlwGeAm4BfAp8uO1tQ\nbrMQuBtYnJlXNnI9zTBrFjz2GAwOrq8zSpIkNSYyR3xsrdgo4hqKV5N9uyx6F/CXmVnvWbqOFBHZ\nyLWOxsyZsGhR8W5aSZKkZokIMrNef4OGNNqLdqvMPCczV5XTt4AJH2u8TStJkjpRowHv8Yh4V0RM\nLqd3AU+0smLdwIAnSZI6UaMB72SKIVIeBR4B3k7x+rIJzaFSJElSJ2q0F+2DmfnWzNwqM2dl5jEU\ngx5PaLbgSZKkTtRoC149H21aLbrU1lvDwyOOvCdJkrT+jSfgjblnR6/YYQe4995210KSJGlt4wl4\nzR1zpAvttBP87nftroUkSdLaRhwHLyKepn6QC2DjzGz0VWdt14px8FasgG23haeegpjw7ZmSJKlZ\nxjsO3ogBLTM3H+uBJ4Lp02GDDeDxxx3sWJIkdY7x3KIVsOOO3qaVJEmdxYA3Tj6HJ0mSOo0Bb5wM\neJIkqdMY8MZpp50cKkWSJHUWA944+QyeJEnqNAa8cfIWrSRJ6jQjjoPXS1oxDh7A4CBsuik88QRs\nsknTDy9Jkiag8Y6DZwveOE2aBPPnw333tbsmkiRJBQNeE/gcniRJ6iQGvCbwOTxJktRJDHhNYMCT\nJEmdxIDXBI6FJ0mSOokBrwl8Bk+SJHUSh0lpguefhxkz4NlnYfLklpxCkiRNIA6T0gE23hhe8QoY\nGGh3TSRJkgx4TWNHC0mS1CkMeE2y4452tJAkSZ3BgNcktuBJkqROYcBrEgOeJEnqFAa8JjHgSZKk\nTmHAaxKfwZMkSZ3CgNckM2fC4CD8/vftrokkSZroDHhNEuFtWkmS1BlaGvAiYmFELI2I2ytl/xgR\nt0XELRFxZUTMqaz7SkQsjohbI+LVlfITI+LuiLgrIk6olO8TEbeX677UymtphAFPkiR1gla34J0D\nHF5T9sXM/KPMfA3w/wOnA0TEUcBOmbkz8D7g7LJ8BvApYF9gf+D0iJhWHuss4D2ZuQuwS0TUnmu9\n8jk8SZLUCVoa8DLzOmB5TdkzlcVNgcFy/q3AeeU2vwSmRcRsioB4dWY+mZkrgKuBI8qWv80z84Zy\n//OAY1p2MQ2wBU+SJHWCtjyDFxH/X0Q8CLyTonUOYC7wUGWzgbKstnxJpXygzvZtY8CTJEmdoC0B\nLzM/mZnbAecDHyqLo2azALJOOesobxsDniRJ6gRT2nz+C4EfAQsoWuC2raybBzxclvfVlF87wvbD\nWrBgwR/m+/r66OvrG3bbsZg3D5YtgxdegI02auqhJUlSD+vv76e/v79px4vM1jZ6RcR84PLM3Ktc\nfmVm3lPOfwh4fWYeW3ay+EBmvjkiDgC+lJkHlJ0sbgL2oWhxvAl4bWauiIhfUrQA3kjRYeMrmXnl\nMPXIVl8rwM47w+WXw267tfxUkiSpR0UEmVnvbmVDWtqCFxEXULS+zSyfuTsdeHNE7AqsBh4A/gYg\nM6+IiKMi4h7gWeCksnx5RHyGItgl8OmyswXAqcC3gI2AK4YLd+vT0G1aA54kSWqXlrfgdYr11YL3\ngQ/ArrvChz/c8lNJkqQeNd4WPN9k0WSOhSdJktrNgNdk9qSVJEntZsBrMgOeJElqN5/Ba7JnnoGt\ntoJnn4VJxmdJkjQGPoPXYTbbDLbcEh54oN01kSRJE5UBrwUOOAB+8Yt210KSJE1UBrwWOOgg+NnP\n2l0LSZI0URnwWuCgg+C669pdC0mSNFHZyaIFVq2CGTPgwQeLT0mSpNGwk0UHmjIF9t8ffv7zdtdE\nkiRNRAa8Fnn9671NK0mS2sOA1yJ2tJAkSe3iM3gt8uyzMGsWPP44bLzxejutJEnqAT6D16E23RT2\n3BNuvLHdNZEkSRONAa+FHC5FkiS1gwGvhexoIUmS2sFn8Fpo2TLYZRd44gmYPHm9nlqSJHUxn8Hr\nYLNmwZw58JvftLsmkiRpIjHgtZjDpUiSpPXNgNdiPocnSZLWNwNeiw214E2QRx0lSVIHMOC12I47\nwuAg3H9/u2siSZImCgNei0V4m1aSJK1fBrz1wI4WkiRpfTLgrQe24EmSpPXJgY7Xg9WrYcst4Xe/\ng1e8oi1VkCRJXcSBjrvA5Mlw4IHw85+3uyaSJGkiMOCtJz6HJ0mS1hcD3npy2GHwgx84Hp4kSWo9\nA956su++sMEGtuJJkqTWM+CtJxFwyimwcGG7ayJJknqdvWjXo8ceg513hgcegGnT2loVSZLUwexF\n20W22goOOQQuvrjdNZEkSb2spQEvIhZGxNKIuL1S9sWIuDMibo2ISyNii8q60yJicbn+sEr5ERGx\nKCLujoiPV8rnR8T1EXFXRFwYEVNaeT3N4G1aSZLUaq1uwTsHOLym7Gpgz8x8NbAYOA0gIvYAjgV2\nB44EvhqFScCZ5XH2BI6PiN3KY30BOCMzdwVWAKe0+HrG7fDDYckS+M1v2l0TSZLUq1oa8DLzOmB5\nTdmPM3OwXLwemFfOvxW4KDNXZeb9FOFvv3JanJkPZOZK4CLg6HKfg4FLy/lzgbe16lqaZfJkOPFE\nW/EkSVLrtPsZvJOBK8r5ucBDlXVLyrLa8gFgbkTMBJZXwuIAsE1rq9scJ58M558PL73U7ppIkqRe\n1LaAFxH/AKzMzAuHiupslusor13XFV2Cd9oJ9twTfvjDdtdEkiT1orZ0SoiIE4GjKG6xDhkAtq0s\nzwMepghx29WWZ+bjETE9IiaVrXhD2w9rwYIFf5jv6+ujr69vHFcxPkOdLd7+9rZVQZIkdYj+/n76\n+/ubdryWj4MXEfOByzNzr3L5COAM4A2Z+URluz2A84H9KW7LXgPsTNHKeBdwCPAIcANwXGYuioiL\ngcsy8+KIOAu4LTPPHqYebR8Hr+q552DePLjtNth223VvL0mSJo6OHgcvIi4A/hvYJSIejIiTgH8D\nNgOuiYibI+KrAJl5B/Bd4A6K5/JOzcJq4IMUvW9/S9ERY1F5ik8AH42Iu4Etga7purDJJvAXfwHn\nntvumkiSpF7jmyza6Kab4Nhj4Z57YFK7u7tIkqSO0dEteBrZa18LW2xhZwtJktRctuC12U9+Au9+\ndzHw8RZbrHNzSZI0AYy3Bc+A1wHe+16YOhW++tV210SSJHUCA16DOjngrVgBr3oVXHABvOEN7a6N\nJElqN5/B6wHTp8OZZ8J73gPPP9/u2kiSpG5nC14HOfbY4i0Xn/tcu2siSZLayVu0DeqGgLd0Key9\nN1x5JbzmNe2ujSRJahdv0faQ2bPhi1+Ek0+GlSvbXRtJktStDHgd5oQTYNYs+Od/bndNJElSt5rS\n7gpobRHwta/BgQfC7rvDMce0u0aSJKnbGPA60Pz58KMfwZFHwqabwqGHtrtGkiSpm3iLtkO99rVw\n2WXwzndXb0zOAAAQmElEQVTCdde1uzaSJKmbGPA62EEHFYMf/9mfwa9+1e7aSJKkbmHA63CHHgrf\n+Aa8+c3w29+2uzaSJKkb+AxeFzj6aHjmGTjsMLjmGthjj3bXSJIkdTJb8LrEX/5l8YaLN7wB/vVf\nYfXqdtdIkiR1Kt9k0WXuuQdOOqmYP+cceOUr21sfSZLUfL7JYoJ55Suhvx/+/M/hgAPgzDNhcLDd\ntZIkSZ3EFrwudtddRWveBhsUt28PPLDdNZIkSc1gC94Etuuu8LOfwXHHFdOf/EnRutdjOVaSJI2S\nLXg9YuVK+M534H/+T5g9Gz75STj88OLVZ5IkqbuMtwXPgNdjVq2CSy6Bz34WpkyB97+/6IG72Wbt\nrpkkSWqUAa9BEyXgDRkchB//GM46C3760+KVZ+9/P+y5Z7trJkmS1sWA16CJFvCqHnqoeBvGN79Z\n9MI9/ng45hjYeut210ySJNVjwGvQRA54Q1auhB/9CL73PbjiCth9d3jb24rJ8fQkSeocBrwGGfDW\n9tJLcO218P3vww9+ADNmwMEHw5veBG98I7ziFe2uoSRJE5cBr0EGvOENDsLNNxdDrFx7LVx3HWy/\nfRH2Dj20+Nx003bXUpKkicOA1yADXuNWrYJf/aoIe1ddVcwfeCAceWQx7bKLw69IktRKBrwGGfDG\n7qmn4H//7+K5vf/8T5g6FQ45ZM0tXTtrSJLUXAa8BhnwmiMT7rgDfvKTooWvv78YWPlNb4L99y86\nbuy2G2yxRbtrKklS9zLgNciA1xqrV8NttxWB75ZbYNGi4h2506YVQW/33YtbukPT9tvD5MntrrUk\nSZ3NgNcgA976MzgIAwNw553FtHgx3H13MS1bBjvsUAzLstNOa0/bbw8bbtju2kuS1H4GvAYZ8DrD\n88/D735XhL7f/W7taWCgGK5l661fPm2zzZppzpziOUBJknpVRwe8iFgI/CmwNDP3LsveDiwAdgf2\nzcybK9ufBpwMrAI+kplXl+VHAF8CJgELM/MLZfl84CJgBnAz8FeZuWqYuhjwOtyqVUUL3yOPrD09\n/PDan8uWwfTpRdgbCn/Vz623LkLgnDmw0UbtvipJkkav0wPeQcAzwHmVgLcrMAh8DfgfQwEvInYH\nLgD2BeYBPwZ2BgK4GzgEeBi4ETguMxdFxMXA9zLzkog4C7g1M782TF0MeD1i9eq1g2BtAHz00eJz\n6VLYZJMi6M2aVQzePHNmMQ3Nb7EFbL752tNQmc8KSpLaZbwBb0ozK1MrM6+LiO1ryu4CiHjZSGpH\nAxeVLXD3R8RiYD+KgLc4Mx8o97uo3HYRcDBwfLn/uRQtg3UDnnrH5MlrWupGkgnLl69p9XviCXj8\n8eJzYKDoHPL002tPzzxTDAvzzDPF4M7TphWthdOnF4Fw1qz60+zZsOWWMGnS+vkbSJI0kpYGvFGa\nC/yisrykLAvgoUr5ALBfRMwElmfmYKV8m/VRUXWHiCJ0bbkl7Lnn6PYdHCwC34oV8OSTRVB84oki\nKC5bVvQU/tnPilbCZcuKz6efLloGZ89e02K41VZrPmfOhM02K4Lj0LTJJsW08cbFZECUJDVDJwW8\nes2QSfHcXb3yqLOP92DVFJMmFa1306Y1vs9LL8FjjxVh77HHitbCoc9bbikC4jPPwLPPrj099xy8\n8EIxTZ1aBL2NNloT/qrTxhvXXx7aZ+hzaH7DDetPG2xQTFOnrpk3XEpS7+ikgDcAbFtZnkfxzF0A\n29WWZ+bjETE9IiaVrXhD2w9rwYIFf5jv6+ujr6+vOTWXKELS3LnFNBaZRUh8/vk103PPrT09++ya\n8ur65cuL5RdeWPvzxRfXnl54oTjHypXF59D8iy8WLZ6TJ8OUKS//rFc2efLa05QpRWAc+hya33DD\nInAOfVYDaDWg1gbY6nw1sPqaPEm9qL+/n/7+/qYdr+XDpJQ9XS/PzL1qyq+l6GTxq3J5D+B8YH+K\nW7PXUHSymATcRdHJ4hHgBtbuZHFZZl5cdrK4LTPPHqYedrKQRjA4WPRkXr16zefKlcVntaz2s7p+\n1apin5Ur18y/9NKaFsqhkFkbYqthtV64re5fDYzVoFmdjyhaJBudarefPPnl89WwOzQNtYBW61Tb\nSlptLa3WceiYtcuj+WzkWoa2NxhL3aXTe9FeAPQBM4GlwOnAcuDfgFcAKyh6vh5Zbn8acAqwkpcP\nk/Jl1gyT8vmyfAfWDJNyC/CuzFw5TF0MeFKXGxxcExJffPHlQXPVqmKbwcGiRXRofvXqtZeHK8ss\nyqvbDM1Xg21tgK22kA59DgXdoZbSl15au65Dx6/OD/dZW1Y71V7L0HVUrwfWhL2hFtbqtPHG9XuU\nb7MNzJtXtEzPm1dMm2/e3v8dSBNBRwe8TmLAkzSRDYW9oZbZ2um5517eq/zJJ4te6AMDsGRJ8Tkw\nUITEoccRttlmzeemm67dernhhkV4rNc6Wm0RHWrdHGmqtnRKE4EBr0EGPEkav8wi+C1Zsvb08MPF\nrfWhFs1qy+VQ62JtS+RQS2j19n71dn+1tXQonMLwz4TWeza0ejs7ov4Ea39Wb2fXztduN3RLvPaz\n9jZ87S354dbX3lavdwu+9vzDXUe9z0bm1/V3Ge7vONL62r9/7d9suEcLqvPDfbfrenRhuO+okfrV\nWx7pf0vV9bV/39Ey4DXIgCdJ3W8oIA73TGjts6HVQJn58gnW/qz+30TtfL3tqo8D1HsMoPZW+7pu\n0VfLh85TLa+ef6TrqPfZyPy6/i71zt3o+up3UG+++lnv7zHc37D2s/Z7qD66UO989eo0Un3rLdeu\nq1UvxFaX63Vae+ghA15DDHiSJGl9Gi4A1j4vW2/afvsOfpOFJEnSRFV7y3998nFVSZKkHmPAkyRJ\n6jEGPEmSpB5jwJMkSeoxBjxJkqQeY8CTJEnqMQY8SZKkHmPAkyRJ6jEGPEmSpB5jwJMkSeoxBjxJ\nkqQeY8CTJEnqMQY8SZKkHmPAkyRJ6jEGPEmSpB5jwJMkSeoxBjxJkqQeY8CTJEnqMQY8SZKkHmPA\nkyRJ6jEGPEmSpB5jwJMkSeoxBjxJkqQeY8CTJEnqMQY8SZKkHmPAkyRJ6jEGPEmSpB5jwJMkSeox\nBjxJkqQe09KAFxELI2JpRNxeKZsREVdHxF0RcVVETKus+0pELI6IWyPi1ZXyEyPi7nKfEyrl+0TE\n7eW6L7XyWiRJkrpFq1vwzgEOryn7BPDjzNwV+AlwGkBEHAnslJk7A+8Dzi7LZwCfAvYF9gdOr4TC\ns4D3ZOYuwC4RUXsu9YD+/v52V0Hj4PfX3fz+upff3cTW0oCXmdcBy2uKjwbOLefPLZeHys8r9/sl\nMC0iZlMExKsz88nMXAFcDRwREXOAzTPzhnL/84BjWnYxahv/kepufn/dze+ve/ndTWzteAZvVmYu\nBcjMR4FZZflc4KHKdgNlWW35kkr5QJ3tJUmSJrRO6mQRdZazTjnrKJckSZrQIrO1mSgitgcuz8y9\ny+U7gb7MXFreZr02M3ePiLPL+YvL7RYBbwTeVG7/N2X52cC1wE+H9i3LjwPemJnvH6Yehj9JktQ1\nMrNeY1ZDpjSzIsMI1m5t+yHwbuAL5ecPKuUfAC6OiAOAFWUIvAr4bNmxYhJwKPCJzFwREU9FxH7A\njcAJwFeGq8R4/kiSJEndpKUBLyIuAPqAmRHxIHA68Hngkog4GXgQeAdAZl4REUdFxD3As8BJZfny\niPgMcBPFLdhPl50tAE4FvgVsBFyRmVe28nokSZK6Qctv0UqSJGn96qROFi0REUdExKJyMOSPt7s+\nGllEzIuIn0TEHRHx64j4cFk+7ADZ6iwRMSkibo6IH5bL8yPi+vK7uzAi1sejIRqDiJgWEZdExJ0R\n8duI2N/fXneIiL+LiN+Ug/+fHxEb+NvrXM16EcRIejrgRcQk4EyKsfT2BI6PiN3aWyutwyrgo5m5\nB3Ag8IHyO6s7QLY60keAOyrLXwDOKL+7FcApbamVGvFlisdddgf+CFiEv72OFxHbAB8C9ik7NE4B\njsffXicb94sg1qWnAx6wH7A4Mx/IzJXARawZWFkdKDMfzcxby/lngDuBebx8gGwHte5AETEPOAr4\nZqX4YODScv5c4G3ru15at4jYHHh9Zp4DkJmrMvNJ/O11i8nApmUr3cbAwxSjUPjb60BNehHEiHo9\n4A03eLK6QETMB14NXA/Mrhkge6v21Uwj+FfgY5RjUkbETGB5Zg6W6weAbdpUN41sR+DxiDinvMX+\n9YjYBH97HS8zHwbOoOi4uAR4EriZYjQKf3vdo9EXQQy98GFEvR7wHAy5S0XEZsD3gI+ULXl+bx0u\nIt4MLC1bYId+e7XDJIHfZaeaAuwD/Htm7kMxmsEn8PvqeBExnaKVZ3uKELcpcGSdTf0uu9OYskyv\nB7wBYLvK8jyKZmt1sPIWw/eAb2fm0DiJS4eapMsBspe1q34a1uuAt0bEvcCFFLdmv0RxO2Ho3xp/\ng51rAHgoM28qly+lCHz+9jrfnwD3ZubvM3M18H3gj4Hp/va6ynC/tQFg28p2DX2XvR7wbgReGRHb\nR8QGwHEUAyqrs/0HcEdmfrlSNjRANsCJrBkgWx0iM/8+M7fLzB0pfms/ycx3Ubx55h3lZn53Haq8\nNfRQROxSFh0C/BZ/e93gQeCAiNgoIoI1352/vc423Isg4OUvgjgBoPoiiHUevNfHwYuIIyh6hk0C\nFmbm59tcJY0gIl4H/Bfwa4om6AT+HrgB+C7Ff8U8CLyjMuC1OkxEvBH4vzPzrRGxA0UHpxnALcC7\nyk5P6jAR8UcUHWSmAvdSDDg/GX97HS8iTqf4D6uVFL+z91C09Pjb60DVF0EASyleBPG/gEuo81uL\niDOBIyhfBJGZN6/zHL0e8CRJkiaaXr9FK0mSNOEY8CRJknqMAU+SJKnHGPAkSZJ6jAFPkiSpxxjw\nJEmSeowBT9KEEhFPl5/bR8TxTT72aTXL1zXz+JLUKAOepIlmaPDPHYB3jmbHymufhvP3a50o86DR\nHF+SmsWAJ2mi+hxwUETcHBEfiYhJEfHFiPhlRNwaEe+F4q0cEfFfEfED4I6y7PsRcWNE/Doi3lOW\nfQ7YuDzet8uyp4dOFhH/VG5/W0QcWzn2tRFxSUTcObSfJI3XlHZXQJLa5BOUr1MDKAPdiszcv3x3\n9c8j4upy29cAe2bmg+XySZm5IiI2Am6MiEsz87SI+EBm7lM5R5bH/nNg78zcKyJmlfv8tNzm1cAe\nwKPlOf84M/+7lRcuqffZgidJhcOAEyLiFuCXwJbAzuW6GyrhDuBvI+JW4HqK933uzMheB1wIkJnL\ngH5g38qxH8nivZG3AvPHfymSJjpb8CSpEMCHMvOatQoj3kjxgu/q8sHA/pn5YkRcC2xUOcZwxx5u\n+cXK/Gr8d1lSE9iCJ2miGQpXTwObV8qvAk6NiCkAEbFzRGxSZ/9pwPIy3O0GHFBZ99LQ/jXn+i/g\nL8rn/LYCXg/c0IRrkaS6/C9FSRPNUC/a24HV5S3Zb2XmlyNiPnBzRASwDDimzv5XAn8TEb8F7gJ+\nUVn3deD2iPhVZv7V0Lky8/sRcQBwGzAIfCwzl0XE7sPUTZLGJYrHPiRJktQrvEUrSZLUYwx4kiRJ\nPcaAJ0mS1GMMeJIkST3GgCdJktRjDHiSJEk9xoAnSZLUYwx4kiRJPeb/AMtx625KlXCeAAAAAElF\nTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10b9beeb8>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Plot training loss over time\n",
"xy = losses.toDF().sort(\"__INDEX\").map(lambda r: (r[0], r[1])).collect()\n",
"x, y = zip(*xy)\n",
"plt.plot(x, y)\n",
"plt.xlabel('Iteration')\n",
"plt.ylabel('Loss')\n",
"plt.title('PNMF Training Loss')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}