blob: fe03a274cfd11c83d2a3bb8c103a6b7d9dec7f31 [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Mini-batch preprocessor\n",
"\n",
"The mini-batch preprocessor is a utility that prepares input data for use by models that support mini-batch as an optimization option. (This is currently only the case for Neural Networks.) It is effectively a packing operation that builds arrays of dependent and independent variables from the source data table.\n",
"\n",
"The mini-batch preprocessor was added in MADlib 1.14."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n",
" \"You should import from traitlets.config instead.\", ShimWarning)\n",
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
" warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
]
}
],
"source": [
"%load_ext sql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"u'Connected: gpadmin@madlib'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Greenplum Database 5.4.0 on GCP (demo machine)\n",
"%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n",
" \n",
"# PostgreSQL local\n",
"#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
"\n",
"# Greenplum Database 4.3.10.0\n",
"#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>version</th>\n",
" </tr>\n",
" <tr>\n",
" <td>MADlib version: 1.14-dev, git revision: rc/1.13-rc1-66-g4cced1b, cmake configuration time: Mon Apr 23 16:26:17 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'MADlib version: 1.14-dev, git revision: rc/1.13-rc1-66-g4cced1b, cmake configuration time: Mon Apr 23 16:26:17 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7',)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select madlib.version();\n",
"#%sql select version();"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. Load data\n",
"Based on the well known iris dataset."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"Done.\n",
"52 rows affected.\n",
"52 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>id</th>\n",
" <th>attributes</th>\n",
" <th>class_text</th>\n",
" <th>class</th>\n",
" <th>state</th>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>[Decimal('5.0'), Decimal('3.2'), Decimal('1.2'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>[Decimal('5.5'), Decimal('3.5'), Decimal('1.3'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>[Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>[Decimal('4.4'), Decimal('3.0'), Decimal('1.3'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>[Decimal('5.1'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>[Decimal('5.0'), Decimal('3.5'), Decimal('1.3'), Decimal('0.3')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>[Decimal('4.5'), Decimal('2.3'), Decimal('1.3'), Decimal('0.3')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>[Decimal('4.4'), Decimal('3.2'), Decimal('1.3'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>[Decimal('5.0'), Decimal('3.5'), Decimal('1.6'), Decimal('0.6')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>[Decimal('5.1'), Decimal('3.8'), Decimal('1.9'), Decimal('0.4')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>[Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.3')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>[Decimal('5.1'), Decimal('3.8'), Decimal('1.6'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>[Decimal('5.7'), Decimal('2.8'), Decimal('4.5'), Decimal('1.3')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>[Decimal('6.3'), Decimal('3.3'), Decimal('4.7'), Decimal('1.6')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>[Decimal('4.9'), Decimal('2.4'), Decimal('3.3'), Decimal('1.0')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>[Decimal('6.6'), Decimal('2.9'), Decimal('4.6'), Decimal('1.3')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>[Decimal('5.2'), Decimal('2.7'), Decimal('3.9'), Decimal('1.4')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>[Decimal('5.0'), Decimal('2.0'), Decimal('3.5'), Decimal('1.0')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>[Decimal('5.9'), Decimal('3.0'), Decimal('4.2'), Decimal('1.5')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>[Decimal('6.0'), Decimal('2.2'), Decimal('4.0'), Decimal('1.0')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>21</td>\n",
" <td>[Decimal('6.1'), Decimal('2.9'), Decimal('4.7'), Decimal('1.4')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>22</td>\n",
" <td>[Decimal('5.6'), Decimal('2.9'), Decimal('3.6'), Decimal('1.3')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>23</td>\n",
" <td>[Decimal('6.7'), Decimal('3.1'), Decimal('4.4'), Decimal('1.4')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>24</td>\n",
" <td>[Decimal('5.6'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>25</td>\n",
" <td>[Decimal('5.8'), Decimal('2.7'), Decimal('4.1'), Decimal('1.0')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>26</td>\n",
" <td>[Decimal('6.2'), Decimal('2.2'), Decimal('4.5'), Decimal('1.5')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>27</td>\n",
" <td>[Decimal('5.6'), Decimal('2.5'), Decimal('3.9'), Decimal('1.1')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <td>28</td>\n",
" <td>[Decimal('5.0'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>29</td>\n",
" <td>[Decimal('4.4'), Decimal('2.9'), Decimal('1.4'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>30</td>\n",
" <td>[Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>31</td>\n",
" <td>[Decimal('5.4'), Decimal('3.7'), Decimal('1.5'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>32</td>\n",
" <td>[Decimal('4.8'), Decimal('3.4'), Decimal('1.6'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>33</td>\n",
" <td>[Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.1')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>34</td>\n",
" <td>[Decimal('4.3'), Decimal('3.0'), Decimal('1.1'), Decimal('0.1')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>35</td>\n",
" <td>[Decimal('5.8'), Decimal('4.0'), Decimal('1.2'), Decimal('0.2')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>36</td>\n",
" <td>[Decimal('5.7'), Decimal('4.4'), Decimal('1.5'), Decimal('0.4')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>37</td>\n",
" <td>[Decimal('5.4'), Decimal('3.9'), Decimal('1.3'), Decimal('0.4')]</td>\n",
" <td>Iris_setosa</td>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>38</td>\n",
" <td>[Decimal('6.0'), Decimal('2.9'), Decimal('4.5'), Decimal('1.5')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>39</td>\n",
" <td>[Decimal('5.7'), Decimal('2.6'), Decimal('3.5'), Decimal('1.0')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>40</td>\n",
" <td>[Decimal('5.5'), Decimal('2.4'), Decimal('3.8'), Decimal('1.1')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>41</td>\n",
" <td>[Decimal('5.5'), Decimal('2.4'), Decimal('3.7'), Decimal('1.0')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>42</td>\n",
" <td>[Decimal('5.8'), Decimal('2.7'), Decimal('3.9'), Decimal('1.2')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>43</td>\n",
" <td>[Decimal('6.0'), Decimal('2.7'), Decimal('5.1'), Decimal('1.6')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>44</td>\n",
" <td>[Decimal('5.4'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>45</td>\n",
" <td>[Decimal('6.0'), Decimal('3.4'), Decimal('4.5'), Decimal('1.6')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>46</td>\n",
" <td>[Decimal('6.7'), Decimal('3.1'), Decimal('4.7'), Decimal('1.5')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>47</td>\n",
" <td>[Decimal('6.3'), Decimal('2.3'), Decimal('4.4'), Decimal('1.3')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>48</td>\n",
" <td>[Decimal('5.6'), Decimal('3.0'), Decimal('4.1'), Decimal('1.3')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>49</td>\n",
" <td>[Decimal('5.5'), Decimal('2.5'), Decimal('4.0'), Decimal('1.3')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>50</td>\n",
" <td>[Decimal('5.5'), Decimal('2.6'), Decimal('4.4'), Decimal('1.2')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>51</td>\n",
" <td>[Decimal('6.1'), Decimal('3.0'), Decimal('4.6'), Decimal('1.4')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
" <tr>\n",
" <td>52</td>\n",
" <td>[Decimal('5.8'), Decimal('2.6'), Decimal('4.0'), Decimal('1.2')]</td>\n",
" <td>Iris_versicolor</td>\n",
" <td>2</td>\n",
" <td>Tennessee</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(1, [Decimal('5.0'), Decimal('3.2'), Decimal('1.2'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
" (2, [Decimal('5.5'), Decimal('3.5'), Decimal('1.3'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
" (3, [Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')], u'Iris_setosa', 1, u'Alaska'),\n",
" (4, [Decimal('4.4'), Decimal('3.0'), Decimal('1.3'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
" (5, [Decimal('5.1'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
" (6, [Decimal('5.0'), Decimal('3.5'), Decimal('1.3'), Decimal('0.3')], u'Iris_setosa', 1, u'Alaska'),\n",
" (7, [Decimal('4.5'), Decimal('2.3'), Decimal('1.3'), Decimal('0.3')], u'Iris_setosa', 1, u'Alaska'),\n",
" (8, [Decimal('4.4'), Decimal('3.2'), Decimal('1.3'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
" (9, [Decimal('5.0'), Decimal('3.5'), Decimal('1.6'), Decimal('0.6')], u'Iris_setosa', 1, u'Alaska'),\n",
" (10, [Decimal('5.1'), Decimal('3.8'), Decimal('1.9'), Decimal('0.4')], u'Iris_setosa', 1, u'Alaska'),\n",
" (11, [Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.3')], u'Iris_setosa', 1, u'Alaska'),\n",
" (12, [Decimal('5.1'), Decimal('3.8'), Decimal('1.6'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
" (13, [Decimal('5.7'), Decimal('2.8'), Decimal('4.5'), Decimal('1.3')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (14, [Decimal('6.3'), Decimal('3.3'), Decimal('4.7'), Decimal('1.6')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (15, [Decimal('4.9'), Decimal('2.4'), Decimal('3.3'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (16, [Decimal('6.6'), Decimal('2.9'), Decimal('4.6'), Decimal('1.3')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (17, [Decimal('5.2'), Decimal('2.7'), Decimal('3.9'), Decimal('1.4')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (18, [Decimal('5.0'), Decimal('2.0'), Decimal('3.5'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (19, [Decimal('5.9'), Decimal('3.0'), Decimal('4.2'), Decimal('1.5')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (20, [Decimal('6.0'), Decimal('2.2'), Decimal('4.0'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (21, [Decimal('6.1'), Decimal('2.9'), Decimal('4.7'), Decimal('1.4')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (22, [Decimal('5.6'), Decimal('2.9'), Decimal('3.6'), Decimal('1.3')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (23, [Decimal('6.7'), Decimal('3.1'), Decimal('4.4'), Decimal('1.4')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (24, [Decimal('5.6'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (25, [Decimal('5.8'), Decimal('2.7'), Decimal('4.1'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (26, [Decimal('6.2'), Decimal('2.2'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (27, [Decimal('5.6'), Decimal('2.5'), Decimal('3.9'), Decimal('1.1')], u'Iris_versicolor', 2, u'Alaska'),\n",
" (28, [Decimal('5.0'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (29, [Decimal('4.4'), Decimal('2.9'), Decimal('1.4'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (30, [Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (31, [Decimal('5.4'), Decimal('3.7'), Decimal('1.5'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (32, [Decimal('4.8'), Decimal('3.4'), Decimal('1.6'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (33, [Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.1')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (34, [Decimal('4.3'), Decimal('3.0'), Decimal('1.1'), Decimal('0.1')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (35, [Decimal('5.8'), Decimal('4.0'), Decimal('1.2'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (36, [Decimal('5.7'), Decimal('4.4'), Decimal('1.5'), Decimal('0.4')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (37, [Decimal('5.4'), Decimal('3.9'), Decimal('1.3'), Decimal('0.4')], u'Iris_setosa', 1, u'Tennessee'),\n",
" (38, [Decimal('6.0'), Decimal('2.9'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (39, [Decimal('5.7'), Decimal('2.6'), Decimal('3.5'), Decimal('1.0')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (40, [Decimal('5.5'), Decimal('2.4'), Decimal('3.8'), Decimal('1.1')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (41, [Decimal('5.5'), Decimal('2.4'), Decimal('3.7'), Decimal('1.0')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (42, [Decimal('5.8'), Decimal('2.7'), Decimal('3.9'), Decimal('1.2')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (43, [Decimal('6.0'), Decimal('2.7'), Decimal('5.1'), Decimal('1.6')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (44, [Decimal('5.4'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (45, [Decimal('6.0'), Decimal('3.4'), Decimal('4.5'), Decimal('1.6')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (46, [Decimal('6.7'), Decimal('3.1'), Decimal('4.7'), Decimal('1.5')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (47, [Decimal('6.3'), Decimal('2.3'), Decimal('4.4'), Decimal('1.3')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (48, [Decimal('5.6'), Decimal('3.0'), Decimal('4.1'), Decimal('1.3')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (49, [Decimal('5.5'), Decimal('2.5'), Decimal('4.0'), Decimal('1.3')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (50, [Decimal('5.5'), Decimal('2.6'), Decimal('4.4'), Decimal('1.2')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (51, [Decimal('6.1'), Decimal('3.0'), Decimal('4.6'), Decimal('1.4')], u'Iris_versicolor', 2, u'Tennessee'),\n",
" (52, [Decimal('5.8'), Decimal('2.6'), Decimal('4.0'), Decimal('1.2')], u'Iris_versicolor', 2, u'Tennessee')]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS iris_data;\n",
"\n",
"CREATE TABLE iris_data(\n",
" id serial,\n",
" attributes numeric[],\n",
" class_text varchar,\n",
" class integer,\n",
" state varchar\n",
");\n",
"\n",
"INSERT INTO iris_data(id, attributes, class_text, class, state) VALUES\n",
"(1,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'Alaska'),\n",
"(2,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'Alaska'),\n",
"(3,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Alaska'),\n",
"(4,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'Alaska'),\n",
"(5,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'Alaska'),\n",
"(6,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'Alaska'),\n",
"(7,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'Alaska'),\n",
"(8,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'Alaska'),\n",
"(9,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'Alaska'),\n",
"(10,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'Alaska'),\n",
"(11,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'Alaska'),\n",
"(12,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'Alaska'),\n",
"(13,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'Alaska'),\n",
"(14,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'Alaska'),\n",
"(15,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'Alaska'),\n",
"(16,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'Alaska'),\n",
"(17,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'Alaska'),\n",
"(18,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'Alaska'),\n",
"(19,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'Alaska'),\n",
"(20,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'Alaska'),\n",
"(21,ARRAY[6.1,2.9,4.7,1.4],'Iris_versicolor',2,'Alaska'),\n",
"(22,ARRAY[5.6,2.9,3.6,1.3],'Iris_versicolor',2,'Alaska'),\n",
"(23,ARRAY[6.7,3.1,4.4,1.4],'Iris_versicolor',2,'Alaska'),\n",
"(24,ARRAY[5.6,3.0,4.5,1.5],'Iris_versicolor',2,'Alaska'),\n",
"(25,ARRAY[5.8,2.7,4.1,1.0],'Iris_versicolor',2,'Alaska'),\n",
"(26,ARRAY[6.2,2.2,4.5,1.5],'Iris_versicolor',2,'Alaska'),\n",
"(27,ARRAY[5.6,2.5,3.9,1.1],'Iris_versicolor',2,'Alaska'),\n",
"(28,ARRAY[5.0,3.4,1.5,0.2],'Iris_setosa',1,'Tennessee'),\n",
"(29,ARRAY[4.4,2.9,1.4,0.2],'Iris_setosa',1,'Tennessee'),\n",
"(30,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Tennessee'),\n",
"(31,ARRAY[5.4,3.7,1.5,0.2],'Iris_setosa',1,'Tennessee'),\n",
"(32,ARRAY[4.8,3.4,1.6,0.2],'Iris_setosa',1,'Tennessee'),\n",
"(33,ARRAY[4.8,3.0,1.4,0.1],'Iris_setosa',1,'Tennessee'),\n",
"(34,ARRAY[4.3,3.0,1.1,0.1],'Iris_setosa',1,'Tennessee'),\n",
"(35,ARRAY[5.8,4.0,1.2,0.2],'Iris_setosa',1,'Tennessee'),\n",
"(36,ARRAY[5.7,4.4,1.5,0.4],'Iris_setosa',1,'Tennessee'),\n",
"(37,ARRAY[5.4,3.9,1.3,0.4],'Iris_setosa',1,'Tennessee'),\n",
"(38,ARRAY[6.0,2.9,4.5,1.5],'Iris_versicolor',2,'Tennessee'),\n",
"(39,ARRAY[5.7,2.6,3.5,1.0],'Iris_versicolor',2,'Tennessee'),\n",
"(40,ARRAY[5.5,2.4,3.8,1.1],'Iris_versicolor',2,'Tennessee'),\n",
"(41,ARRAY[5.5,2.4,3.7,1.0],'Iris_versicolor',2,'Tennessee'),\n",
"(42,ARRAY[5.8,2.7,3.9,1.2],'Iris_versicolor',2,'Tennessee'),\n",
"(43,ARRAY[6.0,2.7,5.1,1.6],'Iris_versicolor',2,'Tennessee'),\n",
"(44,ARRAY[5.4,3.0,4.5,1.5],'Iris_versicolor',2,'Tennessee'),\n",
"(45,ARRAY[6.0,3.4,4.5,1.6],'Iris_versicolor',2,'Tennessee'),\n",
"(46,ARRAY[6.7,3.1,4.7,1.5],'Iris_versicolor',2,'Tennessee'),\n",
"(47,ARRAY[6.3,2.3,4.4,1.3],'Iris_versicolor',2,'Tennessee'),\n",
"(48,ARRAY[5.6,3.0,4.1,1.3],'Iris_versicolor',2,'Tennessee'),\n",
"(49,ARRAY[5.5,2.5,4.0,1.3],'Iris_versicolor',2,'Tennessee'),\n",
"(50,ARRAY[5.5,2.6,4.4,1.2],'Iris_versicolor',2,'Tennessee'),\n",
"(51,ARRAY[6.1,3.0,4.6,1.4],'Iris_versicolor',2,'Tennessee'),\n",
"(52,ARRAY[5.8,2.6,4.0,1.2],'Iris_versicolor',2,'Tennessee');\n",
"\n",
"SELECT * FROM iris_data ORDER BY id;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2. Run preprocessor \n",
"\n",
"Run the preprocessor to generate the packed output table:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"2 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>__id__</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" </tr>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>[[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]</td>\n",
" <td>[[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>[[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
" <td>[[-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025]]</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(0L, [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]], [[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597]]),\n",
" (1L, [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025]])]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
"\n",
"SELECT madlib.minibatch_preprocessor('iris_data', -- Source table\n",
" 'iris_data_packed', -- Output table\n",
" 'class_text', -- Dependent variable\n",
" 'attributes' -- Independent variables\n",
" );\n",
"\n",
"SELECT * FROM iris_data_packed ORDER BY __id__;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For small datasets like in this example, buffer size is mainly determined by the number of segments in the database. For a Greenplum database with 2 segments, there will be 2 rows with a buffer size of 26. For PostgresSQL, there would be only one row with a buffer size of 52 since it is a single node database. For larger data sets, other factors go into computing buffers size besides number of segments. \n",
"\n",
"Also, note above that the dependent variable has been one-hot encoded since it is categorical. Here is a sample of the packed output table\n",
"\n",
"Review the output summary table:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>buffer_size</th>\n",
" <th>class_values</th>\n",
" <th>num_rows_processed</th>\n",
" <th>num_missing_rows_skipped</th>\n",
" <th>grouping_cols</th>\n",
" </tr>\n",
" <tr>\n",
" <td>iris_data</td>\n",
" <td>iris_data_packed</td>\n",
" <td>class_text</td>\n",
" <td>attributes</td>\n",
" <td>character varying</td>\n",
" <td>26</td>\n",
" <td>[u'Iris_setosa', u'Iris_versicolor']</td>\n",
" <td>52</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'iris_data', u'iris_data_packed', u'class_text', u'attributes', u'character varying', 26, [u'Iris_setosa', u'Iris_versicolor'], 52, 0, None)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM iris_data_packed_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Review the output standardization table:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" </tr>\n",
" <tr>\n",
" <td>[5.45961538462, 2.99807692308, 3.025, 0.851923076923]</td>\n",
" <td>[0.598799958695, 0.498262513686, 1.41840579525, 0.550346179381]</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[([5.45961538462, 2.99807692308, 3.025, 0.851923076923], [0.598799958695, 0.498262513686, 1.41840579525, 0.550346179381])]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM iris_data_packed_standardization;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. Change buffer size \n",
"\n",
"Generally the default buffer size will work well, but if you have occasion to change it:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"6 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>__id__</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" </tr>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>[[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
" <td>[[-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>[[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]</td>\n",
" <td>[[0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>[[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
" <td>[[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
" <td>[[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>[[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
" <td>[[0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>[[1.0, 0.0], [1.0, 0.0]]</td>\n",
" <td>[[-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848]]</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(0L, [[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597]]),\n",
" (1L, [[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]], [[0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597]]),\n",
" (2L, [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202]]),\n",
" (3L, [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828]]),\n",
" (4L, [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025]]),\n",
" (5L, [[1.0, 0.0], [1.0, 0.0]], [[-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848]])]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
"\n",
"SELECT madlib.minibatch_preprocessor('iris_data', -- Source table\n",
" 'iris_data_packed', -- Output table\n",
" 'class_text', -- Dependent variable\n",
" 'attributes', -- Independent variables\n",
" NULL, -- Grouping\n",
" 10 -- Buffer size\n",
" );\n",
"\n",
"SELECT * FROM iris_data_packed ORDER BY __id__;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Review the output summary data:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>buffer_size</th>\n",
" <th>class_values</th>\n",
" <th>num_rows_processed</th>\n",
" <th>num_missing_rows_skipped</th>\n",
" <th>grouping_cols</th>\n",
" </tr>\n",
" <tr>\n",
" <td>iris_data</td>\n",
" <td>iris_data_packed</td>\n",
" <td>class_text</td>\n",
" <td>attributes</td>\n",
" <td>character varying</td>\n",
" <td>10</td>\n",
" <td>[u'Iris_setosa', u'Iris_versicolor']</td>\n",
" <td>52</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'iris_data', u'iris_data_packed', u'class_text', u'attributes', u'character varying', 10, [u'Iris_setosa', u'Iris_versicolor'], 52, 0, None)]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM iris_data_packed_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 4. Grouping\n",
"\n",
"Run the preprocessor with grouping by state:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"5 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>__id__</th>\n",
" <th>state</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" </tr>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Alaska</td>\n",
" <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
" <td>[[1.26030711687938, -1.615325368523, 1.10943660794792, 1.24354000843452], [1.10129640587123, -0.126074175104234, 1.2524188915498, 1.05700900716934], [0.306242850830503, -0.977074857057813, 0.680489757142278, 0.497416003373807], [0.942285694863087, -1.615325368523, 0.751980898943218, 0.310885002108629], [0.783274983854942, 0.0866759953841608, 0.894963182545097, 1.24354000843452], [-0.806832126226518, 0.299426165872556, -1.03529764608027, -1.36789400927797], [-0.488810704210227, 1.78867735929132, -0.963806504279335, -1.18136300801279], [-1.60188568126725, 0.512176336360951, -1.17827992968215, -1.18136300801279], [-0.965842837234665, 0.0866759953841608, -1.10678878788121, -0.994832006747614], [-0.647821415218373, 1.15042684782613, -1.17827992968215, -0.994832006747614], [-0.647821415218373, -2.04082570949979, 0.394525189938519, 0.310885002108629], [2.05536067192011, 0.299426165872556, 1.03794546614698, 1.05700900716934], [-0.647821415218373, 0.512176336360951, -1.24977107148309, -1.18136300801279]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Alaska</td>\n",
" <td>[[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]</td>\n",
" <td>[[1.41931782788752, 0.724926506849345, 1.2524188915498, 1.4300710096997], [-0.647821415218373, 1.15042684782613, -0.963806504279335, -0.435239002952081], [0.624264272846795, -0.551574516081023, 0.823472040744157, 0.310885002108629], [-1.4428749702591, -1.4025751980346, -1.17827992968215, -0.994832006747614], [0.306242850830503, -0.126074175104234, 0.466016331739459, 0.870478005904162], [1.89634996091196, -0.126074175104234, 1.18092774974886, 0.870478005904162], [-0.32979999320208, -0.551574516081023, 0.680489757142278, 1.05700900716934], [0.46525356183865, -0.338824345592629, 1.10943660794792, 0.870478005904162], [0.306242850830503, 0.0866759953841608, 1.10943660794792, 1.24354000843452], [-0.488810704210227, 0.93767667733774, -1.03529764608027, -1.18136300801279], [-0.488810704210227, 1.78867735929132, -0.749333078876516, -0.808301005482437], [0.147232139822357, 1.15042684782613, -1.17827992968215, -1.18136300801279], [-1.60188568126725, 0.0866759953841608, -1.17827992968215, -1.18136300801279]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Alaska</td>\n",
" <td>[[0.0, 1.0]]</td>\n",
" <td>[[-0.806832126226518, -1.18982502754621, 0.25154290633664, 0.310885002108629]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Tennessee</td>\n",
" <td>[[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
" <td>[[-0.0286196553591748, -1.22176567731394, 0.412632633639227, 0.22669394242252], [-0.207492501354026, 1.25994585473, -1.12079945083087, -1.19014319771823], [0.507998882625381, -0.839963903153331, 0.621737008794241, 0.580903227457708], [-0.922983885333435, 0.687243193489089, -1.12079945083087, -1.19014319771823], [1.04461742060994, -0.0763603548321211, 1.03994575910427, 0.935112512492896], [2.11785449657905, 0.114540532248182, 1.10964721748927, 1.11221715501049], [0.507998882625381, -0.649063016073029, 0.552035550409236, 0.580903227457708], [-1.99622096130255, -0.267261241912424, -1.19050090921588, -1.19014319771823], [1.40236311259964, -1.41266656439424, 0.90054284233426, 0.758007869975302], [0.32912603663053, 2.59625206429212, -1.12079945083087, -0.835933912683043], [-0.207492501354026, 1.6417476288906, -1.26020236760088, -0.835933912683043], [-2.1750938072974, -0.0763603548321211, -1.39960528437089, -1.36724784023582], [-0.0286196553591748, -1.22176567731394, 0.482334092024232, 0.403798584940115]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Tennessee</td>\n",
" <td>[[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
" <td>[[0.865744574615085, 0.687243193489089, 0.970244300719264, 1.28932179752808], [-0.0286196553591748, -0.839963903153331, 0.90054284233426, 0.580903227457708], [-1.28072957732314, 0.687243193489089, -1.05109799244587, -1.19014319771823], [-1.10185673132829, 0.114540532248182, -1.12079945083087, -1.36724784023582], [-0.0286196553591748, -1.03086479023363, 0.621737008794241, 0.758007869975302], [-0.207492501354026, -0.0763603548321211, 0.970244300719264, 1.11221715501049], [0.865744574615085, -0.649063016073029, 1.38845305102929, 1.28932179752808], [0.150253190635677, -0.0763603548321211, 0.691438467179245, 0.758007869975302], [0.32912603663053, -0.839963903153331, 0.273229716869218, 0.22669394242252], [-1.28072957732314, -0.0763603548321211, -1.19050090921588, -1.36724784023582], [0.507998882625381, 1.8326485159709, -1.32990382598589, -1.19014319771823], [0.865744574615085, -0.267261241912424, 0.970244300719264, 1.11221715501049]]</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(0L, u'Alaska', [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[1.26030711687938, -1.615325368523, 1.10943660794792, 1.24354000843452], [1.10129640587123, -0.126074175104234, 1.2524188915498, 1.05700900716934], [0.306242850830503, -0.977074857057813, 0.680489757142278, 0.497416003373807], [0.942285694863087, -1.615325368523, 0.751980898943218, 0.310885002108629], [0.783274983854942, 0.0866759953841608, 0.894963182545097, 1.24354000843452], [-0.806832126226518, 0.299426165872556, -1.03529764608027, -1.36789400927797], [-0.488810704210227, 1.78867735929132, -0.963806504279335, -1.18136300801279], [-1.60188568126725, 0.512176336360951, -1.17827992968215, -1.18136300801279], [-0.965842837234665, 0.0866759953841608, -1.10678878788121, -0.994832006747614], [-0.647821415218373, 1.15042684782613, -1.17827992968215, -0.994832006747614], [-0.647821415218373, -2.04082570949979, 0.394525189938519, 0.310885002108629], [2.05536067192011, 0.299426165872556, 1.03794546614698, 1.05700900716934], [-0.647821415218373, 0.512176336360951, -1.24977107148309, -1.18136300801279]]),\n",
" (1L, u'Alaska', [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]], [[1.41931782788752, 0.724926506849345, 1.2524188915498, 1.4300710096997], [-0.647821415218373, 1.15042684782613, -0.963806504279335, -0.435239002952081], [0.624264272846795, -0.551574516081023, 0.823472040744157, 0.310885002108629], [-1.4428749702591, -1.4025751980346, -1.17827992968215, -0.994832006747614], [0.306242850830503, -0.126074175104234, 0.466016331739459, 0.870478005904162], [1.89634996091196, -0.126074175104234, 1.18092774974886, 0.870478005904162], [-0.32979999320208, -0.551574516081023, 0.680489757142278, 1.05700900716934], [0.46525356183865, -0.338824345592629, 1.10943660794792, 0.870478005904162], [0.306242850830503, 0.0866759953841608, 1.10943660794792, 1.24354000843452], [-0.488810704210227, 0.93767667733774, -1.03529764608027, -1.18136300801279], [-0.488810704210227, 1.78867735929132, -0.749333078876516, -0.808301005482437], [0.147232139822357, 1.15042684782613, -1.17827992968215, -1.18136300801279], [-1.60188568126725, 0.0866759953841608, -1.17827992968215, -1.18136300801279]]),\n",
" (2L, u'Alaska', [[0.0, 1.0]], [[-0.806832126226518, -1.18982502754621, 0.25154290633664, 0.310885002108629]]),\n",
" (0L, u'Tennessee', [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[-0.0286196553591748, -1.22176567731394, 0.412632633639227, 0.22669394242252], [-0.207492501354026, 1.25994585473, -1.12079945083087, -1.19014319771823], [0.507998882625381, -0.839963903153331, 0.621737008794241, 0.580903227457708], [-0.922983885333435, 0.687243193489089, -1.12079945083087, -1.19014319771823], [1.04461742060994, -0.0763603548321211, 1.03994575910427, 0.935112512492896], [2.11785449657905, 0.114540532248182, 1.10964721748927, 1.11221715501049], [0.507998882625381, -0.649063016073029, 0.552035550409236, 0.580903227457708], [-1.99622096130255, -0.267261241912424, -1.19050090921588, -1.19014319771823], [1.40236311259964, -1.41266656439424, 0.90054284233426, 0.758007869975302], [0.32912603663053, 2.59625206429212, -1.12079945083087, -0.835933912683043], [-0.207492501354026, 1.6417476288906, -1.26020236760088, -0.835933912683043], [-2.1750938072974, -0.0763603548321211, -1.39960528437089, -1.36724784023582], [-0.0286196553591748, -1.22176567731394, 0.482334092024232, 0.403798584940115]]),\n",
" (1L, u'Tennessee', [[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[0.865744574615085, 0.687243193489089, 0.970244300719264, 1.28932179752808], [-0.0286196553591748, -0.839963903153331, 0.90054284233426, 0.580903227457708], [-1.28072957732314, 0.687243193489089, -1.05109799244587, -1.19014319771823], [-1.10185673132829, 0.114540532248182, -1.12079945083087, -1.36724784023582], [-0.0286196553591748, -1.03086479023363, 0.621737008794241, 0.758007869975302], [-0.207492501354026, -0.0763603548321211, 0.970244300719264, 1.11221715501049], [0.865744574615085, -0.649063016073029, 1.38845305102929, 1.28932179752808], [0.150253190635677, -0.0763603548321211, 0.691438467179245, 0.758007869975302], [0.32912603663053, -0.839963903153331, 0.273229716869218, 0.22669394242252], [-1.28072957732314, -0.0763603548321211, -1.19050090921588, -1.36724784023582], [0.507998882625381, 1.8326485159709, -1.32990382598589, -1.19014319771823], [0.865744574615085, -0.267261241912424, 0.970244300719264, 1.11221715501049]])]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
"\n",
"SELECT madlib.minibatch_preprocessor('iris_data', -- Source table\n",
" 'iris_data_packed', -- Output table\n",
" 'class_text', -- Dependent variable\n",
" 'attributes', -- Independent variables\n",
" 'state' -- Grouping\n",
" );\n",
"\n",
"SELECT * FROM iris_data_packed ORDER BY state, __id__;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Review the output summary table:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>buffer_size</th>\n",
" <th>class_values</th>\n",
" <th>num_rows_processed</th>\n",
" <th>num_missing_rows_skipped</th>\n",
" <th>grouping_cols</th>\n",
" </tr>\n",
" <tr>\n",
" <td>iris_data</td>\n",
" <td>iris_data_packed</td>\n",
" <td>class_text</td>\n",
" <td>attributes</td>\n",
" <td>character varying</td>\n",
" <td>13</td>\n",
" <td>[u'Iris_setosa', u'Iris_versicolor']</td>\n",
" <td>52</td>\n",
" <td>0</td>\n",
" <td>state</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'iris_data', u'iris_data_packed', u'class_text', u'attributes', u'character varying', 13, [u'Iris_setosa', u'Iris_versicolor'], 52, 0, u'state')]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM iris_data_packed_summary;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Review the output standardization table:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>state</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" </tr>\n",
" <tr>\n",
" <td>Alaska</td>\n",
" <td>[5.40740740740741, 2.95925925925926, 2.94814814814815, 0.833333333333333]</td>\n",
" <td>[0.628888452645665, 0.470034875978888, 1.39877469405147, 0.536103914747325]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Tennessee</td>\n",
" <td>[5.516, 3.04, 3.108, 0.872]</td>\n",
" <td>[0.55905634778617, 0.523832034148353, 1.43469021046357, 0.564637937088893]</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'Alaska', [5.40740740740741, 2.95925925925926, 2.94814814814815, 0.833333333333333], [0.628888452645665, 0.470034875978888, 1.39877469405147, 0.536103914747325]),\n",
" (u'Tennessee', [5.516, 3.04, 3.108, 0.872], [0.55905634778617, 0.523832034148353, 1.43469021046357, 0.564637937088893])]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM iris_data_packed_standardization ORDER BY state;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 5. Integer dependent variable for classification\n",
"\n",
"If the depedent variable is scalar integer, and you have not already encoded it, you can ask the preprocessor to encode it for you:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done.\n",
"1 rows affected.\n",
"2 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>__id__</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" </tr>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]</td>\n",
" <td>[[0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059]]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]]</td>\n",
" <td>[[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789]]</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(0L, [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]], [[0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059]]),\n",
" (1L, [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]], [[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789]])]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
"\n",
"SELECT madlib.minibatch_preprocessor('iris_data', -- Source table\n",
" 'iris_data_packed', -- Output table\n",
" 'class', -- Integer dependent variable\n",
" 'attributes', -- Independent variables\n",
" NULL, -- Grouping\n",
" NULL, -- Buffer size\n",
" TRUE -- Encode scalar int dependent variable\n",
" );\n",
"\n",
"SELECT * FROM iris_data_packed ORDER BY __id__;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Review output summary table:"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>source_table</th>\n",
" <th>output_table</th>\n",
" <th>dependent_varname</th>\n",
" <th>independent_varname</th>\n",
" <th>dependent_vartype</th>\n",
" <th>buffer_size</th>\n",
" <th>class_values</th>\n",
" <th>num_rows_processed</th>\n",
" <th>num_missing_rows_skipped</th>\n",
" <th>grouping_cols</th>\n",
" </tr>\n",
" <tr>\n",
" <td>iris_data</td>\n",
" <td>iris_data_packed</td>\n",
" <td>class</td>\n",
" <td>attributes</td>\n",
" <td>integer</td>\n",
" <td>26</td>\n",
" <td>[1, 2]</td>\n",
" <td>52</td>\n",
" <td>0</td>\n",
" <td>None</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'iris_data', u'iris_data_packed', u'class', u'attributes', u'integer', 26, [1, 2], 52, 0, None)]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%sql\n",
"SELECT * FROM iris_data_packed_summary;"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}