| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Column and vector operations\n", |
| "\n", |
| "Column and vector operations were added in 1.15.\n", |
| "\n", |
| "* cols2vec\n", |
| "* vec2cols\n", |
| "* drop columns" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": { |
| "scrolled": true |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n", |
| " \"You should import from traitlets.config instead.\", ShimWarning)\n", |
| "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n", |
| " warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%load_ext sql" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 26, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": [ |
| "u'Connected: gpadmin@madlib'" |
| ] |
| }, |
| "execution_count": 26, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "# Greenplum Database 5.4.0 on GCP (demo machine)\n", |
| "%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n", |
| " \n", |
| "# PostgreSQL local\n", |
| "#%sql postgresql://fmcquillan@localhost:5432/madlib" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 27, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>version</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>MADlib version: 1.15-dev, git revision: rc/1.14-rc1-52-g1a7c756, cmake configuration time: Tue Jul 31 20:31:52 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'MADlib version: 1.15-dev, git revision: rc/1.14-rc1-52-g1a7c756, cmake configuration time: Tue Jul 31 20:31:52 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7',)]" |
| ] |
| }, |
| "execution_count": 27, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%sql select madlib.version();\n", |
| "#%sql select version();" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Cols2vec\n", |
| "\n", |
| "Convert feature columns in a table into an array in a single column.\n", |
| "\n", |
| "Given a table with a number of feature columns, this function will create an output table that contains the feature columns in an array. A summary table will also be created that contains the names of the features combined into array, so that this process can be reversed using the function vec2cols." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 1. Load sample data" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 28, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "Done.\n", |
| "14 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>OUTLOOK</th>\n", |
| " <th>temperature</th>\n", |
| " <th>humidity</th>\n", |
| " <th>Temp_Humidity</th>\n", |
| " <th>clouds_airquality</th>\n", |
| " <th>windy</th>\n", |
| " <th>class</th>\n", |
| " <th>observation_weight</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>sunny</td>\n", |
| " <td>85.0</td>\n", |
| " <td>85.0</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " <td>[u'none', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>sunny</td>\n", |
| " <td>80.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " <td>[u'none', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>overcast</td>\n", |
| " <td>83.0</td>\n", |
| " <td>78.0</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>rain</td>\n", |
| " <td>70.0</td>\n", |
| " <td>96.0</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>rain</td>\n", |
| " <td>68.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>rain</td>\n", |
| " <td>65.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>overcast</td>\n", |
| " <td>64.0</td>\n", |
| " <td>65.0</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>sunny</td>\n", |
| " <td>72.0</td>\n", |
| " <td>95.0</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " <td>[u'high', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>sunny</td>\n", |
| " <td>69.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " <td>[u'high', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>rain</td>\n", |
| " <td>75.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>sunny</td>\n", |
| " <td>75.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " <td>[u'none', u'good']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>overcast</td>\n", |
| " <td>72.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>overcast</td>\n", |
| " <td>81.0</td>\n", |
| " <td>75.0</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>rain</td>\n", |
| " <td>71.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, u'sunny', 85.0, 85.0, [85.0, 85.0], [u'none', u'unhealthy'], False, u\"Don't Play\", 5.0),\n", |
| " (2, u'sunny', 80.0, 90.0, [80.0, 90.0], [u'none', u'moderate'], True, u\"Don't Play\", 5.0),\n", |
| " (3, u'overcast', 83.0, 78.0, [83.0, 78.0], [u'low', u'moderate'], False, u'Play', 1.5),\n", |
| " (4, u'rain', 70.0, 96.0, [70.0, 96.0], [u'low', u'moderate'], False, u'Play', 1.0),\n", |
| " (5, u'rain', 68.0, 80.0, [68.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0),\n", |
| " (6, u'rain', 65.0, 70.0, [65.0, 70.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0),\n", |
| " (7, u'overcast', 64.0, 65.0, [64.0, 65.0], [u'medium', u'moderate'], True, u'Play', 1.5),\n", |
| " (8, u'sunny', 72.0, 95.0, [72.0, 95.0], [u'high', u'unhealthy'], False, u\"Don't Play\", 5.0),\n", |
| " (9, u'sunny', 69.0, 70.0, [69.0, 70.0], [u'high', u'good'], False, u'Play', 5.0),\n", |
| " (10, u'rain', 75.0, 80.0, [75.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0),\n", |
| " (11, u'sunny', 75.0, 70.0, [75.0, 70.0], [u'none', u'good'], True, u'Play', 5.0),\n", |
| " (12, u'overcast', 72.0, 90.0, [72.0, 90.0], [u'medium', u'moderate'], True, u'Play', 1.5),\n", |
| " (13, u'overcast', 81.0, 75.0, [81.0, 75.0], [u'medium', u'moderate'], False, u'Play', 1.5),\n", |
| " (14, u'rain', 71.0, 80.0, [71.0, 80.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0)]" |
| ] |
| }, |
| "execution_count": 28, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS golf CASCADE;\n", |
| "\n", |
| "CREATE TABLE golf (\n", |
| " id integer NOT NULL,\n", |
| " \"OUTLOOK\" text,\n", |
| " temperature double precision,\n", |
| " humidity double precision,\n", |
| " \"Temp_Humidity\" double precision[],\n", |
| " clouds_airquality text[],\n", |
| " windy boolean,\n", |
| " class text,\n", |
| " observation_weight double precision\n", |
| ");\n", |
| "\n", |
| "INSERT INTO golf VALUES\n", |
| "(1,'sunny', 85, 85, ARRAY[85, 85],ARRAY['none', 'unhealthy'], 'false','Don''t Play', 5.0),\n", |
| "(2, 'sunny', 80, 90, ARRAY[80, 90], ARRAY['none', 'moderate'], 'true', 'Don''t Play', 5.0),\n", |
| "(3, 'overcast', 83, 78, ARRAY[83, 78], ARRAY['low', 'moderate'], 'false', 'Play', 1.5),\n", |
| "(4, 'rain', 70, 96, ARRAY[70, 96], ARRAY['low', 'moderate'], 'false', 'Play', 1.0),\n", |
| "(5, 'rain', 68, 80, ARRAY[68, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0),\n", |
| "(6, 'rain', 65, 70, ARRAY[65, 70], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0),\n", |
| "(7, 'overcast', 64, 65, ARRAY[64, 65], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5),\n", |
| "(8, 'sunny', 72, 95, ARRAY[72, 95], ARRAY['high', 'unhealthy'], 'false', 'Don''t Play', 5.0),\n", |
| "(9, 'sunny', 69, 70, ARRAY[69, 70], ARRAY['high', 'good'], 'false', 'Play', 5.0),\n", |
| "(10, 'rain', 75, 80, ARRAY[75, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0),\n", |
| "(11, 'sunny', 75, 70, ARRAY[75, 70], ARRAY['none', 'good'], 'true', 'Play', 5.0),\n", |
| "(12, 'overcast', 72, 90, ARRAY[72, 90], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5),\n", |
| "(13, 'overcast', 81, 75, ARRAY[81, 75], ARRAY['medium', 'moderate'], 'false', 'Play', 1.5),\n", |
| "(14, 'rain', 71, 80, ARRAY[71, 80], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0);\n", |
| "\n", |
| "SELECT * FROM golf ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 2. Create feature array\n", |
| "Combine the temperature and humidity columns into a single array feature." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 5, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>feature_vector</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[([85.0, 85.0],),\n", |
| " ([68.0, 80.0],),\n", |
| " ([69.0, 70.0],),\n", |
| " ([81.0, 75.0],),\n", |
| " ([80.0, 90.0],),\n", |
| " ([65.0, 70.0],),\n", |
| " ([75.0, 80.0],),\n", |
| " ([71.0, 80.0],),\n", |
| " ([83.0, 78.0],),\n", |
| " ([64.0, 65.0],),\n", |
| " ([75.0, 70.0],),\n", |
| " ([70.0, 96.0],),\n", |
| " ([72.0, 95.0],),\n", |
| " ([72.0, 90.0],)]" |
| ] |
| }, |
| "execution_count": 5, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;\n", |
| "\n", |
| "SELECT madlib.cols2vec(\n", |
| " 'golf',\n", |
| " 'cols2vec_result',\n", |
| " 'temperature, humidity'\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM cols2vec_result;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "View the summary table:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 8, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>source_table</th>\n", |
| " <th>list_of_features</th>\n", |
| " <th>list_of_features_to_exclude</th>\n", |
| " <th>feature_names</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>golf</td>\n", |
| " <td>temperature, humidity</td>\n", |
| " <td>None</td>\n", |
| " <td>[u'temperature', u'humidity']</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'golf', u'temperature, humidity', u'None', [u'temperature', u'humidity'])]" |
| ] |
| }, |
| "execution_count": 8, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "SELECT * FROM cols2vec_result_summary;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 3. Create feature array and keep some source table columns\n", |
| "Combine the temperature and humidity columns and keep 2 other columns from source_table." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 7, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>OUTLOOK</th>\n", |
| " <th>feature_vector</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>rain</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>rain</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>rain</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>rain</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>rain</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, u'sunny', [85.0, 85.0]),\n", |
| " (2, u'sunny', [80.0, 90.0]),\n", |
| " (3, u'overcast', [83.0, 78.0]),\n", |
| " (4, u'rain', [70.0, 96.0]),\n", |
| " (5, u'rain', [68.0, 80.0]),\n", |
| " (6, u'rain', [65.0, 70.0]),\n", |
| " (7, u'overcast', [64.0, 65.0]),\n", |
| " (8, u'sunny', [72.0, 95.0]),\n", |
| " (9, u'sunny', [69.0, 70.0]),\n", |
| " (10, u'rain', [75.0, 80.0]),\n", |
| " (11, u'sunny', [75.0, 70.0]),\n", |
| " (12, u'overcast', [72.0, 90.0]),\n", |
| " (13, u'overcast', [81.0, 75.0]),\n", |
| " (14, u'rain', [71.0, 80.0])]" |
| ] |
| }, |
| "execution_count": 7, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;\n", |
| "\n", |
| "SELECT madlib.cols2vec(\n", |
| " 'golf',\n", |
| " 'cols2vec_result',\n", |
| " 'temperature, humidity',\n", |
| " NULL,\n", |
| " 'id, \"OUTLOOK\"'\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM cols2vec_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "View the summary table:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 9, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>source_table</th>\n", |
| " <th>list_of_features</th>\n", |
| " <th>list_of_features_to_exclude</th>\n", |
| " <th>feature_names</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>golf</td>\n", |
| " <td>temperature, humidity</td>\n", |
| " <td>None</td>\n", |
| " <td>[u'temperature', u'humidity']</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'golf', u'temperature, humidity', u'None', [u'temperature', u'humidity'])]" |
| ] |
| }, |
| "execution_count": 9, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "SELECT * FROM cols2vec_result_summary;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 4. Exclude all columns that are not double precision\n", |
| "Combine all columns, excluding all columns that are not of type double precision" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 10, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>OUTLOOK</th>\n", |
| " <th>feature_vector</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[85.0, 85.0, 5.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[80.0, 90.0, 5.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[83.0, 78.0, 1.5]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>rain</td>\n", |
| " <td>[70.0, 96.0, 1.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>rain</td>\n", |
| " <td>[68.0, 80.0, 1.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>rain</td>\n", |
| " <td>[65.0, 70.0, 1.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[64.0, 65.0, 1.5]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[72.0, 95.0, 5.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[69.0, 70.0, 5.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>rain</td>\n", |
| " <td>[75.0, 80.0, 1.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>sunny</td>\n", |
| " <td>[75.0, 70.0, 5.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[72.0, 90.0, 1.5]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>overcast</td>\n", |
| " <td>[81.0, 75.0, 1.5]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>rain</td>\n", |
| " <td>[71.0, 80.0, 1.0]</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, u'sunny', [85.0, 85.0, 5.0]),\n", |
| " (2, u'sunny', [80.0, 90.0, 5.0]),\n", |
| " (3, u'overcast', [83.0, 78.0, 1.5]),\n", |
| " (4, u'rain', [70.0, 96.0, 1.0]),\n", |
| " (5, u'rain', [68.0, 80.0, 1.0]),\n", |
| " (6, u'rain', [65.0, 70.0, 1.0]),\n", |
| " (7, u'overcast', [64.0, 65.0, 1.5]),\n", |
| " (8, u'sunny', [72.0, 95.0, 5.0]),\n", |
| " (9, u'sunny', [69.0, 70.0, 5.0]),\n", |
| " (10, u'rain', [75.0, 80.0, 1.0]),\n", |
| " (11, u'sunny', [75.0, 70.0, 5.0]),\n", |
| " (12, u'overcast', [72.0, 90.0, 1.5]),\n", |
| " (13, u'overcast', [81.0, 75.0, 1.5]),\n", |
| " (14, u'rain', [71.0, 80.0, 1.0])]" |
| ] |
| }, |
| "execution_count": 10, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;\n", |
| "\n", |
| "SELECT madlib.cols2vec(\n", |
| " 'golf',\n", |
| " 'cols2vec_result',\n", |
| " '*',\n", |
| " '\"OUTLOOK\", \"Temp_Humidity\", clouds_airquality, windy, class, id',\n", |
| " 'id, \"OUTLOOK\"'\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM cols2vec_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 11, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>source_table</th>\n", |
| " <th>list_of_features</th>\n", |
| " <th>list_of_features_to_exclude</th>\n", |
| " <th>feature_names</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>golf</td>\n", |
| " <td>*</td>\n", |
| " <td>\"OUTLOOK\", \"Temp_Humidity\", clouds_airquality, windy, class, id</td>\n", |
| " <td>[u'temperature', u'humidity', u'observation_weight']</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'golf', u'*', u'\"OUTLOOK\", \"Temp_Humidity\", clouds_airquality, windy, class, id', [u'temperature', u'humidity', u'observation_weight'])]" |
| ] |
| }, |
| "execution_count": 11, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "SELECT * FROM cols2vec_result_summary;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 5. Keep all columns from source table\n", |
| "Combine the temperature and humidity columns, exclude windy, and keep all of the columns from the source table." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 12, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>OUTLOOK</th>\n", |
| " <th>temperature</th>\n", |
| " <th>humidity</th>\n", |
| " <th>Temp_Humidity</th>\n", |
| " <th>clouds_airquality</th>\n", |
| " <th>windy</th>\n", |
| " <th>class</th>\n", |
| " <th>observation_weight</th>\n", |
| " <th>feature_vector</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>sunny</td>\n", |
| " <td>85.0</td>\n", |
| " <td>85.0</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " <td>[u'none', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>sunny</td>\n", |
| " <td>80.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " <td>[u'none', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>overcast</td>\n", |
| " <td>83.0</td>\n", |
| " <td>78.0</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>rain</td>\n", |
| " <td>70.0</td>\n", |
| " <td>96.0</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>rain</td>\n", |
| " <td>68.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>rain</td>\n", |
| " <td>65.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>overcast</td>\n", |
| " <td>64.0</td>\n", |
| " <td>65.0</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>sunny</td>\n", |
| " <td>72.0</td>\n", |
| " <td>95.0</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " <td>[u'high', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>sunny</td>\n", |
| " <td>69.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " <td>[u'high', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>rain</td>\n", |
| " <td>75.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>sunny</td>\n", |
| " <td>75.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " <td>[u'none', u'good']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>overcast</td>\n", |
| " <td>72.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>overcast</td>\n", |
| " <td>81.0</td>\n", |
| " <td>75.0</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>rain</td>\n", |
| " <td>71.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, u'sunny', 85.0, 85.0, [85.0, 85.0], [u'none', u'unhealthy'], False, u\"Don't Play\", 5.0, [85.0, 85.0]),\n", |
| " (2, u'sunny', 80.0, 90.0, [80.0, 90.0], [u'none', u'moderate'], True, u\"Don't Play\", 5.0, [80.0, 90.0]),\n", |
| " (3, u'overcast', 83.0, 78.0, [83.0, 78.0], [u'low', u'moderate'], False, u'Play', 1.5, [83.0, 78.0]),\n", |
| " (4, u'rain', 70.0, 96.0, [70.0, 96.0], [u'low', u'moderate'], False, u'Play', 1.0, [70.0, 96.0]),\n", |
| " (5, u'rain', 68.0, 80.0, [68.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0, [68.0, 80.0]),\n", |
| " (6, u'rain', 65.0, 70.0, [65.0, 70.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0, [65.0, 70.0]),\n", |
| " (7, u'overcast', 64.0, 65.0, [64.0, 65.0], [u'medium', u'moderate'], True, u'Play', 1.5, [64.0, 65.0]),\n", |
| " (8, u'sunny', 72.0, 95.0, [72.0, 95.0], [u'high', u'unhealthy'], False, u\"Don't Play\", 5.0, [72.0, 95.0]),\n", |
| " (9, u'sunny', 69.0, 70.0, [69.0, 70.0], [u'high', u'good'], False, u'Play', 5.0, [69.0, 70.0]),\n", |
| " (10, u'rain', 75.0, 80.0, [75.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0, [75.0, 80.0]),\n", |
| " (11, u'sunny', 75.0, 70.0, [75.0, 70.0], [u'none', u'good'], True, u'Play', 5.0, [75.0, 70.0]),\n", |
| " (12, u'overcast', 72.0, 90.0, [72.0, 90.0], [u'medium', u'moderate'], True, u'Play', 1.5, [72.0, 90.0]),\n", |
| " (13, u'overcast', 81.0, 75.0, [81.0, 75.0], [u'medium', u'moderate'], False, u'Play', 1.5, [81.0, 75.0]),\n", |
| " (14, u'rain', 71.0, 80.0, [71.0, 80.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0, [71.0, 80.0])]" |
| ] |
| }, |
| "execution_count": 12, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;\n", |
| "\n", |
| "SELECT madlib.cols2vec(\n", |
| " 'golf',\n", |
| " 'cols2vec_result',\n", |
| " 'windy, temperature, humidity',\n", |
| " 'windy',\n", |
| " '*'\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM cols2vec_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "View summary table:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 14, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>source_table</th>\n", |
| " <th>list_of_features</th>\n", |
| " <th>list_of_features_to_exclude</th>\n", |
| " <th>feature_names</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>golf</td>\n", |
| " <td>windy, temperature, humidity</td>\n", |
| " <td>windy</td>\n", |
| " <td>[u'temperature', u'humidity']</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'golf', u'windy, temperature, humidity', u'windy', [u'temperature', u'humidity'])]" |
| ] |
| }, |
| "execution_count": 14, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "SELECT * FROM cols2vec_result_summary;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "This also shows that you can exclude features in 'list_of_features_to_exclude' that are in the list of 'list_of_features'. This can be useful if the 'list_of_features' is generated from an expression or subquery." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 6. Boolean casting\n", |
| "\n", |
| "Type casting works as per regular rules of the underlying database. E.g, combining integer and double precisions columns will create a double precision feature vector. For Boolean, do an explicit cast to the target type:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 30, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>feature_vector</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>[u'false', u\"Don't Play\"]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>[u'true', u\"Don't Play\"]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>[u'false', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>[u'false', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>[u'false', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>[u'true', u\"Don't Play\"]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>[u'true', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>[u'false', u\"Don't Play\"]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>[u'false', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>[u'false', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>[u'true', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>[u'true', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>[u'false', u'Play']</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>[u'true', u\"Don't Play\"]</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, [u'false', u\"Don't Play\"]),\n", |
| " (2, [u'true', u\"Don't Play\"]),\n", |
| " (3, [u'false', u'Play']),\n", |
| " (4, [u'false', u'Play']),\n", |
| " (5, [u'false', u'Play']),\n", |
| " (6, [u'true', u\"Don't Play\"]),\n", |
| " (7, [u'true', u'Play']),\n", |
| " (8, [u'false', u\"Don't Play\"]),\n", |
| " (9, [u'false', u'Play']),\n", |
| " (10, [u'false', u'Play']),\n", |
| " (11, [u'true', u'Play']),\n", |
| " (12, [u'true', u'Play']),\n", |
| " (13, [u'false', u'Play']),\n", |
| " (14, [u'true', u\"Don't Play\"])]" |
| ] |
| }, |
| "execution_count": 30, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;\n", |
| "\n", |
| "SELECT madlib.cols2vec(\n", |
| " 'golf',\n", |
| " 'cols2vec_result',\n", |
| " 'windy::TEXT, class',\n", |
| " NULL,\n", |
| " 'id'\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM cols2vec_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Vec2cols\n", |
| "\n", |
| "Converts a feature array in a single column into multiple columns. This process can be used to reverse the function cols2vec.\n", |
| "\n", |
| "Given a table with a column of type array, this function will create an output table that splits this array into multiple columns, one per array element. It includes the option to name the new feature columns, and to include columns from the original table in the output." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 1. Load sample data" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 18, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "Done.\n", |
| "14 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>OUTLOOK</th>\n", |
| " <th>temperature</th>\n", |
| " <th>humidity</th>\n", |
| " <th>Temp_Humidity</th>\n", |
| " <th>clouds_airquality</th>\n", |
| " <th>windy</th>\n", |
| " <th>class</th>\n", |
| " <th>observation_weight</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>sunny</td>\n", |
| " <td>85.0</td>\n", |
| " <td>85.0</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " <td>[u'none', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>sunny</td>\n", |
| " <td>80.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " <td>[u'none', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>overcast</td>\n", |
| " <td>83.0</td>\n", |
| " <td>78.0</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>rain</td>\n", |
| " <td>70.0</td>\n", |
| " <td>96.0</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>rain</td>\n", |
| " <td>68.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>rain</td>\n", |
| " <td>65.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>overcast</td>\n", |
| " <td>64.0</td>\n", |
| " <td>65.0</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>sunny</td>\n", |
| " <td>72.0</td>\n", |
| " <td>95.0</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " <td>[u'high', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>sunny</td>\n", |
| " <td>69.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " <td>[u'high', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>rain</td>\n", |
| " <td>75.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>sunny</td>\n", |
| " <td>75.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " <td>[u'none', u'good']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>overcast</td>\n", |
| " <td>72.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>overcast</td>\n", |
| " <td>81.0</td>\n", |
| " <td>75.0</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>rain</td>\n", |
| " <td>71.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, u'sunny', 85.0, 85.0, [85.0, 85.0], [u'none', u'unhealthy'], False, u\"Don't Play\", 5.0),\n", |
| " (2, u'sunny', 80.0, 90.0, [80.0, 90.0], [u'none', u'moderate'], True, u\"Don't Play\", 5.0),\n", |
| " (3, u'overcast', 83.0, 78.0, [83.0, 78.0], [u'low', u'moderate'], False, u'Play', 1.5),\n", |
| " (4, u'rain', 70.0, 96.0, [70.0, 96.0], [u'low', u'moderate'], False, u'Play', 1.0),\n", |
| " (5, u'rain', 68.0, 80.0, [68.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0),\n", |
| " (6, u'rain', 65.0, 70.0, [65.0, 70.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0),\n", |
| " (7, u'overcast', 64.0, 65.0, [64.0, 65.0], [u'medium', u'moderate'], True, u'Play', 1.5),\n", |
| " (8, u'sunny', 72.0, 95.0, [72.0, 95.0], [u'high', u'unhealthy'], False, u\"Don't Play\", 5.0),\n", |
| " (9, u'sunny', 69.0, 70.0, [69.0, 70.0], [u'high', u'good'], False, u'Play', 5.0),\n", |
| " (10, u'rain', 75.0, 80.0, [75.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0),\n", |
| " (11, u'sunny', 75.0, 70.0, [75.0, 70.0], [u'none', u'good'], True, u'Play', 5.0),\n", |
| " (12, u'overcast', 72.0, 90.0, [72.0, 90.0], [u'medium', u'moderate'], True, u'Play', 1.5),\n", |
| " (13, u'overcast', 81.0, 75.0, [81.0, 75.0], [u'medium', u'moderate'], False, u'Play', 1.5),\n", |
| " (14, u'rain', 71.0, 80.0, [71.0, 80.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0)]" |
| ] |
| }, |
| "execution_count": 18, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS golf CASCADE;\n", |
| "\n", |
| "CREATE TABLE golf (\n", |
| " id integer NOT NULL,\n", |
| " \"OUTLOOK\" text,\n", |
| " temperature double precision,\n", |
| " humidity double precision,\n", |
| " \"Temp_Humidity\" double precision[],\n", |
| " clouds_airquality text[],\n", |
| " windy boolean,\n", |
| " class text,\n", |
| " observation_weight double precision\n", |
| ");\n", |
| "\n", |
| "INSERT INTO golf VALUES\n", |
| "(1,'sunny', 85, 85, ARRAY[85, 85],ARRAY['none', 'unhealthy'], 'false','Don''t Play', 5.0),\n", |
| "(2, 'sunny', 80, 90, ARRAY[80, 90], ARRAY['none', 'moderate'], 'true', 'Don''t Play', 5.0),\n", |
| "(3, 'overcast', 83, 78, ARRAY[83, 78], ARRAY['low', 'moderate'], 'false', 'Play', 1.5),\n", |
| "(4, 'rain', 70, 96, ARRAY[70, 96], ARRAY['low', 'moderate'], 'false', 'Play', 1.0),\n", |
| "(5, 'rain', 68, 80, ARRAY[68, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0),\n", |
| "(6, 'rain', 65, 70, ARRAY[65, 70], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0),\n", |
| "(7, 'overcast', 64, 65, ARRAY[64, 65], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5),\n", |
| "(8, 'sunny', 72, 95, ARRAY[72, 95], ARRAY['high', 'unhealthy'], 'false', 'Don''t Play', 5.0),\n", |
| "(9, 'sunny', 69, 70, ARRAY[69, 70], ARRAY['high', 'good'], 'false', 'Play', 5.0),\n", |
| "(10, 'rain', 75, 80, ARRAY[75, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0),\n", |
| "(11, 'sunny', 75, 70, ARRAY[75, 70], ARRAY['none', 'good'], 'true', 'Play', 5.0),\n", |
| "(12, 'overcast', 72, 90, ARRAY[72, 90], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5),\n", |
| "(13, 'overcast', 81, 75, ARRAY[81, 75], ARRAY['medium', 'moderate'], 'false', 'Play', 1.5),\n", |
| "(14, 'rain', 71, 80, ARRAY[71, 80], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0);\n", |
| "\n", |
| "SELECT * FROM golf ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 2. Split array into columns\n", |
| "Split the column \"clouds_airquality\" into new columns called \"clouds\" and \"air_quality\". Also keep columns id and \"OUTLOOK\" from the source table." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 19, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>OUTLOOK</th>\n", |
| " <th>clouds</th>\n", |
| " <th>air_quality</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>sunny</td>\n", |
| " <td>none</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>sunny</td>\n", |
| " <td>none</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>overcast</td>\n", |
| " <td>low</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>rain</td>\n", |
| " <td>low</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>rain</td>\n", |
| " <td>medium</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>rain</td>\n", |
| " <td>low</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>overcast</td>\n", |
| " <td>medium</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>sunny</td>\n", |
| " <td>high</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>sunny</td>\n", |
| " <td>high</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>rain</td>\n", |
| " <td>medium</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>sunny</td>\n", |
| " <td>none</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>overcast</td>\n", |
| " <td>medium</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>overcast</td>\n", |
| " <td>medium</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>rain</td>\n", |
| " <td>low</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, u'sunny', u'none', u'unhealthy'),\n", |
| " (2, u'sunny', u'none', u'moderate'),\n", |
| " (3, u'overcast', u'low', u'moderate'),\n", |
| " (4, u'rain', u'low', u'moderate'),\n", |
| " (5, u'rain', u'medium', u'good'),\n", |
| " (6, u'rain', u'low', u'unhealthy'),\n", |
| " (7, u'overcast', u'medium', u'moderate'),\n", |
| " (8, u'sunny', u'high', u'unhealthy'),\n", |
| " (9, u'sunny', u'high', u'good'),\n", |
| " (10, u'rain', u'medium', u'good'),\n", |
| " (11, u'sunny', u'none', u'good'),\n", |
| " (12, u'overcast', u'medium', u'moderate'),\n", |
| " (13, u'overcast', u'medium', u'moderate'),\n", |
| " (14, u'rain', u'low', u'unhealthy')]" |
| ] |
| }, |
| "execution_count": 19, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS vec2cols_result;\n", |
| "\n", |
| "SELECT madlib.vec2cols(\n", |
| " 'golf', -- source table\n", |
| " 'vec2cols_result', -- output table\n", |
| " 'clouds_airquality', -- column with array entries to split\n", |
| " ARRAY['clouds', 'air_quality'], -- feature names\n", |
| " 'id, \"OUTLOOK\", ' -- columns to keep from source table\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM vec2cols_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 3. Auto-generate feature names\n", |
| "Similar to the previous example, except now we keep all columns from source table and do not specify the feature names, so that default names are created." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 20, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>OUTLOOK</th>\n", |
| " <th>temperature</th>\n", |
| " <th>humidity</th>\n", |
| " <th>Temp_Humidity</th>\n", |
| " <th>clouds_airquality</th>\n", |
| " <th>windy</th>\n", |
| " <th>class</th>\n", |
| " <th>observation_weight</th>\n", |
| " <th>f1</th>\n", |
| " <th>f2</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>sunny</td>\n", |
| " <td>85.0</td>\n", |
| " <td>85.0</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " <td>[u'none', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>none</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>sunny</td>\n", |
| " <td>80.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " <td>[u'none', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>none</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>overcast</td>\n", |
| " <td>83.0</td>\n", |
| " <td>78.0</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>low</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>rain</td>\n", |
| " <td>70.0</td>\n", |
| " <td>96.0</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>low</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>rain</td>\n", |
| " <td>68.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>medium</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>rain</td>\n", |
| " <td>65.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>low</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>overcast</td>\n", |
| " <td>64.0</td>\n", |
| " <td>65.0</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>medium</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>sunny</td>\n", |
| " <td>72.0</td>\n", |
| " <td>95.0</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " <td>[u'high', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>high</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>sunny</td>\n", |
| " <td>69.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " <td>[u'high', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>high</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>rain</td>\n", |
| " <td>75.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>medium</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>sunny</td>\n", |
| " <td>75.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " <td>[u'none', u'good']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " <td>none</td>\n", |
| " <td>good</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>overcast</td>\n", |
| " <td>72.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>medium</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>overcast</td>\n", |
| " <td>81.0</td>\n", |
| " <td>75.0</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " <td>medium</td>\n", |
| " <td>moderate</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>rain</td>\n", |
| " <td>71.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " <td>low</td>\n", |
| " <td>unhealthy</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, u'sunny', 85.0, 85.0, [85.0, 85.0], [u'none', u'unhealthy'], False, u\"Don't Play\", 5.0, u'none', u'unhealthy'),\n", |
| " (2, u'sunny', 80.0, 90.0, [80.0, 90.0], [u'none', u'moderate'], True, u\"Don't Play\", 5.0, u'none', u'moderate'),\n", |
| " (3, u'overcast', 83.0, 78.0, [83.0, 78.0], [u'low', u'moderate'], False, u'Play', 1.5, u'low', u'moderate'),\n", |
| " (4, u'rain', 70.0, 96.0, [70.0, 96.0], [u'low', u'moderate'], False, u'Play', 1.0, u'low', u'moderate'),\n", |
| " (5, u'rain', 68.0, 80.0, [68.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0, u'medium', u'good'),\n", |
| " (6, u'rain', 65.0, 70.0, [65.0, 70.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0, u'low', u'unhealthy'),\n", |
| " (7, u'overcast', 64.0, 65.0, [64.0, 65.0], [u'medium', u'moderate'], True, u'Play', 1.5, u'medium', u'moderate'),\n", |
| " (8, u'sunny', 72.0, 95.0, [72.0, 95.0], [u'high', u'unhealthy'], False, u\"Don't Play\", 5.0, u'high', u'unhealthy'),\n", |
| " (9, u'sunny', 69.0, 70.0, [69.0, 70.0], [u'high', u'good'], False, u'Play', 5.0, u'high', u'good'),\n", |
| " (10, u'rain', 75.0, 80.0, [75.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0, u'medium', u'good'),\n", |
| " (11, u'sunny', 75.0, 70.0, [75.0, 70.0], [u'none', u'good'], True, u'Play', 5.0, u'none', u'good'),\n", |
| " (12, u'overcast', 72.0, 90.0, [72.0, 90.0], [u'medium', u'moderate'], True, u'Play', 1.5, u'medium', u'moderate'),\n", |
| " (13, u'overcast', 81.0, 75.0, [81.0, 75.0], [u'medium', u'moderate'], False, u'Play', 1.5, u'medium', u'moderate'),\n", |
| " (14, u'rain', 71.0, 80.0, [71.0, 80.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0, u'low', u'unhealthy')]" |
| ] |
| }, |
| "execution_count": 20, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS vec2cols_result;\n", |
| "SELECT madlib.vec2cols(\n", |
| " 'golf', -- source table\n", |
| " 'vec2cols_result', -- output table\n", |
| " 'clouds_airquality', -- column with array entries to split\n", |
| " NULL, -- feature names\n", |
| " '*' -- columns to keep from source table\n", |
| ");\n", |
| "SELECT * FROM vec2cols_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 4. Get feature names from cols2vec summary table\n", |
| "Now let's run cols2vec then reverse it using vec2cols. In this case we will get feature names from the cols2vec summary table.\n", |
| "\n", |
| "First run cols2vec:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 21, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>temperature</th>\n", |
| " <th>humidity</th>\n", |
| " <th>feature_vector</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>85.0</td>\n", |
| " <td>85.0</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>80.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>83.0</td>\n", |
| " <td>78.0</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>70.0</td>\n", |
| " <td>96.0</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>68.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>65.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>64.0</td>\n", |
| " <td>65.0</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>72.0</td>\n", |
| " <td>95.0</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>69.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>75.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>75.0</td>\n", |
| " <td>70.0</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>72.0</td>\n", |
| " <td>90.0</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>81.0</td>\n", |
| " <td>75.0</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>71.0</td>\n", |
| " <td>80.0</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, 85.0, 85.0, [85.0, 85.0]),\n", |
| " (2, 80.0, 90.0, [80.0, 90.0]),\n", |
| " (3, 83.0, 78.0, [83.0, 78.0]),\n", |
| " (4, 70.0, 96.0, [70.0, 96.0]),\n", |
| " (5, 68.0, 80.0, [68.0, 80.0]),\n", |
| " (6, 65.0, 70.0, [65.0, 70.0]),\n", |
| " (7, 64.0, 65.0, [64.0, 65.0]),\n", |
| " (8, 72.0, 95.0, [72.0, 95.0]),\n", |
| " (9, 69.0, 70.0, [69.0, 70.0]),\n", |
| " (10, 75.0, 80.0, [75.0, 80.0]),\n", |
| " (11, 75.0, 70.0, [75.0, 70.0]),\n", |
| " (12, 72.0, 90.0, [72.0, 90.0]),\n", |
| " (13, 81.0, 75.0, [81.0, 75.0]),\n", |
| " (14, 71.0, 80.0, [71.0, 80.0])]" |
| ] |
| }, |
| "execution_count": 21, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;\n", |
| "SELECT madlib.cols2vec(\n", |
| " 'golf',\n", |
| " 'cols2vec_result',\n", |
| " 'temperature, humidity',\n", |
| " NULL,\n", |
| " 'id, temperature, humidity'\n", |
| ");\n", |
| "SELECT * FROM cols2vec_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "View the summary table with the feature_names dictionary:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 22, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>source_table</th>\n", |
| " <th>list_of_features</th>\n", |
| " <th>list_of_features_to_exclude</th>\n", |
| " <th>feature_names</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>golf</td>\n", |
| " <td>temperature, humidity</td>\n", |
| " <td>None</td>\n", |
| " <td>[u'temperature', u'humidity']</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'golf', u'temperature, humidity', u'None', [u'temperature', u'humidity'])]" |
| ] |
| }, |
| "execution_count": 22, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "SELECT * FROM cols2vec_result_summary;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "Now use feature_names from the summary table above to name the columns of the split array:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 23, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>temperature</th>\n", |
| " <th>humidity</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>85.0</td>\n", |
| " <td>85.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>80.0</td>\n", |
| " <td>90.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>83.0</td>\n", |
| " <td>78.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>70.0</td>\n", |
| " <td>96.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>68.0</td>\n", |
| " <td>80.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>65.0</td>\n", |
| " <td>70.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>64.0</td>\n", |
| " <td>65.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>72.0</td>\n", |
| " <td>95.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>69.0</td>\n", |
| " <td>70.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>75.0</td>\n", |
| " <td>80.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>75.0</td>\n", |
| " <td>70.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>72.0</td>\n", |
| " <td>90.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>81.0</td>\n", |
| " <td>75.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>71.0</td>\n", |
| " <td>80.0</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, 85.0, 85.0),\n", |
| " (2, 80.0, 90.0),\n", |
| " (3, 83.0, 78.0),\n", |
| " (4, 70.0, 96.0),\n", |
| " (5, 68.0, 80.0),\n", |
| " (6, 65.0, 70.0),\n", |
| " (7, 64.0, 65.0),\n", |
| " (8, 72.0, 95.0),\n", |
| " (9, 69.0, 70.0),\n", |
| " (10, 75.0, 80.0),\n", |
| " (11, 75.0, 70.0),\n", |
| " (12, 72.0, 90.0),\n", |
| " (13, 81.0, 75.0),\n", |
| " (14, 71.0, 80.0)]" |
| ] |
| }, |
| "execution_count": 23, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS vec2cols_result;\n", |
| "SELECT madlib.vec2cols(\n", |
| " 'cols2vec_result', -- source table containing the feature vector\n", |
| " 'vec2cols_result', -- output table\n", |
| " 'feature_vector', -- column with array entries to split\n", |
| " (SELECT feature_names from cols2vec_result_summary), -- feature_names from summary table of cols2vec\n", |
| " 'id' -- columns to keep from source table\n", |
| ");\n", |
| "SELECT * FROM vec2cols_result ORDER BY id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "This is the same as the format of the original 'golf' dataset that we started with." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Drop columns\n", |
| "Drop some columns from the source table." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 24, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>Temp_Humidity</th>\n", |
| " <th>clouds_airquality</th>\n", |
| " <th>windy</th>\n", |
| " <th>class</th>\n", |
| " <th>observation_weight</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>[85.0, 85.0]</td>\n", |
| " <td>[u'none', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>[80.0, 90.0]</td>\n", |
| " <td>[u'none', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>[83.0, 78.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>[70.0, 96.0]</td>\n", |
| " <td>[u'low', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>[68.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>[65.0, 70.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>7</td>\n", |
| " <td>[64.0, 65.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>[72.0, 95.0]</td>\n", |
| " <td>[u'high', u'unhealthy']</td>\n", |
| " <td>False</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>[69.0, 70.0]</td>\n", |
| " <td>[u'high', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>[75.0, 80.0]</td>\n", |
| " <td>[u'medium', u'good']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>[75.0, 70.0]</td>\n", |
| " <td>[u'none', u'good']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>5.0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>[72.0, 90.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>True</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>[81.0, 75.0]</td>\n", |
| " <td>[u'medium', u'moderate']</td>\n", |
| " <td>False</td>\n", |
| " <td>Play</td>\n", |
| " <td>1.5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>[71.0, 80.0]</td>\n", |
| " <td>[u'low', u'unhealthy']</td>\n", |
| " <td>True</td>\n", |
| " <td>Don't Play</td>\n", |
| " <td>1.0</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, [85.0, 85.0], [u'none', u'unhealthy'], False, u\"Don't Play\", 5.0),\n", |
| " (2, [80.0, 90.0], [u'none', u'moderate'], True, u\"Don't Play\", 5.0),\n", |
| " (3, [83.0, 78.0], [u'low', u'moderate'], False, u'Play', 1.5),\n", |
| " (4, [70.0, 96.0], [u'low', u'moderate'], False, u'Play', 1.0),\n", |
| " (5, [68.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0),\n", |
| " (6, [65.0, 70.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0),\n", |
| " (7, [64.0, 65.0], [u'medium', u'moderate'], True, u'Play', 1.5),\n", |
| " (8, [72.0, 95.0], [u'high', u'unhealthy'], False, u\"Don't Play\", 5.0),\n", |
| " (9, [69.0, 70.0], [u'high', u'good'], False, u'Play', 5.0),\n", |
| " (10, [75.0, 80.0], [u'medium', u'good'], False, u'Play', 1.0),\n", |
| " (11, [75.0, 70.0], [u'none', u'good'], True, u'Play', 5.0),\n", |
| " (12, [72.0, 90.0], [u'medium', u'moderate'], True, u'Play', 1.5),\n", |
| " (13, [81.0, 75.0], [u'medium', u'moderate'], False, u'Play', 1.5),\n", |
| " (14, [71.0, 80.0], [u'low', u'unhealthy'], True, u\"Don't Play\", 1.0)]" |
| ] |
| }, |
| "execution_count": 24, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS dropcols_out;\n", |
| "\n", |
| "SELECT madlib.dropcols(\n", |
| " 'golf',\n", |
| " 'dropcols_out',\n", |
| " '\"OUTLOOK\", temperature, humidity'\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM dropcols_out ORDER BY id;" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 2", |
| "language": "python", |
| "name": "python2" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 2 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython2", |
| "version": "2.7.12" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |