| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Weakly connected components\n", |
| "\n", |
| "Weakly connected components was added in MADlib 1.12. This notebook also includes helper functions based on the WCC output." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n", |
| " \"You should import from traitlets.config instead.\", ShimWarning)\n", |
| "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n", |
| " warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%load_ext sql" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": [ |
| "u'Connected: gpdbchina@madlib'" |
| ] |
| }, |
| "execution_count": 2, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "# Greenplum 4.3.10.0\n", |
| "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n", |
| " \n", |
| "# PostgreSQL local\n", |
| "#%sql postgresql://fmcquillan@localhost:5432/madlib\n", |
| "\n", |
| "# Greenplum 4.2.3.0\n", |
| "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>version</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>MADlib version: 1.12-dev, git revision: rc/v1.9alpha-rc1-195-g85e89ef, cmake configuration time: Wed Jul 26 23:21:17 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'MADlib version: 1.12-dev, git revision: rc/v1.9alpha-rc1-195-g85e89ef, cmake configuration time: Wed Jul 26 23:21:17 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0',)]" |
| ] |
| }, |
| "execution_count": 3, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%sql select madlib.version();\n", |
| "#%sql select version();" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 1. Create vertex and edge tables" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 103, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "Done.\n", |
| "Done.\n", |
| "14 rows affected.\n", |
| "18 rows affected.\n", |
| "18 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>src</th>\n", |
| " <th>dest</th>\n", |
| " <th>user_id</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>2</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>2</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>3</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>5</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>6</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>6</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>3</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>11</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>12</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>12</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>13</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>13</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>10</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>15</td>\n", |
| " <td>14</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>15</td>\n", |
| " <td>16</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(0, 1, 1),\n", |
| " (0, 2, 1),\n", |
| " (1, 2, 1),\n", |
| " (1, 3, 1),\n", |
| " (2, 3, 1),\n", |
| " (2, 5, 1),\n", |
| " (2, 6, 1),\n", |
| " (3, 0, 1),\n", |
| " (5, 6, 1),\n", |
| " (6, 3, 1),\n", |
| " (10, 11, 2),\n", |
| " (10, 12, 2),\n", |
| " (11, 12, 2),\n", |
| " (11, 13, 2),\n", |
| " (12, 13, 2),\n", |
| " (13, 10, 2),\n", |
| " (15, 14, 2),\n", |
| " (15, 16, 2)]" |
| ] |
| }, |
| "execution_count": 103, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql \n", |
| "DROP TABLE IF EXISTS vertex, edge;\n", |
| "\n", |
| "CREATE TABLE vertex(\n", |
| " id INTEGER\n", |
| ");\n", |
| "\n", |
| "CREATE TABLE edge(\n", |
| " src INTEGER,\n", |
| " dest INTEGER,\n", |
| " user_id INTEGER\n", |
| ");\n", |
| "\n", |
| "INSERT INTO vertex VALUES\n", |
| "(0),\n", |
| "(1),\n", |
| "(2),\n", |
| "(3),\n", |
| "(4),\n", |
| "(5),\n", |
| "(6),\n", |
| "(10),\n", |
| "(11),\n", |
| "(12),\n", |
| "(13),\n", |
| "(14),\n", |
| "(15),\n", |
| "(16);\n", |
| "\n", |
| "INSERT INTO edge VALUES\n", |
| "(0, 1, 1),\n", |
| "(0, 2, 1),\n", |
| "(1, 2, 1),\n", |
| "(1, 3, 1),\n", |
| "(2, 3, 1),\n", |
| "(2, 5, 1),\n", |
| "(2, 6, 1),\n", |
| "(3, 0, 1),\n", |
| "(5, 6, 1),\n", |
| "(6, 3, 1),\n", |
| "(10, 11, 2),\n", |
| "(10, 12, 2),\n", |
| "(11, 12, 2),\n", |
| "(11, 13, 2),\n", |
| "(12, 13, 2),\n", |
| "(13, 10, 2),\n", |
| "(15, 16, 2),\n", |
| "(15, 14, 2);\n", |
| "\n", |
| "SELECT * FROM edge ORDER BY src, dest;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 2. Find all weakly connected components" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 104, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "14 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>component_id</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>14</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>15</td>\n", |
| " <td>14</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>16</td>\n", |
| " <td>14</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(0, 0),\n", |
| " (1, 0),\n", |
| " (2, 0),\n", |
| " (3, 0),\n", |
| " (5, 0),\n", |
| " (6, 0),\n", |
| " (4, 4),\n", |
| " (10, 10),\n", |
| " (11, 10),\n", |
| " (12, 10),\n", |
| " (13, 10),\n", |
| " (14, 14),\n", |
| " (15, 14),\n", |
| " (16, 14)]" |
| ] |
| }, |
| "execution_count": 104, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS wcc_out, wcc_out_summary;\n", |
| "\n", |
| "SELECT madlib.weakly_connected_components(\n", |
| " 'vertex', -- Vertex table\n", |
| " 'id', -- Vertix id column\n", |
| " 'edge', -- Edge table\n", |
| " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n", |
| " 'wcc_out'); -- Output table of weakly connected components\n", |
| "\n", |
| "SELECT * FROM wcc_out ORDER BY component_id, id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 3. Weakly connected components with grouping" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 105, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "13 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>component_id</th>\n", |
| " <th>user_id</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>10</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>10</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>12</td>\n", |
| " <td>10</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>13</td>\n", |
| " <td>10</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>14</td>\n", |
| " <td>14</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>15</td>\n", |
| " <td>14</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>16</td>\n", |
| " <td>14</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(0, 0, 1),\n", |
| " (1, 0, 1),\n", |
| " (2, 0, 1),\n", |
| " (3, 0, 1),\n", |
| " (5, 0, 1),\n", |
| " (6, 0, 1),\n", |
| " (10, 10, 2),\n", |
| " (11, 10, 2),\n", |
| " (12, 10, 2),\n", |
| " (13, 10, 2),\n", |
| " (14, 14, 2),\n", |
| " (15, 14, 2),\n", |
| " (16, 14, 2)]" |
| ] |
| }, |
| "execution_count": 105, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS wcc_out, wcc_out_summary;\n", |
| "\n", |
| "SELECT madlib.weakly_connected_components(\n", |
| " 'vertex', -- Vertex table\n", |
| " 'id', -- Vertix id column\n", |
| " 'edge', -- Edge table\n", |
| " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n", |
| " 'wcc_out', -- Output table of weakly connected components\n", |
| " 'user_id'); -- Grouping column name\n", |
| "\n", |
| "SELECT * FROM wcc_out ORDER BY component_id, id, user_id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "Note that vertex '4' is not identified as a separate component in the above result. This is because disconnected nodes cannot be assigned to a particular group with the current graph representation in MADlib." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 4. Largest connected component" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 106, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "2 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>user_id</th>\n", |
| " <th>component_id</th>\n", |
| " <th>num_vertices</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>6</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>10</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, 0, 6L), (2, 10, 4L)]" |
| ] |
| }, |
| "execution_count": 106, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql \n", |
| "DROP TABLE IF EXISTS largest_cpt_table;\n", |
| "\n", |
| "SELECT madlib.graph_wcc_largest_cpt(\n", |
| " 'wcc_out', -- WCC output table\n", |
| " 'largest_cpt_table'); -- output table containing largest component ID\n", |
| "\n", |
| "SELECT * FROM largest_cpt_table ORDER BY component_id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 5. Histogram of number of vertices per connected component" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 107, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "3 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>user_id</th>\n", |
| " <th>component_id</th>\n", |
| " <th>num_vertices</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>6</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>10</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>14</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, 0, 6L), (2, 10, 4L), (2, 14, 3L)]" |
| ] |
| }, |
| "execution_count": 107, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS histogram_table; \n", |
| "\n", |
| "SELECT madlib.graph_wcc_histogram(\n", |
| " 'wcc_out', -- WCC's output table\n", |
| " 'histogram_table'); -- output table containing the histogram of vertices\n", |
| "\n", |
| "SELECT * FROM histogram_table ORDER BY component_id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 6. Check if two vertices belong to the same component" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 108, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>user_id</th>\n", |
| " <th>component_id</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>14</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(2, 14)]" |
| ] |
| }, |
| "execution_count": 108, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS vc_table;\n", |
| "\n", |
| "SELECT madlib.graph_wcc_vertex_check(\n", |
| " 'wcc_out', -- WCC output table\n", |
| " '14,15', -- Pair of vertex IDs\n", |
| " 'vc_table'); -- output table containing components that contain the two vertices\n", |
| "\n", |
| "SELECT * FROM vc_table ORDER BY component_id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 7. Find all reachable vertices from a source vertex" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 109, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "5 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>user_id</th>\n", |
| " <th>component_id</th>\n", |
| " <th>dest</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " <td>6</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 0, 5), (1, 0, 6)]" |
| ] |
| }, |
| "execution_count": 109, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS reach_table;\n", |
| "\n", |
| "SELECT madlib.graph_wcc_reachable_vertices(\n", |
| " 'wcc_out', -- WCC output table\n", |
| " '0', -- source vertex\n", |
| " 'reach_table'); -- output table containing all vertices reachable from source vertex\n", |
| "\n", |
| "SELECT * FROM reach_table ORDER BY component_id, dest;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 8. Count of connected components" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 110, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "2 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>user_id</th>\n", |
| " <th>num_components</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(1, 1L), (2, 2L)]" |
| ] |
| }, |
| "execution_count": 110, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS count_table;\n", |
| "\n", |
| "SELECT madlib.graph_wcc_num_cpts(\n", |
| " 'wcc_out', -- WCC output table\n", |
| " 'count_table'); -- output table containing number of components per group\n", |
| "\n", |
| "SELECT * FROM count_table;" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 2", |
| "language": "python", |
| "name": "python2" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 2 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython2", |
| "version": "2.7.12" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |