| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Breadth first search\n", |
| "\n", |
| "Breadth first search was added in MADlib 1.12." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 21, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "The sql extension is already loaded. To reload it, use:\n", |
| " %reload_ext sql\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%load_ext sql" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 22, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": [ |
| "u'Connected: gpdbchina@madlib'" |
| ] |
| }, |
| "execution_count": 22, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "# Greenplum 4.3.10.0\n", |
| "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n", |
| " \n", |
| "# PostgreSQL local\n", |
| "#%sql postgresql://fmcquillan@localhost:5432/madlib\n", |
| "\n", |
| "# Greenplum 4.2.3.0\n", |
| "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 23, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>version</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>MADlib version: 1.12-dev, git revision: rel/v1.11-29-g8c9b955, cmake configuration time: Thu Jul 13 00:17:54 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'MADlib version: 1.12-dev, git revision: rel/v1.11-29-g8c9b955, cmake configuration time: Thu Jul 13 00:17:54 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0',)]" |
| ] |
| }, |
| "execution_count": 23, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%sql select madlib.version();\n", |
| "#%sql select version();" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 1. Create vertex and edge tables" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 85, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "Done.\n", |
| "Done.\n", |
| "12 rows affected.\n", |
| "11 rows affected.\n", |
| "11 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>src</th>\n", |
| " <th>dest</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>6</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>11</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(0, 5),\n", |
| " (1, 0),\n", |
| " (1, 3),\n", |
| " (2, 6),\n", |
| " (3, 4),\n", |
| " (3, 5),\n", |
| " (4, 2),\n", |
| " (8, 9),\n", |
| " (9, 10),\n", |
| " (9, 11),\n", |
| " (10, 8)]" |
| ] |
| }, |
| "execution_count": 85, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql \n", |
| "DROP TABLE IF EXISTS vertex, edge;\n", |
| "\n", |
| "CREATE TABLE vertex(\n", |
| " id INTEGER\n", |
| " );\n", |
| "\n", |
| "CREATE TABLE edge(\n", |
| " src INTEGER,\n", |
| " dest INTEGER\n", |
| " );\n", |
| "\n", |
| "INSERT INTO vertex VALUES\n", |
| "(0),\n", |
| "(1),\n", |
| "(2),\n", |
| "(3),\n", |
| "(4),\n", |
| "(5),\n", |
| "(6),\n", |
| "(7),\n", |
| "(8),\n", |
| "(9),\n", |
| "(10),\n", |
| "(11);\n", |
| "\n", |
| "INSERT INTO edge VALUES\n", |
| "(0, 5),\n", |
| "(1, 0),\n", |
| "(1, 3),\n", |
| "(2, 6),\n", |
| "(3, 4),\n", |
| "(3, 5),\n", |
| "(4, 2),\n", |
| "(8, 9),\n", |
| "(9, 10),\n", |
| "(9, 11),\n", |
| "(10, 8);\n", |
| "\n", |
| "SELECT * FROM edge ORDER BY src, dest;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 2. Traverse undirected graph from vertex 3" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 86, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "7 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>dist</th>\n", |
| " <th>parent</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>0</td>\n", |
| " <td>None</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>2</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>2</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>6</td>\n", |
| " <td>3</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(3, 0, None),\n", |
| " (1, 1, 3),\n", |
| " (4, 1, 3),\n", |
| " (5, 1, 3),\n", |
| " (0, 2, 1),\n", |
| " (2, 2, 4),\n", |
| " (6, 3, 2)]" |
| ] |
| }, |
| "execution_count": 86, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS out, out_summary;\n", |
| "\n", |
| "SELECT madlib.graph_bfs(\n", |
| " 'vertex', -- Vertex table\n", |
| " NULL, -- Vertix id column (NULL means use default naming)\n", |
| " 'edge', -- Edge table\n", |
| " NULL, -- Edge arguments (NULL means use default naming)\n", |
| " 3, -- Source vertex for BFS\n", |
| " 'out'); -- Output table of nodes reachable from source_vertex\n", |
| " -- Default values used for the other arguments\n", |
| "\n", |
| "SELECT * FROM out ORDER BY dist,id;" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 88, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>vertex_table</th>\n", |
| " <th>vertex_id</th>\n", |
| " <th>edge_table</th>\n", |
| " <th>edge_args</th>\n", |
| " <th>source_vertex</th>\n", |
| " <th>out_table</th>\n", |
| " <th>max_distance</th>\n", |
| " <th>directed</th>\n", |
| " <th>grouping_cols</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>vertex</td>\n", |
| " <td>NULL</td>\n", |
| " <td>edge</td>\n", |
| " <td>NULL</td>\n", |
| " <td>3</td>\n", |
| " <td>out</td>\n", |
| " <td>None</td>\n", |
| " <td>None</td>\n", |
| " <td>NULL</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'vertex', u'NULL', u'edge', u'NULL', 3, u'out', None, None, u'NULL')]" |
| ] |
| }, |
| "execution_count": 88, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "SELECT * FROM out_summary;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 3. Use max_distance to limit the search distance" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 87, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "6 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>id</th>\n", |
| " <th>dist</th>\n", |
| " <th>parent</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>0</td>\n", |
| " <td>None</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>5</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>2</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>2</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(3, 0, None), (1, 1, 3), (4, 1, 3), (5, 1, 3), (0, 2, 1), (2, 2, 4)]" |
| ] |
| }, |
| "execution_count": 87, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS out_max, out_max_summary;\n", |
| "\n", |
| "SELECT madlib.graph_bfs(\n", |
| " 'vertex', -- Vertex table\n", |
| " NULL, -- Vertix id column (NULL means use default naming)\n", |
| " 'edge', -- Edge table\n", |
| " NULL, -- Edge arguments (NULL means use default naming)\n", |
| " 3, -- Source vertex for BFS\n", |
| " 'out_max', -- Output table of nodes reachable from source_vertex\n", |
| " 2); -- Maximum distance to traverse from source_vertex \n", |
| " -- Default values used for the other arguments\n", |
| " \n", |
| "SELECT * FROM out_max ORDER BY dist,id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 4. Use different column naming\n", |
| "\n", |
| "Now let's do an example using different column names in the tables (i.e., not the defaults). Create the vertex and edge tables:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 89, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "12 rows affected.\n", |
| "11 rows affected.\n", |
| "11 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>n1</th>\n", |
| " <th>n2</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>0</td>\n", |
| " <td>5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>2</td>\n", |
| " <td>6</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>3</td>\n", |
| " <td>5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>4</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>11</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(0, 5),\n", |
| " (1, 0),\n", |
| " (1, 3),\n", |
| " (2, 6),\n", |
| " (3, 4),\n", |
| " (3, 5),\n", |
| " (4, 2),\n", |
| " (8, 9),\n", |
| " (9, 10),\n", |
| " (9, 11),\n", |
| " (10, 8)]" |
| ] |
| }, |
| "execution_count": 89, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS vertex_alt, edge_alt;\n", |
| "CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;\n", |
| "CREATE TABLE edge_alt AS SELECT src AS n1, dest AS n2 FROM edge;\n", |
| "SELECT * FROM edge_alt ORDER BY n1, n2;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 5. Run BFS from vertex 8" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 90, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "4 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>v_id</th>\n", |
| " <th>dist</th>\n", |
| " <th>parent</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>0</td>\n", |
| " <td>None</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>1</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>1</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>2</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(8, 0, None), (9, 1, 8), (10, 1, 8), (11, 2, 9)]" |
| ] |
| }, |
| "execution_count": 90, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS out_alt, out_alt_summary;\n", |
| "\n", |
| "SELECT madlib.graph_bfs(\n", |
| " 'vertex_alt', -- Vertex table\n", |
| " 'v_id', -- Vertex id column (NULL means use default naming)\n", |
| " 'edge_alt', -- Edge table\n", |
| " 'src=n1, dest=n2', -- Edge arguments (NULL means use default naming)\n", |
| " 8, -- Source vertex for BFS\n", |
| " 'out_alt'); -- Output table of nodes reachable from source_vertex\n", |
| "\n", |
| "SELECT * FROM out_alt ORDER BY v_id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 6. Directed graph\n", |
| "\n", |
| "Now we show an example where the graph is treated as a directed graph." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 91, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "4 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>v_id</th>\n", |
| " <th>dist</th>\n", |
| " <th>parent</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>8</td>\n", |
| " <td>0</td>\n", |
| " <td>None</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>9</td>\n", |
| " <td>1</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>10</td>\n", |
| " <td>2</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>11</td>\n", |
| " <td>2</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(8, 0, None), (9, 1, 8), (10, 2, 9), (11, 2, 9)]" |
| ] |
| }, |
| "execution_count": 91, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS out_alt_dir, out_alt_dir_summary;\n", |
| "\n", |
| "SELECT madlib.graph_bfs(\n", |
| " 'vertex_alt', -- Vertex table\n", |
| " 'v_id', -- Vertex id column (NULL means use default naming)\n", |
| " 'edge_alt', -- Edge table\n", |
| " 'src=n1, dest=n2', -- Edge arguments (NULL means use default naming)\n", |
| " 8, -- Source vertex for BFS\n", |
| " 'out_alt_dir', -- Output table of nodes reachable from source_vertex\n", |
| " NULL, -- Maximum distance to traverse from source_vertex\n", |
| " TRUE); -- Flag for specifying directed graph\n", |
| "\n", |
| "SELECT * FROM out_alt_dir ORDER BY v_id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "Notice that, with the graph being treated as directed, the parent of v_id=10 is now vertex 9 and not 8 as in the undirected case." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 7. Grouping\n", |
| "\n", |
| "Create a graph with 2 groups." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 92, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "Done.\n", |
| "15 rows affected.\n", |
| "15 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>g1</th>\n", |
| " <th>g2</th>\n", |
| " <th>src</th>\n", |
| " <th>dest</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>1</td>\n", |
| " <td>0</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>2</td>\n", |
| " <td>6</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>3</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>3</td>\n", |
| " <td>5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>4</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>8</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>9</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>9</td>\n", |
| " <td>11</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>10</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>0</td>\n", |
| " <td>5</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>9</td>\n", |
| " <td>10</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>9</td>\n", |
| " <td>11</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>10</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>8</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(100, u'a', 1, 0),\n", |
| " (100, u'a', 1, 3),\n", |
| " (100, u'a', 2, 6),\n", |
| " (100, u'a', 3, 4),\n", |
| " (100, u'a', 3, 5),\n", |
| " (100, u'a', 4, 2),\n", |
| " (100, u'a', 8, 9),\n", |
| " (100, u'a', 9, 10),\n", |
| " (100, u'a', 9, 11),\n", |
| " (100, u'a', 10, 8),\n", |
| " (100, u'a', 0, 5),\n", |
| " (202, u'c', 9, 10),\n", |
| " (202, u'c', 9, 11),\n", |
| " (202, u'c', 10, 8),\n", |
| " (202, u'c', 8, 9)]" |
| ] |
| }, |
| "execution_count": 92, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS edge_gr;\n", |
| "\n", |
| "CREATE TABLE edge_gr(\n", |
| " g1 INTEGER,\n", |
| " g2 TEXT,\n", |
| " src INTEGER,\n", |
| " dest INTEGER\n", |
| " );\n", |
| "\n", |
| "INSERT INTO edge_gr VALUES\n", |
| "(100, 'a', 0, 5),\n", |
| "(100, 'a', 1, 0),\n", |
| "(100, 'a', 1, 3),\n", |
| "(100, 'a', 2, 6),\n", |
| "(100, 'a', 3, 4),\n", |
| "(100, 'a', 3, 5),\n", |
| "(100, 'a', 4, 2),\n", |
| "(100, 'a', 8, 9),\n", |
| "(100, 'a', 9, 10),\n", |
| "(100, 'a', 9, 11),\n", |
| "(100, 'a', 10, 8),\n", |
| "(202, 'c', 8, 9),\n", |
| "(202, 'c', 9, 10),\n", |
| "(202, 'c', 9, 11),\n", |
| "(202, 'c', 10, 8);\n", |
| "\n", |
| "SELECT * FROM edge_gr ORDER BY g1, g2;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# 8. Run BFS for all groups" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 93, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "8 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>g1</th>\n", |
| " <th>g2</th>\n", |
| " <th>id</th>\n", |
| " <th>dist</th>\n", |
| " <th>parent</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>8</td>\n", |
| " <td>0</td>\n", |
| " <td>None</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>9</td>\n", |
| " <td>1</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>10</td>\n", |
| " <td>1</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>11</td>\n", |
| " <td>2</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>8</td>\n", |
| " <td>0</td>\n", |
| " <td>None</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>9</td>\n", |
| " <td>1</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>10</td>\n", |
| " <td>1</td>\n", |
| " <td>8</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>202</td>\n", |
| " <td>c</td>\n", |
| " <td>11</td>\n", |
| " <td>2</td>\n", |
| " <td>9</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(100, u'a', 8, 0, None),\n", |
| " (100, u'a', 9, 1, 8),\n", |
| " (100, u'a', 10, 1, 8),\n", |
| " (100, u'a', 11, 2, 9),\n", |
| " (202, u'c', 8, 0, None),\n", |
| " (202, u'c', 9, 1, 8),\n", |
| " (202, u'c', 10, 1, 8),\n", |
| " (202, u'c', 11, 2, 9)]" |
| ] |
| }, |
| "execution_count": 93, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS out_gr, out_gr_summary;\n", |
| "\n", |
| "SELECT madlib.graph_bfs(\n", |
| " 'vertex', -- Vertex table\n", |
| " NULL, -- Vertex id column (NULL means use default naming)\n", |
| " 'edge_gr', -- Edge table\n", |
| " NULL, -- Edge arguments (NULL means use default naming)\n", |
| " 8, -- Source vertex for BFS\n", |
| " 'out_gr', -- Output table of nodes reachable from source_vertex\n", |
| " NULL, -- Maximum distance to traverse from source_vertex\n", |
| " NULL, -- Flag for specifying directed graph\n", |
| " 'g1,g2' -- Grouping columns\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM out_gr ORDER BY g1,g2,dist,id;" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "If source_vertex is not present in a group, then that group will not appear in the output table:" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 94, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Done.\n", |
| "1 rows affected.\n", |
| "7 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>g1</th>\n", |
| " <th>g2</th>\n", |
| " <th>id</th>\n", |
| " <th>dist</th>\n", |
| " <th>parent</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>3</td>\n", |
| " <td>0</td>\n", |
| " <td>None</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>1</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>4</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>5</td>\n", |
| " <td>1</td>\n", |
| " <td>3</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>0</td>\n", |
| " <td>2</td>\n", |
| " <td>1</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>2</td>\n", |
| " <td>2</td>\n", |
| " <td>4</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>100</td>\n", |
| " <td>a</td>\n", |
| " <td>6</td>\n", |
| " <td>3</td>\n", |
| " <td>2</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(100, u'a', 3, 0, None),\n", |
| " (100, u'a', 1, 1, 3),\n", |
| " (100, u'a', 4, 1, 3),\n", |
| " (100, u'a', 5, 1, 3),\n", |
| " (100, u'a', 0, 2, 1),\n", |
| " (100, u'a', 2, 2, 4),\n", |
| " (100, u'a', 6, 3, 2)]" |
| ] |
| }, |
| "execution_count": 94, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%sql\n", |
| "DROP TABLE IF EXISTS out_gr, out_gr_summary;\n", |
| "\n", |
| "SELECT madlib.graph_bfs(\n", |
| " 'vertex', -- Vertex table\n", |
| " NULL, -- Vertex id column (NULL means use default naming)\n", |
| " 'edge_gr', -- Edge table\n", |
| " NULL, -- Edge arguments (NULL means use default naming)\n", |
| " 3, -- Source vertex for BFS\n", |
| " 'out_gr', -- Output table of nodes reachable from source_vertex\n", |
| " NULL, -- Maximum distance to traverse from source_vertex\n", |
| " NULL, -- Flag for specifying directed graph\n", |
| " 'g1,g2' -- Grouping columns\n", |
| ");\n", |
| "\n", |
| "SELECT * FROM out_gr ORDER BY g1,g2,dist,id;" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 2", |
| "language": "python", |
| "name": "python2" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 2 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython2", |
| "version": "2.7.12" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |