src/ports/postgres/modules/graph/apsp.sql_in - madlib - Git at Google

 /* ----------------------------------------------------------------------- *//**
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *
  * @file apsp.sql_in
  *
  * @brief SQL functions for graph analytics
  * @date Nov 2016
  *
  * @sa Provides all pairs shortest path algorithm.
  *
  *//* ----------------------------------------------------------------------- */
 m4_include(`SQLCommon.m4')

 /**
 @addtogroup grp_apsp

 <div class="toc"><b>Contents</b>
 <ul>
 <li><a href="#apsp">APSP</a></li>
 <li><a href="#examples">Examples</a></li>
 <li><a href="#notes">Notes</a></li>
 <li><a href="#literature">Literature</a></li>
 </ul>
 </div>

 @brief Finds the shortest paths between every vertex pair in a given graph.

 The all pairs shortest paths (APSP) algorithm finds the length (summed weights)
 of the shortest paths between all pairs of vertices, such that the sum of the
 weights of the path edges is minimized.

 @warning APSP is an expensive algorithm for run-time
 because it finds the shortest path between all nodes
 in the graph.  It is recommended that you start with a
 small graph to get a sense of run-time for your use case,
 then increase size carefully from there.  The worst case run-time
 for this implementation is O(V^2 * E) where V is the
 number of vertices and E is the number of edges.  In
 practice, run-time will be generally be
 much less than this, but it depends on the graph.
 On a Greenplum cluster, the edge table should be distributed
 by the source vertex id column for better performance.

 @anchor apsp
 @par APSP
 <pre class="syntax">
 graph_apsp( vertex_table,
             vertex_id,
             edge_table,
             edge_args,
             out_table,
             grouping_cols
           )
 </pre>

 \b Arguments
 <dl class="arglist">
 <dt>vertex_table</dt>
 <dd>TEXT. Name of the table containing the vertex data for the graph. Must
 contain the column specified in the 'vertex_id' parameter below.</dd>

 <dt>vertex_id</dt>
 <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
 vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
 They do not need to be contiguous.</dd>

 <dt>edge_table</dt>
 <dd>TEXT. Name of the table containing the edge data. The edge table must
 contain columns for source vertex, destination vertex and edge weight.
 Column naming convention is described below in the 'edge_args' parameter.</dd>

 <dt>edge_args</dt>
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
   - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the
   edge table. Default column name is 'src'.
   - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids
   in the edge table. Default column name is 'dest'.
   - weight (FLOAT8): Name of the column containing the edge weights in the
   edge table. Default column name is 'weight'.</dd>

 <dt>out_table</dt>
 <dd>TEXT. Name of the table to store the result of APSP.
 It contains a row for every vertex of every group and have
 the following columns (in addition to the grouping columns):
   - source_vertex: The id for the source vertex. Will use the input edge
   column 'src' for column naming.
   - dest_vertex: The id for the destination vertex. Will use the input
   edge column 'dest' for column naming.
   - weight: The total weight of the shortest path from the source vertex to
   the destination vertex. Will use the input parameter 'weight' for column
   naming.
   - parent: The parent of the destination vertex in the shortest path from
   source. Parent will equal dest_vertex if there are no
   intermediate vertices. Will use 'parent' for column naming.

 A summary table named <out_table>_summary is also created. This is an internal
 table that keeps a record of the input parameters and is used by the path
 retrieval function described below.
 </dd>

 <dt>grouping_cols (optional)</dt>
 <dd>TEXT, default = NULL. List of columns used to group the input into discrete
 subgraphs. These columns must exist in the edge table. When this value is null,
 no grouping is used and a single APSP result is generated. </dd>
 </dl>

 @par Path Retrieval

 The path retrieval function returns the shortest path from the
 source vertex to a specified desination vertex.

 <pre class="syntax">
 graph_apsp_get_path( apsp_table,
                      source_vertex,
                      dest_vertex,
                      path_table
                     )
 </pre>

 \b Arguments
 <dl class="arglist">
 <dt>apsp_table</dt>
 <dd>TEXT. Name of the table that contains the APSP output.</dd>

 <dt>source_vertex</dt>
 <dd>INTEGER or BIGINT. The vertex that will be the source of the desired path.</dd>

 <dt>dest_vertex</dt>
 <dd>INTEGER or BIGINT. The vertex that will be the destination of the desired path.</dd>

 <dt>path_table</dt>
 <dd>TEXT. Name of the output table that contains the path.
 It contains a row for every group and has the following columns:
   - grouping_cols: The grouping columns given in the creation of the APSP
   table. If there are no grouping columns, these columns will not exist and the
   table will have a single row.
   - path (ARRAY): The shortest path from the source vertex to the destination
   vertex.
 </dd>

 </dl>

 @anchor examples
 @examp

 -# Create vertex and edge tables to represent the graph:
 <pre class="syntax">
 DROP TABLE IF EXISTS vertex, edge;
 CREATE TABLE vertex(
         id INTEGER
         );
 CREATE TABLE edge(
         src INTEGER,
         dest INTEGER,
         weight FLOAT8
         );
 INSERT INTO vertex VALUES
 (0),
 (1),
 (2),
 (3),
 (4),
 (5),
 (6),
 (7);
 INSERT INTO edge VALUES
 (0, 1, 1.0),
 (0, 2, 1.0),
 (0, 4, 10.0),
 (1, 2, 2.0),
 (1, 3, 10.0),
 (2, 3, 1.0),
 (2, 5, 1.0),
 (2, 6, 3.0),
 (3, 0, 1.0),
 (4, 0, -2.0),
 (5, 6, 1.0),
 (6, 7, 1.0);
 </pre>

 -# Calculate the shortest paths:
 <pre class="syntax">
 DROP TABLE IF EXISTS out, out_summary;
 SELECT madlib.graph_apsp(
                          'vertex',      -- Vertex table
                          NULL,          -- Vertix id column (NULL means use default naming)
                          'edge',        -- Edge table
                          NULL,          -- Edge arguments (NULL means use default naming)
                          'out');        -- Output table of shortest paths
 SELECT * FROM out ORDER BY src,dest;
 </pre>
 <pre class="result">
  src | dest |  weight  | parent
 -----+------+----------+--------
    0 |    0 |        0 |      0
    0 |    1 |        1 |      1
    0 |    2 |        1 |      2
    0 |    3 |        2 |      2
    0 |    4 |       10 |      4
    0 |    5 |        2 |      2
    0 |    6 |        3 |      5
    0 |    7 |        4 |      6
    1 |    0 |        4 |      3
    1 |    1 |        0 |      1
    1 |    2 |        2 |      2
    1 |    3 |        3 |      2
    1 |    4 |       14 |      0
    1 |    5 |        3 |      2
    1 |    6 |        4 |      5
    1 |    7 |        5 |      6
 (showing only 16 of 64 rows)
 </pre>

 -# Get the shortest path from vertex 0 to vertex 5:
 <pre class="syntax">
 DROP TABLE IF EXISTS out_path;
 SELECT madlib.graph_apsp_get_path('out',0,5,'out_path');
 SELECT * FROM out_path;
 </pre>
 <pre class="result">
   path
 \---------
  {0,2,5}
 </pre>

 -# Now let's do a similar example except using
 different column names in the tables (i.e., not the defaults).
 Create the vertex and edge tables:
 <pre class="syntax">
 DROP TABLE IF EXISTS vertex_alt, edge_alt;
 CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
 CREATE TABLE edge_alt AS SELECT src AS e_src, dest, weight AS e_weight FROM edge;
 </pre>

 -# Calculate the shortest paths:
 <pre class="syntax">
 DROP TABLE IF EXISTS out_alt, out_alt_summary;
 SELECT madlib.graph_apsp(
                          'vertex_alt',                  -- Vertex table
                          'v_id',                        -- Vertex id column
                          'edge_alt',                    -- Edge table
                          'src=e_src, weight=e_weight',  -- Edge arguments
                          'out_alt');                    -- Output table of shortest paths
 SELECT * FROM out_alt ORDER BY e_src, dest;
 </pre>
 <pre class="result">
  e_src | dest | e_weight | parent
 -------+------+----------+--------
      0 |    0 |        0 |      0
      0 |    1 |        1 |      1
      0 |    2 |        1 |      2
      0 |    3 |        2 |      2
      0 |    4 |       10 |      4
      0 |    5 |        2 |      2
      0 |    6 |        3 |      5
      0 |    7 |        4 |      6
      1 |    0 |        4 |      3
      1 |    1 |        0 |      1
      1 |    2 |        2 |      2
      1 |    3 |        3 |      2
      1 |    4 |       14 |      0
      1 |    5 |        3 |      2
      1 |    6 |        4 |      5
      1 |    7 |        5 |      6
 (showing only 16 of 64 rows)
 </pre>

 -# Create a graph with 2 groups and find APSP for each group:
 <pre class="syntax">
 DROP TABLE IF EXISTS edge_gr;
 CREATE TABLE edge_gr AS
 (
   SELECT *, 0 AS grp FROM edge
   UNION
   SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6
 );
 INSERT INTO edge_gr VALUES
 (4,5,-20,1);
 </pre>

 -# Find APSP for all groups:
 <pre class="syntax">
 DROP TABLE IF EXISTS out_gr, out_gr_summary;
 SELECT madlib.graph_apsp(
                          'vertex',      -- Vertex table
                          NULL,          -- Vertex id column (NULL means use default naming)
                          'edge_gr',     -- Edge table
                          NULL,          -- Edge arguments (NULL means use default naming)
                          'out_gr',      -- Output table of shortest paths
                          'grp'          -- Grouping columns
 );
 SELECT * FROM out_gr WHERE src < 2 ORDER BY grp,src,dest;
 </pre>
 <pre class="result">
  grp | src | dest | weight | parent
 -----+-----+------+--------+--------
    0 |   0 |    0 |      0 |      0
    0 |   0 |    1 |      1 |      1
    0 |   0 |    2 |      1 |      2
    0 |   0 |    3 |      2 |      2
    0 |   0 |    4 |     10 |      4
    0 |   0 |    5 |      2 |      2
    0 |   0 |    6 |      4 |      2
    0 |   0 |    7 |      5 |      6
    0 |   1 |    0 |      4 |      3
    0 |   1 |    1 |      0 |      1
    0 |   1 |    2 |      2 |      2
    0 |   1 |    3 |      3 |      2
    0 |   1 |    4 |     14 |      0
    0 |   1 |    5 |      3 |      2
    0 |   1 |    6 |      4 |      5
    0 |   1 |    7 |      5 |      6
    1 |   0 |    0 |      0 |      0
    1 |   0 |    1 |      1 |      1
    1 |   0 |    2 |      1 |      2
    1 |   0 |    3 |      2 |      2
    1 |   0 |    4 |     10 |      4
    1 |   0 |    5 |    -10 |      4
    1 |   1 |    0 |      4 |      3
    1 |   1 |    1 |      0 |      1
    1 |   1 |    2 |      2 |      2
    1 |   1 |    3 |      3 |      2
    1 |   1 |    4 |     14 |      0
    1 |   1 |    5 |     -6 |      4
 (28 rows)
 </pre>

 -# Find the path from vertex 0 to vertex 5 in every group
 <pre class="syntax">
 DROP TABLE IF EXISTS out_gr_path;
 SELECT madlib.graph_apsp_get_path('out_gr',0,5,'out_gr_path');
 SELECT * FROM out_gr_path ORDER BY grp;
 </pre>
 <pre class="result">
  grp |  path
 -----+---------
    0 | {0,2,5}
    1 | {0,4,5}
 </pre>

 @anchor notes
 @par Notes

 1. Graphs with negative edges are supported but graphs with negative cycles are not.

 2. The implementation for APSP is analogous to a matrix multiplication operation.
 Please refer to the MADlib design document and references [1] and [2]
 for more details.

 3. Also see the Grail project [3] for more background on graph analytics processing
 in relational databases.

 @anchor literature
 @par Literature

 [1] http://www.columbia.edu/~cs2035/courses/ieor6614.S11/apsp.pdf

 [2] http://users.cecs.anu.edu.au/~Alistair.Rendell/Teaching/apac_comp3600/module4/all_pairs_shortest_paths.xhtml

 [3] The case against specialized graph analytics engines, J. Fan, G. Soosai Raj,
 and J. M. Patel. CIDR 2015. http://cidrdb.org/cidr2015/Papers/CIDR15_Paper20.pdf
 */

 -------------------------------------------------------------------------


 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp(
     vertex_table            TEXT,
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
     out_table               TEXT,
     grouping_cols           TEXT

 ) RETURNS VOID AS $$
     PythonFunction(graph, apsp, graph_apsp)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 -------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp(
     vertex_table            TEXT,
     vertex_id               TEXT,
     edge_table              TEXT,
     edge_args               TEXT,
     out_table               TEXT

 ) RETURNS VOID AS $$
      SELECT MADLIB_SCHEMA.graph_apsp($1, $2, $3, $4, $5, NULL);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 -------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp_get_path(
     apsp_table             TEXT,
     source_vertex          BIGINT,
     dest_vertex            BIGINT,
     path_table             TEXT

 ) RETURNS VOID AS $$
     PythonFunction(graph, apsp, graph_apsp_get_path)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 -------------------------------------------------------------------------------

 -- Online help
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp(
     message VARCHAR
 ) RETURNS VARCHAR AS $$
     PythonFunction(graph, apsp, graph_apsp_help)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 --------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp()
 RETURNS VARCHAR AS $$
     SELECT MADLIB_SCHEMA.graph_apsp('');
 $$ LANGUAGE sql IMMUTABLE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
 --------------------------------------------------------------------------------
	/* ----------------------------------------------------------------------- //*
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*
	* @file apsp.sql_in
	*
	* @brief SQL functions for graph analytics
	* @date Nov 2016
	*
	* @sa Provides all pairs shortest path algorithm.
	*
	// ----------------------------------------------------------------------- */
	m4_include(`SQLCommon.m4')

	/**
	@addtogroup grp_apsp

	<div class="toc"><b>Contents</b>
	<ul>
	<li><a href="#apsp">APSP</a></li>
	<li><a href="#examples">Examples</a></li>
	<li><a href="#notes">Notes</a></li>
	<li><a href="#literature">Literature</a></li>
	</ul>
	</div>

	@brief Finds the shortest paths between every vertex pair in a given graph.

	The all pairs shortest paths (APSP) algorithm finds the length (summed weights)
	of the shortest paths between all pairs of vertices, such that the sum of the
	weights of the path edges is minimized.

	@warning APSP is an expensive algorithm for run-time
	because it finds the shortest path between all nodes
	in the graph. It is recommended that you start with a
	small graph to get a sense of run-time for your use case,
	then increase size carefully from there. The worst case run-time
	for this implementation is O(V^2 * E) where V is the
	number of vertices and E is the number of edges. In
	practice, run-time will be generally be
	much less than this, but it depends on the graph.
	On a Greenplum cluster, the edge table should be distributed
	by the source vertex id column for better performance.

	@anchor apsp
	@par APSP
	<pre class="syntax">
	graph_apsp( vertex_table,
	vertex_id,
	edge_table,
	edge_args,
	out_table,
	grouping_cols
	)
	</pre>

	\b Arguments
	<dl class="arglist">
	<dt>vertex_table</dt>
	<dd>TEXT. Name of the table containing the vertex data for the graph. Must
	contain the column specified in the 'vertex_id' parameter below.</dd>

	<dt>vertex_id</dt>
	<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
	vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
	They do not need to be contiguous.</dd>

	<dt>edge_table</dt>
	<dd>TEXT. Name of the table containing the edge data. The edge table must
	contain columns for source vertex, destination vertex and edge weight.
	Column naming convention is described below in the 'edge_args' parameter.</dd>

	<dt>edge_args</dt>
	<dd>TEXT. A comma-delimited string containing multiple named arguments of
	the form "name=value". The following parameters are supported for
	this string argument:
	- src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the
	edge table. Default column name is 'src'.
	- dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids
	in the edge table. Default column name is 'dest'.
	- weight (FLOAT8): Name of the column containing the edge weights in the
	edge table. Default column name is 'weight'.</dd>

	<dt>out_table</dt>
	<dd>TEXT. Name of the table to store the result of APSP.
	It contains a row for every vertex of every group and have
	the following columns (in addition to the grouping columns):
	- source_vertex: The id for the source vertex. Will use the input edge
	column 'src' for column naming.
	- dest_vertex: The id for the destination vertex. Will use the input
	edge column 'dest' for column naming.
	- weight: The total weight of the shortest path from the source vertex to
	the destination vertex. Will use the input parameter 'weight' for column
	naming.
	- parent: The parent of the destination vertex in the shortest path from
	source. Parent will equal dest_vertex if there are no
	intermediate vertices. Will use 'parent' for column naming.

	A summary table named <out_table>_summary is also created. This is an internal
	table that keeps a record of the input parameters and is used by the path
	retrieval function described below.
	</dd>

	<dt>grouping_cols (optional)</dt>
	<dd>TEXT, default = NULL. List of columns used to group the input into discrete
	subgraphs. These columns must exist in the edge table. When this value is null,
	no grouping is used and a single APSP result is generated. </dd>
	</dl>

	@par Path Retrieval

	The path retrieval function returns the shortest path from the
	source vertex to a specified desination vertex.

	<pre class="syntax">
	graph_apsp_get_path( apsp_table,
	source_vertex,
	dest_vertex,
	path_table
	)
	</pre>

	\b Arguments
	<dl class="arglist">
	<dt>apsp_table</dt>
	<dd>TEXT. Name of the table that contains the APSP output.</dd>

	<dt>source_vertex</dt>
	<dd>INTEGER or BIGINT. The vertex that will be the source of the desired path.</dd>

	<dt>dest_vertex</dt>
	<dd>INTEGER or BIGINT. The vertex that will be the destination of the desired path.</dd>

	<dt>path_table</dt>
	<dd>TEXT. Name of the output table that contains the path.
	It contains a row for every group and has the following columns:
	- grouping_cols: The grouping columns given in the creation of the APSP
	table. If there are no grouping columns, these columns will not exist and the
	table will have a single row.
	- path (ARRAY): The shortest path from the source vertex to the destination
	vertex.
	</dd>

	</dl>

	@anchor examples
	@examp

	-# Create vertex and edge tables to represent the graph:
	<pre class="syntax">
	DROP TABLE IF EXISTS vertex, edge;
	CREATE TABLE vertex(
	id INTEGER
	);
	CREATE TABLE edge(
	src INTEGER,
	dest INTEGER,
	weight FLOAT8
	);
	INSERT INTO vertex VALUES
	(0),
	(1),
	(2),
	(3),
	(4),
	(5),
	(6),
	(7);
	INSERT INTO edge VALUES
	(0, 1, 1.0),
	(0, 2, 1.0),
	(0, 4, 10.0),
	(1, 2, 2.0),
	(1, 3, 10.0),
	(2, 3, 1.0),
	(2, 5, 1.0),
	(2, 6, 3.0),
	(3, 0, 1.0),
	(4, 0, -2.0),
	(5, 6, 1.0),
	(6, 7, 1.0);
	</pre>

	-# Calculate the shortest paths:
	<pre class="syntax">
	DROP TABLE IF EXISTS out, out_summary;
	SELECT madlib.graph_apsp(
	'vertex', -- Vertex table
	NULL, -- Vertix id column (NULL means use default naming)
	'edge', -- Edge table
	NULL, -- Edge arguments (NULL means use default naming)
	'out'); -- Output table of shortest paths
	SELECT * FROM out ORDER BY src,dest;
	</pre>
	<pre class="result">
	src \| dest \| weight \| parent
	-----+------+----------+--------
	0 \| 0 \| 0 \| 0
	0 \| 1 \| 1 \| 1
	0 \| 2 \| 1 \| 2
	0 \| 3 \| 2 \| 2
	0 \| 4 \| 10 \| 4
	0 \| 5 \| 2 \| 2
	0 \| 6 \| 3 \| 5
	0 \| 7 \| 4 \| 6
	1 \| 0 \| 4 \| 3
	1 \| 1 \| 0 \| 1
	1 \| 2 \| 2 \| 2
	1 \| 3 \| 3 \| 2
	1 \| 4 \| 14 \| 0
	1 \| 5 \| 3 \| 2
	1 \| 6 \| 4 \| 5
	1 \| 7 \| 5 \| 6
	(showing only 16 of 64 rows)
	</pre>

	-# Get the shortest path from vertex 0 to vertex 5:
	<pre class="syntax">
	DROP TABLE IF EXISTS out_path;
	SELECT madlib.graph_apsp_get_path('out',0,5,'out_path');
	SELECT * FROM out_path;
	</pre>
	<pre class="result">
	path
	\---------
	{0,2,5}
	</pre>

	-# Now let's do a similar example except using
	different column names in the tables (i.e., not the defaults).
	Create the vertex and edge tables:
	<pre class="syntax">
	DROP TABLE IF EXISTS vertex_alt, edge_alt;
	CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
	CREATE TABLE edge_alt AS SELECT src AS e_src, dest, weight AS e_weight FROM edge;
	</pre>

	-# Calculate the shortest paths:
	<pre class="syntax">
	DROP TABLE IF EXISTS out_alt, out_alt_summary;
	SELECT madlib.graph_apsp(
	'vertex_alt', -- Vertex table
	'v_id', -- Vertex id column
	'edge_alt', -- Edge table
	'src=e_src, weight=e_weight', -- Edge arguments
	'out_alt'); -- Output table of shortest paths
	SELECT * FROM out_alt ORDER BY e_src, dest;
	</pre>
	<pre class="result">
	e_src \| dest \| e_weight \| parent
	-------+------+----------+--------
	0 \| 0 \| 0 \| 0
	0 \| 1 \| 1 \| 1
	0 \| 2 \| 1 \| 2
	0 \| 3 \| 2 \| 2
	0 \| 4 \| 10 \| 4
	0 \| 5 \| 2 \| 2
	0 \| 6 \| 3 \| 5
	0 \| 7 \| 4 \| 6
	1 \| 0 \| 4 \| 3
	1 \| 1 \| 0 \| 1
	1 \| 2 \| 2 \| 2
	1 \| 3 \| 3 \| 2
	1 \| 4 \| 14 \| 0
	1 \| 5 \| 3 \| 2
	1 \| 6 \| 4 \| 5
	1 \| 7 \| 5 \| 6
	(showing only 16 of 64 rows)
	</pre>

	-# Create a graph with 2 groups and find APSP for each group:
	<pre class="syntax">
	DROP TABLE IF EXISTS edge_gr;
	CREATE TABLE edge_gr AS
	(
	SELECT *, 0 AS grp FROM edge
	UNION
	SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6
	);
	INSERT INTO edge_gr VALUES
	(4,5,-20,1);
	</pre>

	-# Find APSP for all groups:
	<pre class="syntax">
	DROP TABLE IF EXISTS out_gr, out_gr_summary;
	SELECT madlib.graph_apsp(
	'vertex', -- Vertex table
	NULL, -- Vertex id column (NULL means use default naming)
	'edge_gr', -- Edge table
	NULL, -- Edge arguments (NULL means use default naming)
	'out_gr', -- Output table of shortest paths
	'grp' -- Grouping columns
	);
	SELECT * FROM out_gr WHERE src < 2 ORDER BY grp,src,dest;
	</pre>
	<pre class="result">
	grp \| src \| dest \| weight \| parent
	-----+-----+------+--------+--------
	0 \| 0 \| 0 \| 0 \| 0
	0 \| 0 \| 1 \| 1 \| 1
	0 \| 0 \| 2 \| 1 \| 2
	0 \| 0 \| 3 \| 2 \| 2
	0 \| 0 \| 4 \| 10 \| 4
	0 \| 0 \| 5 \| 2 \| 2
	0 \| 0 \| 6 \| 4 \| 2
	0 \| 0 \| 7 \| 5 \| 6
	0 \| 1 \| 0 \| 4 \| 3
	0 \| 1 \| 1 \| 0 \| 1
	0 \| 1 \| 2 \| 2 \| 2
	0 \| 1 \| 3 \| 3 \| 2
	0 \| 1 \| 4 \| 14 \| 0
	0 \| 1 \| 5 \| 3 \| 2
	0 \| 1 \| 6 \| 4 \| 5
	0 \| 1 \| 7 \| 5 \| 6
	1 \| 0 \| 0 \| 0 \| 0
	1 \| 0 \| 1 \| 1 \| 1
	1 \| 0 \| 2 \| 1 \| 2
	1 \| 0 \| 3 \| 2 \| 2
	1 \| 0 \| 4 \| 10 \| 4
	1 \| 0 \| 5 \| -10 \| 4
	1 \| 1 \| 0 \| 4 \| 3
	1 \| 1 \| 1 \| 0 \| 1
	1 \| 1 \| 2 \| 2 \| 2
	1 \| 1 \| 3 \| 3 \| 2
	1 \| 1 \| 4 \| 14 \| 0
	1 \| 1 \| 5 \| -6 \| 4
	(28 rows)
	</pre>

	-# Find the path from vertex 0 to vertex 5 in every group
	<pre class="syntax">
	DROP TABLE IF EXISTS out_gr_path;
	SELECT madlib.graph_apsp_get_path('out_gr',0,5,'out_gr_path');
	SELECT * FROM out_gr_path ORDER BY grp;
	</pre>
	<pre class="result">
	grp \| path
	-----+---------
	0 \| {0,2,5}
	1 \| {0,4,5}
	</pre>

	@anchor notes
	@par Notes

	1. Graphs with negative edges are supported but graphs with negative cycles are not.

	2. The implementation for APSP is analogous to a matrix multiplication operation.
	Please refer to the MADlib design document and references [1] and [2]
	for more details.

	3. Also see the Grail project [3] for more background on graph analytics processing
	in relational databases.

	@anchor literature
	@par Literature

	[1] http://www.columbia.edu/~cs2035/courses/ieor6614.S11/apsp.pdf

	[2] http://users.cecs.anu.edu.au/~Alistair.Rendell/Teaching/apac_comp3600/module4/all_pairs_shortest_paths.xhtml

	[3] The case against specialized graph analytics engines, J. Fan, G. Soosai Raj,
	and J. M. Patel. CIDR 2015. http://cidrdb.org/cidr2015/Papers/CIDR15_Paper20.pdf
	*/

	-------------------------------------------------------------------------


	CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp(
	vertex_table TEXT,
	vertex_id TEXT,
	edge_table TEXT,
	edge_args TEXT,
	out_table TEXT,
	grouping_cols TEXT

	) RETURNS VOID AS $$
	PythonFunction(graph, apsp, graph_apsp)
	$$ LANGUAGE plpythonu VOLATILE
	m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
	-------------------------------------------------------------------------------

	CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp(
	vertex_table TEXT,
	vertex_id TEXT,
	edge_table TEXT,
	edge_args TEXT,
	out_table TEXT

	) RETURNS VOID AS $$
	SELECT MADLIB_SCHEMA.graph_apsp($1, $2, $3, $4, $5, NULL);
	$$ LANGUAGE sql VOLATILE
	m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
	-------------------------------------------------------------------------------

	CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp_get_path(
	apsp_table TEXT,
	source_vertex BIGINT,
	dest_vertex BIGINT,
	path_table TEXT

	) RETURNS VOID AS $$
	PythonFunction(graph, apsp, graph_apsp_get_path)
	$$ LANGUAGE plpythonu VOLATILE
	m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
	-------------------------------------------------------------------------------

	-- Online help
	CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp(
	message VARCHAR
	) RETURNS VARCHAR AS $$
	PythonFunction(graph, apsp, graph_apsp_help)
	$$ LANGUAGE plpythonu IMMUTABLE
	m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

	--------------------------------------------------------------------------------

	CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_apsp()
	RETURNS VARCHAR AS $$
	SELECT MADLIB_SCHEMA.graph_apsp('');
	$$ LANGUAGE sql IMMUTABLE
	m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
	--------------------------------------------------------------------------------