| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| * |
| * @file sssp.sql_in |
| * |
| * @brief SQL functions for graph analytics |
| * @date Nov 2016 |
| * |
| * @sa Provides single source shortest path algorithm. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| m4_include(`SQLCommon.m4') |
| |
| |
| /** |
| @addtogroup grp_sssp |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li><a href="#sssp">SSSP</a></li> |
| <li><a href="#notes">Notes</a></li> |
| <li><a href="#examples">Examples</a></li> |
| <li><a href="#literature">Literature</a></li> |
| </ul> |
| </div> |
| |
| @brief Finds the shortest path from a single source vertex to every other vertex in a given graph. |
| |
| Given a graph and a source vertex, the single source shortest path (SSSP) algorithm |
| finds a path from the source vertex to every other vertex in the graph, |
| such that the sum of the weights of the path edges is minimized. |
| |
| @anchor sssp |
| @par SSSP |
| <pre class="syntax"> |
| graph_sssp( vertex_table, |
| vertex_id, |
| edge_table, |
| edge_args, |
| source_vertex, |
| out_table, |
| grouping_cols |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>vertex_table</dt> |
| <dd>TEXT. Name of the table containing the vertex data for the graph. Must contain the |
| column specified in the 'vertex_id' parameter below.</dd> |
| |
| <dt>vertex_id</dt> |
| <dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing |
| vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates. |
| They do not need to be contiguous.</dd> |
| |
| <dt>edge_table</dt> |
| <dd>TEXT. Name of the table containing the edge data. The edge table must |
| contain columns for source vertex, destination vertex and edge weight. |
| Column naming convention is described below in the 'edge_args' parameter.</dd> |
| |
| <dt>edge_args</dt> |
| <dd>TEXT. A comma-delimited string containing multiple named arguments of |
| the form "name=value". The following parameters are supported for |
| this string argument: |
| - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'. |
| - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'. |
| - weight (FLOAT8): Name of the column containing the edge weights in the edge table. Default column name is 'weight'.</dd> |
| |
| <dt>source_vertex</dt> |
| <dd>INTEGER or BIGINT. The source vertex id for the algorithm to start. This vertex id must |
| exist in the 'vertex_id' column of 'vertex_table'.</dd> |
| |
| <dt>out_table</dt> |
| <dd>TEXT. Name of the table to store the result of SSSP. |
| It contains a row for every vertex of every group and have |
| the following columns (in addition to the grouping columns): |
| - vertex_id : The id for the destination. Will use the input parameter 'vertex_id' for column naming. |
| - weight : The total weight of the shortest path from the source vertex to this particular vertex. |
| Will use the input parameter 'weight' for column naming. |
| - parent : The parent of this vertex in the shortest path from source. Will use 'parent' for column naming. |
| |
| A summary table named <out_table>_summary is also created. This is an internal table that keeps a record of the input parameters and is used by the path function described below. |
| </dd> |
| |
| <dt>grouping_cols (optional)</dt> |
| <dd>TEXT, default = NULL. List of columns used to group the input into discrete subgraphs. These columns must exist in the edge table. When this value is null, no grouping is used and a single SSSP result is generated. </dd> |
| </dl> |
| |
| @note On a Greenplum cluster, the edge table should be distributed |
| by the source vertex id column for better performance. |
| |
| @par Path Retrieval |
| |
| The path retrieval function returns the shortest path from the |
| source vertex to a specified desination vertex. |
| |
| <pre class="syntax"> |
| graph_sssp_get_path( sssp_table, |
| dest_vertex, |
| path_table |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>sssp_table</dt> |
| <dd>TEXT. Name of the table that contains the SSSP output.</dd> |
| |
| <dt>dest_vertex</dt> |
| <dd>INTEGER or BIGINT. The vertex that will be the destination of the desired path.</dd> |
| |
| <dt>path_table</dt> |
| <dd>TEXT. Name of the output table that contains the path. |
| It contains a row for every group and has the following columns: |
| - grouping_cols : The grouping columns given in the creation of the SSSP table. If there are no grouping columns, these columns will not exist and the table will have a single row. |
| - path (ARRAY) : The shortest path from the source vertex (as specified in the SSSP execution) to the destination vertex. |
| </dd> |
| |
| </dl> |
| |
| @anchor notes |
| @par Notes |
| |
| The Bellman-Ford algorithm [1] is used to implement SSSP. This algorithm allows |
| negative edges but not negative cycles. In the case of graphs with |
| negative cycles, an error will be given and no output table will be generated. |
| |
| Also see the Grail project [2] for more background on graph analytics processing |
| in relational databases. |
| |
| @anchor examples |
| @examp |
| |
| -# Create vertex and edge tables to represent the graph: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS vertex, edge; |
| CREATE TABLE vertex( |
| id INTEGER |
| ); |
| CREATE TABLE edge( |
| src INTEGER, |
| dest INTEGER, |
| weight FLOAT8 |
| ); |
| INSERT INTO vertex VALUES |
| (0), |
| (1), |
| (2), |
| (3), |
| (4), |
| (5), |
| (6), |
| (7); |
| INSERT INTO edge VALUES |
| (0, 1, 1.0), |
| (0, 2, 1.0), |
| (0, 4, 10.0), |
| (1, 2, 2.0), |
| (1, 3, 10.0), |
| (2, 3, 1.0), |
| (2, 5, 1.0), |
| (2, 6, 3.0), |
| (3, 0, 1.0), |
| (4, 0, -2.0), |
| (5, 6, 1.0), |
| (6, 7, 1.0); |
| </pre> |
| |
| -# Calculate the shortest paths from vertex 0: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS out, out_summary; |
| SELECT madlib.graph_sssp( |
| 'vertex', -- Vertex table |
| NULL, -- Vertix id column (NULL means use default naming) |
| 'edge', -- Edge table |
| NULL, -- Edge arguments (NULL means use default naming) |
| 0, -- Source vertex for path calculation |
| 'out'); -- Output table of shortest paths |
| SELECT * FROM out ORDER BY id; |
| </pre> |
| <pre class="result"> |
| id | weight | parent |
| ----+--------+-------- |
| 0 | 0 | 0 |
| 1 | 1 | 0 |
| 2 | 1 | 0 |
| 3 | 2 | 2 |
| 4 | 10 | 0 |
| 5 | 2 | 2 |
| 6 | 3 | 5 |
| 7 | 4 | 6 |
| (8 rows) |
| </pre> |
| |
| -# Get the shortest path to vertex 5: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS out_path; |
| SELECT madlib.graph_sssp_get_path('out',5,'out_path'); |
| SELECT * FROM out_path; |
| </pre> |
| <pre class="result"> |
| path |
| \--------- |
| {0,2,5} |
| </pre> |
| |
| -# Now let's do a similar example except using |
| different column names in the tables (i.e., not the defaults). |
| Create the vertex and edge tables: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS vertex_alt, edge_alt; |
| CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex; |
| CREATE TABLE edge_alt AS SELECT src AS e_src, dest, weight AS e_weight FROM edge; |
| </pre> |
| |
| -# Get the shortest path from vertex 1: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS out_alt, out_alt_summary; |
| SELECT madlib.graph_sssp( |
| 'vertex_alt', -- Vertex table |
| 'v_id', -- Vertex id column (NULL means use default naming) |
| 'edge_alt', -- Edge table |
| 'src=e_src, weight=e_weight', -- Edge arguments (NULL means use default naming) |
| 1, -- Source vertex for path calculation |
| 'out_alt'); -- Output table of shortest paths |
| SELECT * FROM out_alt ORDER BY v_id; |
| </pre> |
| <pre class="result"> |
| v_id | e_weight | parent |
| ------+----------+-------- |
| 0 | 4 | 3 |
| 1 | 0 | 1 |
| 2 | 2 | 1 |
| 3 | 3 | 2 |
| 4 | 14 | 0 |
| 5 | 3 | 2 |
| 6 | 4 | 5 |
| 7 | 5 | 6 |
| (8 rows) |
| </pre> |
| |
| -# Create a graph with 2 groups: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS edge_gr; |
| CREATE TABLE edge_gr AS |
| ( |
| SELECT *, 0 AS grp FROM edge |
| UNION |
| SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6 |
| ); |
| INSERT INTO edge_gr VALUES |
| (4,5,-20,1); |
| </pre> |
| |
| -# Find SSSP for all groups |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS out_gr, out_gr_summary; |
| SELECT madlib.graph_sssp( |
| 'vertex', -- Vertex table |
| NULL, -- Vertex id column (NULL means use default naming) |
| 'edge_gr', -- Edge table |
| NULL, -- Edge arguments (NULL means use default naming) |
| 0, -- Source vertex for path calculation |
| 'out_gr', -- Output table of shortest paths |
| 'grp' -- Grouping columns |
| ); |
| SELECT * FROM out_gr ORDER BY grp,id; |
| </pre> |
| <pre class="result"> |
| grp | id | weight | parent |
| -----+----+--------+-------- |
| 0 | 0 | 0 | 0 |
| 0 | 1 | 1 | 0 |
| 0 | 2 | 1 | 0 |
| 0 | 3 | 2 | 2 |
| 0 | 4 | 10 | 0 |
| 0 | 5 | 2 | 2 |
| 0 | 6 | 3 | 5 |
| 0 | 7 | 4 | 6 |
| 1 | 0 | 0 | 0 |
| 1 | 1 | 1 | 0 |
| 1 | 2 | 1 | 0 |
| 1 | 3 | 2 | 2 |
| 1 | 4 | 10 | 0 |
| 1 | 5 | -10 | 4 |
| </pre> |
| |
| -# Find the path to vertex 5 in every group |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS out_gr_path; |
| SELECT madlib.graph_sssp_get_path('out_gr',5,'out_gr_path'); |
| SELECT * FROM out_gr_path ORDER BY grp; |
| </pre> |
| <pre class="result"> |
| grp | path |
| -----+--------- |
| 0 | {0,2,5} |
| 1 | {0,4,5} |
| </pre> |
| |
| @anchor literature |
| @par Literature |
| |
| [1] Bellman–Ford algorithm. https://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm |
| |
| [2] The case against specialized graph analytics engines, J. Fan, G. Soosai Raj, |
| and J. M. Patel. CIDR 2015. http://cidrdb.org/cidr2015/Papers/CIDR15_Paper20.pdf |
| */ |
| |
| ------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp( |
| vertex_table TEXT, |
| vertex_id TEXT, |
| edge_table TEXT, |
| edge_args TEXT, |
| source_vertex BIGINT, |
| out_table TEXT, |
| grouping_cols TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, sssp, graph_sssp) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp( |
| vertex_table TEXT, |
| vertex_id TEXT, |
| edge_table TEXT, |
| edge_args TEXT, |
| source_vertex BIGINT, |
| out_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.graph_sssp($1, $2, $3, $4, $5, $6, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp_get_path( |
| sssp_table TEXT, |
| dest_vertex BIGINT, |
| path_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, sssp, graph_sssp_get_path) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| |
| -- Online help |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp( |
| message VARCHAR |
| ) RETURNS VARCHAR AS $$ |
| PythonFunction(graph, sssp, graph_sssp_help) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |
| |
| ------------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.graph_sssp(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |
| ------------------------------------------------------------------------------- |