blob: 699d348b9b31ed64cb31097d367f664f687ba1f8 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*
* @file graph.sql_in
*
* @brief SQL functions for graph analytics
* @date June 2017
*
* @sa Provides various graph algorithms.
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_wcc
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#wcc">Weakly Connected Components</a></li>
<li><a href="#rlcc">Retrieve Largest Connected Component</a></li>
<li><a href="#hist">Build Histogram</a></li>
<li><a href="#samecpt">Check Vertices in Same Connected Component</a></li>
<li><a href="#reach">Retrieve Reachable Vertices</a></li>
<li><a href="#count">Count Connected Components</a></li>
<li><a href="#examples">Examples</a></li>
<li><a href="#notes">Notes</a></li>
</ul>
</div>
@brief Find all weakly connected components of a graph.
Given a directed graph, a weakly connected component (WCC) is a subgraph of
the original graph where all vertices are connected to each other by some path,
ignoring the direction of edges. In case of an undirected graph, a weakly
connected component is also a strongly connected component. This module also
includes a number of helper functions that operate on the WCC output.
@note MADlib graph functions might create a large number of subtransactions with very large graphs. Since the cache for subtransaction metadata is limited, running them in conjunction with other processes might degrade the performance of the whole database. The warm start feature of WCC can be used to limit the number of subtransactions in a given session to ensure that the performance is preserved. While the ideal iteration limit is dependent on the graph as well as the rest of the system processes, it is advised to start with 40 and adjust down if the problem persists.
@anchor wcc
@par Weakly Connected Components
<pre class="syntax">
weakly_connected_components( vertex_table,
vertex_id,
edge_table,
edge_args,
out_table,
grouping_cols,
iteration_limit,
warm_start
)
</pre>
\b Arguments
<dl class="arglist">
<dt>vertex_table</dt>
<dd>TEXT. Name of the table containing the vertex data for the graph. Must contain the
column specified in the 'vertex_id' parameter below.</dd>
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column(s) in 'vertex_table' containing
vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous. If multiple columns are used as vertex ids,
they are passed in the following format: [<vertex_id1>,<vertex_id2>,...]</dd>
<dt>edge_table</dt>
<dd>TEXT. Name of the table containing the edge data. The edge table must
contain columns for source vertex and destination vertex.</dd>
<dt>edge_args</dt>
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- src (INTEGER or BIGINT): Name of the column(s) containing the source vertex ids in the edge table. Default column name is 'src'.
- dest (INTEGER or BIGINT): Name of the column(s) containing the destination vertex ids in the edge table. Default column name is 'dest'.</dd>
<dt>out_table</dt>
<dd>TEXT. Name of the table to store the component ID associated with each vertex.
It will contain a row for every vertex from 'vertex_table' with
the following columns:
- vertex_id : The id of a vertex. Will use the input parameter 'vertex_id'
for column naming. If multiple columns are used for identifying vertices,
this column will be an array named "id".
- component_id : Component that the vertex belongs to.
We use the convention where 'component_id' is the id of
the first vertex in a particular group. It means that component ids
are generally not contiguous.
- grouping_cols : Grouping column (if any) values associated with the vertex_id.
A summary table named <out_table>_summary is also created. This is an internal
table that keeps a record of some of the input parameters and is used by the
weakly connected component helper functions.
</dd>
<dt>grouping_cols (optional)</dt>
<dd>TEXT, default: NULL. A single column or a list of comma-separated
columns that divides the input data into discrete groups, which are
treated independently as separate graphs.
When this value is NULL, no grouping is used and
weakly connected components are generated for all data
(single graph).
@note Expressions are not currently supported for 'grouping_cols'.</dd>
<dt>iteration_limit (optional)</dt>
<dd>INTEGER, default: NULL. Maximum number of iterations to run wcc. This
parameter is used to stop wcc early to limit the number of subtransactions
created by wcc. For such subtx issues, it is advised to set this parameter to
40. A wcc run that stopped early by this parameter can resume its progress by
using the warm_start parameter.
An additional table named <out_table>_message is also created. This table is
necessary in case the iteration_limit is reached and there are still vertices to
update. It gets used when the wcc continues the process via warm_start and
gets dropped when the wcc determines there are no more updates necessary.
The user might determine if the wcc is completed or not by checking the
nodes_to_update column of <out_table>_summary table where 0 means wcc is
complete. </dd>
<dt>warm_start (optional)</dt>
<dd>BOOLEAN, default: NULL. If True, wcc will look for the <out_table>_message
table and continue using it and the partial output from <out_table> to continue
the wcc process.
</dd>
</dl>
@note On a Greenplum cluster, the edge table should be distributed
by the source vertex id column for better performance.
In addition, the user should note that this
function creates a duplicate of the edge table (on Greenplum cluster) for
better performance.
@anchor rlcc
@par Retrieve Largest Connected Component
The largest connected component retrieval function finds the largest weakly
connected component(s) in a graph. If weakly connected components was run with
grouping, the largest connected components are computed for each group.
<pre class="syntax">
graph_wcc_largest_cpt( wcc_table,
largest_cpt_table
)
</pre>
\b Arguments
<dl class="arglist">
<dt>wcc_table</dt>
<dd>TEXT. Name of the table that contains the output of weakly connected
components.</dd>
<dt>largest_cpt_table</dt>
<dd>TEXT. Name of the output table that contains the largest component's
information. It contains one or more rows for every group and has the following
columns:
- grouping_cols: The grouping columns given in the creation of wcc_table.
If there are no grouping columns, this column is not created.
- component_id: The ID of the largest component. Recall that we use the
convention where 'component_id' is the id of the first vertex in a
particular group. It means that component ids are generally not contiguous.
If there are multiple components of the same size, a row is created for each
component. If grouping_cols is specified, the largest
component is computed for each group.
- num_vertices: Number of vertices in the largest component.
</dd>
</dl>
@anchor hist
@par Retrieve Histogram of Vertices Per Connected Component
This function creates a histogram of the number of vertices
per connected component.
<pre class="syntax">
graph_wcc_histogram( wcc_table,
histogram_table
)
</pre>
\b Arguments
<dl class="arglist">
<dt>wcc_table</dt>
<dd>TEXT. Name of the table that contains the output of weakly connected
components.</dd>
<dt>histogram_table</dt>
<dd>TEXT. Name of the output table that contains the number of vertices per
component. A row is created for every comoponent in every group
if grouping_cols was specified when running weakly connected components.
The output table has the following columns:
- grouping_cols: The grouping columns given during the creation of the
wcc_table. If there are no grouping columns, this column
is not created.
- component_id: The ID of the component.
- num_vertices: Number of vertices in the component specified by the
component_id column.
</dd>
</dl>
@anchor samecpt
@par Check if Two Vertices Belong to the Same Component
This function determines if two vertices belong to the same component.
<pre class="syntax">
graph_wcc_vertex_check( wcc_table,
vertex_pair,
pair_table
)
</pre>
\b Arguments
<dl class="arglist">
<dt>wcc_table</dt>
<dd>TEXT. Name of the table that contains the output of weakly connected
components.</dd>
<dt>vertex_pair</dt>
<dd>BIGINT[]. A pair of vertex IDs separated by a comma. If multiple
columns are used for identifying vertices, a 2D array will be required for this
parameter.</dd>
<dt>pair_table</dt>
<dd>TEXT. Name of the output table that specifies if the two vertices in
vertex_pair belong to the same component. If wcc_table was generated using
grouping_cols, all the components in all groups are considered. The output
table has the following columns:
- component_id: Component ID that contains both the vertices in vertex_pair.
- grouping_cols: The grouping columns given in the creation of wcc_table. If
there are no grouping columns, this column is not created.
</dd>
</dl>
@anchor reach
@par Retrieve All Vertices Reachable from a Vertex
This function finds all the vertices that can be reached from a given vertex
via weakly connected paths.
<pre class="syntax">
graph_wcc_reachable_vertices( wcc_table,
src,
reachable_vertices_table
)
</pre>
\b Arguments
<dl class="arglist">
<dt>wcc_table</dt>
<dd>TEXT. Name of the table that contains the output of weakly connected
components.</dd>
<dt>src</dt>
<dd>BIGINT or BIGINT[]. The vertex ID from which all reachable vertices have to be found.</dd>
<dt>reachable_vertices_table</dt>
<dd>TEXT. Name of the output table that contains the list of vertices that are
reachable from the src vertex. The output table has the following columns:
- grouping_cols : The grouping columns given in the creation of wcc_table. If
there are no grouping columns, this column is not created.
- component_id : The ID of the component that both the src and dest vertices
belong to.
- dest : Vertex ID that is reachable from the src vertex.
Reachability is computed with regard to a component.
</dd>
</dl>
@anchor count
@par Count of Connected Components
This function finds the total number of components in the input graph.
<pre class="syntax">
graph_wcc_num_cpts( wcc_table,
count_table
)
</pre>
\b Arguments
<dl class="arglist">
<dt>wcc_table</dt>
<dd>TEXT. Name of the table that contains the output of weakly connected
components.</dd>
<dt>count_table</dt>
<dd>TEXT. Name of the output table that contains the total number of components
per group in the graph, if there are any grouping_cols in wcc_table. The output
table has the following columns:
- grouping_cols : The grouping columns given in the creation of wcc_table.
If there are no grouping columns, this column is not created,
and count is with regard to the entire graph.
- num_components : Count of weakly connected components in a graph, or the
number of components within a group if grouping_cols is defined.
</dd>
</dl>
@anchor examples
@examp
<a href="example/madlib_wcc_example.sql">Download the example sql file here.</a>
-# Create vertex and edge tables to represent the graph:
<pre class="syntax">
DROP TABLE IF EXISTS vertex, edge;
CREATE TABLE vertex(
node_id INTEGER
);
CREATE TABLE edge(
conn_src INTEGER,
conn_dest INTEGER,
user_id INTEGER
);
INSERT INTO vertex VALUES
(0),
(1),
(2),
(3),
(4),
(5),
(6),
(10),
(11),
(12),
(13),
(14),
(15),
(16);
INSERT INTO edge VALUES
(0, 1, 1),
(0, 2, 1),
(1, 2, 1),
(1, 3, 1),
(2, 3, 1),
(2, 5, 1),
(2, 6, 1),
(3, 0, 1),
(5, 6, 1),
(6, 3, 1),
(10, 11, 2),
(10, 12, 2),
(11, 12, 2),
(11, 13, 2),
(12, 13, 2),
(13, 10, 2),
(15, 16, 2),
(15, 14, 2);
</pre>
-# Find all the weakly connected components in the graph:
<pre class="syntax">
DROP TABLE IF EXISTS wcc_out, wcc_out_summary;
SELECT madlib.weakly_connected_components(
'vertex', -- Vertex table
'node_id', -- Vertex id column
'edge', -- Edge table
'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments
'wcc_out'); -- Output table of weakly connected components
SELECT * FROM wcc_out ORDER BY component_id, id;
</pre>
<pre class="result">
node_id | component_id
---------+--------------
0 | 0
1 | 0
2 | 0
3 | 0
5 | 0
6 | 0
4 | 4
10 | 10
11 | 10
12 | 10
13 | 10
14 | 14
15 | 14
16 | 14
(14 rows)
</pre>
-# Now get the weakly connected components associated with each 'user_id'
using the grouping feature:
<pre class="syntax">
DROP TABLE IF EXISTS wcc_out, wcc_out_summary;
SELECT madlib.weakly_connected_components(
'vertex', -- Vertex table
'node_id', -- Vertex id column
'edge', -- Edge table
'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments
'wcc_out', -- Output table of weakly connected components
'user_id'); -- Grouping column name
SELECT * FROM wcc_out ORDER BY user_id, component_id, id;
</pre>
<pre class="result">
node_id | component_id | user_id
---------+--------------+---------
0 | 0 | 1
1 | 0 | 1
2 | 0 | 1
3 | 0 | 1
5 | 0 | 1
6 | 0 | 1
10 | 10 | 2
11 | 10 | 2
12 | 10 | 2
13 | 10 | 2
14 | 14 | 2
15 | 14 | 2
16 | 14 | 2
(13 rows)
</pre>
Note that vertex 4 is not identified as a separate component
above. This is because there is no entry in the
edge table for vertex 4 indicating which group it belongs to
(though you could do that if you wanted to).
-# Retrieve the largest connected component:
<pre class="syntax">
DROP TABLE IF EXISTS largest_cpt_table;
SELECT madlib.graph_wcc_largest_cpt(
'wcc_out', -- WCC output table
'largest_cpt_table'); -- output table containing largest component ID
SELECT * FROM largest_cpt_table ORDER BY component_id;
</pre>
<pre class="result">
user_id | component_id | num_vertices
---------+--------------+--------------
1 | 0 | 6
2 | 10 | 4
(2 rows)
</pre>
-# Retrieve histogram of the number of vertices per
connected component:
<pre class="syntax">
DROP TABLE IF EXISTS histogram_table;
SELECT madlib.graph_wcc_histogram(
'wcc_out', -- WCC output table
'histogram_table'); -- output table containing the histogram of vertices
SELECT * FROM histogram_table ORDER BY component_id;
</pre>
<pre class="result">
user_id | component_id | num_vertices
---------+--------------+--------------
1 | 0 | 6
2 | 10 | 4
2 | 14 | 3
(3 rows)
</pre>
-# Check if two vertices belong to the same component:
<pre class="syntax">
DROP TABLE IF EXISTS vc_table;
SELECT madlib.graph_wcc_vertex_check(
'wcc_out', -- WCC output table
'14,15', -- Pair of vertex IDs
'vc_table'); -- output table containing components that contain the two vertices
SELECT * FROM vc_table ORDER BY component_id;
</pre>
<pre class="result">
user_id | component_id
---------+--------------
2 | 14
(1 row)
</pre>
-# Retrieve all vertices reachable from a vertex
<pre class="syntax">
DROP TABLE IF EXISTS reach_table;
SELECT madlib.graph_wcc_reachable_vertices(
'wcc_out', -- WCC output table
'0', -- source vertex
'reach_table'); -- output table containing all vertices reachable from source vertex
SELECT * FROM reach_table ORDER BY component_id, dest;
</pre>
<pre class="result">
user_id | component_id | dest
---------+--------------+------
1 | 0 | 1
1 | 0 | 2
1 | 0 | 3
1 | 0 | 5
1 | 0 | 6
(5 rows)
</pre>
-# Count of connected components:
<pre class="syntax">
DROP TABLE IF EXISTS count_table;
SELECT madlib.graph_wcc_num_cpts(
'wcc_out', -- WCC output table
'count_table'); -- output table containing number of components per group
SELECT * FROM count_table;
</pre>
<pre class="result">
user_id | num_components
---------+----------------
1 | 1
2 | 2
(2 rows)
</pre>
-# Create vertex and edge tables with multiple column ids to represent the graph:
<pre class="syntax">
DROP TABLE IF EXISTS vertex_multicol_wcc, edge_multicol_wcc;
CREATE TABLE vertex_multicol_wcc(
node_id_major BIGINT,
node_id_minor BIGINT
);
CREATE TABLE edge_multicol_wcc(
conn_src_major BIGINT,
conn_dest_major BIGINT,
user_id_major BIGINT,
conn_src_minor BIGINT,
conn_dest_minor BIGINT,
user_id_minor BIGINT
);
INSERT INTO vertex_multicol_wcc VALUES
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(5, 5),
(6, 6);
INSERT INTO edge_multicol_wcc VALUES
(0, 1, 1, 0, 1, 1),
(0, 2, 1, 0, 2, 1),
(0, 4, 1, 0, 4, 1),
(1, 2, 1, 1, 2, 1),
(1, 3, 1, 1, 3, 1),
(2, 3, 1, 2, 3, 1),
(2, 5, 1, 2, 5, 1),
(2, 6, 1, 2, 6, 1),
(3, 0, 1, 3, 0, 1),
(4, 0, 1, 4, 0, 1),
(5, 6, 1, 5, 6, 1),
(6, 3, 1, 6, 3, 1),
(0, 1, 2, 0, 1, 2),
(0, 2, 2, 0, 2, 2),
(0, 4, 2, 0, 4, 2),
(1, 2, 2, 1, 2, 2),
(1, 3, 2, 1, 3, 2),
(2, 3, 2, 2, 3, 2),
(3, 0, 2, 3, 0, 2),
(4, 0, 2, 4, 0, 2),
(5, 6, 2, 5, 6, 2),
(6, 3, 2, 6, 3, 2);
</pre>
-# Find all the weakly connected components in the graph:
<pre class="syntax">
DROP TABLE IF EXISTS wcc_multicol_out, wcc_multicol_out_summary;
SELECT madlib.weakly_connected_components(
'vertex_multicol_wcc', -- Vertex table
'[node_id_major,node_id_minor]', -- Vertex id column
'edge_multicol_wcc', -- Edge table
'src=[conn_src_major,conn_src_minor], dest=[conn_dest_major,conn_dest_minor]', -- Comma delimted string of edge arguments
'wcc_multicol_out', -- Output table of weakly connected components
'user_id_major,user_id_minor'); -- Grouping column name
SELECT * FROM wcc_multicol_out ORDER BY user_id_major, user_id_minor, component_id, id;
</pre>
<pre class="result">
id | component_id | user_id_major | user_id_minor
-------+--------------+---------------+---------------
{0,0} | 3 | 1 | 1
{1,1} | 3 | 1 | 1
{2,2} | 3 | 1 | 1
{3,3} | 3 | 1 | 1
{4,4} | 3 | 1 | 1
{5,5} | 3 | 1 | 1
{6,6} | 3 | 1 | 1
{0,0} | 3 | 2 | 2
{1,1} | 3 | 2 | 2
{2,2} | 3 | 2 | 2
{3,3} | 3 | 2 | 2
{4,4} | 3 | 2 | 2
{5,5} | 3 | 2 | 2
{6,6} | 3 | 2 | 2
(14 rows)
</pre>
-# Use iteration limit and warm start in a bash loop to avoid subtx limitations
<pre class="syntax">
#!/bin/bash
psql madlib -c "DROP TABLE IF EXISTS wcc_out, wcc_out_summary, wcc_out_message;"
\# Run wcc for 2 iterations
psql madlib -c "SELECT madlib.weakly_connected_components(
'vertex',
'node_id',
'edge',
'src=conn_src,dest=conn_dest',
'wcc_out',
'user_id',
2);" # Number of iterations
\# Check if wcc is done
mynode=$(psql madlib -tc "SELECT nodes_to_update FROM wcc_out_summary")
echo $mynode
\# While there are remaining nodes to update
while [ $mynode -ne 0 ]
do
\# Run WCC with warm start to continue building on the previous output tables
psql madlib -c "SELECT madlib.weakly_connected_components(
'vertex',
'node_id',
'edge',
'src=conn_src,dest=conn_dest',
'wcc_out',
'user_id',
2,
True);" # Warm start
mynode=$(psql madlib -tc "SELECT nodes_to_update FROM wcc_out_summary")
echo $mynode
done
</pre>
@anchor notes
@par Notes
1. On a Greenplum cluster, the edge table should be distributed
by the source vertex id column for better performance.
*/
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components(
vertex_table TEXT,
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
out_table TEXT,
grouping_cols TEXT,
iteration_limit INTEGER,
warm_start BOOLEAN
) RETURNS VOID AS $$
PythonFunction(graph, wcc, wcc)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components(
vertex_table TEXT,
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
out_table TEXT,
grouping_cols TEXT,
iteration_limit INTEGER
) RETURNS VOID AS $$
PythonFunction(graph, wcc, wcc)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components(
vertex_table TEXT,
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
out_table TEXT,
grouping_cols TEXT
) RETURNS VOID AS $$
PythonFunction(graph, wcc, wcc)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components(
vertex_table TEXT,
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
out_table TEXT
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.weakly_connected_components($1, $2, $3, $4, $5, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
-- HELPER functions
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_largest_cpt(
wcc_table TEXT,
largest_cpt_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, wcc, graph_wcc_largest_cpt)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_histogram(
wcc_table TEXT,
histogram_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, wcc, graph_wcc_histogram)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_vertex_check(
wcc_table TEXT,
vertex_pair BIGINT[],
pair_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, wcc, graph_wcc_vertex_check)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_reachable_vertices(
wcc_table TEXT,
src BIGINT,
reachable_vertices_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, wcc, graph_wcc_reachable_vertices)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_reachable_vertices(
wcc_table TEXT,
src BIGINT[],
reachable_vertices_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, wcc, graph_wcc_reachable_vertices)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_num_cpts(
wcc_table TEXT,
count_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, wcc, graph_wcc_num_cpts)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
-------------------------------------------------------------------------
-- Online help
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunction(graph, wcc, wcc_help)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
-------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.weakly_connected_components('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
-------------------------------------------------------------------------------