| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| * |
| * @file graph.sql_in |
| * |
| * @brief SQL functions for graph analytics |
| * @date June 2017 |
| * |
| * @sa Provides various graph algorithms. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| m4_include(`SQLCommon.m4') |
| |
| |
| /** |
| @addtogroup grp_wcc |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li><a href="#wcc">Weakly Connected Components</a></li> |
| <li><a href="#rlcc">Retrieve Largest Connected Component</a></li> |
| <li><a href="#hist">Build Histogram</a></li> |
| <li><a href="#samecpt">Check Vertices in Same Connected Component</a></li> |
| <li><a href="#reach">Retrieve Reachable Vertices</a></li> |
| <li><a href="#count">Count Connected Components</a></li> |
| <li><a href="#examples">Examples</a></li> |
| <li><a href="#notes">Notes</a></li> |
| </ul> |
| </div> |
| |
| @brief Find all weakly connected components of a graph. |
| |
| Given a directed graph, a weakly connected component (WCC) is a subgraph of |
| the original graph where all vertices are connected to each other by some path, |
| ignoring the direction of edges. In case of an undirected graph, a weakly |
| connected component is also a strongly connected component. This module also |
| includes a number of helper functions that operate on the WCC output. |
| |
| @note MADlib graph functions might create a large number of subtransactions with very large graphs. Since the cache for subtransaction metadata is limited, running them in conjunction with other processes might degrade the performance of the whole database. The warm start feature of WCC can be used to limit the number of subtransactions in a given session to ensure that the performance is preserved. While the ideal iteration limit is dependent on the graph as well as the rest of the system processes, it is advised to start with 40 and adjust down if the problem persists. |
| |
| @anchor wcc |
| @par Weakly Connected Components |
| <pre class="syntax"> |
| weakly_connected_components( vertex_table, |
| vertex_id, |
| edge_table, |
| edge_args, |
| out_table, |
| grouping_cols, |
| iteration_limit, |
| warm_start |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>vertex_table</dt> |
| <dd>TEXT. Name of the table containing the vertex data for the graph. Must contain the |
| column specified in the 'vertex_id' parameter below.</dd> |
| |
| <dt>vertex_id</dt> |
| <dd>TEXT, default = 'id'. Name of the column(s) in 'vertex_table' containing |
| vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates. |
| They do not need to be contiguous. If multiple columns are used as vertex ids, |
| they are passed in the following format: [<vertex_id1>,<vertex_id2>,...]</dd> |
| |
| <dt>edge_table</dt> |
| <dd>TEXT. Name of the table containing the edge data. The edge table must |
| contain columns for source vertex and destination vertex.</dd> |
| |
| <dt>edge_args</dt> |
| <dd>TEXT. A comma-delimited string containing multiple named arguments of |
| the form "name=value". The following parameters are supported for |
| this string argument: |
| - src (INTEGER or BIGINT): Name of the column(s) containing the source vertex ids in the edge table. Default column name is 'src'. |
| - dest (INTEGER or BIGINT): Name of the column(s) containing the destination vertex ids in the edge table. Default column name is 'dest'.</dd> |
| |
| <dt>out_table</dt> |
| <dd>TEXT. Name of the table to store the component ID associated with each vertex. |
| It will contain a row for every vertex from 'vertex_table' with |
| the following columns: |
| - vertex_id : The id of a vertex. Will use the input parameter 'vertex_id' |
| for column naming. If multiple columns are used for identifying vertices, |
| this column will be an array named "id". |
| - component_id : Component that the vertex belongs to. |
| We use the convention where 'component_id' is the id of |
| the first vertex in a particular group. It means that component ids |
| are generally not contiguous. |
| - grouping_cols : Grouping column (if any) values associated with the vertex_id. |
| |
| A summary table named <out_table>_summary is also created. This is an internal |
| table that keeps a record of some of the input parameters and is used by the |
| weakly connected component helper functions. |
| </dd> |
| |
| <dt>grouping_cols (optional)</dt> |
| <dd>TEXT, default: NULL. A single column or a list of comma-separated |
| columns that divides the input data into discrete groups, which are |
| treated independently as separate graphs. |
| When this value is NULL, no grouping is used and |
| weakly connected components are generated for all data |
| (single graph). |
| @note Expressions are not currently supported for 'grouping_cols'.</dd> |
| |
| <dt>iteration_limit (optional)</dt> |
| <dd>INTEGER, default: NULL. Maximum number of iterations to run wcc. This |
| parameter is used to stop wcc early to limit the number of subtransactions |
| created by wcc. For such subtx issues, it is advised to set this parameter to |
| 40. A wcc run that stopped early by this parameter can resume its progress by |
| using the warm_start parameter. |
| An additional table named <out_table>_message is also created. This table is |
| necessary in case the iteration_limit is reached and there are still vertices to |
| update. It gets used when the wcc continues the process via warm_start and |
| gets dropped when the wcc determines there are no more updates necessary. |
| The user might determine if the wcc is completed or not by checking the |
| nodes_to_update column of <out_table>_summary table where 0 means wcc is |
| complete. </dd> |
| |
| <dt>warm_start (optional)</dt> |
| <dd>BOOLEAN, default: NULL. If True, wcc will look for the <out_table>_message |
| table and continue using it and the partial output from <out_table> to continue |
| the wcc process. |
| </dd> |
| |
| </dl> |
| |
| @note On a Greenplum cluster, the edge table should be distributed |
| by the source vertex id column for better performance. |
| In addition, the user should note that this |
| function creates a duplicate of the edge table (on Greenplum cluster) for |
| better performance. |
| |
| @anchor rlcc |
| @par Retrieve Largest Connected Component |
| |
| The largest connected component retrieval function finds the largest weakly |
| connected component(s) in a graph. If weakly connected components was run with |
| grouping, the largest connected components are computed for each group. |
| |
| <pre class="syntax"> |
| graph_wcc_largest_cpt( wcc_table, |
| largest_cpt_table |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>wcc_table</dt> |
| <dd>TEXT. Name of the table that contains the output of weakly connected |
| components.</dd> |
| |
| <dt>largest_cpt_table</dt> |
| <dd>TEXT. Name of the output table that contains the largest component's |
| information. It contains one or more rows for every group and has the following |
| columns: |
| - grouping_cols: The grouping columns given in the creation of wcc_table. |
| If there are no grouping columns, this column is not created. |
| - component_id: The ID of the largest component. Recall that we use the |
| convention where 'component_id' is the id of the first vertex in a |
| particular group. It means that component ids are generally not contiguous. |
| If there are multiple components of the same size, a row is created for each |
| component. If grouping_cols is specified, the largest |
| component is computed for each group. |
| - num_vertices: Number of vertices in the largest component. |
| </dd> |
| </dl> |
| |
| @anchor hist |
| @par Retrieve Histogram of Vertices Per Connected Component |
| |
| This function creates a histogram of the number of vertices |
| per connected component. |
| |
| <pre class="syntax"> |
| graph_wcc_histogram( wcc_table, |
| histogram_table |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>wcc_table</dt> |
| <dd>TEXT. Name of the table that contains the output of weakly connected |
| components.</dd> |
| |
| <dt>histogram_table</dt> |
| <dd>TEXT. Name of the output table that contains the number of vertices per |
| component. A row is created for every comoponent in every group |
| if grouping_cols was specified when running weakly connected components. |
| The output table has the following columns: |
| - grouping_cols: The grouping columns given during the creation of the |
| wcc_table. If there are no grouping columns, this column |
| is not created. |
| - component_id: The ID of the component. |
| - num_vertices: Number of vertices in the component specified by the |
| component_id column. |
| |
| </dd> |
| </dl> |
| |
| @anchor samecpt |
| @par Check if Two Vertices Belong to the Same Component |
| |
| This function determines if two vertices belong to the same component. |
| |
| <pre class="syntax"> |
| graph_wcc_vertex_check( wcc_table, |
| vertex_pair, |
| pair_table |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>wcc_table</dt> |
| <dd>TEXT. Name of the table that contains the output of weakly connected |
| components.</dd> |
| |
| <dt>vertex_pair</dt> |
| <dd>BIGINT[]. A pair of vertex IDs separated by a comma. If multiple |
| columns are used for identifying vertices, a 2D array will be required for this |
| parameter.</dd> |
| |
| <dt>pair_table</dt> |
| <dd>TEXT. Name of the output table that specifies if the two vertices in |
| vertex_pair belong to the same component. If wcc_table was generated using |
| grouping_cols, all the components in all groups are considered. The output |
| table has the following columns: |
| - component_id: Component ID that contains both the vertices in vertex_pair. |
| - grouping_cols: The grouping columns given in the creation of wcc_table. If |
| there are no grouping columns, this column is not created. |
| |
| </dd> |
| </dl> |
| |
| @anchor reach |
| @par Retrieve All Vertices Reachable from a Vertex |
| |
| This function finds all the vertices that can be reached from a given vertex |
| via weakly connected paths. |
| |
| <pre class="syntax"> |
| graph_wcc_reachable_vertices( wcc_table, |
| src, |
| reachable_vertices_table |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>wcc_table</dt> |
| <dd>TEXT. Name of the table that contains the output of weakly connected |
| components.</dd> |
| |
| <dt>src</dt> |
| <dd>BIGINT or BIGINT[]. The vertex ID from which all reachable vertices have to be found.</dd> |
| |
| <dt>reachable_vertices_table</dt> |
| <dd>TEXT. Name of the output table that contains the list of vertices that are |
| reachable from the src vertex. The output table has the following columns: |
| - grouping_cols : The grouping columns given in the creation of wcc_table. If |
| there are no grouping columns, this column is not created. |
| - component_id : The ID of the component that both the src and dest vertices |
| belong to. |
| - dest : Vertex ID that is reachable from the src vertex. |
| Reachability is computed with regard to a component. |
| |
| </dd> |
| </dl> |
| |
| @anchor count |
| @par Count of Connected Components |
| |
| This function finds the total number of components in the input graph. |
| |
| <pre class="syntax"> |
| graph_wcc_num_cpts( wcc_table, |
| count_table |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>wcc_table</dt> |
| <dd>TEXT. Name of the table that contains the output of weakly connected |
| components.</dd> |
| |
| <dt>count_table</dt> |
| <dd>TEXT. Name of the output table that contains the total number of components |
| per group in the graph, if there are any grouping_cols in wcc_table. The output |
| table has the following columns: |
| - grouping_cols : The grouping columns given in the creation of wcc_table. |
| If there are no grouping columns, this column is not created, |
| and count is with regard to the entire graph. |
| - num_components : Count of weakly connected components in a graph, or the |
| number of components within a group if grouping_cols is defined. |
| </dd> |
| |
| </dl> |
| |
| @anchor examples |
| @examp |
| |
| <a href="example/madlib_wcc_example.sql">Download the example sql file here.</a> |
| |
| -# Create vertex and edge tables to represent the graph: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS vertex, edge; |
| CREATE TABLE vertex( |
| node_id INTEGER |
| ); |
| CREATE TABLE edge( |
| conn_src INTEGER, |
| conn_dest INTEGER, |
| user_id INTEGER |
| ); |
| INSERT INTO vertex VALUES |
| (0), |
| (1), |
| (2), |
| (3), |
| (4), |
| (5), |
| (6), |
| (10), |
| (11), |
| (12), |
| (13), |
| (14), |
| (15), |
| (16); |
| INSERT INTO edge VALUES |
| (0, 1, 1), |
| (0, 2, 1), |
| (1, 2, 1), |
| (1, 3, 1), |
| (2, 3, 1), |
| (2, 5, 1), |
| (2, 6, 1), |
| (3, 0, 1), |
| (5, 6, 1), |
| (6, 3, 1), |
| (10, 11, 2), |
| (10, 12, 2), |
| (11, 12, 2), |
| (11, 13, 2), |
| (12, 13, 2), |
| (13, 10, 2), |
| (15, 16, 2), |
| (15, 14, 2); |
| </pre> |
| |
| -# Find all the weakly connected components in the graph: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS wcc_out, wcc_out_summary; |
| SELECT madlib.weakly_connected_components( |
| 'vertex', -- Vertex table |
| 'node_id', -- Vertex id column |
| 'edge', -- Edge table |
| 'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments |
| 'wcc_out'); -- Output table of weakly connected components |
| SELECT * FROM wcc_out ORDER BY component_id, id; |
| </pre> |
| <pre class="result"> |
| node_id | component_id |
| ---------+-------------- |
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| 3 | 0 |
| 5 | 0 |
| 6 | 0 |
| 4 | 4 |
| 10 | 10 |
| 11 | 10 |
| 12 | 10 |
| 13 | 10 |
| 14 | 14 |
| 15 | 14 |
| 16 | 14 |
| (14 rows) |
| </pre> |
| |
| -# Now get the weakly connected components associated with each 'user_id' |
| using the grouping feature: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS wcc_out, wcc_out_summary; |
| SELECT madlib.weakly_connected_components( |
| 'vertex', -- Vertex table |
| 'node_id', -- Vertex id column |
| 'edge', -- Edge table |
| 'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments |
| 'wcc_out', -- Output table of weakly connected components |
| 'user_id'); -- Grouping column name |
| SELECT * FROM wcc_out ORDER BY user_id, component_id, id; |
| </pre> |
| <pre class="result"> |
| node_id | component_id | user_id |
| ---------+--------------+--------- |
| 0 | 0 | 1 |
| 1 | 0 | 1 |
| 2 | 0 | 1 |
| 3 | 0 | 1 |
| 5 | 0 | 1 |
| 6 | 0 | 1 |
| 10 | 10 | 2 |
| 11 | 10 | 2 |
| 12 | 10 | 2 |
| 13 | 10 | 2 |
| 14 | 14 | 2 |
| 15 | 14 | 2 |
| 16 | 14 | 2 |
| (13 rows) |
| </pre> |
| Note that vertex 4 is not identified as a separate component |
| above. This is because there is no entry in the |
| edge table for vertex 4 indicating which group it belongs to |
| (though you could do that if you wanted to). |
| |
| -# Retrieve the largest connected component: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS largest_cpt_table; |
| SELECT madlib.graph_wcc_largest_cpt( |
| 'wcc_out', -- WCC output table |
| 'largest_cpt_table'); -- output table containing largest component ID |
| SELECT * FROM largest_cpt_table ORDER BY component_id; |
| </pre> |
| <pre class="result"> |
| user_id | component_id | num_vertices |
| ---------+--------------+-------------- |
| 1 | 0 | 6 |
| 2 | 10 | 4 |
| (2 rows) |
| </pre> |
| |
| -# Retrieve histogram of the number of vertices per |
| connected component: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS histogram_table; |
| SELECT madlib.graph_wcc_histogram( |
| 'wcc_out', -- WCC output table |
| 'histogram_table'); -- output table containing the histogram of vertices |
| SELECT * FROM histogram_table ORDER BY component_id; |
| </pre> |
| <pre class="result"> |
| user_id | component_id | num_vertices |
| ---------+--------------+-------------- |
| 1 | 0 | 6 |
| 2 | 10 | 4 |
| 2 | 14 | 3 |
| (3 rows) |
| </pre> |
| |
| -# Check if two vertices belong to the same component: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS vc_table; |
| SELECT madlib.graph_wcc_vertex_check( |
| 'wcc_out', -- WCC output table |
| '14,15', -- Pair of vertex IDs |
| 'vc_table'); -- output table containing components that contain the two vertices |
| SELECT * FROM vc_table ORDER BY component_id; |
| </pre> |
| <pre class="result"> |
| user_id | component_id |
| ---------+-------------- |
| 2 | 14 |
| (1 row) |
| </pre> |
| |
| -# Retrieve all vertices reachable from a vertex |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS reach_table; |
| SELECT madlib.graph_wcc_reachable_vertices( |
| 'wcc_out', -- WCC output table |
| '0', -- source vertex |
| 'reach_table'); -- output table containing all vertices reachable from source vertex |
| SELECT * FROM reach_table ORDER BY component_id, dest; |
| </pre> |
| <pre class="result"> |
| user_id | component_id | dest |
| ---------+--------------+------ |
| 1 | 0 | 1 |
| 1 | 0 | 2 |
| 1 | 0 | 3 |
| 1 | 0 | 5 |
| 1 | 0 | 6 |
| (5 rows) |
| </pre> |
| |
| -# Count of connected components: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS count_table; |
| SELECT madlib.graph_wcc_num_cpts( |
| 'wcc_out', -- WCC output table |
| 'count_table'); -- output table containing number of components per group |
| SELECT * FROM count_table; |
| </pre> |
| <pre class="result"> |
| user_id | num_components |
| ---------+---------------- |
| 1 | 1 |
| 2 | 2 |
| (2 rows) |
| </pre> |
| |
| -# Create vertex and edge tables with multiple column ids to represent the graph: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS vertex_multicol_wcc, edge_multicol_wcc; |
| CREATE TABLE vertex_multicol_wcc( |
| node_id_major BIGINT, |
| node_id_minor BIGINT |
| ); |
| CREATE TABLE edge_multicol_wcc( |
| conn_src_major BIGINT, |
| conn_dest_major BIGINT, |
| user_id_major BIGINT, |
| conn_src_minor BIGINT, |
| conn_dest_minor BIGINT, |
| user_id_minor BIGINT |
| ); |
| INSERT INTO vertex_multicol_wcc VALUES |
| (0, 0), |
| (1, 1), |
| (2, 2), |
| (3, 3), |
| (4, 4), |
| (5, 5), |
| (6, 6); |
| INSERT INTO edge_multicol_wcc VALUES |
| (0, 1, 1, 0, 1, 1), |
| (0, 2, 1, 0, 2, 1), |
| (0, 4, 1, 0, 4, 1), |
| (1, 2, 1, 1, 2, 1), |
| (1, 3, 1, 1, 3, 1), |
| (2, 3, 1, 2, 3, 1), |
| (2, 5, 1, 2, 5, 1), |
| (2, 6, 1, 2, 6, 1), |
| (3, 0, 1, 3, 0, 1), |
| (4, 0, 1, 4, 0, 1), |
| (5, 6, 1, 5, 6, 1), |
| (6, 3, 1, 6, 3, 1), |
| (0, 1, 2, 0, 1, 2), |
| (0, 2, 2, 0, 2, 2), |
| (0, 4, 2, 0, 4, 2), |
| (1, 2, 2, 1, 2, 2), |
| (1, 3, 2, 1, 3, 2), |
| (2, 3, 2, 2, 3, 2), |
| (3, 0, 2, 3, 0, 2), |
| (4, 0, 2, 4, 0, 2), |
| (5, 6, 2, 5, 6, 2), |
| (6, 3, 2, 6, 3, 2); |
| </pre> |
| |
| -# Find all the weakly connected components in the graph: |
| <pre class="syntax"> |
| DROP TABLE IF EXISTS wcc_multicol_out, wcc_multicol_out_summary; |
| SELECT madlib.weakly_connected_components( |
| 'vertex_multicol_wcc', -- Vertex table |
| '[node_id_major,node_id_minor]', -- Vertex id column |
| 'edge_multicol_wcc', -- Edge table |
| 'src=[conn_src_major,conn_src_minor], dest=[conn_dest_major,conn_dest_minor]', -- Comma delimted string of edge arguments |
| 'wcc_multicol_out', -- Output table of weakly connected components |
| 'user_id_major,user_id_minor'); -- Grouping column name |
| SELECT * FROM wcc_multicol_out ORDER BY user_id_major, user_id_minor, component_id, id; |
| </pre> |
| <pre class="result"> |
| id | component_id | user_id_major | user_id_minor |
| -------+--------------+---------------+--------------- |
| {0,0} | 3 | 1 | 1 |
| {1,1} | 3 | 1 | 1 |
| {2,2} | 3 | 1 | 1 |
| {3,3} | 3 | 1 | 1 |
| {4,4} | 3 | 1 | 1 |
| {5,5} | 3 | 1 | 1 |
| {6,6} | 3 | 1 | 1 |
| {0,0} | 3 | 2 | 2 |
| {1,1} | 3 | 2 | 2 |
| {2,2} | 3 | 2 | 2 |
| {3,3} | 3 | 2 | 2 |
| {4,4} | 3 | 2 | 2 |
| {5,5} | 3 | 2 | 2 |
| {6,6} | 3 | 2 | 2 |
| (14 rows) |
| </pre> |
| |
| -# Use iteration limit and warm start in a bash loop to avoid subtx limitations |
| <pre class="syntax"> |
| #!/bin/bash |
| psql madlib -c "DROP TABLE IF EXISTS wcc_out, wcc_out_summary, wcc_out_message;" |
| \# Run wcc for 2 iterations |
| psql madlib -c "SELECT madlib.weakly_connected_components( |
| 'vertex', |
| 'node_id', |
| 'edge', |
| 'src=conn_src,dest=conn_dest', |
| 'wcc_out', |
| 'user_id', |
| 2);" # Number of iterations |
| \# Check if wcc is done |
| mynode=$(psql madlib -tc "SELECT nodes_to_update FROM wcc_out_summary") |
| echo $mynode |
| \# While there are remaining nodes to update |
| while [ $mynode -ne 0 ] |
| do |
| \# Run WCC with warm start to continue building on the previous output tables |
| psql madlib -c "SELECT madlib.weakly_connected_components( |
| 'vertex', |
| 'node_id', |
| 'edge', |
| 'src=conn_src,dest=conn_dest', |
| 'wcc_out', |
| 'user_id', |
| 2, |
| True);" # Warm start |
| mynode=$(psql madlib -tc "SELECT nodes_to_update FROM wcc_out_summary") |
| echo $mynode |
| done |
| </pre> |
| |
| @anchor notes |
| @par Notes |
| |
| 1. On a Greenplum cluster, the edge table should be distributed |
| by the source vertex id column for better performance. |
| |
| */ |
| |
| ------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components( |
| vertex_table TEXT, |
| vertex_id TEXT, |
| edge_table TEXT, |
| edge_args TEXT, |
| out_table TEXT, |
| grouping_cols TEXT, |
| iteration_limit INTEGER, |
| warm_start BOOLEAN |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, wcc) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components( |
| vertex_table TEXT, |
| vertex_id TEXT, |
| edge_table TEXT, |
| edge_args TEXT, |
| out_table TEXT, |
| grouping_cols TEXT, |
| iteration_limit INTEGER |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, wcc) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components( |
| vertex_table TEXT, |
| vertex_id TEXT, |
| edge_table TEXT, |
| edge_args TEXT, |
| out_table TEXT, |
| grouping_cols TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, wcc) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components( |
| vertex_table TEXT, |
| vertex_id TEXT, |
| edge_table TEXT, |
| edge_args TEXT, |
| out_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| SELECT MADLIB_SCHEMA.weakly_connected_components($1, $2, $3, $4, $5, NULL); |
| $$ LANGUAGE sql VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| -- HELPER functions |
| ------------------------------------------------------------------------- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_largest_cpt( |
| wcc_table TEXT, |
| largest_cpt_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, graph_wcc_largest_cpt) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_histogram( |
| wcc_table TEXT, |
| histogram_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, graph_wcc_histogram) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_vertex_check( |
| wcc_table TEXT, |
| vertex_pair BIGINT[], |
| pair_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, graph_wcc_vertex_check) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_reachable_vertices( |
| wcc_table TEXT, |
| src BIGINT, |
| reachable_vertices_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, graph_wcc_reachable_vertices) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_reachable_vertices( |
| wcc_table TEXT, |
| src BIGINT[], |
| reachable_vertices_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, graph_wcc_reachable_vertices) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_num_cpts( |
| wcc_table TEXT, |
| count_table TEXT |
| |
| ) RETURNS VOID AS $$ |
| PythonFunction(graph, wcc, graph_wcc_num_cpts) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); |
| ------------------------------------------------------------------------- |
| |
| ------------------------------------------------------------------------- |
| |
| -- Online help |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components( |
| message VARCHAR |
| ) RETURNS VARCHAR AS $$ |
| PythonFunction(graph, wcc, wcc_help) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |
| |
| ------------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weakly_connected_components() |
| RETURNS VARCHAR AS $$ |
| SELECT MADLIB_SCHEMA.weakly_connected_components(''); |
| $$ LANGUAGE sql IMMUTABLE |
| m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); |
| ------------------------------------------------------------------------------- |
| |