blob: 50afaf35811d8615f07fbec39c97bd39d6da6159 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* @file closeness.sql_in
*
* @brief SQL functions for graph analytics
* @date Jun 2017
*
* @sa Provides analytical measures for graphs
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_graph_closeness
@brief Computes the closeness centrality value of each node in the graph.
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#closeness">Closeness</a></li>
<li><a href="#examples">Examples</a></li>
</ul>
</div>
The Closeness function returns various closeness centrality measures and the
k-degree for given subset of vertices. The closeness measures are the inverse of
the sum, the inverse of the average, and the sum of inverses of the shortest
distances to all reachable target vertices (excluding the source vertex).
@note The closeness measures require a valid output from a prior APSP run - both
the APSP table and the associated output summary table. APSP is a
computationally expensive algorithm because it finds the shortest path between
all nodes in the graph. The worst case run-time for this implementation is O(V^2
* E) where V is the number of vertices and E is the number of edges. In
practice, run-time will be generally be much less than this, depending on the
graph.
@anchor closeness
@par Closeness
<pre class="syntax">
graph_closeness( apsp_table,
output_table,
vertex_filter_expr
)
</pre>
\b Arguments
<dl class="arglist">
<dt>apsp_table</dt>
<dd>TEXT. Name of the output table generated by a prior run of all pairs shortest path (APSP).
</dd>
<dt>out_table</dt>
<dd>TEXT. Name of the table to store the closeness measures.
It contains a row for every vertex of every group and have
the following columns (in addition to the grouping columns):
- inverse_sum_dist: Inverse of the sum of shortest distances to all reachable
vertices.
- inverse_average_dist: Inverse of the average of shortest distances to all
reachable vertices.
- sum_inverse_dist: Sum of the inverse of shortest distances to all reachable
vertices.
- k_degree: Total number of reachable vertices.
</dd>
<dt>vertex_filter_expr (optional)</dt>
<dd>TEXT, default = NULL. Valid PostgreSQL expression that describes the
vertices to generate closeness measures for. If this parameter is not
specified, closeness measures are generated for all vertices in the apsp table.
You can think of this input parameter as being like a WHERE clause.
Some example inputs:
- If you want a short list of vertices, say 1, 2 and 3:
<pre>vertex_id IN (1, 2, 3)</pre>
- If you want a range of vertices between 1000 and 2000:
<pre>vertix_id BETWEEN 1000 AND 2000</pre>
- If you want a set of vertices from a separate table satisfying to a condition
<pre>EXISTS (SELECT vertex_id FROM vertices_of_interest
WHERE vertex_id > 5000 AND condition = 'xyz')
</pre>
</dd>
</dl>
@anchor examples
@examp
-# Create vertex and edge tables to represent the graph:
<pre class="syntax">
DROP TABLE IF EXISTS vertex, edge;
CREATE TABLE vertex(
id INTEGER,
name TEXT
);
CREATE TABLE edge(
src_id INTEGER,
dest_id INTEGER,
edge_weight FLOAT8
);
INSERT INTO vertex VALUES
(0, 'A'),
(1, 'B'),
(2, 'C'),
(3, 'D'),
(4, 'E'),
(5, 'F'),
(6, 'G'),
(7, 'H');
INSERT INTO edge VALUES
(0, 1, 1.0),
(0, 2, 1.0),
(0, 4, 10.0),
(1, 2, 2.0),
(1, 3, 10.0),
(2, 3, 1.0),
(2, 5, 1.0),
(2, 6, 3.0),
(3, 0, 1.0),
(4, 0, -2.0),
(5, 6, 1.0),
(6, 7, 1.0);
</pre>
-# Calculate the all-pair shortest paths:
<pre class="syntax">
DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
SELECT madlib.graph_apsp('vertex', -- Vertex table
'id', -- Vertix id column (NULL means use default naming)
'edge', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
-- Edge arguments (NULL means use default naming)
'out_apsp'); -- Output table of shortest paths
</pre>
-# Compute the closeness measure for all nodes:
<pre class="syntax">
DROP TABLE IF EXISTS out_closeness;
SELECT madlib.graph_closeness('out_apsp', 'out_closeness');
SELECT * FROM out_closeness;
</pre>
<pre class="result">
src_id | inverse_sum_dist | inverse_avg_dist | sum_inverse_dist | k_degree
\--------+--------------------+-------------------+------------------+----------
1 | 0.0285714285714286 | 0.2 | 1.93809523809524 | 7
3 | 0.0357142857142857 | 0.25 | 2.87424242424242 | 7
4 | -1 | -7 | -1 | 7
0 | 0.0434782608695652 | 0.304347826086957 | 3.68333333333333 | 7
6 | 1 | 1 | 1 | 1
2 | 0.0416666666666667 | 0.291666666666667 | 3.75 | 7
5 | 0.333333333333333 | 0.666666666666667 | 1.5 | 2
7 | [NULL] | [NULL] | 0 | 0
(8 rows)
</pre>
-# Create a graph with 2 groups and find APSP for each group:
<pre class="syntax">
DROP TABLE IF EXISTS edge_gr;
CREATE TABLE edge_gr AS
(
SELECT *, 0 AS grp FROM edge
UNION
SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
);
INSERT INTO edge_gr VALUES
(4,5,-20,1);
</pre>
-# Find APSP for all groups:
<pre class="syntax">
DROP TABLE IF EXISTS out_gr, out_gr_summary;
SELECT madlib.graph_apsp(
'vertex', -- Vertex table
NULL, -- Vertex id column (NULL means use default naming)
'edge_gr', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
'out_gr', -- Output table of shortest paths
'grp' -- Grouping columns
);
</pre>
-# Compute closeness measure for vertex 0 to vertex 5 in every group
<pre class="syntax">
DROP TABLE IF EXISTS out_gr_path;
SELECT madlib.graph_closeness('out_gr', 'out_gr_closeness', 'src_id >= 0 and src_id <=5');
SELECT * FROM out_gr_closeness ORDER BY grp;
</pre>
<pre class="result">
grp | src_id | inverse_sum_dist | inverse_avg_dist | sum_inverse_dist | k_degree
-------+----------+-----------------------+----------------------+---------------------+------------
0 | 0 | 0.0434782608695652 | 0.304347826086957 | 3.68333333333333 | 7
0 | 5 | 0.333333333333333 | 0.666666666666667 | 1.5 | 2
0 | 4 | -1 | -7 | -1 | 7
0 | 3 | 0.0357142857142857 | 0.25 | 2.87424242424242 | 7
0 | 1 | 0.0285714285714286 | 0.2 | 1.93809523809524 | 7
0 | 2 | 0.0416666666666667 | 0.291666666666667 | 3.75 | 7
1 | 3 | 0.142857142857143 | 0.714285714285714 | 1.97979797979798 | 5
1 | 5 | [NULL] | [NULL] | 0 | 0
1 | 0 | 0.25 | 1.25 | 2.5 | 5
1 | 1 | 0.0588235294117647 | 0.294117647058824 | 0.988095238095238 | 5
1 | 2 | 0.1 | 0.5 | 1.79166666666667 | 5
1 | 4 | -0.0416666666666667 | -0.208333333333333 | -2.55 | 5
(12 rows)
</pre>
*/
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness(
apsp_table TEXT,
out_table TEXT,
vertex_filter_expr TEXT
) RETURNS VOID AS $$
PythonFunction(graph, measures, graph_closeness)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness(
apsp_table TEXT,
out_table TEXT
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.graph_closeness($1, $2, NULL);
$$ LANGUAGE SQL VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------------
-- Online help
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunction(graph, measures, graph_closeness_help)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
--------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.graph_closeness('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
/**
@addtogroup grp_graph_diameter
@brief Computes the diameter of a graph.
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#diameter">Diameter</a></li>
<li><a href="#examples">Examples</a></li>
</ul>
</div>
Diameter is defined as the longest of all shortest paths in a graph.
@note This function assumes a valid output from a prior APSP run - both the APSP
table and the associated output summary table. APSP is a computationally
expensive algorithm because it finds the shortest path between all nodes in the
graph. The worst case run-time for this implementation is O(V^2 * E) where V is
the number of vertices and E is the number of edges. In practice, run-time will
be generally be much less than this, depending on the graph.
@anchor diameter
@par Diameter
<pre class="syntax">
graph_diameter( apsp_table,
output_table
)
</pre>
\b Arguments
<dl class="arglist">
<dt>apsp_table</dt>
<dd>TEXT. Name of the output table generated by a prior run of all pairs shortest path (APSP).
</dd>
<dt>out_table</dt>
<dd>TEXT. Name of the table to store the diameter. It contains a row for every
group, the diameter value and the two vertices that are the farthest apart.
</dd>
</dl>
@anchor examples
@examp
-# Create vertex and edge tables to represent the graph:
<pre class="syntax">
DROP TABLE IF EXISTS vertex, edge;
CREATE TABLE vertex(
id INTEGER,
name TEXT
);
CREATE TABLE edge(
src_id INTEGER,
dest_id INTEGER,
edge_weight FLOAT8
);
INSERT INTO vertex VALUES
(0, 'A'),
(1, 'B'),
(2, 'C'),
(3, 'D'),
(4, 'E'),
(5, 'F'),
(6, 'G'),
(7, 'H');
INSERT INTO edge VALUES
(0, 1, 1.0),
(0, 2, 1.0),
(0, 4, 10.0),
(1, 2, 2.0),
(1, 3, 10.0),
(2, 3, 1.0),
(2, 5, 1.0),
(2, 6, 3.0),
(3, 0, 1.0),
(4, 0, -2.0),
(5, 6, 1.0),
(6, 7, 1.0);
</pre>
-# Calculate the all-pair shortest paths:
<pre class="syntax">
DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
SELECT madlib.graph_apsp('vertex', -- Vertex table
'id', -- Vertix id column (NULL means use default naming)
'edge', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
-- Edge arguments (NULL means use default naming)
'out_apsp'); -- Output table of shortest paths
</pre>
-# Compute the diameter measure for the graph:
<pre class="syntax">
DROP TABLE IF EXISTS out_diameter;
SELECT madlib.graph_diameter('out_apsp', 'out_diameter');
SELECT * FROM out_diameter;
</pre>
<pre class="result">
diameter | diameter_end_vertices
\---------+-----------------------
14 | {{1,4}}
(1 row)
</pre>
-# Create a graph with 2 groups and find APSP for each group:
<pre class="syntax">
DROP TABLE IF EXISTS edge_gr;
CREATE TABLE edge_gr AS
(
SELECT *, 0 AS grp FROM edge
UNION
SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
);
INSERT INTO edge_gr VALUES
(4,5,-20,1);
</pre>
-# Find APSP for all groups:
<pre class="syntax">
DROP TABLE IF EXISTS out_gr, out_gr_summary;
SELECT madlib.graph_apsp(
'vertex', -- Vertex table
NULL, -- Vertex id column (NULL means use default naming)
'edge_gr', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
'out_gr', -- Output table of shortest paths
'grp' -- Grouping columns
);
</pre>
-# Find the diameter of graph in every group
<pre class="syntax">
DROP TABLE IF EXISTS out_gr_path;
SELECT madlib.graph_diameter('out_gr', 'out_gr_diameter');
SELECT * FROM out_gr_diameter ORDER BY grp;
</pre>
<pre class="result">
grp | diameter | diameter_end_vertices
\------+------------+-------------------------
0 | 14 | {{1,4}}
1 | 14 | {{1,4}}
(2 rows)
</pre>
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_diameter(
apsp_table TEXT,
out_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, measures, graph_diameter)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------------
-- Online help
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_diameter(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunction(graph, measures, graph_diameter_help)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
--------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_diameter()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.graph_diameter('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
/**
@addtogroup grp_graph_avg_path_length
@brief Computes the average shortest-path length of a graph.
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#avg_path_length">Average Path Length</a></li>
<li><a href="#examples">Examples</a></li>
</ul>
</div>
This function computes the average of the shortest paths between each pair of
vertices. Average path length is based on "reachable target vertices", so it
ignores infinite-length paths between vertices that are not connected. This means
the function will average the path lengths in each connected component. If the
user requires the average path length of a particular component, the
weakly_connected_components function may be used to isolate the relevant vertices.
@note This function assumes a valid output from a prior APSP run - both the APSP
table and the associated output summary table. APSP is a computationally
expensive algorithm because it finds the shortest path between all nodes in the
graph. The worst case run-time for this implementation is O(V^2 * E) where V is
the number of vertices and E is the number of edges. In practice, run-time will
be generally be much less than this, depending on the graph.
@anchor avg_path_length
@par Average Path Length
<pre class="syntax">
graph_avg_path_length( apsp_table,
output_table
)
</pre>
\b Arguments
<dl class="arglist">
<dt>apsp_table</dt>
<dd>TEXT. Name of the output table generated by a prior run of all pairs
shortest path (APSP).
</dd>
<dt>out_table</dt>
<dd>TEXT. Name of the table to store the average path length. It contains a row
for every group, and the average path value.
</dd>
</dl>
@anchor examples
@examp
-# Create vertex and edge tables to represent the graph:
<pre class="syntax">
DROP TABLE IF EXISTS vertex, edge;
CREATE TABLE vertex(
id INTEGER,
name TEXT
);
CREATE TABLE edge(
src_id INTEGER,
dest_id INTEGER,
edge_weight FLOAT8
);
INSERT INTO vertex VALUES
(0, 'A'),
(1, 'B'),
(2, 'C'),
(3, 'D'),
(4, 'E'),
(5, 'F'),
(6, 'G'),
(7, 'H');
INSERT INTO edge VALUES
(0, 1, 1.0),
(0, 2, 1.0),
(0, 4, 10.0),
(1, 2, 2.0),
(1, 3, 10.0),
(2, 3, 1.0),
(2, 5, 1.0),
(2, 6, 3.0),
(3, 0, 1.0),
(4, 0, -2.0),
(5, 6, 1.0),
(6, 7, 1.0);
</pre>
-# Calculate the all-pair shortest paths:
<pre class="syntax">
DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
SELECT madlib.graph_apsp('vertex', -- Vertex table
'id', -- Vertix id column (NULL means use default naming)
'edge', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
-- Edge arguments (NULL means use default naming)
'out_apsp'); -- Output table of shortest paths
</pre>
-# Compute the average path length measure:
<pre class="syntax">
DROP TABLE IF EXISTS out_avg_path_length;
SELECT madlib.graph_avg_path_length('out_apsp', 'out_avg_path_length');
SELECT * FROM out_avg_path_length;
</pre>
<pre class="result">
avg_path_length
\------------------
2.973684210526316
(1 row)
</pre>
-# Create a graph with 2 groups and find APSP for each group:
<pre class="syntax">
DROP TABLE IF EXISTS edge_gr;
CREATE TABLE edge_gr AS
(
SELECT *, 0 AS grp FROM edge
UNION
SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
);
INSERT INTO edge_gr VALUES
(4,5,-20,1);
</pre>
-# Find APSP for all groups:
<pre class="syntax">
DROP TABLE IF EXISTS out_gr, out_gr_summary;
SELECT madlib.graph_apsp(
'vertex', -- Vertex table
NULL, -- Vertex id column (NULL means use default naming)
'edge_gr', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
'out_gr', -- Output table of shortest paths
'grp' -- Grouping columns
);
</pre>
-# Find the average path length in every group
<pre class="syntax">
DROP TABLE IF EXISTS out_gr_path;
SELECT madlib.graph_avg_path_length('out_gr', 'out_gr_path');
SELECT * FROM out_gr_path ORDER BY grp;
</pre>
<pre class="result">
grp | avg_path_length
\-------+----------------------
0 | 2.973684210526316
1 | 0.56
(2 rows)
</pre>
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_avg_path_length(
apsp_table TEXT,
out_table TEXT
) RETURNS VOID AS $$
PythonFunction(graph, measures, graph_avg_path_length)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------------
-- Online help
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_avg_path_length(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunction(graph, measures, graph_avg_path_length_help)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
--------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_avg_path_length()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.graph_avg_path_length('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
/**
@addtogroup grp_graph_vertex_degrees
@brief Computes the degrees for each vertex
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#degrees">In-out degrees</a></li>
<li><a href="#examples">Examples</a></li>
</ul>
</div>
This function computes the degree of each node. The node degree is the number of
edges adjacent to that node. The node in-degree is the number of edges pointing
in to the node and node out-degree is the number of edges pointing out of the
node.
@anchor degrees
@par In-out degrees
<pre class="syntax">
graph_vertex_degrees(
vertex_table,
vertex_id,
edge_table,
edge_args,
out_table,
grouping_cols
)
</pre>
\b Arguments
<dl class="arglist">
<dt>vertex_table</dt>
<dd>TEXT. Name of the table containing the vertex data for the graph. Must
contain the column specified in the 'vertex_id' parameter below.</dd>
<dt>vertex_id</dt>
<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
vertex ids. The vertex ids can be of type INTEGER or BIGINT with no duplicates.
They do not need to be contiguous.</dd>
<dt>edge_table</dt>
<dd>TEXT. Name of the table containing the edge data. The edge table must
contain columns for source vertex, destination vertex and edge weight.
Column naming convention is described below in the 'edge_args' parameter.</dd>
<dt>edge_args</dt>
<dd>TEXT. A comma-delimited string containing multiple named arguments of
the form "name=value". The following parameters are supported for
this string argument:
- src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the
edge table. Default column name is 'src'.
- dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids
in the edge table. Default column name is 'dest'.
- weight (FLOAT8): Name of the column containing the edge weights in the
edge table. Default column name is 'weight'.</dd>
<dt>out_table</dt>
<dd>TEXT. Name of the table to store the result.
It contains a row for every vertex of every group and has
the following columns (in addition to the grouping columns):
- vertex: The id for the source vertex. Will use the input vertex
column 'id' for column naming.
- indegree: Number of incoming edges to the vertex.
- outdegree: Number of outgoing edges from the vertex.
<dt>grouping_cols</dt>
<dd>TEXT, default = NULL. List of columns used to group the input into discrete
subgraphs. These columns must exist in the edge table. When this value is null,
no grouping is used and a single result is generated. </dd>
</dl>
@anchor examples
@examp
-# Create vertex and edge tables to represent the graph:
<pre class="syntax">
DROP TABLE IF EXISTS vertex, edge;
CREATE TABLE vertex(
id INTEGER,
name TEXT
);
CREATE TABLE edge(
src_id INTEGER,
dest_id INTEGER,
edge_weight FLOAT8
);
INSERT INTO vertex VALUES
(0, 'A'),
(1, 'B'),
(2, 'C'),
(3, 'D'),
(4, 'E'),
(5, 'F'),
(6, 'G'),
(7, 'H');
INSERT INTO edge VALUES
(0, 1, 1.0),
(0, 2, 1.0),
(0, 4, 10.0),
(1, 2, 2.0),
(1, 3, 10.0),
(2, 3, 1.0),
(2, 5, 1.0),
(2, 6, 3.0),
(3, 0, 1.0),
(4, 0, -2.0),
(5, 6, 1.0),
(6, 7, 1.0);
</pre>
-# Calculate the in-out degrees for each node:
<pre class="syntax">
DROP TABLE IF EXISTS degrees;
SELECT madlib.graph_vertex_degrees(
'vertex', -- Vertex table
'id', -- Vertix id column (NULL means use default naming)
'edge', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
'degrees'); -- Output table of shortest paths
SELECT * FROM degrees ORDER BY id;
</pre>
<pre class="result">
id | indegree | outdegree
\----+----------+-----------
0 | 2 | 3
1 | 1 | 2
2 | 2 | 3
3 | 2 | 1
4 | 1 | 1
5 | 1 | 1
6 | 2 | 1
7 | 1 | 0
</pre>
-# Create a graph with 2 groups and find degrees for each group:
<pre class="syntax">
DROP TABLE IF EXISTS edge_gr;
CREATE TABLE edge_gr AS
(
SELECT *, 0 AS grp FROM edge
UNION
SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
);
INSERT INTO edge_gr VALUES
(4,5,-20,1);
</pre>
-# Find in-out degrees for all groups:
<pre class="syntax">
DROP TABLE IF EXISTS out_gr;
SELECT madlib.graph_vertex_degrees(
'vertex', -- Vertex table
NULL, -- Vertex id column (NULL means use default naming)
'edge_gr', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
'out_gr', -- Output table of shortest paths
'grp' -- Grouping columns
);
SELECT * FROM out_gr WHERE id < 2 ORDER BY grp, id;
</pre>
<pre class="result">
grp | id | indegree | outdegree
\-------+------+------------+-------------
0 | 0 | 2 | 3
0 | 1 | 1 | 2
1 | 0 | 2 | 3
1 | 1 | 1 | 2
(4 rows)
</pre>
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees(
vertex_table TEXT,
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
out_table TEXT,
grouping_cols TEXT
) RETURNS VOID AS $$
PythonFunction(graph, measures, graph_vertex_degrees)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees(
vertex_table TEXT,
vertex_id TEXT,
edge_table TEXT,
edge_args TEXT,
out_table TEXT
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.graph_vertex_degrees($1, $2, $3, $4, $5, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------------
-- Online help
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees(
message VARCHAR
) RETURNS VARCHAR AS $$
PythonFunction(graph, measures, graph_vertex_degrees_help)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
--------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees()
RETURNS VARCHAR AS $$
SELECT MADLIB_SCHEMA.graph_vertex_degrees('');
$$ LANGUAGE sql IMMUTABLE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');