blob: ea0bf80198cce614813d2b2ddabbe96ba41f7ea5 [file] [log] [blame]
<!-- HTML header for doxygen 1.8.4-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.13"/>
<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
<title>MADlib: Closeness</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtreedata.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { init_search(); });
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
<!-- hack in the navigation tree -->
<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
<!-- google analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-45382226-1', 'madlib.apache.org');
ga('send', 'pageview');
</script>
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
<td style="padding-left: 0.5em;">
<div id="projectname">
<span id="projectnumber">1.18.0</span>
</div>
<div id="projectbrief">User Documentation for Apache MADlib</div>
</td>
<td> <div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.13 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
<div id="nav-sync" class="sync"></div>
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('group__grp__graph__closeness.html','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="header">
<div class="headertitle">
<div class="title">Closeness<div class="ingroups"><a class="el" href="group__grp__graph.html">Graph</a> &raquo; <a class="el" href="group__grp__graph__measures.html">Measures</a></div></div> </div>
</div><!--header-->
<div class="contents">
<div class="toc"><b>Contents</b> <ul>
<li>
<a href="#closeness">Closeness</a> </li>
<li>
<a href="#examples">Examples</a> </li>
</ul>
</div><p>The Closeness function returns various closeness centrality measures and the k-degree for given subset of vertices. The closeness measures are the inverse of the sum, the inverse of the average, and the sum of inverses of the shortest distances to all reachable target vertices (excluding the source vertex).</p>
<dl class="section note"><dt>Note</dt><dd>The closeness measures require a valid output from a prior APSP run - both the APSP table and the associated output summary table. APSP is a computationally expensive algorithm because it finds the shortest path between all nodes in the graph. The worst case run-time for this implementation is O(V^2 E) where V is the number of vertices and E is the number of edges. In practice, run-time will be generally be much less than this, depending on the graph.</dd></dl>
<p><a class="anchor" id="closeness"></a></p><dl class="section user"><dt>Closeness</dt><dd><pre class="syntax">
graph_closeness( apsp_table,
output_table,
vertex_filter_expr
)
</pre></dd></dl>
<p><b>Arguments</b> </p><dl class="arglist">
<dt>apsp_table </dt>
<dd><p class="startdd">TEXT. Name of the output table generated by a prior run of all pairs shortest path (APSP). </p>
<p class="enddd"></p>
</dd>
<dt>out_table </dt>
<dd><p class="startdd">TEXT. Name of the table to store the closeness measures. It contains a row for every vertex of every group and have the following columns (in addition to the grouping columns):</p><ul>
<li>inverse_sum_dist: Inverse of the sum of shortest distances to all reachable vertices.</li>
<li>inverse_average_dist: Inverse of the average of shortest distances to all reachable vertices.</li>
<li>sum_inverse_dist: Sum of the inverse of shortest distances to all reachable vertices.</li>
<li>k_degree: Total number of reachable vertices. </li>
</ul>
<p class="enddd"></p>
</dd>
<dt>vertex_filter_expr (optional) </dt>
<dd><p class="startdd">TEXT, default = NULL. Valid PostgreSQL expression that describes the vertices to generate closeness measures for. If this parameter is not specified, closeness measures are generated for all vertices in the apsp table. You can think of this input parameter as being like a WHERE clause.</p>
<p>Some example inputs:</p><ul>
<li>If you want a short list of vertices, say 1, 2 and 3: <pre>vertex_id IN (1, 2, 3)</pre></li>
<li>If you want a range of vertices between 1000 and 2000: <pre>vertix_id BETWEEN 1000 AND 2000</pre></li>
<li>If you want a set of vertices from a separate table satisfying to a condition <pre>EXISTS (SELECT vertex_id FROM vertices_of_interest
WHERE vertex_id &gt; 5000 AND condition = 'xyz')
</pre></li>
</ul>
<p class="enddd"></p>
</dd>
</dl>
<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
<ol type="1">
<li>Create vertex and edge tables to represent the graph: <pre class="syntax">
DROP TABLE IF EXISTS vertex, edge;
CREATE TABLE vertex(
id INTEGER,
name TEXT
);
CREATE TABLE edge(
src_id INTEGER,
dest_id INTEGER,
edge_weight FLOAT8
);
INSERT INTO vertex VALUES
(0, 'A'),
(1, 'B'),
(2, 'C'),
(3, 'D'),
(4, 'E'),
(5, 'F'),
(6, 'G'),
(7, 'H');
INSERT INTO edge VALUES
(0, 1, 1.0),
(0, 2, 1.0),
(0, 4, 10.0),
(1, 2, 2.0),
(1, 3, 10.0),
(2, 3, 1.0),
(2, 5, 1.0),
(2, 6, 3.0),
(3, 0, 1.0),
(4, 0, -2.0),
(5, 6, 1.0),
(6, 7, 1.0);
</pre></li>
<li>Calculate the all-pair shortest paths: <pre class="syntax">
DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
SELECT madlib.graph_apsp('vertex', -- Vertex table
'id', -- Vertix id column (NULL means use default naming)
'edge', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
-- Edge arguments (NULL means use default naming)
'out_apsp'); -- Output table of shortest paths
</pre></li>
<li>Compute the closeness measure for all nodes: <pre class="syntax">
DROP TABLE IF EXISTS out_closeness;
SELECT madlib.graph_closeness('out_apsp', 'out_closeness');
SELECT * FROM out_closeness;
</pre> <pre class="result">
src_id | inverse_sum_dist | inverse_avg_dist | sum_inverse_dist | k_degree
--------+--------------------+-------------------+------------------+----------
1 | 0.0285714285714286 | 0.2 | 1.93809523809524 | 7
3 | 0.0357142857142857 | 0.25 | 2.87424242424242 | 7
4 | -1 | -7 | -1 | 7
0 | 0.0434782608695652 | 0.304347826086957 | 3.68333333333333 | 7
6 | 1 | 1 | 1 | 1
2 | 0.0416666666666667 | 0.291666666666667 | 3.75 | 7
5 | 0.333333333333333 | 0.666666666666667 | 1.5 | 2
7 | [NULL] | [NULL] | 0 | 0
(8 rows)
</pre></li>
<li>Create a graph with 2 groups and find APSP for each group: <pre class="syntax">
DROP TABLE IF EXISTS edge_gr;
CREATE TABLE edge_gr AS
(
SELECT *, 0 AS grp FROM edge
UNION
SELECT *, 1 AS grp FROM edge WHERE src_id &lt; 6 AND dest_id &lt; 6
);
INSERT INTO edge_gr VALUES
(4,5,-20,1);
</pre></li>
<li>Find APSP for all groups: <pre class="syntax">
DROP TABLE IF EXISTS out_gr, out_gr_summary;
SELECT madlib.graph_apsp(
'vertex', -- Vertex table
NULL, -- Vertex id column (NULL means use default naming)
'edge_gr', -- Edge table
'src=src_id, dest=dest_id, weight=edge_weight',
'out_gr', -- Output table of shortest paths
'grp' -- Grouping columns
);
</pre></li>
<li>Compute closeness measure for vertex 0 to vertex 5 in every group <pre class="syntax">
DROP TABLE IF EXISTS out_gr_path;
SELECT madlib.graph_closeness('out_gr', 'out_gr_closeness', 'src_id &gt;= 0 and src_id &lt;=5');
SELECT * FROM out_gr_closeness ORDER BY grp;
</pre> <pre class="result">
grp | src_id | inverse_sum_dist | inverse_avg_dist | sum_inverse_dist | k_degree
----&mdash;+-------&mdash;+--------------------&mdash;+-------------------&mdash;+------------------&mdash;+---------&mdash;
0 | 0 | 0.0434782608695652 | 0.304347826086957 | 3.68333333333333 | 7
0 | 5 | 0.333333333333333 | 0.666666666666667 | 1.5 | 2
0 | 4 | -1 | -7 | -1 | 7
0 | 3 | 0.0357142857142857 | 0.25 | 2.87424242424242 | 7
0 | 1 | 0.0285714285714286 | 0.2 | 1.93809523809524 | 7
0 | 2 | 0.0416666666666667 | 0.291666666666667 | 3.75 | 7
1 | 3 | 0.142857142857143 | 0.714285714285714 | 1.97979797979798 | 5
1 | 5 | [NULL] | [NULL] | 0 | 0
1 | 0 | 0.25 | 1.25 | 2.5 | 5
1 | 1 | 0.0588235294117647 | 0.294117647058824 | 0.988095238095238 | 5
1 | 2 | 0.1 | 0.5 | 1.79166666666667 | 5
1 | 4 | -0.0416666666666667 | -0.208333333333333 | -2.55 | 5
(12 rows)
</pre> </li>
</ol>
</div><!-- contents -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="footer">Generated on Wed Mar 31 2021 20:45:48 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
</ul>
</div>
</body>
</html>