blob: e7f840a515d2a2757dd9e8c4ac46fe33a7052595 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.4"/>
<title>MADlib: kmeans.sql_in File Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
$(window).load(resizeHeight);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
jax: ["input/TeX","output/HTML-CSS"],
});
</script><script src="../mathjax/MathJax.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">MADlib
&#160;<span id="projectnumber">1.0</span> <span style="font-size:10pt; font-style:italic"><a href="../latest/./kmeans_8sql__in.html"> A newer version is available</a></span>
</div>
<div id="projectbrief">User Documentation</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.4 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main&#160;Page</span></a></li>
<li><a href="modules.html"><span>Modules</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
<div id="nav-sync" class="sync"></div>
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('kmeans_8sql__in.html','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Groups</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div class="header">
<div class="summary">
<a href="#func-members">Functions</a> </div>
<div class="headertitle">
<div class="title">kmeans.sql_in File Reference</div> </div>
</div><!--header-->
<div class="contents">
<p>Set of functions for k-means clustering.
<a href="#details">More...</a></p>
<p><a href="kmeans_8sql__in_source.html">Go to the source code of this file.</a></p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
Functions</h2></td></tr>
<tr class="memitem:ae8bb21bf12220aa9de82792376afab7d"><td class="memItemLeft" align="right" valign="top">kmeans_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="kmeans_8sql__in.html#ae8bb21bf12220aa9de82792376afab7d">kmeans</a> (varchar rel_source, varchar expr_point, float8[][] initial_centroids, varchar fn_dist=&quot;squared_dist_norm2&quot;, varchar agg_centroid=&quot;avg&quot;, integer max_num_iterations=20, float8 min_frac_reassigned=0.001)</td></tr>
<tr class="memdesc:ae8bb21bf12220aa9de82792376afab7d"><td class="mdescLeft">&#160;</td><td class="mdescRight">Perform Lloyd's k-means local-search heuristic. <a href="#ae8bb21bf12220aa9de82792376afab7d">More...</a><br/></td></tr>
<tr class="separator:ae8bb21bf12220aa9de82792376afab7d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af0d5172211c83d4de4d70a84555aa68e"><td class="memItemLeft" align="right" valign="top">float8[][]&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="kmeans_8sql__in.html#af0d5172211c83d4de4d70a84555aa68e">kmeanspp_seeding</a> (varchar rel_source, varchar expr_point, integer k, varchar fn_dist=&quot;squared_dist_norm2&quot;, float8[][] initial_centroids=NULL)</td></tr>
<tr class="memdesc:af0d5172211c83d4de4d70a84555aa68e"><td class="mdescLeft">&#160;</td><td class="mdescRight">k-Means++ Seeding <a href="#af0d5172211c83d4de4d70a84555aa68e">More...</a><br/></td></tr>
<tr class="separator:af0d5172211c83d4de4d70a84555aa68e"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac6c26c8e6b4643acfa79a87bd3ab0fe4"><td class="memItemLeft" align="right" valign="top">kmeans_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="kmeans_8sql__in.html#ac6c26c8e6b4643acfa79a87bd3ab0fe4">kmeanspp</a> (varchar rel_source, varchar expr_point, integer k, varchar fn_dist=&quot;squared_dist_norm2&quot;, varchar agg_centroid=&quot;avg&quot;, integer max_num_iterations=20, float8 min_frac_reassigned=0.001)</td></tr>
<tr class="memdesc:ac6c26c8e6b4643acfa79a87bd3ab0fe4"><td class="mdescLeft">&#160;</td><td class="mdescRight">Run k-Means++. <a href="#ac6c26c8e6b4643acfa79a87bd3ab0fe4">More...</a><br/></td></tr>
<tr class="separator:ac6c26c8e6b4643acfa79a87bd3ab0fe4"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a01e02736e6d156240b15f7d6dae092c3"><td class="memItemLeft" align="right" valign="top">float8[][]&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="kmeans_8sql__in.html#a01e02736e6d156240b15f7d6dae092c3">kmeans_random_seeding</a> (varchar rel_source, varchar expr_point, integer k, float8[][] initial_centroids=NULL)</td></tr>
<tr class="memdesc:a01e02736e6d156240b15f7d6dae092c3"><td class="mdescLeft">&#160;</td><td class="mdescRight">k-Means Random Seeding <a href="#a01e02736e6d156240b15f7d6dae092c3">More...</a><br/></td></tr>
<tr class="separator:a01e02736e6d156240b15f7d6dae092c3"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aeec5efd06aca50f4830aa10d522dc5ed"><td class="memItemLeft" align="right" valign="top">kmeans_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="kmeans_8sql__in.html#aeec5efd06aca50f4830aa10d522dc5ed">kmeans_random</a> (varchar rel_source, varchar expr_point, integer k, varchar fn_dist=&quot;squared_dist_norm2&quot;, varchar agg_centroid=&quot;avg&quot;, integer max_num_iterations=20, float8 min_frac_reassigned=0.001)</td></tr>
<tr class="memdesc:aeec5efd06aca50f4830aa10d522dc5ed"><td class="mdescLeft">&#160;</td><td class="mdescRight">Run k-Means with random seeding. <a href="#aeec5efd06aca50f4830aa10d522dc5ed">More...</a><br/></td></tr>
<tr class="separator:aeec5efd06aca50f4830aa10d522dc5ed"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a6e1a47f006bc0576f56eabcd6903086f"><td class="memItemLeft" align="right" valign="top">kmeans_result&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="kmeans_8sql__in.html#a6e1a47f006bc0576f56eabcd6903086f">kmeans</a> (varchar rel_source, varchar expr_point, varchar rel_initial_centroids, varchar expr_centroid, varchar fn_dist=&quot;squared_dist_norm2&quot;, varchar agg_centroid=&quot;avg&quot;, integer max_num_iterations=20, float8 min_frac_reassigned=0.001)</td></tr>
<tr class="memdesc:a6e1a47f006bc0576f56eabcd6903086f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Perform Lloyd's k-means local-search heuristic, but with initial centroids stored in a table. <a href="#a6e1a47f006bc0576f56eabcd6903086f">More...</a><br/></td></tr>
<tr class="separator:a6e1a47f006bc0576f56eabcd6903086f"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a71e7675758c99acbe7785819b6a85a8f"><td class="memItemLeft" align="right" valign="top">float8&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="kmeans_8sql__in.html#a71e7675758c99acbe7785819b6a85a8f">simple_silhouette</a> (varchar rel_source, varchar expr_point, float8[][] centroids, varchar fn_dist=&quot;dist_norm2&quot;)</td></tr>
<tr class="memdesc:a71e7675758c99acbe7785819b6a85a8f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Compute a simplified version of the silhouette coefficient. <a href="#a71e7675758c99acbe7785819b6a85a8f">More...</a><br/></td></tr>
<tr class="separator:a71e7675758c99acbe7785819b6a85a8f"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><dl class="section see"><dt>See Also</dt><dd>For a brief introduction to k-means clustering, see the module description <a class="el" href="group__grp__kmeans.html">k-Means Clustering</a>. </dd></dl>
<p>Definition in file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div><h2 class="groupheader">Function Documentation</h2>
<a class="anchor" id="ae8bb21bf12220aa9de82792376afab7d"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">kmeans_result kmeans </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_point</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>initial_centroids</em>[][], </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>fn_dist</em> = <code>&quot;squared_dist_norm2&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>agg_centroid</em> = <code>&quot;avg&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>max_num_iterations</em> = <code>20</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>min_frac_reassigned</em> = <code>0.001</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rel_source</td><td>Name of the relation containing input points </td></tr>
<tr><td class="paramname">expr_point</td><td>Expression evaluating to point coordinates for each tuple </td></tr>
<tr><td class="paramname">initial_centroids</td><td>Matrix containing the initial centroids as columns </td></tr>
<tr><td class="paramname">fn_dist</td><td>Name of a function with signature <code>DOUBLE PRECISION[] x DOUBLE PRECISION[] -&gt; DOUBLE PRECISION</code> that returns the distance between two points. The default is the <a class="el" href="linalg_8sql__in.html#a00a08e69f27524f2096032214e15b668">squared Euclidean distance</a>. </td></tr>
<tr><td class="paramname">agg_centroid</td><td>Name of an aggregate function with signature <code>DOUBLE PRECISION[] -&gt; DOUBLE PRECISION[]</code> that, for each group of points, returns a centroid. In order for Lloyd's local-search heuristic to provably converge and to return a local minimum, this centroid should minimize the sum of distances between each point in the group and the centroid. The default is the <a class="el" href="linalg_8sql__in.html#a1aa37f73fb1cd8d7d106aa518dd8c0b4">average (mean/barycenter in Euclidean space)</a>, which satisfies this property if <code>fn_dist = 'squared_dist_norm2'</code>. </td></tr>
<tr><td class="paramname">max_num_iterations</td><td>Maximum number of iterations </td></tr>
<tr><td class="paramname">min_frac_reassigned</td><td>Fraction of reassigned points below which convergence is assumed and the algorithm terminates </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>A composite value:<ul>
<li><code>centroids</code> - Matrix with \( k \) centroids as columns.</li>
<li><code>frac_reassigned</code> - Fraction of points that were assigned a different centroid in the last iteration.</li>
<li><code>num_iterations</code> - The number of iterations before the algorithm terminated </li>
</ul>
</dd></dl>
<p>Definition at line <a class="el" href="kmeans_8sql__in_source.html#l00348">348</a> of file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div>
</div>
<a class="anchor" id="a6e1a47f006bc0576f56eabcd6903086f"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">kmeans_result kmeans </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_point</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_initial_centroids</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_centroid</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>fn_dist</em> = <code>&quot;squared_dist_norm2&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>agg_centroid</em> = <code>&quot;avg&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>max_num_iterations</em> = <code>20</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>min_frac_reassigned</em> = <code>0.001</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>This is a shortcut for running k-means with initial centroids stored in a table (as opposed to an array of centroids). It is equivalent to </p>
<pre>SELECT <a class="el" href="kmeans_8sql__in.html#a6e1a47f006bc0576f56eabcd6903086f">kmeans</a>(
rel_source,
expr_point,
(SELECT <a class="el" href="linalg_8sql__in.html#a9c439706f35d6cac89f151d553a5f111">matrix_agg</a>($expr_centroid) FROM $rel_initial_centroids),
fn_dist,
agg_centroid,
max_num_iterations,
min_frac_reassigned
)</pre><p> where <code>$expr_centroid</code> and <code>$rel_initial_centroids</code> denote textual substituions. </p>
<p>Definition at line <a class="el" href="kmeans_8sql__in_source.html#l01093">1093</a> of file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div>
</div>
<a class="anchor" id="aeec5efd06aca50f4830aa10d522dc5ed"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">kmeans_result kmeans_random </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_point</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>k</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>fn_dist</em> = <code>&quot;squared_dist_norm2&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>agg_centroid</em> = <code>&quot;avg&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>max_num_iterations</em> = <code>20</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>min_frac_reassigned</em> = <code>0.001</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>This is a shortcut for running k-means with random seeding. It is equivalent to </p>
<pre>SELECT <a class="el" href="kmeans_8sql__in.html#a6e1a47f006bc0576f56eabcd6903086f">kmeans</a>(
rel_source,
expr_point,
<a class="el" href="kmeans_8sql__in.html#a01e02736e6d156240b15f7d6dae092c3">kmeans_random_seeding</a>(
rel_source,
expr_point,
k
),
fn_dist,
agg_centroid,
max_num_iterations,
min_frac_reassigned
)</pre>
<p>Definition at line <a class="el" href="kmeans_8sql__in_source.html#l00910">910</a> of file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div>
</div>
<a class="anchor" id="a01e02736e6d156240b15f7d6dae092c3"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">float8 [][] kmeans_random_seeding </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_point</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>k</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>initial_centroids</em>[][] = <code>NULL</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rel_source</td><td>Name of the relation containing input points </td></tr>
<tr><td class="paramname">expr_point</td><td>Expression evaluating to point coordinates for each tuple </td></tr>
<tr><td class="paramname">k</td><td>Number of centroids </td></tr>
<tr><td class="paramname">initial_centroids</td><td>A matrix containing up to \( k \) columns as columns. <a class="el" href="kmeans_8sql__in.html#af0d5172211c83d4de4d70a84555aa68e" title="k-Means++ Seeding ">kmeanspp_seeding()</a> proceeds exactly as if these centroids had already been generated in previous iterations. This parameter may be NULL in which all \( k \) centroids will be generated. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>A matrix containing \( k \) centroids as columns </dd></dl>
<p>Definition at line <a class="el" href="kmeans_8sql__in_source.html#l00804">804</a> of file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div>
</div>
<a class="anchor" id="ac6c26c8e6b4643acfa79a87bd3ab0fe4"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">kmeans_result kmeanspp </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_point</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>k</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>fn_dist</em> = <code>&quot;squared_dist_norm2&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>agg_centroid</em> = <code>&quot;avg&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>max_num_iterations</em> = <code>20</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>min_frac_reassigned</em> = <code>0.001</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>This is a shortcut for running k-means++. It is equivalent to </p>
<pre>SELECT <a class="el" href="kmeans_8sql__in.html#a6e1a47f006bc0576f56eabcd6903086f">kmeans</a>(
rel_source,
expr_point,
<a class="el" href="kmeans_8sql__in.html#af0d5172211c83d4de4d70a84555aa68e">kmeanspp_seeding</a>(
rel_source,
expr_point,
k,
fn_dist
),
fn_dist,
agg_centroid,
max_num_iterations,
min_frac_reassigned
)</pre>
<p>Definition at line <a class="el" href="kmeans_8sql__in_source.html#l00668">668</a> of file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div>
</div>
<a class="anchor" id="af0d5172211c83d4de4d70a84555aa68e"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">float8 [][] kmeanspp_seeding </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_point</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">integer&#160;</td>
<td class="paramname"><em>k</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>fn_dist</em> = <code>&quot;squared_dist_norm2&quot;</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>initial_centroids</em>[][] = <code>NULL</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rel_source</td><td>Name of the relation containing input points </td></tr>
<tr><td class="paramname">expr_point</td><td>Expression evaluating to point coordinates for each tuple </td></tr>
<tr><td class="paramname">k</td><td>Number of centroids </td></tr>
<tr><td class="paramname">fn_dist</td><td>Name of a function with signature <code>DOUBLE PRECISION[] x DOUBLE PRECISION[] -&gt; DOUBLE PRECISION</code> that returns the distance between two points </td></tr>
<tr><td class="paramname">initial_centroids</td><td>A matrix containing up to \( k \) columns as columns. <a class="el" href="kmeans_8sql__in.html#af0d5172211c83d4de4d70a84555aa68e" title="k-Means++ Seeding ">kmeanspp_seeding()</a> proceeds exactly as if these centroids had already been generated in previous iterations. This parameter may be NULL in which all \( k \) centroids will be generated. </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>A matrix containing \( k \) centroids as columns </dd></dl>
<p>Definition at line <a class="el" href="kmeans_8sql__in_source.html#l00544">544</a> of file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div>
</div>
<a class="anchor" id="a71e7675758c99acbe7785819b6a85a8f"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">float8 simple_silhouette </td>
<td>(</td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>rel_source</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>expr_point</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">float8&#160;</td>
<td class="paramname"><em>centroids</em>[][], </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">varchar&#160;</td>
<td class="paramname"><em>fn_dist</em> = <code>&quot;dist_norm2&quot;</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">rel_source</td><td>Name of the relation containing input points </td></tr>
<tr><td class="paramname">expr_point</td><td>Expression evaluating to point coordinates \( x_i \) for each tuple </td></tr>
<tr><td class="paramname">centroids</td><td>Matrix \( M = (\vec{m_0} \dots \vec{m_{k-1}}) \in \mathbb{R}^{d \times k} \) with \( k \) columns, where column \( i \) contains the position of centroid \( i \). </td></tr>
<tr><td class="paramname">fn_dist</td><td>Name of a function with signature <code>DOUBLE PRECISION[] x DOUBLE PRECISION[] -&gt; DOUBLE PRECISION</code> that returns the distance between two points </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>For each point \( x_i \), let \( d_1( x_i ) \) and \( d_2( x_i ) \) be the distance to the closest and 2nd-closest centroid, respectively. If there is more than one closest centroids then \( d_1( x_i ) = d_2( x_i )\). The return value is the average, over all points \( x_i \), of <p class="formulaDsp">
\[ \frac{d_2( x_i ) - d_1(x_i)}{d_2(x_i)}, \]
</p>
where 0/0 is interpreted as 0. Clearly, the simplified silhouette coefficient assumes values in \( [0,1] \). </dd></dl>
<p>Definition at line <a class="el" href="kmeans_8sql__in_source.html#l01230">1230</a> of file <a class="el" href="kmeans_8sql__in_source.html">kmeans.sql_in</a>.</p>
</div>
</div>
</div><!-- contents -->
</div><!-- doc-content -->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="navelem"><a class="el" href="dir_68267d1309a1af8e8297ef4c3efbcdba.html">src</a></li><li class="navelem"><a class="el" href="dir_efbcf68973d247bbf15f9eecae7f24e3.html">ports</a></li><li class="navelem"><a class="el" href="dir_a4a48839224ef8488facbffa8a397967.html">postgres</a></li><li class="navelem"><a class="el" href="dir_dc596537ad427a4d866006d1a3e1fe29.html">modules</a></li><li class="navelem"><a class="el" href="dir_73ccba3aa44ce35463f879b4ebbd3f46.html">kmeans</a></li><li class="navelem"><a class="el" href="kmeans_8sql__in.html">kmeans.sql_in</a></li>
<li class="footer">Generated on Tue Sep 10 2013 15:48:04 for MADlib by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.4 </li>
</ul>
</div>
</body>
</html>