blob: 21794605c144409af920dc81224610fb3f73c76e [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_402) on Mon Apr 15 02:01:54 UTC 2024 -->
<title>KMeans (Spark 3.4.3 JavaDoc)</title>
<meta name="date" content="2024-04-15">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="KMeans (Spark 3.4.3 JavaDoc)";
}
}
catch(err) {
}
//-->
var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":9,"i8":9,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":9,"i19":9,"i20":9};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/spark/mllib/clustering/GaussianMixtureModel.html" title="class in org.apache.spark.mllib.clustering"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/mllib/clustering/KMeans.html" target="_top">Frames</a></li>
<li><a href="KMeans.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.spark.mllib.clustering</div>
<h2 title="Class KMeans" class="title">Class KMeans</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>Object</li>
<li>
<ul class="inheritance">
<li>org.apache.spark.mllib.clustering.KMeans</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd>java.io.Serializable, org.apache.spark.internal.Logging</dd>
</dl>
<hr>
<br>
<pre>public class <span class="typeNameLabel">KMeans</span>
extends Object
implements scala.Serializable, org.apache.spark.internal.Logging</pre>
<div class="block">K-means clustering with a k-means++ like initialization mode
(the k-means|| algorithm by Bahmani et al).
<p>
This is an iterative algorithm that will make multiple passes over the data, so any RDDs given
to it should be cached by the user.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../serialized-form.html#org.apache.spark.mllib.clustering.KMeans">Serialized Form</a></dd>
</dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<ul class="blockList">
<li class="blockList"><a name="nested.classes.inherited.from.class.org.apache.spark.internal.Logging">
<!-- -->
</a>
<h3>Nested classes/interfaces inherited from interface&nbsp;org.apache.spark.internal.Logging</h3>
<code>org.apache.spark.internal.Logging.SparkShellLoggingFilter</code></li>
</ul>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#KMeans--">KMeans</a></span>()</code>
<div class="block">Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20,
initializationMode: "k-means||", initializationSteps: 2, epsilon: 1e-4, seed: random,
distanceMeasure: "euclidean"}.</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#getDistanceMeasure--">getDistanceMeasure</a></span>()</code>
<div class="block">The distance suite used by the algorithm.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>double</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#getEpsilon--">getEpsilon</a></span>()</code>
<div class="block">The distance threshold within which we've consider centers to have converged.</div>
</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#getInitializationMode--">getInitializationMode</a></span>()</code>
<div class="block">The initialization algorithm.</div>
</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#getInitializationSteps--">getInitializationSteps</a></span>()</code>
<div class="block">Number of steps for the k-means|| initialization mode</div>
</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#getK--">getK</a></span>()</code>
<div class="block">Number of clusters to create (k).</div>
</td>
</tr>
<tr id="i5" class="rowColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#getMaxIterations--">getMaxIterations</a></span>()</code>
<div class="block">Maximum number of iterations allowed.</div>
</td>
</tr>
<tr id="i6" class="altColor">
<td class="colFirst"><code>long</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#getSeed--">getSeed</a></span>()</code>
<div class="block">The random seed for cluster initialization.</div>
</td>
</tr>
<tr id="i7" class="rowColor">
<td class="colFirst"><code>static String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#K_MEANS_PARALLEL--">K_MEANS_PARALLEL</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i8" class="altColor">
<td class="colFirst"><code>static String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#RANDOM--">RANDOM</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i9" class="rowColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#run-org.apache.spark.rdd.RDD-">run</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data)</code>
<div class="block">Train a K-means model on the given set of points; <code>data</code> should be cached for high
performance, because this is an iterative algorithm.</div>
</td>
</tr>
<tr id="i10" class="altColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setDistanceMeasure-java.lang.String-">setDistanceMeasure</a></span>(String&nbsp;distanceMeasure)</code>
<div class="block">Set the distance suite used by the algorithm.</div>
</td>
</tr>
<tr id="i11" class="rowColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setEpsilon-double-">setEpsilon</a></span>(double&nbsp;epsilon)</code>
<div class="block">Set the distance threshold within which we've consider centers to have converged.</div>
</td>
</tr>
<tr id="i12" class="altColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setInitializationMode-java.lang.String-">setInitializationMode</a></span>(String&nbsp;initializationMode)</code>
<div class="block">Set the initialization algorithm.</div>
</td>
</tr>
<tr id="i13" class="rowColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setInitializationSteps-int-">setInitializationSteps</a></span>(int&nbsp;initializationSteps)</code>
<div class="block">Set the number of steps for the k-means|| initialization mode.</div>
</td>
</tr>
<tr id="i14" class="altColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setInitialModel-org.apache.spark.mllib.clustering.KMeansModel-">setInitialModel</a></span>(<a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a>&nbsp;model)</code>
<div class="block">Set the initial starting point, bypassing the random initialization or k-means||
The condition model.k == this.k must be met, failure results
in an IllegalArgumentException.</div>
</td>
</tr>
<tr id="i15" class="rowColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setK-int-">setK</a></span>(int&nbsp;k)</code>
<div class="block">Set the number of clusters to create (k).</div>
</td>
</tr>
<tr id="i16" class="altColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setMaxIterations-int-">setMaxIterations</a></span>(int&nbsp;maxIterations)</code>
<div class="block">Set maximum number of iterations allowed.</div>
</td>
</tr>
<tr id="i17" class="rowColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#setSeed-long-">setSeed</a></span>(long&nbsp;seed)</code>
<div class="block">Set the random seed for cluster initialization.</div>
</td>
</tr>
<tr id="i18" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#train-org.apache.spark.rdd.RDD-int-int-">train</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data,
int&nbsp;k,
int&nbsp;maxIterations)</code>
<div class="block">Trains a k-means model using specified parameters and the default values for unspecified.</div>
</td>
</tr>
<tr id="i19" class="rowColor">
<td class="colFirst"><code>static <a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#train-org.apache.spark.rdd.RDD-int-int-java.lang.String-">train</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data,
int&nbsp;k,
int&nbsp;maxIterations,
String&nbsp;initializationMode)</code>
<div class="block">Trains a k-means model using the given set of parameters.</div>
</td>
</tr>
<tr id="i20" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html#train-org.apache.spark.rdd.RDD-int-int-java.lang.String-long-">train</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data,
int&nbsp;k,
int&nbsp;maxIterations,
String&nbsp;initializationMode,
long&nbsp;seed)</code>
<div class="block">Trains a k-means model using the given set of parameters.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;Object</h3>
<code>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.org.apache.spark.internal.Logging">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;org.apache.spark.internal.Logging</h3>
<code>$init$, initializeForcefully, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, initLock, isTraceEnabled, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning, org$apache$spark$internal$Logging$$log__$eq, org$apache$spark$internal$Logging$$log_, uninitialize</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="KMeans--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>KMeans</h4>
<pre>public&nbsp;KMeans()</pre>
<div class="block">Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20,
initializationMode: "k-means||", initializationSteps: 2, epsilon: 1e-4, seed: random,
distanceMeasure: "euclidean"}.</div>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="RANDOM--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>RANDOM</h4>
<pre>public static&nbsp;String&nbsp;RANDOM()</pre>
</li>
</ul>
<a name="K_MEANS_PARALLEL--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>K_MEANS_PARALLEL</h4>
<pre>public static&nbsp;String&nbsp;K_MEANS_PARALLEL()</pre>
</li>
</ul>
<a name="train-org.apache.spark.rdd.RDD-int-int-java.lang.String-long-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>train</h4>
<pre>public static&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a>&nbsp;train(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data,
int&nbsp;k,
int&nbsp;maxIterations,
String&nbsp;initializationMode,
long&nbsp;seed)</pre>
<div class="block">Trains a k-means model using the given set of parameters.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>data</code> - Training points as an <code>RDD</code> of <code>Vector</code> types.</dd>
<dd><code>k</code> - Number of clusters to create.</dd>
<dd><code>maxIterations</code> - Maximum number of iterations allowed.</dd>
<dd><code>initializationMode</code> - The initialization algorithm. This can either be "random" or
"k-means||". (default: "k-means||")</dd>
<dd><code>seed</code> - Random seed for cluster initialization. Default is to generate seed based
on system time.</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="train-org.apache.spark.rdd.RDD-int-int-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>train</h4>
<pre>public static&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a>&nbsp;train(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data,
int&nbsp;k,
int&nbsp;maxIterations,
String&nbsp;initializationMode)</pre>
<div class="block">Trains a k-means model using the given set of parameters.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>data</code> - Training points as an <code>RDD</code> of <code>Vector</code> types.</dd>
<dd><code>k</code> - Number of clusters to create.</dd>
<dd><code>maxIterations</code> - Maximum number of iterations allowed.</dd>
<dd><code>initializationMode</code> - The initialization algorithm. This can either be "random" or
"k-means||". (default: "k-means||")</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="train-org.apache.spark.rdd.RDD-int-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>train</h4>
<pre>public static&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a>&nbsp;train(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data,
int&nbsp;k,
int&nbsp;maxIterations)</pre>
<div class="block">Trains a k-means model using specified parameters and the default values for unspecified.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>data</code> - (undocumented)</dd>
<dd><code>k</code> - (undocumented)</dd>
<dd><code>maxIterations</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getK--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getK</h4>
<pre>public&nbsp;int&nbsp;getK()</pre>
<div class="block">Number of clusters to create (k).
<p></div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>It is possible for fewer than k clusters to
be returned, for example, if there are fewer than k distinct points to cluster.</dd>
</dl>
</li>
</ul>
<a name="setK-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setK</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setK(int&nbsp;k)</pre>
<div class="block">Set the number of clusters to create (k).
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>k</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>It is possible for fewer than k clusters to
be returned, for example, if there are fewer than k distinct points to cluster. Default: 2.</dd>
</dl>
</li>
</ul>
<a name="getMaxIterations--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getMaxIterations</h4>
<pre>public&nbsp;int&nbsp;getMaxIterations()</pre>
<div class="block">Maximum number of iterations allowed.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setMaxIterations-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setMaxIterations</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setMaxIterations(int&nbsp;maxIterations)</pre>
<div class="block">Set maximum number of iterations allowed. Default: 20.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>maxIterations</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getInitializationMode--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getInitializationMode</h4>
<pre>public&nbsp;String&nbsp;getInitializationMode()</pre>
<div class="block">The initialization algorithm. This can be either "random" or "k-means||".</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setInitializationMode-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setInitializationMode</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setInitializationMode(String&nbsp;initializationMode)</pre>
<div class="block">Set the initialization algorithm. This can be either "random" to choose random points as
initial cluster centers, or "k-means||" to use a parallel variant of k-means++
(Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>initializationMode</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getInitializationSteps--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getInitializationSteps</h4>
<pre>public&nbsp;int&nbsp;getInitializationSteps()</pre>
<div class="block">Number of steps for the k-means|| initialization mode</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setInitializationSteps-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setInitializationSteps</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setInitializationSteps(int&nbsp;initializationSteps)</pre>
<div class="block">Set the number of steps for the k-means|| initialization mode. This is an advanced
setting -- the default of 2 is almost always enough. Default: 2.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>initializationSteps</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getEpsilon--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getEpsilon</h4>
<pre>public&nbsp;double&nbsp;getEpsilon()</pre>
<div class="block">The distance threshold within which we've consider centers to have converged.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setEpsilon-double-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setEpsilon</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setEpsilon(double&nbsp;epsilon)</pre>
<div class="block">Set the distance threshold within which we've consider centers to have converged.
If all centers move less than this Euclidean distance, we stop iterating one run.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>epsilon</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getSeed--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getSeed</h4>
<pre>public&nbsp;long&nbsp;getSeed()</pre>
<div class="block">The random seed for cluster initialization.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setSeed-long-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setSeed</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setSeed(long&nbsp;seed)</pre>
<div class="block">Set the random seed for cluster initialization.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>seed</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getDistanceMeasure--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getDistanceMeasure</h4>
<pre>public&nbsp;String&nbsp;getDistanceMeasure()</pre>
<div class="block">The distance suite used by the algorithm.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setDistanceMeasure-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setDistanceMeasure</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setDistanceMeasure(String&nbsp;distanceMeasure)</pre>
<div class="block">Set the distance suite used by the algorithm.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>distanceMeasure</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setInitialModel-org.apache.spark.mllib.clustering.KMeansModel-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setInitialModel</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeans.html" title="class in org.apache.spark.mllib.clustering">KMeans</a>&nbsp;setInitialModel(<a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a>&nbsp;model)</pre>
<div class="block">Set the initial starting point, bypassing the random initialization or k-means||
The condition model.k == this.k must be met, failure results
in an IllegalArgumentException.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>model</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="run-org.apache.spark.rdd.RDD-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>run</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering">KMeansModel</a>&nbsp;run(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;<a href="../../../../../org/apache/spark/mllib/linalg/Vector.html" title="interface in org.apache.spark.mllib.linalg">Vector</a>&gt;&nbsp;data)</pre>
<div class="block">Train a K-means model on the given set of points; <code>data</code> should be cached for high
performance, because this is an iterative algorithm.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>data</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/spark/mllib/clustering/GaussianMixtureModel.html" title="class in org.apache.spark.mllib.clustering"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../org/apache/spark/mllib/clustering/KMeansModel.html" title="class in org.apache.spark.mllib.clustering"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/mllib/clustering/KMeans.html" target="_top">Frames</a></li>
<li><a href="KMeans.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<script defer="defer" type="text/javascript" src="../../../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../../../lib/api-javadocs.js"></script></body>
</html>