| <!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `/root/.cargo/git/checkouts/incubator-teaclave-crates-c8106113f74feefc/ede1f68/rusty-machine/src/learning/k_means.rs`."><meta name="keywords" content="rust, rustlang, rust-lang"><title>k_means.rs - source</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceSerif4-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../FiraSans-Regular.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../FiraSans-Medium.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceCodePro-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceSerif4-Bold.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceCodePro-Semibold.ttf.woff2"><link rel="stylesheet" href="../../../normalize.css"><link rel="stylesheet" href="../../../rustdoc.css" id="mainThemeStyle"><link rel="stylesheet" href="../../../ayu.css" disabled><link rel="stylesheet" href="../../../dark.css" disabled><link rel="stylesheet" href="../../../light.css" id="themeStyle"><script id="default-settings" ></script><script src="../../../storage.js"></script><script defer src="../../../source-script.js"></script><script defer src="../../../source-files.js"></script><script defer src="../../../main.js"></script><noscript><link rel="stylesheet" href="../../../noscript.css"></noscript><link rel="alternate icon" type="image/png" href="../../../favicon-16x16.png"><link rel="alternate icon" type="image/png" href="../../../favicon-32x32.png"><link rel="icon" type="image/svg+xml" href="../../../favicon.svg"></head><body class="rustdoc source"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><a class="sidebar-logo" href="../../../rusty_machine/index.html"><div class="logo-container"><img class="rust-logo" src="../../../rust-logo.svg" alt="logo"></div></a></nav><main><div class="width-limiter"><nav class="sub"><a class="sub-logo-container" href="../../../rusty_machine/index.html"><img class="rust-logo" src="../../../rust-logo.svg" alt="logo"></a><form class="search-form"><div class="search-container"><span></span><input class="search-input" name="search" autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to search, ‘?’ for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../../../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../../../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../../../wheel.svg"></a></div></div></form></nav><section id="main-content" class="content"><div class="example-wrap"><pre class="src-line-numbers"><span id="1">1</span> |
| <span id="2">2</span> |
| <span id="3">3</span> |
| <span id="4">4</span> |
| <span id="5">5</span> |
| <span id="6">6</span> |
| <span id="7">7</span> |
| <span id="8">8</span> |
| <span id="9">9</span> |
| <span id="10">10</span> |
| <span id="11">11</span> |
| <span id="12">12</span> |
| <span id="13">13</span> |
| <span id="14">14</span> |
| <span id="15">15</span> |
| <span id="16">16</span> |
| <span id="17">17</span> |
| <span id="18">18</span> |
| <span id="19">19</span> |
| <span id="20">20</span> |
| <span id="21">21</span> |
| <span id="22">22</span> |
| <span id="23">23</span> |
| <span id="24">24</span> |
| <span id="25">25</span> |
| <span id="26">26</span> |
| <span id="27">27</span> |
| <span id="28">28</span> |
| <span id="29">29</span> |
| <span id="30">30</span> |
| <span id="31">31</span> |
| <span id="32">32</span> |
| <span id="33">33</span> |
| <span id="34">34</span> |
| <span id="35">35</span> |
| <span id="36">36</span> |
| <span id="37">37</span> |
| <span id="38">38</span> |
| <span id="39">39</span> |
| <span id="40">40</span> |
| <span id="41">41</span> |
| <span id="42">42</span> |
| <span id="43">43</span> |
| <span id="44">44</span> |
| <span id="45">45</span> |
| <span id="46">46</span> |
| <span id="47">47</span> |
| <span id="48">48</span> |
| <span id="49">49</span> |
| <span id="50">50</span> |
| <span id="51">51</span> |
| <span id="52">52</span> |
| <span id="53">53</span> |
| <span id="54">54</span> |
| <span id="55">55</span> |
| <span id="56">56</span> |
| <span id="57">57</span> |
| <span id="58">58</span> |
| <span id="59">59</span> |
| <span id="60">60</span> |
| <span id="61">61</span> |
| <span id="62">62</span> |
| <span id="63">63</span> |
| <span id="64">64</span> |
| <span id="65">65</span> |
| <span id="66">66</span> |
| <span id="67">67</span> |
| <span id="68">68</span> |
| <span id="69">69</span> |
| <span id="70">70</span> |
| <span id="71">71</span> |
| <span id="72">72</span> |
| <span id="73">73</span> |
| <span id="74">74</span> |
| <span id="75">75</span> |
| <span id="76">76</span> |
| <span id="77">77</span> |
| <span id="78">78</span> |
| <span id="79">79</span> |
| <span id="80">80</span> |
| <span id="81">81</span> |
| <span id="82">82</span> |
| <span id="83">83</span> |
| <span id="84">84</span> |
| <span id="85">85</span> |
| <span id="86">86</span> |
| <span id="87">87</span> |
| <span id="88">88</span> |
| <span id="89">89</span> |
| <span id="90">90</span> |
| <span id="91">91</span> |
| <span id="92">92</span> |
| <span id="93">93</span> |
| <span id="94">94</span> |
| <span id="95">95</span> |
| <span id="96">96</span> |
| <span id="97">97</span> |
| <span id="98">98</span> |
| <span id="99">99</span> |
| <span id="100">100</span> |
| <span id="101">101</span> |
| <span id="102">102</span> |
| <span id="103">103</span> |
| <span id="104">104</span> |
| <span id="105">105</span> |
| <span id="106">106</span> |
| <span id="107">107</span> |
| <span id="108">108</span> |
| <span id="109">109</span> |
| <span id="110">110</span> |
| <span id="111">111</span> |
| <span id="112">112</span> |
| <span id="113">113</span> |
| <span id="114">114</span> |
| <span id="115">115</span> |
| <span id="116">116</span> |
| <span id="117">117</span> |
| <span id="118">118</span> |
| <span id="119">119</span> |
| <span id="120">120</span> |
| <span id="121">121</span> |
| <span id="122">122</span> |
| <span id="123">123</span> |
| <span id="124">124</span> |
| <span id="125">125</span> |
| <span id="126">126</span> |
| <span id="127">127</span> |
| <span id="128">128</span> |
| <span id="129">129</span> |
| <span id="130">130</span> |
| <span id="131">131</span> |
| <span id="132">132</span> |
| <span id="133">133</span> |
| <span id="134">134</span> |
| <span id="135">135</span> |
| <span id="136">136</span> |
| <span id="137">137</span> |
| <span id="138">138</span> |
| <span id="139">139</span> |
| <span id="140">140</span> |
| <span id="141">141</span> |
| <span id="142">142</span> |
| <span id="143">143</span> |
| <span id="144">144</span> |
| <span id="145">145</span> |
| <span id="146">146</span> |
| <span id="147">147</span> |
| <span id="148">148</span> |
| <span id="149">149</span> |
| <span id="150">150</span> |
| <span id="151">151</span> |
| <span id="152">152</span> |
| <span id="153">153</span> |
| <span id="154">154</span> |
| <span id="155">155</span> |
| <span id="156">156</span> |
| <span id="157">157</span> |
| <span id="158">158</span> |
| <span id="159">159</span> |
| <span id="160">160</span> |
| <span id="161">161</span> |
| <span id="162">162</span> |
| <span id="163">163</span> |
| <span id="164">164</span> |
| <span id="165">165</span> |
| <span id="166">166</span> |
| <span id="167">167</span> |
| <span id="168">168</span> |
| <span id="169">169</span> |
| <span id="170">170</span> |
| <span id="171">171</span> |
| <span id="172">172</span> |
| <span id="173">173</span> |
| <span id="174">174</span> |
| <span id="175">175</span> |
| <span id="176">176</span> |
| <span id="177">177</span> |
| <span id="178">178</span> |
| <span id="179">179</span> |
| <span id="180">180</span> |
| <span id="181">181</span> |
| <span id="182">182</span> |
| <span id="183">183</span> |
| <span id="184">184</span> |
| <span id="185">185</span> |
| <span id="186">186</span> |
| <span id="187">187</span> |
| <span id="188">188</span> |
| <span id="189">189</span> |
| <span id="190">190</span> |
| <span id="191">191</span> |
| <span id="192">192</span> |
| <span id="193">193</span> |
| <span id="194">194</span> |
| <span id="195">195</span> |
| <span id="196">196</span> |
| <span id="197">197</span> |
| <span id="198">198</span> |
| <span id="199">199</span> |
| <span id="200">200</span> |
| <span id="201">201</span> |
| <span id="202">202</span> |
| <span id="203">203</span> |
| <span id="204">204</span> |
| <span id="205">205</span> |
| <span id="206">206</span> |
| <span id="207">207</span> |
| <span id="208">208</span> |
| <span id="209">209</span> |
| <span id="210">210</span> |
| <span id="211">211</span> |
| <span id="212">212</span> |
| <span id="213">213</span> |
| <span id="214">214</span> |
| <span id="215">215</span> |
| <span id="216">216</span> |
| <span id="217">217</span> |
| <span id="218">218</span> |
| <span id="219">219</span> |
| <span id="220">220</span> |
| <span id="221">221</span> |
| <span id="222">222</span> |
| <span id="223">223</span> |
| <span id="224">224</span> |
| <span id="225">225</span> |
| <span id="226">226</span> |
| <span id="227">227</span> |
| <span id="228">228</span> |
| <span id="229">229</span> |
| <span id="230">230</span> |
| <span id="231">231</span> |
| <span id="232">232</span> |
| <span id="233">233</span> |
| <span id="234">234</span> |
| <span id="235">235</span> |
| <span id="236">236</span> |
| <span id="237">237</span> |
| <span id="238">238</span> |
| <span id="239">239</span> |
| <span id="240">240</span> |
| <span id="241">241</span> |
| <span id="242">242</span> |
| <span id="243">243</span> |
| <span id="244">244</span> |
| <span id="245">245</span> |
| <span id="246">246</span> |
| <span id="247">247</span> |
| <span id="248">248</span> |
| <span id="249">249</span> |
| <span id="250">250</span> |
| <span id="251">251</span> |
| <span id="252">252</span> |
| <span id="253">253</span> |
| <span id="254">254</span> |
| <span id="255">255</span> |
| <span id="256">256</span> |
| <span id="257">257</span> |
| <span id="258">258</span> |
| <span id="259">259</span> |
| <span id="260">260</span> |
| <span id="261">261</span> |
| <span id="262">262</span> |
| <span id="263">263</span> |
| <span id="264">264</span> |
| <span id="265">265</span> |
| <span id="266">266</span> |
| <span id="267">267</span> |
| <span id="268">268</span> |
| <span id="269">269</span> |
| <span id="270">270</span> |
| <span id="271">271</span> |
| <span id="272">272</span> |
| <span id="273">273</span> |
| <span id="274">274</span> |
| <span id="275">275</span> |
| <span id="276">276</span> |
| <span id="277">277</span> |
| <span id="278">278</span> |
| <span id="279">279</span> |
| <span id="280">280</span> |
| <span id="281">281</span> |
| <span id="282">282</span> |
| <span id="283">283</span> |
| <span id="284">284</span> |
| <span id="285">285</span> |
| <span id="286">286</span> |
| <span id="287">287</span> |
| <span id="288">288</span> |
| <span id="289">289</span> |
| <span id="290">290</span> |
| <span id="291">291</span> |
| <span id="292">292</span> |
| <span id="293">293</span> |
| <span id="294">294</span> |
| <span id="295">295</span> |
| <span id="296">296</span> |
| <span id="297">297</span> |
| <span id="298">298</span> |
| <span id="299">299</span> |
| <span id="300">300</span> |
| <span id="301">301</span> |
| <span id="302">302</span> |
| <span id="303">303</span> |
| <span id="304">304</span> |
| <span id="305">305</span> |
| <span id="306">306</span> |
| <span id="307">307</span> |
| <span id="308">308</span> |
| <span id="309">309</span> |
| <span id="310">310</span> |
| <span id="311">311</span> |
| <span id="312">312</span> |
| <span id="313">313</span> |
| <span id="314">314</span> |
| <span id="315">315</span> |
| <span id="316">316</span> |
| <span id="317">317</span> |
| <span id="318">318</span> |
| <span id="319">319</span> |
| <span id="320">320</span> |
| <span id="321">321</span> |
| <span id="322">322</span> |
| <span id="323">323</span> |
| <span id="324">324</span> |
| <span id="325">325</span> |
| <span id="326">326</span> |
| <span id="327">327</span> |
| <span id="328">328</span> |
| <span id="329">329</span> |
| <span id="330">330</span> |
| <span id="331">331</span> |
| <span id="332">332</span> |
| <span id="333">333</span> |
| <span id="334">334</span> |
| <span id="335">335</span> |
| <span id="336">336</span> |
| <span id="337">337</span> |
| <span id="338">338</span> |
| <span id="339">339</span> |
| <span id="340">340</span> |
| <span id="341">341</span> |
| <span id="342">342</span> |
| <span id="343">343</span> |
| <span id="344">344</span> |
| <span id="345">345</span> |
| <span id="346">346</span> |
| <span id="347">347</span> |
| <span id="348">348</span> |
| <span id="349">349</span> |
| <span id="350">350</span> |
| <span id="351">351</span> |
| <span id="352">352</span> |
| <span id="353">353</span> |
| <span id="354">354</span> |
| <span id="355">355</span> |
| <span id="356">356</span> |
| <span id="357">357</span> |
| <span id="358">358</span> |
| <span id="359">359</span> |
| <span id="360">360</span> |
| <span id="361">361</span> |
| <span id="362">362</span> |
| <span id="363">363</span> |
| <span id="364">364</span> |
| <span id="365">365</span> |
| <span id="366">366</span> |
| <span id="367">367</span> |
| <span id="368">368</span> |
| <span id="369">369</span> |
| <span id="370">370</span> |
| <span id="371">371</span> |
| <span id="372">372</span> |
| <span id="373">373</span> |
| <span id="374">374</span> |
| <span id="375">375</span> |
| <span id="376">376</span> |
| <span id="377">377</span> |
| <span id="378">378</span> |
| <span id="379">379</span> |
| <span id="380">380</span> |
| <span id="381">381</span> |
| </pre><pre class="rust"><code><span class="doccomment">//! K-means Classification |
| //! |
| //! Provides implementation of K-Means classification. |
| //! |
| //! # Usage |
| //! |
| //! ``` |
| //! use rusty_machine::linalg::Matrix; |
| //! use rusty_machine::learning::k_means::KMeansClassifier; |
| //! use rusty_machine::learning::UnSupModel; |
| //! |
| //! let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); |
| //! let test_inputs = Matrix::new(1, 2, vec![1.0, 3.5]); |
| //! |
| //! // Create model with k(=2) classes. |
| //! let mut model = KMeansClassifier::new(2); |
| //! |
| //! // Where inputs is a Matrix with features in columns. |
| //! model.train(&inputs).unwrap(); |
| //! |
| //! // Where test_inputs is a Matrix with features in columns. |
| //! let a = model.predict(&test_inputs).unwrap(); |
| //! ``` |
| //! |
| //! Additionally you can control the initialization |
| //! algorithm and max number of iterations. |
| //! |
| //! # Initializations |
| //! |
| //! Three initialization algorithms are supported. |
| //! |
| //! ## Forgy initialization |
| //! |
| //! Choose initial centroids randomly from the data. |
| //! |
| //! ## Random Partition initialization |
| //! |
| //! Randomly assign each data point to one of k clusters. |
| //! The initial centroids are the mean of the data in their class. |
| //! |
| //! ## K-means++ initialization |
| //! |
| //! The [k-means++](https://en.wikipedia.org/wiki/K-means%2B%2B) scheme. |
| |
| </span><span class="kw">use </span>linalg::{Matrix, MatrixSlice, Axes, Vector, BaseMatrix}; |
| <span class="kw">use </span>learning::{LearningResult, UnSupModel}; |
| <span class="kw">use </span>learning::error::{Error, ErrorKind}; |
| |
| <span class="kw">use </span>rand::{Rng, thread_rng}; |
| <span class="kw">use </span>libnum::abs; |
| |
| <span class="kw">use </span>std::fmt::Debug; |
| |
| <span class="doccomment">/// K-Means Classification model. |
| /// |
| /// Contains option for centroids. |
| /// Specifies iterations and number of classes. |
| /// |
| /// # Usage |
| /// |
| /// This model is used through the `UnSupModel` trait. The model is |
| /// trained via the `train` function with a matrix containing rows of |
| /// feature vectors. |
| /// |
| /// The model will not check to ensure the data coming in is all valid. |
| /// This responsibility lies with the user (for now). |
| </span><span class="attribute">#[derive(Debug)] |
| </span><span class="kw">pub struct </span>KMeansClassifier<InitAlg: Initializer> { |
| <span class="doccomment">/// Max iterations of algorithm to run. |
| </span>iters: usize, |
| <span class="doccomment">/// The number of classes. |
| </span>k: usize, |
| <span class="doccomment">/// The fitted centroids . |
| </span>centroids: <span class="prelude-ty">Option</span><Matrix<f64>>, |
| <span class="doccomment">/// The initial algorithm to use. |
| </span>init_algorithm: InitAlg, |
| } |
| |
| <span class="kw">impl</span><InitAlg: Initializer> UnSupModel<Matrix<f64>, Vector<usize>> <span class="kw">for </span>KMeansClassifier<InitAlg> { |
| <span class="doccomment">/// Predict classes from data. |
| /// |
| /// Model must be trained. |
| </span><span class="kw">fn </span>predict(<span class="kw-2">&</span><span class="self">self</span>, inputs: <span class="kw-2">&</span>Matrix<f64>) -> LearningResult<Vector<usize>> { |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(<span class="kw-2">ref </span>centroids) = <span class="self">self</span>.centroids { |
| <span class="prelude-val">Ok</span>(KMeansClassifier::<InitAlg>::find_closest_centroids(centroids.as_slice(), inputs).<span class="number">0</span>) |
| } <span class="kw">else </span>{ |
| <span class="prelude-val">Err</span>(Error::new_untrained()) |
| } |
| } |
| |
| <span class="doccomment">/// Train the classifier using input data. |
| </span><span class="kw">fn </span>train(<span class="kw-2">&mut </span><span class="self">self</span>, inputs: <span class="kw-2">&</span>Matrix<f64>) -> LearningResult<()> { |
| <span class="self">self</span>.init_centroids(inputs)<span class="question-mark">?</span>; |
| <span class="kw">let </span><span class="kw-2">mut </span>cost = <span class="number">0.0</span>; |
| <span class="kw">let </span>eps = <span class="number">1e-14</span>; |
| |
| <span class="kw">for </span>_i <span class="kw">in </span><span class="number">0</span>..<span class="self">self</span>.iters { |
| <span class="kw">let </span>(idx, distances) = <span class="self">self</span>.get_closest_centroids(inputs)<span class="question-mark">?</span>; |
| <span class="self">self</span>.update_centroids(inputs, idx); |
| |
| <span class="kw">let </span>cost_i = distances.sum(); |
| <span class="kw">if </span>abs(cost - cost_i) < eps { |
| <span class="kw">break</span>; |
| } |
| |
| cost = cost_i; |
| } |
| |
| <span class="prelude-val">Ok</span>(()) |
| } |
| } |
| |
| <span class="kw">impl </span>KMeansClassifier<KPlusPlus> { |
| <span class="doccomment">/// Constructs untrained k-means classifier model. |
| /// |
| /// Requires number of classes to be specified. |
| /// Defaults to 100 iterations and kmeans++ initialization. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::learning::k_means::KMeansClassifier; |
| /// |
| /// let model = KMeansClassifier::new(5); |
| /// ``` |
| </span><span class="kw">pub fn </span>new(k: usize) -> KMeansClassifier<KPlusPlus> { |
| KMeansClassifier { |
| iters: <span class="number">100</span>, |
| k: k, |
| centroids: <span class="prelude-val">None</span>, |
| init_algorithm: KPlusPlus, |
| } |
| } |
| } |
| |
| <span class="kw">impl</span><InitAlg: Initializer> KMeansClassifier<InitAlg> { |
| <span class="doccomment">/// Constructs untrained k-means classifier model. |
| /// |
| /// Requires number of classes, number of iterations, and |
| /// the initialization algorithm to use. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::learning::k_means::{KMeansClassifier, Forgy}; |
| /// |
| /// let model = KMeansClassifier::new_specified(5, 42, Forgy); |
| /// ``` |
| </span><span class="kw">pub fn </span>new_specified(k: usize, iters: usize, algo: InitAlg) -> KMeansClassifier<InitAlg> { |
| KMeansClassifier { |
| iters: iters, |
| k: k, |
| centroids: <span class="prelude-val">None</span>, |
| init_algorithm: algo, |
| } |
| } |
| |
| <span class="doccomment">/// Get the number of classes. |
| </span><span class="kw">pub fn </span>k(<span class="kw-2">&</span><span class="self">self</span>) -> usize { |
| <span class="self">self</span>.k |
| } |
| |
| <span class="doccomment">/// Get the number of iterations. |
| </span><span class="kw">pub fn </span>iters(<span class="kw-2">&</span><span class="self">self</span>) -> usize { |
| <span class="self">self</span>.iters |
| } |
| |
| <span class="doccomment">/// Get the initialization algorithm. |
| </span><span class="kw">pub fn </span>init_algorithm(<span class="kw-2">&</span><span class="self">self</span>) -> <span class="kw-2">&</span>InitAlg { |
| <span class="kw-2">&</span><span class="self">self</span>.init_algorithm |
| } |
| |
| <span class="doccomment">/// Get the centroids `Option<Matrix<f64>>`. |
| </span><span class="kw">pub fn </span>centroids(<span class="kw-2">&</span><span class="self">self</span>) -> <span class="kw-2">&</span><span class="prelude-ty">Option</span><Matrix<f64>> { |
| <span class="kw-2">&</span><span class="self">self</span>.centroids |
| } |
| |
| <span class="doccomment">/// Set the number of iterations. |
| </span><span class="kw">pub fn </span>set_iters(<span class="kw-2">&mut </span><span class="self">self</span>, iters: usize) { |
| <span class="self">self</span>.iters = iters; |
| } |
| |
| <span class="doccomment">/// Initialize the centroids. |
| /// |
| /// Used internally within model. |
| </span><span class="kw">fn </span>init_centroids(<span class="kw-2">&mut </span><span class="self">self</span>, inputs: <span class="kw-2">&</span>Matrix<f64>) -> LearningResult<()> { |
| <span class="kw">if </span><span class="self">self</span>.k > inputs.rows() { |
| <span class="prelude-val">Err</span>(Error::new(ErrorKind::InvalidData, |
| <span class="macro">format!</span>(<span class="string">"Number of clusters ({0}) exceeds number of data points \ |
| ({1})."</span>, |
| <span class="self">self</span>.k, |
| inputs.rows()))) |
| } <span class="kw">else </span>{ |
| <span class="kw">let </span>centroids = <span class="self">self</span>.init_algorithm.init_centroids(<span class="self">self</span>.k, inputs)<span class="question-mark">?</span>; |
| |
| <span class="kw">if </span>centroids.rows() != <span class="self">self</span>.k { |
| <span class="prelude-val">Err</span>(Error::new(ErrorKind::InvalidState, |
| <span class="string">"Initial centroids must have exactly k rows."</span>)) |
| } <span class="kw">else if </span>centroids.cols() != inputs.cols() { |
| <span class="prelude-val">Err</span>(Error::new(ErrorKind::InvalidState, |
| <span class="string">"Initial centroids must have the same column count as inputs."</span>)) |
| } <span class="kw">else </span>{ |
| <span class="self">self</span>.centroids = <span class="prelude-val">Some</span>(centroids); |
| <span class="prelude-val">Ok</span>(()) |
| } |
| } |
| |
| } |
| |
| <span class="doccomment">/// Updated the centroids by computing means of assigned classes. |
| /// |
| /// Used internally within model. |
| </span><span class="kw">fn </span>update_centroids(<span class="kw-2">&mut </span><span class="self">self</span>, inputs: <span class="kw-2">&</span>Matrix<f64>, classes: Vector<usize>) { |
| <span class="kw">let </span><span class="kw-2">mut </span>new_centroids = Vec::with_capacity(<span class="self">self</span>.k * inputs.cols()); |
| |
| <span class="kw">let </span><span class="kw-2">mut </span>row_indexes = <span class="macro">vec!</span>[Vec::new(); <span class="self">self</span>.k]; |
| <span class="kw">for </span>(i, c) <span class="kw">in </span>classes.into_vec().into_iter().enumerate() { |
| row_indexes.get_mut(c <span class="kw">as </span>usize).map(|v| v.push(i)); |
| } |
| |
| <span class="kw">for </span>vec_i <span class="kw">in </span>row_indexes { |
| <span class="kw">let </span>mat_i = inputs.select_rows(<span class="kw-2">&</span>vec_i); |
| new_centroids.extend(mat_i.mean(Axes::Row).into_vec()); |
| } |
| |
| <span class="self">self</span>.centroids = <span class="prelude-val">Some</span>(Matrix::new(<span class="self">self</span>.k, inputs.cols(), new_centroids)); |
| } |
| |
| <span class="kw">fn </span>get_closest_centroids(<span class="kw-2">&</span><span class="self">self</span>, |
| inputs: <span class="kw-2">&</span>Matrix<f64>) |
| -> LearningResult<(Vector<usize>, Vector<f64>)> { |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(<span class="kw-2">ref </span>c) = <span class="self">self</span>.centroids { |
| <span class="prelude-val">Ok</span>(KMeansClassifier::<InitAlg>::find_closest_centroids(c.as_slice(), inputs)) |
| } <span class="kw">else </span>{ |
| <span class="prelude-val">Err</span>(Error::new(ErrorKind::InvalidState, |
| <span class="string">"Centroids not correctly initialized."</span>)) |
| } |
| } |
| |
| <span class="doccomment">/// Find the centroid closest to each data point. |
| /// |
| /// Used internally within model. |
| /// Returns the index of the closest centroid and the distance to it. |
| </span><span class="kw">fn </span>find_closest_centroids(centroids: MatrixSlice<f64>, |
| inputs: <span class="kw-2">&</span>Matrix<f64>) |
| -> (Vector<usize>, Vector<f64>) { |
| <span class="kw">let </span><span class="kw-2">mut </span>idx = Vec::with_capacity(inputs.rows()); |
| <span class="kw">let </span><span class="kw-2">mut </span>distances = Vec::with_capacity(inputs.rows()); |
| |
| <span class="kw">for </span>i <span class="kw">in </span><span class="number">0</span>..inputs.rows() { |
| <span class="comment">// This works like repmat pulling out row i repeatedly. |
| </span><span class="kw">let </span>centroid_diff = centroids - inputs.select_rows(<span class="kw-2">&</span><span class="macro">vec!</span>[i; centroids.rows()]); |
| <span class="kw">let </span>dist = <span class="kw-2">&</span>centroid_diff.elemul(<span class="kw-2">&</span>centroid_diff).sum_cols(); |
| |
| <span class="comment">// Now take argmin and this is the centroid. |
| </span><span class="kw">let </span>(min_idx, min_dist) = dist.argmin(); |
| idx.push(min_idx); |
| distances.push(min_dist); |
| } |
| |
| (Vector::new(idx), Vector::new(distances)) |
| } |
| } |
| |
| <span class="doccomment">/// Trait for algorithms initializing the K-means centroids. |
| </span><span class="kw">pub trait </span>Initializer: Debug { |
| <span class="doccomment">/// Initialize the centroids for the initial state of the K-Means model. |
| /// |
| /// The `Matrix` returned must have `k` rows and the same column count as `inputs`. |
| </span><span class="kw">fn </span>init_centroids(<span class="kw-2">&</span><span class="self">self</span>, k: usize, inputs: <span class="kw-2">&</span>Matrix<f64>) -> LearningResult<Matrix<f64>>; |
| } |
| |
| <span class="doccomment">/// The Forgy initialization scheme. |
| </span><span class="attribute">#[derive(Debug)] |
| </span><span class="kw">pub struct </span>Forgy; |
| |
| <span class="kw">impl </span>Initializer <span class="kw">for </span>Forgy { |
| <span class="kw">fn </span>init_centroids(<span class="kw-2">&</span><span class="self">self</span>, k: usize, inputs: <span class="kw-2">&</span>Matrix<f64>) -> LearningResult<Matrix<f64>> { |
| <span class="kw">let </span><span class="kw-2">mut </span>random_choices = Vec::with_capacity(k); |
| <span class="kw">let </span><span class="kw-2">mut </span>rng = thread_rng(); |
| <span class="kw">while </span>random_choices.len() < k { |
| <span class="kw">let </span>r = rng.gen_range(<span class="number">0</span>..inputs.rows()); |
| |
| <span class="kw">if </span>!random_choices.contains(<span class="kw-2">&</span>r) { |
| random_choices.push(r); |
| } |
| } |
| |
| <span class="prelude-val">Ok</span>(inputs.select_rows(<span class="kw-2">&</span>random_choices)) |
| } |
| } |
| |
| <span class="doccomment">/// The Random Partition initialization scheme. |
| </span><span class="attribute">#[derive(Debug)] |
| </span><span class="kw">pub struct </span>RandomPartition; |
| |
| <span class="kw">impl </span>Initializer <span class="kw">for </span>RandomPartition { |
| <span class="kw">fn </span>init_centroids(<span class="kw-2">&</span><span class="self">self</span>, k: usize, inputs: <span class="kw-2">&</span>Matrix<f64>) -> LearningResult<Matrix<f64>> { |
| |
| <span class="comment">// Populate so we have something in each class. |
| </span><span class="kw">let </span><span class="kw-2">mut </span>random_assignments = (<span class="number">0</span>..k).map(|i| <span class="macro">vec!</span>[i]).collect::<Vec<Vec<usize>>>(); |
| <span class="kw">let </span><span class="kw-2">mut </span>rng = thread_rng(); |
| <span class="kw">for </span>i <span class="kw">in </span>k..inputs.rows() { |
| <span class="kw">let </span>idx = rng.gen_range(<span class="number">0</span>..k); |
| <span class="kw">unsafe </span>{ |
| random_assignments.get_unchecked_mut(idx).push(i); |
| } |
| } |
| |
| <span class="kw">let </span><span class="kw-2">mut </span>init_centroids = Vec::with_capacity(k * inputs.cols()); |
| |
| <span class="kw">for </span>vec_i <span class="kw">in </span>random_assignments { |
| <span class="kw">let </span>mat_i = inputs.select_rows(<span class="kw-2">&</span>vec_i); |
| init_centroids.extend_from_slice(<span class="kw-2">&*</span>mat_i.mean(Axes::Row).into_vec()); |
| } |
| |
| <span class="prelude-val">Ok</span>(Matrix::new(k, inputs.cols(), init_centroids)) |
| } |
| } |
| |
| <span class="doccomment">/// The K-means ++ initialization scheme. |
| </span><span class="attribute">#[derive(Debug)] |
| </span><span class="kw">pub struct </span>KPlusPlus; |
| |
| <span class="kw">impl </span>Initializer <span class="kw">for </span>KPlusPlus { |
| <span class="kw">fn </span>init_centroids(<span class="kw-2">&</span><span class="self">self</span>, k: usize, inputs: <span class="kw-2">&</span>Matrix<f64>) -> LearningResult<Matrix<f64>> { |
| <span class="kw">let </span><span class="kw-2">mut </span>rng = thread_rng(); |
| |
| <span class="kw">let </span><span class="kw-2">mut </span>init_centroids = Vec::with_capacity(k * inputs.cols()); |
| <span class="kw">let </span>first_cen = rng.gen_range(<span class="number">0usize</span>..inputs.rows()); |
| |
| <span class="kw">unsafe </span>{ |
| init_centroids.extend_from_slice(inputs.row_unchecked(first_cen).raw_slice()); |
| } |
| |
| <span class="kw">for </span>i <span class="kw">in </span><span class="number">1</span>..k { |
| <span class="kw">unsafe </span>{ |
| <span class="kw">let </span>temp_centroids = MatrixSlice::from_raw_parts(init_centroids.as_ptr(), |
| i, |
| inputs.cols(), |
| inputs.cols()); |
| <span class="kw">let </span>(<span class="kw">_</span>, dist) = |
| KMeansClassifier::<KPlusPlus>::find_closest_centroids(temp_centroids, inputs); |
| |
| <span class="comment">// A relatively cheap way to validate our input data |
| </span><span class="kw">if </span>!dist.data().iter().all(|x| x.is_finite()) { |
| <span class="kw">return </span><span class="prelude-val">Err</span>(Error::new(ErrorKind::InvalidData, |
| <span class="string">"Input data led to invalid centroid distances during \ |
| initialization."</span>)); |
| } |
| |
| <span class="kw">let </span>next_cen = sample_discretely(<span class="kw-2">&</span>dist); |
| init_centroids.extend_from_slice(inputs.row_unchecked(next_cen).raw_slice()); |
| } |
| } |
| |
| <span class="prelude-val">Ok</span>(Matrix::new(k, inputs.cols(), init_centroids)) |
| } |
| } |
| |
| <span class="doccomment">/// Sample from an unnormalized distribution. |
| /// |
| /// The input to this function is assumed to have all positive entries. |
| </span><span class="kw">fn </span>sample_discretely(unnorm_dist: <span class="kw-2">&</span>Vector<f64>) -> usize { |
| <span class="macro">assert!</span>(unnorm_dist.size() > <span class="number">0</span>, <span class="string">"No entries in distribution vector."</span>); |
| |
| <span class="kw">let </span>sum = unnorm_dist.sum(); |
| |
| <span class="kw">let </span>rand = thread_rng().gen_range(<span class="number">0.0f64</span>..sum); |
| |
| <span class="kw">let </span><span class="kw-2">mut </span>tempsum = <span class="number">0.0</span>; |
| <span class="kw">for </span>(i, p) <span class="kw">in </span>unnorm_dist.data().iter().enumerate() { |
| tempsum += <span class="kw-2">*</span>p; |
| |
| <span class="kw">if </span>rand < tempsum { |
| <span class="kw">return </span>i; |
| } |
| } |
| |
| <span class="macro">panic!</span>(<span class="string">"No random value was sampled! There may be more clusters than unique data points."</span>); |
| } |
| </code></pre></div> |
| </section></div></main><div id="rustdoc-vars" data-root-path="../../../" data-current-crate="rusty_machine" data-themes="ayu,dark,light" data-resource-suffix="" data-rustdoc-version="1.66.0-nightly (5c8bff74b 2022-10-21)" ></div></body></html> |